build fixes

slaren · slaren · commit 28b3b760aab3 · 2024-11-12T01:58:30.000+01:00
diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt
@@ -29,37 +29,6 @@ endif()
 unset(GGML_EXTRA_LIBS_PRIVATE)
 unset(GGML_EXTRA_LIBS_PUBLIC)
 
-# musa, hip: add directory with a CMakeLists.txt file, but no source files (use refer to ggml-cuda files as ../ggml-cuda)
-if (GGML_MUSA)
-    list(APPEND CMAKE_MODULE_PATH "/usr/local/musa/cmake/")
-    find_package(MUSAToolkit)
-    set(CUDAToolkit_FOUND ${MUSAToolkit_FOUND})
-else()
-    find_package(CUDAToolkit)
-endif()
-
-# if (GGML_MUSA)
-# set(CMAKE_CUDA_COMPILER ${MUSAToolkit_MCC_EXECUTABLE})
-# else()
-# if (GGML_MUSA)
-# set_source_files_properties(${GGML_SOURCES_CUDA} PROPERTIES LANGUAGE CXX)
-# foreach(SOURCE ${GGML_SOURCES_CUDA})
-#     set_property(SOURCE ${SOURCE} PROPERTY COMPILE_FLAGS "-x musa -mtgpu --cuda-gpu-arch=mp_21 --cuda-gpu-arch=mp_22")
-# endforeach()
-# endif()
-
-
-if (GGML_MUSA)
-    set(CMAKE_C_COMPILER clang)
-    set(CMAKE_C_EXTENSIONS OFF)
-    set(CMAKE_CXX_COMPILER clang++)
-    set(CMAKE_CXX_EXTENSIONS OFF)
-
-    set(GGML_CUDA ON)
-
-    list(APPEND GGML_CDEF_PUBLIC GGML_USE_MUSA)
-endif()
-
 if (GGML_AMX)
     if (CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 11.0)
     else()
@@ -597,12 +566,6 @@ function(get_flags CCID CCVER)
     elseif (CCID STREQUAL "GNU")
         set(C_FLAGS   -Wdouble-promotion)
         set(CXX_FLAGS -Wno-array-bounds)
-
-        if (NOT GGML_MUSA)
-            if (CCVER VERSION_GREATER_EQUAL 7.1.0)
-                list(APPEND CXX_FLAGS -Wno-format-truncation)
-            endif()
-        endif()
         if (CCVER VERSION_GREATER_EQUAL 8.1.0)
             list(APPEND CXX_FLAGS -Wextra-semi)
         endif()
@@ -779,18 +742,16 @@ add_library(ggml-base STATIC
             ggml-backend.cpp
             ggml-threading.cpp
             ggml-threading.h
-            ggml-quants.c # for quantize functions TODO: move dot fns to a separate file
+            ggml-quants.c
             ggml-quants.h
             ggml-aarch64.c
             ggml-aarch64.h
-
-)
-
-add_subdirectory(ggml-cpu)
+            )
 
 add_library(ggml
             ggml-backend-reg.cpp
-)
+            )
+add_subdirectory(ggml-cpu)
 
 target_link_libraries(ggml PUBLIC ggml-base ggml-cpu)
 
diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp
@@ -1,5 +1,7 @@
-#include "ggml-backend.h"
 #include "ggml-backend-impl.h"
+#include "ggml-backend.h"
+#include "ggml-cpu.h"
+#include "ggml-impl.h"
 #include <cstring>
 #include <vector>
 
@@ -45,8 +47,6 @@
 #include "ggml-kompute.h"
 #endif
 
-#include "ggml-cpu.h"
-
 struct ggml_backend_registry {
     std::vector<ggml_backend_reg_t> backends;
     std::vector<ggml_backend_dev_t> devices;
diff --git a/ggml/src/ggml-cpu/ggml-cpu-quants.c b/ggml/src/ggml-cpu/ggml-cpu-quants.c
@@ -10783,22 +10783,6 @@ void ggml_vec_dot_iq4_xs_q8_K(int n, float * restrict s, size_t bs, const void *
 #endif
 }
 
-//
-// ============================================= 3-bit using D4 lattice
-//
-
-void quantize_row_iq3_xxs(const float * restrict x, void * restrict vy, int64_t k) {
-    assert(k % QK_K == 0);
-    block_iq3_xxs * restrict y = vy;
-    quantize_row_iq3_xxs_ref(x, y, k);
-}
-
-void quantize_row_iq3_s(const float * restrict x, void * restrict vy, int64_t k) {
-    assert(k % QK_K == 0);
-    block_iq3_s * restrict y = vy;
-    quantize_row_iq3_s_ref(x, y, k);
-}
-
 // ============================ 4-bit non-linear quants
 
 void quantize_row_iq4_nl(const float * restrict x, void * restrict y, int64_t k) {
@@ -10810,10 +10794,3 @@ void quantize_row_iq4_xs(const float * restrict x, void * restrict y, int64_t k)
     assert(k % QK_K == 0);
     quantize_iq4_xs(x, y, 1, k, NULL);
 }
-
-// =============================== 2.5625 bpw
-
-void quantize_row_iq2_s(const float * restrict x, void * restrict y, int64_t k) {
-    assert(k % QK_K == 0);
-    quantize_iq2_s(x, y, 1, k, NULL);
-}
diff --git a/ggml/src/ggml-cpu/ggml-cpu-quants.h b/ggml/src/ggml-cpu/ggml-cpu-quants.h
@@ -29,11 +29,8 @@ void quantize_row_q8_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, in
 void quantize_row_tq1_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
 void quantize_row_tq2_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
 
-void quantize_row_iq3_xxs(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
 void quantize_row_iq4_nl (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
 void quantize_row_iq4_xs (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
-void quantize_row_iq3_s  (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
-void quantize_row_iq2_s  (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
 
 // Dot product
 void ggml_vec_dot_q4_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
diff --git a/ggml/src/ggml-cpu/ggml-cpu.c b/ggml/src/ggml-cpu/ggml-cpu.c
@@ -356,19 +356,20 @@ static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
         .nrows                    = 1,
     },
     [GGML_TYPE_IQ3_XXS] = {
-        .from_float               = quantize_row_iq3_xxs,
+        // NOTE: from_float for iq3 and iq2_s was removed because these quants require initialization in ggml_quantize_init
+        //.from_float               = quantize_row_iq3_xxs,
         .vec_dot                  = ggml_vec_dot_iq3_xxs_q8_K,
         .vec_dot_type             = GGML_TYPE_Q8_K,
         .nrows                    = 1,
     },
     [GGML_TYPE_IQ3_S] = {
-        .from_float               = quantize_row_iq3_s,
+        //.from_float               = quantize_row_iq3_s,
         .vec_dot                  = ggml_vec_dot_iq3_s_q8_K,
         .vec_dot_type             = GGML_TYPE_Q8_K,
         .nrows                    = 1,
     },
     [GGML_TYPE_IQ2_S] = {
-        .from_float               = quantize_row_iq2_s,
+        //.from_float               = quantize_row_iq2_s,
         .vec_dot                  = ggml_vec_dot_iq2_s_q8_K,
         .vec_dot_type             = GGML_TYPE_Q8_K,
         .nrows                    = 1,
diff --git a/ggml/src/ggml-cpu/ggml-cpu.cpp b/ggml/src/ggml-cpu/ggml-cpu.cpp
@@ -11,6 +11,14 @@
 #include <sys/sysctl.h>
 #endif
 
+#if defined(_WIN32)
+#define WIN32_LEAN_AND_MEAN
+#ifndef NOMINMAX
+    #define NOMINMAX
+#endif
+#include <windows.h>
+#endif
+
 // ggml-backend interface
 
 #ifdef GGML_USE_CPU_HBM
diff --git a/ggml/src/ggml-impl.h b/ggml/src/ggml-impl.h