Skip to content

Commit 8ab8ba6

Browse files
ikawrakowKawrakow
andauthored
llama : prevent usage of k-quants when tensor size is not a multiple of 256 (#1921)
* Fix examples/metal * k-quants: prevent usage when tensor size is not divisible by 256 --------- Co-authored-by: Iwan Kawrakow <[email protected]>
1 parent 90cc59d commit 8ab8ba6

File tree

1 file changed

+16
-0
lines changed

1 file changed

+16
-0
lines changed

llama.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,11 @@
1919
#ifdef GGML_USE_METAL
2020
#include "ggml-metal.h"
2121
#endif
22+
#ifdef GGML_USE_K_QUANTS
23+
#ifndef QK_K
24+
#define QK_K 256
25+
#endif
26+
#endif
2227

2328
#include <array>
2429
#include <ctime>
@@ -2491,6 +2496,17 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
24912496
} else {
24922497
new_type = quantized_type;
24932498
#ifdef GGML_USE_K_QUANTS
2499+
if (quantized_type == GGML_TYPE_Q2_K || quantized_type == GGML_TYPE_Q3_K || quantized_type == GGML_TYPE_Q4_K ||
2500+
quantized_type == GGML_TYPE_Q5_K || quantized_type == GGML_TYPE_Q6_K) {
2501+
int nx = tensor.ne.at(0);
2502+
int ny = tensor.ne.at(0);
2503+
if (nx % QK_K != 0 || ny % QK_K != 0) {
2504+
fprintf(stderr, "\n\n========================= Tensor sizes %d x %d are not divisible by %d\n",nx,ny,QK_K);
2505+
fprintf(stderr, "This is required to be able to use k-quants for now!\n");
2506+
fprintf(stderr, "========================================================================================\n\n");
2507+
throw std::runtime_error("Unsupported tensor size encountered\n");
2508+
}
2509+
}
24942510
if (tensor.name == "output.weight") {
24952511
new_type = GGML_TYPE_Q6_K;
24962512
} else if (tensor.name.find("attention.wv.weight") != std::string::npos) {

0 commit comments

Comments
 (0)