@@ -16,44 +16,44 @@ struct quant_option {
16
16
};
17
17
18
18
static const std::vector<struct quant_option > QUANT_OPTIONS = {
19
- { " Q4_0" , LLAMA_FTYPE_MOSTLY_Q4_0, " 4.34G, +0.4685 ppl @ Llama-3-8B" , },
20
- { " Q4_1" , LLAMA_FTYPE_MOSTLY_Q4_1, " 4.78G, +0.4511 ppl @ Llama-3-8B" , },
21
- { " Q5_0" , LLAMA_FTYPE_MOSTLY_Q5_0, " 5.21G, +0.1316 ppl @ Llama-3-8B" , },
22
- { " Q5_1" , LLAMA_FTYPE_MOSTLY_Q5_1, " 5.65G, +0.1062 ppl @ Llama-3-8B" , },
23
- { " IQ2_XXS" ,LLAMA_FTYPE_MOSTLY_IQ2_XXS," 2.06 bpw quantization" , },
24
- { " IQ2_XS" , LLAMA_FTYPE_MOSTLY_IQ2_XS, " 2.31 bpw quantization" , },
25
- { " IQ2_S" , LLAMA_FTYPE_MOSTLY_IQ2_S, " 2.5 bpw quantization" , },
26
- { " IQ2_M" , LLAMA_FTYPE_MOSTLY_IQ2_M, " 2.7 bpw quantization" , },
27
- { " IQ1_S" , LLAMA_FTYPE_MOSTLY_IQ1_S, " 1.56 bpw quantization" , },
28
- { " IQ1_M" , LLAMA_FTYPE_MOSTLY_IQ1_M, " 1.75 bpw quantization" , },
29
- { " Q2_K" , LLAMA_FTYPE_MOSTLY_Q2_K, " 2.96G, +3.5199 ppl @ Llama-3-8B" , },
30
- { " Q2_K_S" , LLAMA_FTYPE_MOSTLY_Q2_K_S, " 2.96G, +3.1836 ppl @ Llama-3-8B" , },
31
- { " IQ3_XXS" ,LLAMA_FTYPE_MOSTLY_IQ3_XXS," 3.06 bpw quantization" , },
32
- { " IQ3_S" , LLAMA_FTYPE_MOSTLY_IQ3_S, " 3.44 bpw quantization" , },
33
- { " IQ3_M" , LLAMA_FTYPE_MOSTLY_IQ3_M, " 3.66 bpw quantization mix" , },
34
- { " Q3_K" , LLAMA_FTYPE_MOSTLY_Q3_K_M, " alias for Q3_K_M" },
35
- { " IQ3_XS" , LLAMA_FTYPE_MOSTLY_IQ3_XS, " 3.3 bpw quantization" , },
36
- { " Q3_K_S" , LLAMA_FTYPE_MOSTLY_Q3_K_S, " 3.41G, +1.6321 ppl @ Llama-3-8B" , },
37
- { " Q3_K_M" , LLAMA_FTYPE_MOSTLY_Q3_K_M, " 3.74G, +0.6569 ppl @ Llama-3-8B" , },
38
- { " Q3_K_L" , LLAMA_FTYPE_MOSTLY_Q3_K_L, " 4.03G, +0.5562 ppl @ Llama-3-8B" , },
39
- { " IQ4_NL" , LLAMA_FTYPE_MOSTLY_IQ4_NL, " 4.50 bpw non-linear quantization" , },
40
- { " IQ4_XS" , LLAMA_FTYPE_MOSTLY_IQ4_XS, " 4.25 bpw non-linear quantization" , },
41
- { " Q4_K" , LLAMA_FTYPE_MOSTLY_Q4_K_M, " alias for Q4_K_M" , },
42
- { " Q4_K_S" , LLAMA_FTYPE_MOSTLY_Q4_K_S, " 4.37G, +0.2689 ppl @ Llama-3-8B" , },
43
- { " Q4_K_M" , LLAMA_FTYPE_MOSTLY_Q4_K_M, " 4.58G, +0.1754 ppl @ Llama-3-8B" , },
44
- { " Q5_K" , LLAMA_FTYPE_MOSTLY_Q5_K_M, " alias for Q5_K_M" , },
45
- { " Q5_K_S" , LLAMA_FTYPE_MOSTLY_Q5_K_S, " 5.21G, +0.1049 ppl @ Llama-3-8B" , },
46
- { " Q5_K_M" , LLAMA_FTYPE_MOSTLY_Q5_K_M, " 5.33G, +0.0569 ppl @ Llama-3-8B" , },
47
- { " Q6_K" , LLAMA_FTYPE_MOSTLY_Q6_K, " 6.14G, +0.0217 ppl @ Llama-3-8B" , },
48
- { " Q8_0" , LLAMA_FTYPE_MOSTLY_Q8_0, " 7.96G, +0.0026 ppl @ Llama-3-8B" , },
49
- { " Q4_0_4_4" , LLAMA_FTYPE_MOSTLY_Q4_0_4_4, " 4.34G, +0.4685 ppl @ Llama-3-8B" , },
50
- { " Q4_0_4_8" , LLAMA_FTYPE_MOSTLY_Q4_0_4_8, " 4.34G, +0.4685 ppl @ Llama-3-8B" , },
51
- { " Q4_0_8_8" , LLAMA_FTYPE_MOSTLY_Q4_0_8_8, " 4.34G, +0.4685 ppl @ Llama-3-8B" , },
52
- { " F16" , LLAMA_FTYPE_MOSTLY_F16, " 14.00G, +0.0020 ppl @ Mistral-7B" , },
53
- { " BF16" , LLAMA_FTYPE_MOSTLY_BF16, " 14.00G, -0.0050 ppl @ Mistral-7B" , },
54
- { " F32" , LLAMA_FTYPE_ALL_F32, " 26.00G @ 7B" , },
19
+ { " Q4_0" , LLAMA_FTYPE_MOSTLY_Q4_0, " 4.34G, +0.4685 ppl @ Llama-3-8B" , },
20
+ { " Q4_1" , LLAMA_FTYPE_MOSTLY_Q4_1, " 4.78G, +0.4511 ppl @ Llama-3-8B" , },
21
+ { " Q5_0" , LLAMA_FTYPE_MOSTLY_Q5_0, " 5.21G, +0.1316 ppl @ Llama-3-8B" , },
22
+ { " Q5_1" , LLAMA_FTYPE_MOSTLY_Q5_1, " 5.65G, +0.1062 ppl @ Llama-3-8B" , },
23
+ { " IQ2_XXS" , LLAMA_FTYPE_MOSTLY_IQ2_XXS, " 2.06 bpw quantization" , },
24
+ { " IQ2_XS" , LLAMA_FTYPE_MOSTLY_IQ2_XS, " 2.31 bpw quantization" , },
25
+ { " IQ2_S" , LLAMA_FTYPE_MOSTLY_IQ2_S, " 2.5 bpw quantization" , },
26
+ { " IQ2_M" , LLAMA_FTYPE_MOSTLY_IQ2_M, " 2.7 bpw quantization" , },
27
+ { " IQ1_S" , LLAMA_FTYPE_MOSTLY_IQ1_S, " 1.56 bpw quantization" , },
28
+ { " IQ1_M" , LLAMA_FTYPE_MOSTLY_IQ1_M, " 1.75 bpw quantization" , },
29
+ { " Q2_K" , LLAMA_FTYPE_MOSTLY_Q2_K, " 2.96G, +3.5199 ppl @ Llama-3-8B" , },
30
+ { " Q2_K_S" , LLAMA_FTYPE_MOSTLY_Q2_K_S, " 2.96G, +3.1836 ppl @ Llama-3-8B" , },
31
+ { " IQ3_XXS" , LLAMA_FTYPE_MOSTLY_IQ3_XXS, " 3.06 bpw quantization" , },
32
+ { " IQ3_S" , LLAMA_FTYPE_MOSTLY_IQ3_S, " 3.44 bpw quantization" , },
33
+ { " IQ3_M" , LLAMA_FTYPE_MOSTLY_IQ3_M, " 3.66 bpw quantization mix" , },
34
+ { " Q3_K" , LLAMA_FTYPE_MOSTLY_Q3_K_M, " alias for Q3_K_M" },
35
+ { " IQ3_XS" , LLAMA_FTYPE_MOSTLY_IQ3_XS, " 3.3 bpw quantization" , },
36
+ { " Q3_K_S" , LLAMA_FTYPE_MOSTLY_Q3_K_S, " 3.41G, +1.6321 ppl @ Llama-3-8B" , },
37
+ { " Q3_K_M" , LLAMA_FTYPE_MOSTLY_Q3_K_M, " 3.74G, +0.6569 ppl @ Llama-3-8B" , },
38
+ { " Q3_K_L" , LLAMA_FTYPE_MOSTLY_Q3_K_L, " 4.03G, +0.5562 ppl @ Llama-3-8B" , },
39
+ { " IQ4_NL" , LLAMA_FTYPE_MOSTLY_IQ4_NL, " 4.50 bpw non-linear quantization" , },
40
+ { " IQ4_XS" , LLAMA_FTYPE_MOSTLY_IQ4_XS, " 4.25 bpw non-linear quantization" , },
41
+ { " Q4_K" , LLAMA_FTYPE_MOSTLY_Q4_K_M, " alias for Q4_K_M" , },
42
+ { " Q4_K_S" , LLAMA_FTYPE_MOSTLY_Q4_K_S, " 4.37G, +0.2689 ppl @ Llama-3-8B" , },
43
+ { " Q4_K_M" , LLAMA_FTYPE_MOSTLY_Q4_K_M, " 4.58G, +0.1754 ppl @ Llama-3-8B" , },
44
+ { " Q5_K" , LLAMA_FTYPE_MOSTLY_Q5_K_M, " alias for Q5_K_M" , },
45
+ { " Q5_K_S" , LLAMA_FTYPE_MOSTLY_Q5_K_S, " 5.21G, +0.1049 ppl @ Llama-3-8B" , },
46
+ { " Q5_K_M" , LLAMA_FTYPE_MOSTLY_Q5_K_M, " 5.33G, +0.0569 ppl @ Llama-3-8B" , },
47
+ { " Q6_K" , LLAMA_FTYPE_MOSTLY_Q6_K, " 6.14G, +0.0217 ppl @ Llama-3-8B" , },
48
+ { " Q8_0" , LLAMA_FTYPE_MOSTLY_Q8_0, " 7.96G, +0.0026 ppl @ Llama-3-8B" , },
49
+ { " Q4_0_4_4" , LLAMA_FTYPE_MOSTLY_Q4_0_4_4, " 4.34G, +0.4685 ppl @ Llama-3-8B" , },
50
+ { " Q4_0_4_8" , LLAMA_FTYPE_MOSTLY_Q4_0_4_8, " 4.34G, +0.4685 ppl @ Llama-3-8B" , },
51
+ { " Q4_0_8_8" , LLAMA_FTYPE_MOSTLY_Q4_0_8_8, " 4.34G, +0.4685 ppl @ Llama-3-8B" , },
52
+ { " F16" , LLAMA_FTYPE_MOSTLY_F16, " 14.00G, +0.0020 ppl @ Mistral-7B" , },
53
+ { " BF16" , LLAMA_FTYPE_MOSTLY_BF16, " 14.00G, -0.0050 ppl @ Mistral-7B" , },
54
+ { " F32" , LLAMA_FTYPE_ALL_F32, " 26.00G @ 7B" , },
55
55
// Note: Ensure COPY comes after F32 to avoid ftype 0 from matching.
56
- { " COPY" , LLAMA_FTYPE_ALL_F32, " only copy tensors, no quantizing" , },
56
+ { " COPY" , LLAMA_FTYPE_ALL_F32, " only copy tensors, no quantizing" , },
57
57
};
58
58
59
59
static const char * const LLM_KV_QUANTIZE_IMATRIX_FILE = " quantize.imatrix.file" ;
0 commit comments