diff --git a/common/arg.cpp b/common/arg.cpp index 9f87e9910b540..73a3cfe5392c0 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -2627,6 +2627,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex params.i_chunk = value; } ).set_examples({LLAMA_EXAMPLE_IMATRIX})); + add_opt(common_arg( + {"--parse-special"}, + string_format("prase special tokens (chat, tool, etc) (default: %s)", params.parse_special ? "true" : "false"), + [](common_params & params) { + params.parse_special = true; + } + ).set_examples({LLAMA_EXAMPLE_IMATRIX})); add_opt(common_arg( {"-pps"}, string_format("is the prompt shared across parallel sequences (default: %s)", params.is_pp_shared ? "true" : "false"), diff --git a/common/common.h b/common/common.h index 90702245463cb..d051d4ec971c4 100644 --- a/common/common.h +++ b/common/common.h @@ -409,6 +409,7 @@ struct common_params { bool process_output = false; // collect data for the output tensor bool compute_ppl = true; // whether to compute perplexity + bool parse_special = false; // whether to parse special tokens during imatrix tokenization // cvector-generator params int n_pca_batch = 100; diff --git a/tools/imatrix/imatrix.cpp b/tools/imatrix/imatrix.cpp index 2c39278dba3d9..81d0404d683d5 100644 --- a/tools/imatrix/imatrix.cpp +++ b/tools/imatrix/imatrix.cpp @@ -24,7 +24,8 @@ static void print_usage(int, char ** argv) { LOG("\n %s \\\n" " -m model.gguf -f some-text.txt [-o imatrix.dat] [--process-output] \\\n" " [--no-ppl] [--chunk 123] [--output-frequency 10] [--save-frequency 0] \\\n" - " [--in-file imatrix-prev-0.dat --in-file imatrix-prev-1.dat ...]\n" , argv[0]); + " [--in-file imatrix-prev-0.dat --in-file imatrix-prev-1.dat ...] \\\n" + " [--parse-special]\n" , argv[0]); LOG("\n"); } @@ -439,7 +440,7 @@ static bool compute_imatrix(llama_context * ctx, const common_params & params) { auto tim1 = std::chrono::high_resolution_clock::now(); LOG_INF("%s: tokenizing the input ..\n", __func__); - std::vector tokens = common_tokenize(ctx, params.prompt, true); + std::vector tokens = common_tokenize(ctx, params.prompt, true, params.parse_special); auto tim2 = std::chrono::high_resolution_clock::now(); LOG_INF("%s: tokenization took %g ms\n",__func__,1e-3*std::chrono::duration_cast(tim2-tim1).count());