Skip to content

Commit 1c217ac

Browse files
ericcurtinjpohhhh
authored andcommitted
Add CLI arg to llama-run to adjust the number of threads used (#12370)
We default to 4, sometimes we want to manually adjust this Signed-off-by: Eric Curtin <[email protected]>
1 parent c17bce7 commit 1c217ac

File tree

1 file changed

+89
-44
lines changed

1 file changed

+89
-44
lines changed

examples/run/run.cpp

Lines changed: 89 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ class Opt {
7979
ctx_params = llama_context_default_params();
8080
model_params = llama_model_default_params();
8181
context_size_default = ctx_params.n_batch;
82+
n_threads_default = ctx_params.n_threads;
8283
ngl_default = model_params.n_gpu_layers;
8384
common_params_sampling sampling;
8485
temperature_default = sampling.temp;
@@ -104,6 +105,7 @@ class Opt {
104105

105106
ctx_params.n_batch = context_size >= 0 ? context_size : context_size_default;
106107
ctx_params.n_ctx = ctx_params.n_batch;
108+
ctx_params.n_threads = ctx_params.n_threads_batch = n_threads >= 0 ? n_threads : n_threads_default;
107109
model_params.n_gpu_layers = ngl >= 0 ? ngl : ngl_default;
108110
temperature = temperature >= 0 ? temperature : temperature_default;
109111

@@ -116,12 +118,12 @@ class Opt {
116118
std::string chat_template_file;
117119
std::string user;
118120
bool use_jinja = false;
119-
int context_size = -1, ngl = -1;
121+
int context_size = -1, ngl = -1, n_threads = -1;
120122
float temperature = -1;
121123
bool verbose = false;
122124

123125
private:
124-
int context_size_default = -1, ngl_default = -1;
126+
int context_size_default = -1, ngl_default = -1, n_threads_default = -1;
125127
float temperature_default = -1;
126128
bool help = false;
127129

@@ -159,53 +161,94 @@ class Opt {
159161
return 0;
160162
}
161163

164+
int parse_options_with_value(int argc, const char ** argv, int & i, bool & options_parsing) {
165+
if (options_parsing && (strcmp(argv[i], "-c") == 0 || strcmp(argv[i], "--context-size") == 0)) {
166+
if (handle_option_with_value(argc, argv, i, context_size) == 1) {
167+
return 1;
168+
}
169+
} else if (options_parsing &&
170+
(strcmp(argv[i], "-n") == 0 || strcmp(argv[i], "-ngl") == 0 || strcmp(argv[i], "--ngl") == 0)) {
171+
if (handle_option_with_value(argc, argv, i, ngl) == 1) {
172+
return 1;
173+
}
174+
} else if (options_parsing && (strcmp(argv[i], "-t") == 0 || strcmp(argv[i], "--threads") == 0)) {
175+
if (handle_option_with_value(argc, argv, i, n_threads) == 1) {
176+
return 1;
177+
}
178+
} else if (options_parsing && strcmp(argv[i], "--temp") == 0) {
179+
if (handle_option_with_value(argc, argv, i, temperature) == 1) {
180+
return 1;
181+
}
182+
} else if (options_parsing && strcmp(argv[i], "--chat-template-file") == 0) {
183+
if (handle_option_with_value(argc, argv, i, chat_template_file) == 1) {
184+
return 1;
185+
}
186+
use_jinja = true;
187+
} else {
188+
return 2;
189+
}
190+
191+
return 0;
192+
}
193+
194+
int parse_options(const char ** argv, int & i, bool & options_parsing) {
195+
if (options_parsing && (parse_flag(argv, i, "-v", "--verbose") || parse_flag(argv, i, "-v", "--log-verbose"))) {
196+
verbose = true;
197+
} else if (options_parsing && strcmp(argv[i], "--jinja") == 0) {
198+
use_jinja = true;
199+
} else if (options_parsing && parse_flag(argv, i, "-h", "--help")) {
200+
help = true;
201+
return 0;
202+
} else if (options_parsing && strcmp(argv[i], "--") == 0) {
203+
options_parsing = false;
204+
} else {
205+
return 2;
206+
}
207+
208+
return 0;
209+
}
210+
211+
int parse_positional_args(const char ** argv, int & i, int & positional_args_i) {
212+
if (positional_args_i == 0) {
213+
if (!argv[i][0] || argv[i][0] == '-') {
214+
return 1;
215+
}
216+
217+
++positional_args_i;
218+
model_ = argv[i];
219+
} else if (positional_args_i == 1) {
220+
++positional_args_i;
221+
user = argv[i];
222+
} else {
223+
user += " " + std::string(argv[i]);
224+
}
225+
226+
return 0;
227+
}
228+
162229
int parse(int argc, const char ** argv) {
163230
bool options_parsing = true;
164231
for (int i = 1, positional_args_i = 0; i < argc; ++i) {
165-
if (options_parsing && (strcmp(argv[i], "-c") == 0 || strcmp(argv[i], "--context-size") == 0)) {
166-
if (handle_option_with_value(argc, argv, i, context_size) == 1) {
167-
return 1;
168-
}
169-
} else if (options_parsing &&
170-
(strcmp(argv[i], "-n") == 0 || strcmp(argv[i], "-ngl") == 0 || strcmp(argv[i], "--ngl") == 0)) {
171-
if (handle_option_with_value(argc, argv, i, ngl) == 1) {
172-
return 1;
173-
}
174-
} else if (options_parsing && strcmp(argv[i], "--temp") == 0) {
175-
if (handle_option_with_value(argc, argv, i, temperature) == 1) {
176-
return 1;
177-
}
178-
} else if (options_parsing &&
179-
(parse_flag(argv, i, "-v", "--verbose") || parse_flag(argv, i, "-v", "--log-verbose"))) {
180-
verbose = true;
181-
} else if (options_parsing && strcmp(argv[i], "--jinja") == 0) {
182-
use_jinja = true;
183-
} else if (options_parsing && strcmp(argv[i], "--chat-template-file") == 0){
184-
if (handle_option_with_value(argc, argv, i, chat_template_file) == 1) {
185-
return 1;
186-
}
187-
use_jinja = true;
188-
} else if (options_parsing && parse_flag(argv, i, "-h", "--help")) {
189-
help = true;
190-
return 0;
191-
} else if (options_parsing && strcmp(argv[i], "--") == 0) {
192-
options_parsing = false;
193-
} else if (positional_args_i == 0) {
194-
if (!argv[i][0] || argv[i][0] == '-') {
195-
return 1;
196-
}
197-
198-
++positional_args_i;
199-
model_ = argv[i];
200-
} else if (positional_args_i == 1) {
201-
++positional_args_i;
202-
user = argv[i];
203-
} else {
204-
user += " " + std::string(argv[i]);
232+
int ret = parse_options_with_value(argc, argv, i, options_parsing);
233+
if (ret == 0) {
234+
continue;
235+
} else if (ret == 1) {
236+
return ret;
237+
}
238+
239+
ret = parse_options(argv, i, options_parsing);
240+
if (ret == 0) {
241+
continue;
242+
} else if (ret == 1) {
243+
return ret;
244+
}
245+
246+
if (parse_positional_args(argv, i, positional_args_i)) {
247+
return 1;
205248
}
206249
}
207250

208-
if (model_.empty()){
251+
if (model_.empty()) {
209252
return 1;
210253
}
211254

@@ -232,6 +275,8 @@ class Opt {
232275
" Number of GPU layers (default: %d)\n"
233276
" --temp <value>\n"
234277
" Temperature (default: %.1f)\n"
278+
" -t, --threads <value>\n"
279+
" Number of threads to use during generation (default: %d)\n"
235280
" -v, --verbose, --log-verbose\n"
236281
" Set verbosity level to infinity (i.e. log all messages, useful for debugging)\n"
237282
" -h, --help\n"
@@ -260,7 +305,7 @@ class Opt {
260305
" llama-run file://some-file3.gguf\n"
261306
" llama-run --ngl 999 some-file4.gguf\n"
262307
" llama-run --ngl 999 some-file5.gguf Hello World\n",
263-
context_size_default, ngl_default, temperature_default);
308+
context_size_default, ngl_default, temperature_default, n_threads_default);
264309
}
265310
};
266311

0 commit comments

Comments
 (0)