@@ -79,6 +79,7 @@ class Opt {
79
79
ctx_params = llama_context_default_params ();
80
80
model_params = llama_model_default_params ();
81
81
context_size_default = ctx_params.n_batch ;
82
+ n_threads_default = ctx_params.n_threads ;
82
83
ngl_default = model_params.n_gpu_layers ;
83
84
common_params_sampling sampling;
84
85
temperature_default = sampling.temp ;
@@ -104,6 +105,7 @@ class Opt {
104
105
105
106
ctx_params.n_batch = context_size >= 0 ? context_size : context_size_default;
106
107
ctx_params.n_ctx = ctx_params.n_batch ;
108
+ ctx_params.n_threads = ctx_params.n_threads_batch = n_threads >= 0 ? n_threads : n_threads_default;
107
109
model_params.n_gpu_layers = ngl >= 0 ? ngl : ngl_default;
108
110
temperature = temperature >= 0 ? temperature : temperature_default;
109
111
@@ -116,12 +118,12 @@ class Opt {
116
118
std::string chat_template_file;
117
119
std::string user;
118
120
bool use_jinja = false ;
119
- int context_size = -1 , ngl = -1 ;
121
+ int context_size = -1 , ngl = -1 , n_threads = - 1 ;
120
122
float temperature = -1 ;
121
123
bool verbose = false ;
122
124
123
125
private:
124
- int context_size_default = -1 , ngl_default = -1 ;
126
+ int context_size_default = -1 , ngl_default = -1 , n_threads_default = - 1 ;
125
127
float temperature_default = -1 ;
126
128
bool help = false ;
127
129
@@ -159,53 +161,94 @@ class Opt {
159
161
return 0 ;
160
162
}
161
163
164
+ int parse_options_with_value (int argc, const char ** argv, int & i, bool & options_parsing) {
165
+ if (options_parsing && (strcmp (argv[i], " -c" ) == 0 || strcmp (argv[i], " --context-size" ) == 0 )) {
166
+ if (handle_option_with_value (argc, argv, i, context_size) == 1 ) {
167
+ return 1 ;
168
+ }
169
+ } else if (options_parsing &&
170
+ (strcmp (argv[i], " -n" ) == 0 || strcmp (argv[i], " -ngl" ) == 0 || strcmp (argv[i], " --ngl" ) == 0 )) {
171
+ if (handle_option_with_value (argc, argv, i, ngl) == 1 ) {
172
+ return 1 ;
173
+ }
174
+ } else if (options_parsing && (strcmp (argv[i], " -t" ) == 0 || strcmp (argv[i], " --threads" ) == 0 )) {
175
+ if (handle_option_with_value (argc, argv, i, n_threads) == 1 ) {
176
+ return 1 ;
177
+ }
178
+ } else if (options_parsing && strcmp (argv[i], " --temp" ) == 0 ) {
179
+ if (handle_option_with_value (argc, argv, i, temperature) == 1 ) {
180
+ return 1 ;
181
+ }
182
+ } else if (options_parsing && strcmp (argv[i], " --chat-template-file" ) == 0 ) {
183
+ if (handle_option_with_value (argc, argv, i, chat_template_file) == 1 ) {
184
+ return 1 ;
185
+ }
186
+ use_jinja = true ;
187
+ } else {
188
+ return 2 ;
189
+ }
190
+
191
+ return 0 ;
192
+ }
193
+
194
+ int parse_options (const char ** argv, int & i, bool & options_parsing) {
195
+ if (options_parsing && (parse_flag (argv, i, " -v" , " --verbose" ) || parse_flag (argv, i, " -v" , " --log-verbose" ))) {
196
+ verbose = true ;
197
+ } else if (options_parsing && strcmp (argv[i], " --jinja" ) == 0 ) {
198
+ use_jinja = true ;
199
+ } else if (options_parsing && parse_flag (argv, i, " -h" , " --help" )) {
200
+ help = true ;
201
+ return 0 ;
202
+ } else if (options_parsing && strcmp (argv[i], " --" ) == 0 ) {
203
+ options_parsing = false ;
204
+ } else {
205
+ return 2 ;
206
+ }
207
+
208
+ return 0 ;
209
+ }
210
+
211
+ int parse_positional_args (const char ** argv, int & i, int & positional_args_i) {
212
+ if (positional_args_i == 0 ) {
213
+ if (!argv[i][0 ] || argv[i][0 ] == ' -' ) {
214
+ return 1 ;
215
+ }
216
+
217
+ ++positional_args_i;
218
+ model_ = argv[i];
219
+ } else if (positional_args_i == 1 ) {
220
+ ++positional_args_i;
221
+ user = argv[i];
222
+ } else {
223
+ user += " " + std::string (argv[i]);
224
+ }
225
+
226
+ return 0 ;
227
+ }
228
+
162
229
int parse (int argc, const char ** argv) {
163
230
bool options_parsing = true ;
164
231
for (int i = 1 , positional_args_i = 0 ; i < argc; ++i) {
165
- if (options_parsing && (strcmp (argv[i], " -c" ) == 0 || strcmp (argv[i], " --context-size" ) == 0 )) {
166
- if (handle_option_with_value (argc, argv, i, context_size) == 1 ) {
167
- return 1 ;
168
- }
169
- } else if (options_parsing &&
170
- (strcmp (argv[i], " -n" ) == 0 || strcmp (argv[i], " -ngl" ) == 0 || strcmp (argv[i], " --ngl" ) == 0 )) {
171
- if (handle_option_with_value (argc, argv, i, ngl) == 1 ) {
172
- return 1 ;
173
- }
174
- } else if (options_parsing && strcmp (argv[i], " --temp" ) == 0 ) {
175
- if (handle_option_with_value (argc, argv, i, temperature) == 1 ) {
176
- return 1 ;
177
- }
178
- } else if (options_parsing &&
179
- (parse_flag (argv, i, " -v" , " --verbose" ) || parse_flag (argv, i, " -v" , " --log-verbose" ))) {
180
- verbose = true ;
181
- } else if (options_parsing && strcmp (argv[i], " --jinja" ) == 0 ) {
182
- use_jinja = true ;
183
- } else if (options_parsing && strcmp (argv[i], " --chat-template-file" ) == 0 ){
184
- if (handle_option_with_value (argc, argv, i, chat_template_file) == 1 ) {
185
- return 1 ;
186
- }
187
- use_jinja = true ;
188
- } else if (options_parsing && parse_flag (argv, i, " -h" , " --help" )) {
189
- help = true ;
190
- return 0 ;
191
- } else if (options_parsing && strcmp (argv[i], " --" ) == 0 ) {
192
- options_parsing = false ;
193
- } else if (positional_args_i == 0 ) {
194
- if (!argv[i][0 ] || argv[i][0 ] == ' -' ) {
195
- return 1 ;
196
- }
197
-
198
- ++positional_args_i;
199
- model_ = argv[i];
200
- } else if (positional_args_i == 1 ) {
201
- ++positional_args_i;
202
- user = argv[i];
203
- } else {
204
- user += " " + std::string (argv[i]);
232
+ int ret = parse_options_with_value (argc, argv, i, options_parsing);
233
+ if (ret == 0 ) {
234
+ continue ;
235
+ } else if (ret == 1 ) {
236
+ return ret;
237
+ }
238
+
239
+ ret = parse_options (argv, i, options_parsing);
240
+ if (ret == 0 ) {
241
+ continue ;
242
+ } else if (ret == 1 ) {
243
+ return ret;
244
+ }
245
+
246
+ if (parse_positional_args (argv, i, positional_args_i)) {
247
+ return 1 ;
205
248
}
206
249
}
207
250
208
- if (model_.empty ()){
251
+ if (model_.empty ()) {
209
252
return 1 ;
210
253
}
211
254
@@ -232,6 +275,8 @@ class Opt {
232
275
" Number of GPU layers (default: %d)\n "
233
276
" --temp <value>\n "
234
277
" Temperature (default: %.1f)\n "
278
+ " -t, --threads <value>\n "
279
+ " Number of threads to use during generation (default: %d)\n "
235
280
" -v, --verbose, --log-verbose\n "
236
281
" Set verbosity level to infinity (i.e. log all messages, useful for debugging)\n "
237
282
" -h, --help\n "
@@ -260,7 +305,7 @@ class Opt {
260
305
" llama-run file://some-file3.gguf\n "
261
306
" llama-run --ngl 999 some-file4.gguf\n "
262
307
" llama-run --ngl 999 some-file5.gguf Hello World\n " ,
263
- context_size_default, ngl_default, temperature_default);
308
+ context_size_default, ngl_default, temperature_default, n_threads_default );
264
309
}
265
310
};
266
311
0 commit comments