37
37
#include < stdio.h>
38
38
#include < vector>
39
39
#include < filesystem>
40
+ #include < algorithm>
41
+ #include < thread>
40
42
41
43
namespace fs = std::filesystem;
42
44
@@ -150,12 +152,14 @@ struct rpc_server_params {
150
152
int port = 50052 ;
151
153
size_t backend_mem = 0 ;
152
154
bool use_cache = false ;
155
+ int n_threads = std::max(1U , std::thread::hardware_concurrency()/2 );
153
156
};
154
157
155
158
static void print_usage (int /* argc*/ , char ** argv, rpc_server_params params) {
156
159
fprintf (stderr, " Usage: %s [options]\n\n " , argv[0 ]);
157
160
fprintf (stderr, " options:\n " );
158
161
fprintf (stderr, " -h, --help show this help message and exit\n " );
162
+ fprintf (stderr, " -t, --threads number of threads for the CPU backend (default: %d)\n " , params.n_threads );
159
163
fprintf (stderr, " -H HOST, --host HOST host to bind to (default: %s)\n " , params.host .c_str ());
160
164
fprintf (stderr, " -p PORT, --port PORT port to bind to (default: %d)\n " , params.port );
161
165
fprintf (stderr, " -m MEM, --mem MEM backend memory size (in MB)\n " );
@@ -172,6 +176,15 @@ static bool rpc_server_params_parse(int argc, char ** argv, rpc_server_params &
172
176
return false ;
173
177
}
174
178
params.host = argv[i];
179
+ } else if (arg == " -t" || arg == " --threads" ) {
180
+ if (++i >= argc) {
181
+ return false ;
182
+ }
183
+ params.n_threads = std::stoi (argv[i]);
184
+ if (params.n_threads <= 0 ) {
185
+ fprintf (stderr, " error: invalid number of threads: %d\n " , params.n_threads );
186
+ return false ;
187
+ }
175
188
} else if (arg == " -p" || arg == " --port" ) {
176
189
if (++i >= argc) {
177
190
return false ;
@@ -199,7 +212,7 @@ static bool rpc_server_params_parse(int argc, char ** argv, rpc_server_params &
199
212
return true ;
200
213
}
201
214
202
- static ggml_backend_t create_backend () {
215
+ static ggml_backend_t create_backend (const rpc_server_params & params ) {
203
216
ggml_backend_t backend = NULL ;
204
217
#ifdef GGML_USE_CUDA
205
218
fprintf (stderr, " %s: using CUDA backend\n " , __func__);
@@ -231,6 +244,7 @@ static ggml_backend_t create_backend() {
231
244
if (!backend) {
232
245
fprintf (stderr, " %s: using CPU backend\n " , __func__);
233
246
backend = ggml_backend_cpu_init ();
247
+ ggml_backend_cpu_set_n_threads (backend, params.n_threads );
234
248
}
235
249
return backend;
236
250
}
@@ -275,7 +289,7 @@ int main(int argc, char * argv[]) {
275
289
fprintf (stderr, " \n " );
276
290
}
277
291
278
- ggml_backend_t backend = create_backend ();
292
+ ggml_backend_t backend = create_backend (params );
279
293
if (!backend) {
280
294
fprintf (stderr, " Failed to create backend\n " );
281
295
return 1 ;
0 commit comments