24
24
#endif
25
25
#include < string>
26
26
#include < stdio.h>
27
+ #include < algorithm>
28
+ #include < thread>
27
29
28
30
struct rpc_server_params {
29
31
std::string host = " 127.0.0.1" ;
30
32
int port = 50052 ;
31
33
size_t backend_mem = 0 ;
34
+ int n_threads = std::max(1U , std::thread::hardware_concurrency()/2 );
32
35
};
33
36
34
37
static void print_usage (int /* argc*/ , char ** argv, rpc_server_params params) {
35
38
fprintf (stderr, " Usage: %s [options]\n\n " , argv[0 ]);
36
39
fprintf (stderr, " options:\n " );
37
40
fprintf (stderr, " -h, --help show this help message and exit\n " );
41
+ fprintf (stderr, " -t, --threads number of threads for the CPU backend (default: %d)\n " , params.n_threads );
38
42
fprintf (stderr, " -H HOST, --host HOST host to bind to (default: %s)\n " , params.host .c_str ());
39
43
fprintf (stderr, " -p PORT, --port PORT port to bind to (default: %d)\n " , params.port );
40
44
fprintf (stderr, " -m MEM, --mem MEM backend memory size (in MB)\n " );
@@ -50,6 +54,15 @@ static bool rpc_server_params_parse(int argc, char ** argv, rpc_server_params &
50
54
return false ;
51
55
}
52
56
params.host = argv[i];
57
+ } else if (arg == " -t" || arg == " --threads" ) {
58
+ if (++i >= argc) {
59
+ return false ;
60
+ }
61
+ params.n_threads = std::stoi (argv[i]);
62
+ if (params.n_threads <= 0 ) {
63
+ fprintf (stderr, " error: invalid number of threads: %d\n " , params.n_threads );
64
+ return false ;
65
+ }
53
66
} else if (arg == " -p" || arg == " --port" ) {
54
67
if (++i >= argc) {
55
68
return false ;
@@ -75,7 +88,7 @@ static bool rpc_server_params_parse(int argc, char ** argv, rpc_server_params &
75
88
return true ;
76
89
}
77
90
78
- static ggml_backend_t create_backend () {
91
+ static ggml_backend_t create_backend (const rpc_server_params & params ) {
79
92
ggml_backend_t backend = NULL ;
80
93
#ifdef GGML_USE_CUDA
81
94
fprintf (stderr, " %s: using CUDA backend\n " , __func__);
@@ -107,6 +120,7 @@ static ggml_backend_t create_backend() {
107
120
if (!backend) {
108
121
fprintf (stderr, " %s: using CPU backend\n " , __func__);
109
122
backend = ggml_backend_cpu_init ();
123
+ ggml_backend_cpu_set_n_threads (backend, params.n_threads );
110
124
}
111
125
return backend;
112
126
}
@@ -151,7 +165,7 @@ int main(int argc, char * argv[]) {
151
165
fprintf (stderr, " \n " );
152
166
}
153
167
154
- ggml_backend_t backend = create_backend ();
168
+ ggml_backend_t backend = create_backend (params );
155
169
if (!backend) {
156
170
fprintf (stderr, " Failed to create backend\n " );
157
171
return 1 ;
0 commit comments