Skip to content

Commit a5d30b1

Browse files
common : better default number of threads (#934)
* commit * fix * try-catch * apply code review * improve * improve * add macos headers * done * remove color * fix windows * minor * fix * Apply suggestions from code review Co-authored-by: DannyDaemonic <[email protected]> * remove * minor * minor --------- Co-authored-by: jon-chuang <[email protected]> Co-authored-by: DannyDaemonic <[email protected]>
1 parent 76a8849 commit a5d30b1

File tree

2 files changed

+42
-12
lines changed

2 files changed

+42
-12
lines changed

examples/common.cpp

Lines changed: 39 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,18 @@
11
#include "common.h"
22

33
#include <cassert>
4+
#include <iostream>
45
#include <cstring>
56
#include <fstream>
67
#include <string>
78
#include <iterator>
89
#include <algorithm>
910
#include <sstream>
10-
#include <iostream>
11+
12+
#if defined(__APPLE__) && defined(__MACH__)
13+
#include <sys/types.h>
14+
#include <sys/sysctl.h>
15+
#endif
1116

1217
#if defined (_WIN32)
1318
#include <fcntl.h>
@@ -25,19 +30,43 @@ extern "C" __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int
2530
#define CP_UTF8 65001
2631
#endif
2732

28-
bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
29-
// determine sensible default number of threads.
30-
// std::thread::hardware_concurrency may not be equal to the number of cores, or may return 0.
33+
int32_t get_num_physical_cores() {
3134
#ifdef __linux__
3235
std::ifstream cpuinfo("/proc/cpuinfo");
33-
params.n_threads = std::count(std::istream_iterator<std::string>(cpuinfo),
34-
std::istream_iterator<std::string>(),
35-
std::string("processor"));
36-
#endif
37-
if (params.n_threads == 0) {
38-
params.n_threads = std::max(1, (int32_t) std::thread::hardware_concurrency());
36+
std::string line;
37+
while (std::getline(cpuinfo, line)) {
38+
std::size_t pos = line.find("cpu cores");
39+
if (pos != std::string::npos) {
40+
pos = line.find(": ", pos);
41+
if (pos != std::string::npos) {
42+
try {
43+
// Extract the number and return it
44+
return static_cast<int32_t>(std::stoul(line.substr(pos + 2)));
45+
} catch (const std::invalid_argument &) {
46+
// Ignore if we could not parse
47+
}
48+
}
49+
}
50+
}
51+
#elif defined(__APPLE__) && defined(__MACH__)
52+
int32_t num_physical_cores;
53+
size_t len = sizeof(num_physical_cores);
54+
int result = sysctlbyname("hw.perflevel0.physicalcpu", &num_physical_cores, &len, NULL, 0);
55+
if (result == 0) {
56+
return num_physical_cores;
57+
}
58+
result = sysctlbyname("hw.physicalcpu", &num_physical_cores, &len, NULL, 0);
59+
if (result == 0) {
60+
return num_physical_cores;
3961
}
62+
#elif defined(_WIN32)
63+
//TODO: Implement
64+
#endif
65+
unsigned int n_threads = std::thread::hardware_concurrency();
66+
return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4;
67+
}
4068

69+
bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
4170
bool invalid_param = false;
4271
std::string arg;
4372
gpt_params default_params;

examples/common.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,12 @@
1313
//
1414
// CLI argument parsing
1515
//
16+
int32_t get_num_physical_cores();
1617

1718
struct gpt_params {
1819
int32_t seed = -1; // RNG seed
19-
int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
20-
int32_t n_predict = -1; // new tokens to predict
20+
int32_t n_threads = get_num_physical_cores();
21+
int32_t n_predict = -1; // new tokens to predict
2122
int32_t n_parts = -1; // amount of model parts (-1 = determine from model dimensions)
2223
int32_t n_ctx = 512; // context size
2324
int32_t n_batch = 512; // batch size for prompt processing (must be >=32 to use BLAS)

0 commit comments

Comments
 (0)