Skip to content

support cpu count > 64 issue6142 #6197

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 15 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,4 @@ python/setup.py

# Xmake
.xmake/
CMakePresets.json
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ option(NCNN_PYTHON "build python api" OFF)
option(NCNN_INT8 "int8 inference" ON)
option(NCNN_BF16 "bf16 inference" ON)
option(NCNN_FORCE_INLINE "force inline some function" ON)
option(NCNN_MUTITHREAD "enable multi thread bata" ON)

if(ANDROID OR IOS OR NCNN_SIMPLESTL)
option(NCNN_DISABLE_RTTI "disable rtti" ON)
Expand Down
12 changes: 12 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,13 @@ if(ANDROID)
list(APPEND ncnn_SRCS mat_pixel_android.cpp)
endif()

if(NCNN_MUTITHREAD)
list(APPEND ncnn_SRCS thread.cpp)
if(WIN32)
list(APPEND ncnn_SRCS ThreadInfo.cpp)
endif()
endif()

ncnn_src_group(ncnn_SRCS "sources")

include_directories("${CMAKE_CURRENT_SOURCE_DIR}/layer/${NCNN_TARGET_ARCH}")
Expand Down Expand Up @@ -266,6 +273,11 @@ if(NCNN_THREADS)
target_link_libraries(ncnn PUBLIC pthread)
endif()
endif()
if(NCNN_MUTITHREAD)
if(NOT WIN32 AND (NOT NCNN_SIMPLEOMP) AND (NOT NCNN_SIMPLESTL))
target_link_libraries(ncnn PUBLIC -pthread)
endif()
endif()

if(NCNN_VULKAN)
if(NCNN_SIMPLEVK)
Expand Down
69 changes: 69 additions & 0 deletions src/TheadInfo.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#ifdef NCNN_MUTITHREAD
#ifdef _WIN32

#include "TheadInfo.h"
namespace ncnn {

// 初始化静态成员
ThreadInfo* ThreadInfo::thread_info = nullptr;

ThreadInfo::ThreadInfo(/* args */)
{
int groupCount = GetActiveProcessorGroupCount();
for (WORD group = 0; group < groupCount; group++)
{
DWORD processorsInGroup = GetActiveProcessorCount(group);
for (int i = 0; i < static_cast<int>(processorsInGroup); i++)
{
CoreInfo info;
info.group = group;
info.id = i + core_infos.size();
info.affinity = (static_cast<DWORD_PTR>(1) << i);
core_infos.push_back(info);
}
}
}

ThreadInfo* ThreadInfo::get()
{
static Mutex lock;
AutoLock guard(lock);

if (!thread_info)
{
thread_info = new ThreadInfo();
}
return thread_info;
}

CoreInfo ThreadInfo::getCurrentCore()
{
// 获取当前线程运行的CPU核心(支持多处理器组)
DWORD_PTR process_affinity, system_affinity;
GetProcessAffinityMask(GetCurrentProcess(), &process_affinity, &system_affinity);

// 使用扩展API获取处理器组信息
PROCESSOR_NUMBER proc_num;
GetCurrentProcessorNumberEx(&proc_num);

for (const auto& core : core_infos)
{
// 匹配组号和组内核心编号
if (core.group == proc_num.Group && (core.affinity & (1ULL << proc_num.Number)))
{
return core;
}
}

// 未找到时返回默认值
return {-1, -1, 0};
}

void ThreadInfo::getAllCore(std::vector<CoreInfo>& out)
{
out = core_infos;
}
} // namespace ncnn

#endif
#endif
30 changes: 30 additions & 0 deletions src/TheadInfo.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#ifndef THREAD_INFO_H
#define THREAD_INFO_H
#ifdef NCNN_MUTITHREAD
#if defined _WIN32
#include "cpu.h"
namespace ncnn {
struct CoreInfo
{
public:
int id;
int group;
DWORD_PTR affinity;
};
class TheadInfo
{
private:
static ThreadInfo* thread_info;
std::vector<CoreInfo> core_infos;
TheadInfo(/* args */);

public:
static ThreadInfo* get();
CoreInfo getCurrentCore();
void getAllCore(std::vector<CoreInfo>& out);
};
} // namespace ncnn

#endif
#endif
#endif
34 changes: 34 additions & 0 deletions src/cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3266,4 +3266,38 @@ int set_flush_denormals(int flush_denormals)
#endif
}

int get_multi_thread_batch()
{
#if defined(_NCNN_MUTITHREAD)
#if defined _WIN32
DWORD length = 0;
GetLogicalProcessorInformation(NULL, &length);
if (GetLastError() != ERROR_INSUFFICIENT_BUFFER)
return 0;

PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION)malloc(length);

int count = 0;
if (GetLogicalProcessorInformation(buffer, &length))
{
DWORD offset = 0;
while (offset < length)
{
if (buffer->Relationship == RelationProcessorCore)
count++;

offset += sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
buffer++;
}
}
free(buffer);
return count;
#else
return get_cpu_count();
#endif
#else
return get_cpu_count();
#endif
}

} // namespace ncnn
4 changes: 4 additions & 0 deletions src/cpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#if defined _WIN32
#define WIN32_LEAN_AND_MEAN
#define _WIN32_WINNT 0x0601 // Windows 7+
#include <windows.h>
#endif
#if defined __ANDROID__ || defined __linux__
Expand Down Expand Up @@ -172,6 +173,9 @@ NCNN_EXPORT void set_kmp_blocktime(int time_ms);
NCNN_EXPORT int get_flush_denormals();
NCNN_EXPORT int set_flush_denormals(int flush_denormals);

// multi thread batch inference
NCNN_EXPORT int get_multi_thread_batch();

} // namespace ncnn

#endif // NCNN_CPU_H
5 changes: 5 additions & 0 deletions src/layer.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,10 @@ class NCNN_EXPORT Layer
// return 0 if success
virtual int forward_inplace(std::vector<Mat>& bottom_top_blobs, const Option& opt) const;
virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt) const;
/// @brief mutithread work function
/// @param workspace thread infomation
/// @return 0 if success
virtual int forward_thread(void* workspace);

#if NCNN_VULKAN
public:
Expand Down Expand Up @@ -139,6 +143,7 @@ class NCNN_EXPORT Layer
// layer factory function
typedef Layer* (*layer_creator_func)(void*);
typedef void (*layer_destroyer_func)(Layer*, void*);
typedef int (*layer_work_func)(Layer*, void*);

struct layer_registry_entry
{
Expand Down
54 changes: 54 additions & 0 deletions src/layer/absval.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// SPDX-License-Identifier: BSD-3-Clause

#include "absval.h"
#include "thread.h"

namespace ncnn {

Expand All @@ -17,6 +18,16 @@ int AbsVal::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
int h = bottom_top_blob.h;
int channels = bottom_top_blob.c;
int size = w * h;
if (opt.num_threads > 64)
{
ThreadWorkspace workspace;
workspace.layer = (Layer*)this;
MutilThread thread(workspace, opt);
std::vector<Mat> workspace_blobs;
workspace_blobs.push_back(bottom_top_blob);
thread.join(workspace_blobs);
return 0;
}

#pragma omp parallel for num_threads(opt.num_threads)
for (int q = 0; q < channels; q++)
Expand All @@ -33,4 +44,47 @@ int AbsVal::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
return 0;
}

int AbsVal::forward_thread(void* workspace)
{
ThreadInfoExc* info = (ThreadInfoExc*)workspace;
Mat& bottom_top_blob = info->mats->at(0);
if (bottom_top_blob.elemsize == 1)
{
int8_t* ptr = (int8_t*)bottom_top_blob.data;
const int8_t flag = 1 << 7;
for (size_t i = info->start_index; i < info->end_index; i++)
{
if (ptr[i] & flag)
{
ptr[i] = -ptr[i];
}
}
}
else if (bottom_top_blob.elemsize == 2)
{
int16_t* ptr = (int16_t*)bottom_top_blob.data;
const int16_t flag = 1 << 15;
for (size_t i = info->start_index; i < info->end_index; i++)
{
if (ptr[i] & flag)
{
ptr[i] = -ptr[i];
}
}
}
else
{
float* ptr = (float*)bottom_top_blob.data;
for (size_t i = info->start_index; i < info->end_index; i++)
{
if (ptr[i] < 0)
{
ptr[i] = -ptr[i];
}
}
}

return 0;
}

} // namespace ncnn
1 change: 1 addition & 0 deletions src/layer/absval.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ class AbsVal : public Layer
AbsVal();

virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt) const;
virtual int forward_thread(void* workspace);
};

} // namespace ncnn
Expand Down
2 changes: 2 additions & 0 deletions src/layer/batchnorm.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ class BatchNorm : public Layer

virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt) const;

virtual int forward_thread(void* workspace);

public:
// param
int channels;
Expand Down
1 change: 1 addition & 0 deletions src/platform.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
#cmakedefine01 NCNN_INT8
#cmakedefine01 NCNN_BF16
#cmakedefine01 NCNN_FORCE_INLINE
#cmakedefine01 NCNN_MUTITHREAD

#cmakedefine NCNN_VERSION_STRING "@NCNN_VERSION_STRING@"

Expand Down
Loading