Skip to content

Move extension/fb/threadpool into backends/xnnpack/threadpool #55

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion backends/qnnpack/QNNPackBackend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#include <executorch/backends/qnnpack/executor/QNNExecutor.h>
#include <executorch/backends/qnnpack/qnnpack_schema_generated.h>
#include <executorch/backends/qnnpack/utils/utils.h>
#include <executorch/extension/fb/threadpool/threadpool.h>
#include <executorch/backends/xnnpack/threadpool/threadpool.h>
#include <executorch/runtime/backend/backend_registry.h>
#include <executorch/runtime/core/error.h>
#include <executorch/runtime/core/evalue.h>
Expand Down
2 changes: 1 addition & 1 deletion backends/qnnpack/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def define_common_targets():
"//executorch/runtime/core/exec_aten/util:scalar_type_util",
"//executorch/runtime/core/exec_aten/util:tensor_util",
"//executorch/runtime/backend:backend_registry",
"//executorch/extension/fb/threadpool:threadpool",
"//executorch/backends/xnnpack/threadpool:threadpool",
"//executorch/util:memory_utils",
"//{prefix}caffe2/aten/src/ATen/native/quantized/cpu/qnnpack:pytorch_qnnpack".format(
prefix = (
Expand Down
2 changes: 1 addition & 1 deletion backends/xnnpack/runtime/XNNCompiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
*/

#include <executorch/backends/xnnpack/runtime/XNNCompiler.h>
#include <executorch/backends/xnnpack/threadpool/threadpool.h>
#include <executorch/backends/xnnpack/xnnpack_schema_generated.h>
#include <executorch/extension/fb/threadpool/threadpool.h>
#include <executorch/runtime/core/exec_aten/util/scalar_type_util.h>
#include <unordered_map>

Expand Down
2 changes: 1 addition & 1 deletion backends/xnnpack/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def define_common_targets():
":xnnpack_schema",
"//executorch/runtime/backend:backend_registry",
"//executorch/backends/qnnpack:qnnpack_utils", # TODO Use (1) portable for choose_qparams(), (2) xnnpack for quantize_per_tensor()
"//executorch/extension/fb/threadpool:threadpool",
"//executorch/backends/xnnpack/threadpool:threadpool",
"//executorch/util:memory_utils",
"//executorch/runtime/core/exec_aten/util:tensor_util",
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@
#include <TargetConditionals.h>
#endif /* __APPLE__ */

#if (defined(__arm__) || defined(__aarch64__)) && defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE
#if (defined(__arm__) || defined(__aarch64__)) && defined(TARGET_OS_MAC) && TARGET_OS_MAC
#include <arm/mach/init.c>
#endif /* (defined(__arm__) || defined(__aarch64__)) && defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE */
#endif /* (defined(__arm__) || defined(__aarch64__)) && defined(TARGET_OS_MAC) && TARGET_OS_MAC */
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
"(defined(__arm__) || defined(__aarch64__)) && defined(__ANDROID__)": [
"arm/android/properties.c",
],
"(defined(__arm__) || defined(__aarch64__)) && defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE": [
"(defined(__arm__) || defined(__aarch64__)) && defined(TARGET_OS_MAC) && TARGET_OS_MAC": [
"arm/mach/init.c",
],}

Expand Down
6 changes: 6 additions & 0 deletions backends/xnnpack/threadpool/TARGETS
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Any targets that should be shared between fbcode and xplat must be defined in
# targets.bzl. This file can contain fbcode-only targets.

load(":targets.bzl", "define_common_targets")

define_common_targets()
42 changes: 42 additions & 0 deletions backends/xnnpack/threadpool/targets.bzl
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
load("@fbsource//xplat/executorch/backends/xnnpack/third-party:third_party_libs.bzl", "third_party_dep")
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")

def define_common_targets():
"""Defines targets that should be shared between fbcode and xplat.

The directory containing this targets.bzl file should also contain both
TARGETS and BUCK files that call this function.
"""

_THREADPOOL_SRCS = [
"threadpool.cpp",
"threadpool_guard.cpp",
] + (["fb/threadpool_use_n_threads.cpp"] if not runtime.is_oss else [])

_THREADPOOL_HEADERS = [
"threadpool.h",
"threadpool_guard.h",
] + (["fb/threadpool_use_n_threads.h"] if not runtime.is_oss else [])

runtime.cxx_library(
name = "threadpool",
srcs = _THREADPOOL_SRCS,
deps = [
"//executorch/runtime/core:core",
],
exported_headers = _THREADPOOL_HEADERS,
exported_deps = [
third_party_dep("pthreadpool"),
],
external_deps = ["cpuinfo"],
exported_preprocessor_flags = [
"-DET_USE_THREADPOOL",
],
visibility = [
"//executorch/...",
"//executorch/backends/...",
"//executorch/runtime/backend/...",
"//executorch/extension/threadpool/test/...",
"@EXECUTORCH_CLIENTS",
],
)
8 changes: 8 additions & 0 deletions backends/xnnpack/threadpool/test/TARGETS
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Any targets that should be shared between fbcode and xplat must be defined in
# targets.bzl. This file can contain fbcode-only targets.

load(":targets.bzl", "define_common_targets")

oncall("executorch")

define_common_targets()
20 changes: 20 additions & 0 deletions backends/xnnpack/threadpool/test/targets.bzl
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")

def define_common_targets():
"""Defines targets that should be shared between fbcode and xplat.

The directory containing this targets.bzl file should also contain both
TARGETS and BUCK files that call this function.
"""

_THREADPOOL_TESTS = [
"threadpool_test.cpp",
] + (["fb/threadpool_use_n_threads_test.cpp"] if not runtime.is_oss else [])

runtime.cxx_test(
name = "threadpool_test",
srcs = _THREADPOOL_TESTS,
deps = [
"//executorch/backends/xnnpack/threadpool:threadpool",
],
)
188 changes: 188 additions & 0 deletions backends/xnnpack/threadpool/test/threadpool_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#include <gtest/gtest.h>
#include <mutex>
#include <numeric>
#include <random>

#include <executorch/backends/xnnpack/threadpool/threadpool.h>
#include <executorch/backends/xnnpack/threadpool/threadpool_guard.h>

using namespace ::testing;

namespace {

size_t div_round_up(const size_t divident, const size_t divisor) {
return (divident + divisor - 1) / divisor;
}

void resize_and_fill_vector(std::vector<int32_t>& a, const size_t size) {
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<> distrib(1, size * 2);
a.resize(size);
auto generator = [&distrib, &gen]() { return distrib(gen); };
std::generate(a.begin(), a.end(), generator);
}

void generate_add_test_inputs(
std::vector<int32_t>& a,
std::vector<int32_t>& b,
std::vector<int32_t>& c_ref,
std::vector<int32_t>& c,
size_t vector_size) {
resize_and_fill_vector(a, vector_size);
resize_and_fill_vector(b, vector_size);
resize_and_fill_vector(c, vector_size);
resize_and_fill_vector(c_ref, vector_size);
for (size_t i = 0, size = a.size(); i < size; ++i) {
c_ref[i] = a[i] + b[i];
}
}

void generate_reduce_test_inputs(
std::vector<int32_t>& a,
int32_t& c_ref,
size_t vector_size) {
resize_and_fill_vector(a, vector_size);
c_ref = 0;
for (size_t i = 0, size = a.size(); i < size; ++i) {
c_ref += a[i];
}
}

void run_lambda_with_size(
std::function<void(size_t)> f,
size_t range,
size_t grain_size) {
size_t num_grains = div_round_up(range, grain_size);

auto threadpool = torch::executorch::threadpool::get_threadpool();
threadpool->run(f, range);
}
} // namespace

TEST(ThreadPoolTest, ParallelAdd) {
std::vector<int32_t> a, b, c, c_ref;
size_t vector_size = 100;
size_t grain_size = 10;

auto add_lambda = [&](size_t i) {
size_t start_index = i * grain_size;
size_t end_index = start_index + grain_size;
end_index = std::min(end_index, vector_size);
for (size_t j = start_index; j < end_index; ++j) {
c[j] = a[j] + b[j];
}
};

auto threadpool = torch::executorch::threadpool::get_threadpool();
EXPECT_GT(threadpool->get_thread_count(), 1);

generate_add_test_inputs(a, b, c_ref, c, vector_size);
run_lambda_with_size(add_lambda, vector_size, grain_size);
EXPECT_EQ(c, c_ref);

// Try smaller grain size
grain_size = 5;
generate_add_test_inputs(a, b, c_ref, c, vector_size);
run_lambda_with_size(add_lambda, vector_size, grain_size);
EXPECT_EQ(c, c_ref);

vector_size = 7;
generate_add_test_inputs(a, b, c_ref, c, vector_size);
run_lambda_with_size(add_lambda, vector_size, grain_size);
EXPECT_EQ(c, c_ref);

vector_size = 7;
grain_size = 5;
generate_add_test_inputs(a, b, c_ref, c, vector_size);
run_lambda_with_size(add_lambda, vector_size, grain_size);
EXPECT_EQ(c, c_ref);
}

// Test parallel reduction where we acquire lock within lambda
TEST(ThreadPoolTest, ParallelReduce) {
std::vector<int32_t> a;
int32_t c = 0, c_ref = 0;
size_t vector_size = 100;
size_t grain_size = 11;
std::mutex m;

auto reduce_lambda = [&](size_t i) {
size_t start_index = i * grain_size;
size_t end_index = start_index + grain_size;
end_index = std::min(end_index, vector_size);
std::lock_guard<std::mutex> lock(m);
for (size_t j = start_index; j < end_index; ++j) {
c += a[j];
}
};

auto threadpool = torch::executorch::threadpool::get_threadpool();
EXPECT_GT(threadpool->get_thread_count(), 1);

generate_reduce_test_inputs(a, c_ref, vector_size);
run_lambda_with_size(reduce_lambda, vector_size, grain_size);
EXPECT_EQ(c, c_ref);

vector_size = 7;
c = c_ref = 0;
generate_reduce_test_inputs(a, c_ref, vector_size);
run_lambda_with_size(reduce_lambda, vector_size, grain_size);
EXPECT_EQ(c, c_ref);
}

// Copied from
// caffe2/aten/src/ATen/test/test_thread_pool_guard.cp
TEST(TestNoThreadPoolGuard, TestThreadPoolGuard) {
auto threadpool_ptr = torch::executorch::threadpool::get_pthreadpool();

ASSERT_NE(threadpool_ptr, nullptr);
{
torch::executorch::threadpool::NoThreadPoolGuard g1;
auto threadpool_ptr1 = torch::executorch::threadpool::get_pthreadpool();
ASSERT_EQ(threadpool_ptr1, nullptr);

{
torch::executorch::threadpool::NoThreadPoolGuard g2;
auto threadpool_ptr2 = torch::executorch::threadpool::get_pthreadpool();
ASSERT_EQ(threadpool_ptr2, nullptr);
}

// Guard should restore prev value (nullptr)
auto threadpool_ptr3 = torch::executorch::threadpool::get_pthreadpool();
ASSERT_EQ(threadpool_ptr3, nullptr);
}

// Guard should restore prev value (pthreadpool_)
auto threadpool_ptr4 = torch::executorch::threadpool::get_pthreadpool();
ASSERT_NE(threadpool_ptr4, nullptr);
ASSERT_EQ(threadpool_ptr4, threadpool_ptr);
}

TEST(TestNoThreadPoolGuard, TestRunWithGuard) {
const std::vector<int64_t> array = {1, 2, 3};

auto pool = torch::executorch::threadpool::get_threadpool();
int64_t inner = 0;
{
// Run on same thread
torch::executorch::threadpool::NoThreadPoolGuard g1;
auto fn = [&array, &inner](const size_t task_id) {
inner += array[task_id];
};
pool->run(fn, 3);

// confirm the guard is on
auto threadpool_ptr = torch::executorch::threadpool::get_pthreadpool();
ASSERT_EQ(threadpool_ptr, nullptr);
}
ASSERT_EQ(inner, 6);
}
Loading