Skip to content

XNNPACK Buck Target #38

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 17 commits into from
Closed
  •  
  •  
  •  
The diff you're trying to view is too large. We only load the first 3000 changed files.
15 changes: 15 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,18 @@
[submodule "third-party/gflags"]
path = third-party/gflags
url = https://github.com/gflags/gflags.git
[submodule "third-party/cpuinfo"]
path = third-party/cpuinfo
url = https://github.com/pytorch/cpuinfo.git
[submodule "third-party/FP16"]
path = third-party/FP16
url = https://github.com/Maratyszcza/FP16.git
[submodule "third-party/FXdiv"]
path = third-party/FXdiv
url = https://github.com/Maratyszcza/FXdiv.git
[submodule "third-party/pthreadpool"]
path = third-party/pthreadpool
url = https://github.com/Maratyszcza/pthreadpool.git
[submodule "third-party/XNNPACK"]
path = third-party/XNNPACK
url = https://github.com/google/XNNPACK.git
7 changes: 3 additions & 4 deletions backends/xnnpack/runtime/XNNCompiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@

#include <executorch/backends/xnnpack/runtime/XNNCompiler.h>
#include <executorch/backends/xnnpack/xnnpack_schema_generated.h>
#include <executorch/extension/fb/threadpool/threadpool.h>
#include <executorch/runtime/core/exec_aten/util/scalar_type_util.h>
#include <unordered_map>

Expand Down Expand Up @@ -91,7 +90,7 @@ int32_t. As a result, we need to static cast the shapes to size_t
*/
template <typename T = size_t>
std::vector<T> flatbufferDimsToVector(
const flatbuffers_fbsource::Vector<uint32_t>* fb_dims) {
const flatbuffers::Vector<uint32_t>* fb_dims) {
std::vector<T> dims_data;
dims_data.reserve(fb_dims->size());
for (auto fb_dim : *fb_dims) {
Expand Down Expand Up @@ -1427,7 +1426,7 @@ __ET_NODISCARD Error XNNCompiler::compileModel(
fb_xnnpack::XNNGraphBufferHasIdentifier(buffer_pointer),
DelegateInvalidCompatibility,
"XNNPACK Delegate Serialization Format version identifier '%.4s' != expected '%.4s'",
flatbuffers_fbsource::GetBufferIdentifier(buffer_pointer),
flatbuffers::GetBufferIdentifier(buffer_pointer),
fb_xnnpack::XNNGraphIdentifier());

auto flatbuffer_graph = fb_xnnpack::GetXNNGraph(buffer_pointer);
Expand Down Expand Up @@ -1489,7 +1488,7 @@ __ET_NODISCARD Error XNNCompiler::compileModel(
xnn_runtime_t runtime_ptr = nullptr;
status = xnn_create_runtime_v2(
subgraph.get(),
torch::executorch::threadpool::get_pthreadpool(),
nullptr,
0,
&runtime_ptr);
ET_CHECK_OR_RETURN_ERROR(
Expand Down
50 changes: 1 addition & 49 deletions backends/xnnpack/runtime/XNNExecutor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/

#include <executorch/backends/qnnpack/utils/utils.h>
#include <executorch/backends/xnnpack/runtime/XNNExecutor.h>

namespace torch {
Expand All @@ -15,54 +14,7 @@ namespace xnnpack {
namespace delegate {

Error XNNExecutor::set_external_input(uint32_t id, Tensor* input) {
auto qinput_pair = qinputs_.find(id);
if (qinput_pair != qinputs_.end()) {
auto qinput = qinput_pair->second;
// dq the input and copy it in to qinput
float input_min, input_max;
std::tie(input_min, input_max) = qnnpack_utils::GetMinMax(*input);

qnnpack_utils::QuantizationParams input_qparam;

int8_t qmin = std::numeric_limits<int8_t>::min();
int8_t qmax = std::numeric_limits<int8_t>::max();
Error e = qnnpack_utils::ChooseQuantizationParams(
input_min,
input_max,
qmin,
qmax,
input_qparam,
false, /* preserve_sparsity */
false, /* force_scale_power_of_two */
false /* reduce_range */
);
ET_CHECK_OR_RETURN_ERROR(
e == Error::Ok, Internal, "ChooseQuantizationParams() failed");

ET_CHECK_OR_RETURN_ERROR(
input_qparam.zero_point <= qmax && input_qparam.zero_point >= qmin,
Internal,
"ChooseQuantizationParams() selected invalid input_zero_point: %d",
input_qparam.zero_point);

e = qnnpack_utils::QuantizePerTensor<int8_t>(
*input, qinput, input_qparam.scale, input_qparam.zero_point);

size_t batch_size = 1;
for (int i = 0; i < input->dim() - 1; i++) {
batch_size *= input->size(i);
}
ET_CHECK_OR_RETURN_ERROR(
e == Error::Ok, Internal, "QuantizePerTensor() failed");
externals_.emplace_back(xnn_external_value{
id,
qinput.data_ptr(),
{static_cast<float>(input_qparam.scale),
static_cast<int8_t>(input_qparam.zero_point)},
batch_size});
} else {
externals_.emplace_back(xnn_external_value{id, input->data_ptr()});
}
externals_.emplace_back(xnn_external_value{id, input->data_ptr()});
return Error::Ok;
}

Expand Down
6 changes: 2 additions & 4 deletions backends/xnnpack/serialization/xnnpack_graph_serialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,9 @@
import os
import tempfile

# pyre-ignore[21]: Could not find module `executorch.exir.serialize.bindings`.
import executorch.exir.serialize.bindings as bindings # @manual=//executorch/exir/serialize:bindings

import pkg_resources

from executorch.exir.serialize._flatbuffer import _flatc_compile
from executorch.backends.xnnpack.serialization.xnnpack_graph_schema import XNNGraph
from executorch.exir.serialize._dataclass import _DataclassEncoder

Expand All @@ -27,7 +25,7 @@ def convert_to_flatbuffer(xnnpack_graph: XNNGraph) -> bytes:
with open(json_path, "wb") as json_file:
json_file.write(xnnpack_graph_json.encode("ascii"))
# pyre-ignore
bindings.flatc_compile(d, schema_path, json_path)
_flatc_compile(d, schema_path, json_path)
output_path = os.path.join(d, "schema.bin")
with open(output_path, "rb") as output_file:
return output_file.read()
10 changes: 3 additions & 7 deletions backends/xnnpack/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def define_common_targets():
"xnnpack_schema_generated.h": ["schema_generated.h"],
},
cmd = " ".join([
"$(exe fbsource//third-party/flatbuffers/fbsource_namespace:flatc)",
"$(exe {})".format(runtime.external_dep_location("flatc")),
"--cpp",
"--cpp-std c++11",
"--scoped-enums",
Expand All @@ -30,9 +30,7 @@ def define_common_targets():
exported_headers = {
"xnnpack_schema_generated.h": ":gen_xnnpack_schema[xnnpack_schema_generated.h]",
},
exported_deps = [
"fbsource//third-party/flatbuffers/fbsource_namespace:flatbuffers-api",
],
exported_external_deps = ["flatbuffers-api"]
)

runtime.cxx_library(
Expand All @@ -51,11 +49,9 @@ def define_common_targets():
"@EXECUTORCH_CLIENTS",
],
deps = [
"//xplat/third-party/XNNPACK:XNNPACK",
"//executorch/third-party:XNNPACK",
":xnnpack_schema",
"//executorch/runtime/backend:backend_registry",
"//executorch/backends/qnnpack:qnnpack_utils", # TODO Use (1) portable for choose_qparams(), (2) xnnpack for quantize_per_tensor()
"//executorch/extension/fb/threadpool:threadpool",
"//executorch/util:memory_utils",
"//executorch/runtime/core/exec_aten/util:tensor_util",
],
Expand Down
22 changes: 22 additions & 0 deletions backends/xnnpack/utils/xnnpack_constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
UINT32_MAX = 4294967295
XNN_EXTRA_BYTES = 16
XNN_MAX_TENSOR_DIMS = 6
XNN_FLAG_SPARSE_INFERENCE = 0x00000001
XNN_FLAG_HINT_SPARSE_INFERENCE = XNN_FLAG_SPARSE_INFERENCE
XNN_FLAG_FP16_INFERENCE = 0x00000002
XNN_FLAG_HINT_FP16_INFERENCE = XNN_FLAG_FP16_INFERENCE
XNN_FLAG_FORCE_FP16_INFERENCE = 0x00000004
XNN_FLAG_BASIC_PROFILING = 0x00000008
XNN_FLAG_DEPTHWISE_CONVOLUTION = 0x00000001
XNN_FLAG_TRANSPOSE_WEIGHTS = 0x00000001
XNN_FLAG_INPUT_NHWC = 0x00000002
XNN_FLAG_TENSORFLOW_SAME_PADDING = 0x00000004
XNN_FLAG_TENSORFLOW_RESHAPE_2D = 0x00000004
XNN_FLAG_TENSORFLOW_LEGACY_MODE = 0x00000004
XNN_FLAG_FP32_STATIC_WEIGHTS = 0x00000008
XNN_FLAG_ALIGN_CORNERS = 0x00000008
XNN_FLAG_YIELD_WORKERS = 0x00000010
XNN_VALUE_FLAG_EXTERNAL_INPUT = 0x00000001
XNN_VALUE_FLAG_EXTERNAL_OUTPUT = 0x00000002
XNN_VALUE_FLAG_PERSISTENT = 0x00000004
XNN_INVALID_VALUE_ID = UINT32_MAX
1 change: 1 addition & 0 deletions examples/executor_runner/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ def define_common_targets():
srcs = ["executor_runner.cpp"],
deps = [
"//executorch/runtime/executor/test:test_backend_compiler_lib",
"//executorch/backends/xnnpack:xnnpack_backend",
"//executorch/runtime/executor:program",
"//executorch/extension/data_loader:file_data_loader",
"//executorch/util:util",
Expand Down
117 changes: 117 additions & 0 deletions examples/export/export_xnnpack_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

# Example script for exporting simple models to flatbuffer

import argparse

import executorch.exir as exir
import torch
from executorch.backends.backend_api import to_backend
from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackFloatingPointPartitioner
from executorch.backends.backend_api import validation_disabled

from ..models import MODEL_NAME_TO_MODEL



def export_add_module_with_lower_graph():
"""

AddMulModule:

input -> torch.mm -> torch.add -> output

this module can be lowered to the demo backend as a delegate

input -> [lowered module (delegate)] -> output

the lowered module can be used to composite with other modules

input -> [lowered module (delegate)] -> sub -> output
|-------- composite module -------|

"""
class AddModule(torch.nn.Module):
def __init__(self):
super().__init__()

def forward(self, x):
return x + x

capture_config = exir.CaptureConfig(pt2_mode=True, enable_dynamic_shape=False)
edge_compile_config = exir.EdgeCompileConfig()
sample_inputs = (torch.ones(1, 2, 3, 4),)
print("Running the example to export a composite module with lowered graph...")
edge = exir.capture(AddModule(), sample_inputs, capture_config).to_edge(edge_compile_config)
print("Exported graph:\n", edge.exported_program.graph)

# Lower AddMulModule to the demo backend
print("Lowering to the demo backend...")
edge.exported_program = to_backend(
edge.exported_program, XnnpackFloatingPointPartitioner
)


# The graph module is still runnerable
edge.exported_program.graph_module(*sample_inputs)

print("Lowered graph:\n", edge.exported_program.graph)

exec_prog = edge.to_executorch()
buffer = exec_prog.buffer

model_name = "xnnpack_add"
filename = f"{model_name}.ff"
print(f"Saving exported program to {filename}")
with open(filename, "wb") as file:
file.write(buffer)

def export_mv2_with_lower_graph():
mv2, example_inputs = MODEL_NAME_TO_MODEL["mv2"]()
mv2 =mv2.eval()
capture_config = exir.CaptureConfig(pt2_mode=True, enable_dynamic_shape=False)
edge_compile_config = exir.EdgeCompileConfig(_check_ir_validity=False)

edge = exir.capture(mv2, example_inputs, capture_config).to_edge(edge_compile_config)
with validation_disabled():
edge.exported_program = to_backend(
edge.exported_program, XnnpackFloatingPointPartitioner
)

edge.exported_program.graph_module(*example_inputs)
print("Lowered graph: \n", edge.exported_program.graph)

exec_prog = edge.to_executorch()
buffer = exec_prog.buffer

model_name = "xnnpack_mv2"
filename = f"{model_name}.ff"
print(f"Saving exported program to {filename}")
with open(filename, "wb") as file:
file.write(buffer)

OPTIONS_TO_LOWER = {
"add": export_add_module_with_lower_graph,
"mv2": export_mv2_with_lower_graph
}

if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--option",
required=True,
choices=list(OPTIONS_TO_LOWER.keys()),
help=f"Provide the flow name. Valid ones: {list(OPTIONS_TO_LOWER.keys())}",
)

args = parser.parse_args()

# Choose one option
option = OPTIONS_TO_LOWER[args.option]

# Run the example flow
option()
13 changes: 8 additions & 5 deletions shim/xplat/executorch/build/env_interface.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -113,18 +113,21 @@ def _patch_headers(kwargs):

# header_namespace is to workaround the fact that all C++ source files are having the pattern:
# `include <executorch/.../*.h>` but BUCK2 root is at executorch/ so the `executorch/` prefix is redundant.
kwargs["header_namespace"] = "executorch/" + native.package_name()
if not kwargs["name"] in ["FXdiv", "pthreadpool_header", "interface", "operators", "hot", "subgraph","ukernels_scalar", "tables"]: # HACK do not commit!!!!
kwargs["header_namespace"] = "executorch/" + native.package_name()
return kwargs

def _patch_pp_flags(kwargs):
return kwargs

def _patch_cxx_compiler_flags(kwargs):
"""Compiler flags to enable C++17 features."""
if "compiler_flags" in kwargs:
kwargs["compiler_flags"].append("-std=c++17")
"""CXX Compiler flags to enable C++17 features."""
if "lang_compiler_flags" not in kwargs:
kwargs["lang_compiler_flags"] = {"cxx_cpp_output": ["-std=c++17"]}
elif "cxx_cpp_output" not in kwargs["lang_compiler_flags"]:
kwargs["lang_compiler_flags"]["cxx_cpp_output"] = ["-std=c++17"]
else:
kwargs["compiler_flags"] = ["-std=c++17"]
kwargs["lang_compiler_flags"]["cxx_cpp_output"].append("-std=c++17")
return kwargs

# buildifier: disable=unused-variable
Expand Down
1 change: 1 addition & 0 deletions third-party/FP16
Submodule FP16 added at 4dfe08
1 change: 1 addition & 0 deletions third-party/FXdiv
Submodule FXdiv added at b40832
Loading