pytorch · mcr229 · Aug 3, 2023 · Aug 3, 2023 · Aug 3, 2023 · Aug 4, 2023
diff --git a/.gitmodules b/.gitmodules
@@ -10,3 +10,18 @@
 [submodule "third-party/gflags"]
 	path = third-party/gflags
 	url = https://github.com/gflags/gflags.git
+[submodule "third-party/cpuinfo"]
+	path = third-party/cpuinfo
+	url = https://github.com/pytorch/cpuinfo.git
+[submodule "third-party/FP16"]
+	path = third-party/FP16
+	url = https://github.com/Maratyszcza/FP16.git
+[submodule "third-party/FXdiv"]
+	path = third-party/FXdiv
+	url = https://github.com/Maratyszcza/FXdiv.git
+[submodule "third-party/pthreadpool"]
+	path = third-party/pthreadpool
+	url = https://github.com/Maratyszcza/pthreadpool.git
+[submodule "third-party/XNNPACK"]
+	path = third-party/XNNPACK
+	url = https://github.com/google/XNNPACK.git
@@ -8,7 +8,6 @@
 
 #include <executorch/backends/xnnpack/runtime/XNNCompiler.h>
 #include <executorch/backends/xnnpack/xnnpack_schema_generated.h>
-#include <executorch/extension/fb/threadpool/threadpool.h>
 #include <executorch/runtime/core/exec_aten/util/scalar_type_util.h>
 #include <unordered_map>
 
@@ -91,7 +90,7 @@ int32_t. As a result, we need to static cast the shapes to size_t
 */
 template <typename T = size_t>
 std::vector<T> flatbufferDimsToVector(
-    const flatbuffers_fbsource::Vector<uint32_t>* fb_dims) {
+    const flatbuffers::Vector<uint32_t>* fb_dims) {
   std::vector<T> dims_data;
   dims_data.reserve(fb_dims->size());
   for (auto fb_dim : *fb_dims) {
@@ -1427,7 +1426,7 @@ __ET_NODISCARD Error XNNCompiler::compileModel(
       fb_xnnpack::XNNGraphBufferHasIdentifier(buffer_pointer),
       DelegateInvalidCompatibility,
       "XNNPACK Delegate Serialization Format version identifier '%.4s' != expected '%.4s'",
-      flatbuffers_fbsource::GetBufferIdentifier(buffer_pointer),
+      flatbuffers::GetBufferIdentifier(buffer_pointer),
       fb_xnnpack::XNNGraphIdentifier());
 
   auto flatbuffer_graph = fb_xnnpack::GetXNNGraph(buffer_pointer);
@@ -1489,7 +1488,7 @@ __ET_NODISCARD Error XNNCompiler::compileModel(
   xnn_runtime_t runtime_ptr = nullptr;
   status = xnn_create_runtime_v2(
       subgraph.get(),
-      torch::executorch::threadpool::get_pthreadpool(),
+      nullptr,
       0,
       &runtime_ptr);
   ET_CHECK_OR_RETURN_ERROR(

@@ -6,7 +6,6 @@
  * LICENSE file in the root directory of this source tree.
  */
 
-#include <executorch/backends/qnnpack/utils/utils.h>
 #include <executorch/backends/xnnpack/runtime/XNNExecutor.h>
 
 namespace torch {
@@ -15,54 +14,7 @@ namespace xnnpack {
 namespace delegate {
 
 Error XNNExecutor::set_external_input(uint32_t id, Tensor* input) {
-  auto qinput_pair = qinputs_.find(id);
-  if (qinput_pair != qinputs_.end()) {
-    auto qinput = qinput_pair->second;
-    // dq the input and copy it in to qinput
-    float input_min, input_max;
-    std::tie(input_min, input_max) = qnnpack_utils::GetMinMax(*input);
-
-    qnnpack_utils::QuantizationParams input_qparam;
-
-    int8_t qmin = std::numeric_limits<int8_t>::min();
-    int8_t qmax = std::numeric_limits<int8_t>::max();
-    Error e = qnnpack_utils::ChooseQuantizationParams(
-        input_min,
-        input_max,
-        qmin,
-        qmax,
-        input_qparam,
-        false, /* preserve_sparsity */
-        false, /* force_scale_power_of_two */
-        false /* reduce_range */
-    );
-    ET_CHECK_OR_RETURN_ERROR(
-        e == Error::Ok, Internal, "ChooseQuantizationParams() failed");
-
-    ET_CHECK_OR_RETURN_ERROR(
-        input_qparam.zero_point <= qmax && input_qparam.zero_point >= qmin,
-        Internal,
-        "ChooseQuantizationParams() selected invalid input_zero_point: %d",
-        input_qparam.zero_point);
-
-    e = qnnpack_utils::QuantizePerTensor<int8_t>(
-        *input, qinput, input_qparam.scale, input_qparam.zero_point);
-
-    size_t batch_size = 1;
-    for (int i = 0; i < input->dim() - 1; i++) {
-      batch_size *= input->size(i);
-    }
-    ET_CHECK_OR_RETURN_ERROR(
-        e == Error::Ok, Internal, "QuantizePerTensor() failed");
-    externals_.emplace_back(xnn_external_value{
-        id,
-        qinput.data_ptr(),
-        {static_cast<float>(input_qparam.scale),
-         static_cast<int8_t>(input_qparam.zero_point)},
-        batch_size});
-  } else {
-    externals_.emplace_back(xnn_external_value{id, input->data_ptr()});
-  }
+  externals_.emplace_back(xnn_external_value{id, input->data_ptr()});
   return Error::Ok;
 }
 

@@ -8,11 +8,9 @@
 import os
 import tempfile
 
-# pyre-ignore[21]: Could not find module `executorch.exir.serialize.bindings`.
-import executorch.exir.serialize.bindings as bindings  # @manual=//executorch/exir/serialize:bindings
-
 import pkg_resources
 
+from executorch.exir.serialize._flatbuffer import _flatc_compile
 from executorch.backends.xnnpack.serialization.xnnpack_graph_schema import XNNGraph
 from executorch.exir.serialize._dataclass import _DataclassEncoder
 
@@ -27,7 +25,7 @@ def convert_to_flatbuffer(xnnpack_graph: XNNGraph) -> bytes:
         with open(json_path, "wb") as json_file:
             json_file.write(xnnpack_graph_json.encode("ascii"))
         # pyre-ignore
-        bindings.flatc_compile(d, schema_path, json_path)
+        _flatc_compile(d, schema_path, json_path)
         output_path = os.path.join(d, "schema.bin")
         with open(output_path, "rb") as output_file:
             return output_file.read()
@@ -14,7 +14,7 @@ def define_common_targets():
             "xnnpack_schema_generated.h": ["schema_generated.h"],
         },
         cmd = " ".join([
-            "$(exe fbsource//third-party/flatbuffers/fbsource_namespace:flatc)",
+            "$(exe {})".format(runtime.external_dep_location("flatc")),
             "--cpp",
             "--cpp-std c++11",
             "--scoped-enums",
@@ -30,9 +30,7 @@ def define_common_targets():
         exported_headers = {
             "xnnpack_schema_generated.h": ":gen_xnnpack_schema[xnnpack_schema_generated.h]",
         },
-        exported_deps = [
-            "fbsource//third-party/flatbuffers/fbsource_namespace:flatbuffers-api",
-        ],
+        exported_external_deps = ["flatbuffers-api"]
     )
 
     runtime.cxx_library(
@@ -51,11 +49,9 @@ def define_common_targets():
             "@EXECUTORCH_CLIENTS",
         ],
         deps = [
-            "//xplat/third-party/XNNPACK:XNNPACK",
+            "//executorch/third-party:XNNPACK",
             ":xnnpack_schema",
             "//executorch/runtime/backend:backend_registry",
-            "//executorch/backends/qnnpack:qnnpack_utils",  # TODO Use (1) portable for choose_qparams(), (2) xnnpack for quantize_per_tensor()
-            "//executorch/extension/fb/threadpool:threadpool",
             "//executorch/util:memory_utils",
             "//executorch/runtime/core/exec_aten/util:tensor_util",
         ],

@@ -0,0 +1,22 @@
+UINT32_MAX = 4294967295
+XNN_EXTRA_BYTES = 16
+XNN_MAX_TENSOR_DIMS = 6
+XNN_FLAG_SPARSE_INFERENCE = 0x00000001
+XNN_FLAG_HINT_SPARSE_INFERENCE = XNN_FLAG_SPARSE_INFERENCE
+XNN_FLAG_FP16_INFERENCE = 0x00000002
+XNN_FLAG_HINT_FP16_INFERENCE = XNN_FLAG_FP16_INFERENCE
+XNN_FLAG_FORCE_FP16_INFERENCE = 0x00000004
+XNN_FLAG_BASIC_PROFILING = 0x00000008
+XNN_FLAG_DEPTHWISE_CONVOLUTION = 0x00000001
+XNN_FLAG_TRANSPOSE_WEIGHTS = 0x00000001
+XNN_FLAG_INPUT_NHWC = 0x00000002
+XNN_FLAG_TENSORFLOW_SAME_PADDING = 0x00000004
+XNN_FLAG_TENSORFLOW_RESHAPE_2D = 0x00000004
+XNN_FLAG_TENSORFLOW_LEGACY_MODE = 0x00000004
+XNN_FLAG_FP32_STATIC_WEIGHTS = 0x00000008
+XNN_FLAG_ALIGN_CORNERS = 0x00000008
+XNN_FLAG_YIELD_WORKERS = 0x00000010
+XNN_VALUE_FLAG_EXTERNAL_INPUT = 0x00000001
+XNN_VALUE_FLAG_EXTERNAL_OUTPUT = 0x00000002
+XNN_VALUE_FLAG_PERSISTENT = 0x00000004
+XNN_INVALID_VALUE_ID = UINT32_MAX
diff --git a/examples/executor_runner/targets.bzl b/examples/executor_runner/targets.bzl
@@ -13,6 +13,7 @@ def define_common_targets():
         srcs = ["executor_runner.cpp"],
         deps = [
             "//executorch/runtime/executor/test:test_backend_compiler_lib",
+            "//executorch/backends/xnnpack:xnnpack_backend",
             "//executorch/runtime/executor:program",
             "//executorch/extension/data_loader:file_data_loader",
             "//executorch/util:util",

diff --git a/examples/export/export_xnnpack_example.py b/examples/export/export_xnnpack_example.py
@@ -0,0 +1,117 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# Example script for exporting simple models to flatbuffer
+
+import argparse
+
+import executorch.exir as exir
+import torch
+from executorch.backends.backend_api import to_backend
+from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackFloatingPointPartitioner
+from executorch.backends.backend_api import validation_disabled
+
+from ..models import MODEL_NAME_TO_MODEL
+
+
+
+def export_add_module_with_lower_graph():
+    """
+
+    AddMulModule:
+
+        input -> torch.mm -> torch.add -> output
+
+    this module can be lowered to the demo backend as a delegate
+
+        input -> [lowered module (delegate)] -> output
+
+    the lowered module can be used to composite with other modules
+
+        input -> [lowered module (delegate)] -> sub  -> output
+               |--------  composite module    -------|
+
+    """
+    class AddModule(torch.nn.Module):
+        def __init__(self):
+            super().__init__()
+
+        def forward(self, x):
+            return x + x
+
+    capture_config = exir.CaptureConfig(pt2_mode=True, enable_dynamic_shape=False)
+    edge_compile_config = exir.EdgeCompileConfig()
+    sample_inputs = (torch.ones(1, 2, 3, 4),)
+    print("Running the example to export a composite module with lowered graph...")
+    edge = exir.capture(AddModule(), sample_inputs, capture_config).to_edge(edge_compile_config)
+    print("Exported graph:\n", edge.exported_program.graph)
+
+    # Lower AddMulModule to the demo backend
+    print("Lowering to the demo backend...")
+    edge.exported_program = to_backend(
+        edge.exported_program, XnnpackFloatingPointPartitioner
+    )
+
+
+    # The graph module is still runnerable
+    edge.exported_program.graph_module(*sample_inputs)
+
+    print("Lowered graph:\n", edge.exported_program.graph)
+
+    exec_prog = edge.to_executorch()
+    buffer = exec_prog.buffer
+
+    model_name = "xnnpack_add"
+    filename = f"{model_name}.ff"
+    print(f"Saving exported program to {filename}")
+    with open(filename, "wb") as file:
+        file.write(buffer)
+
+def export_mv2_with_lower_graph():
+    mv2, example_inputs = MODEL_NAME_TO_MODEL["mv2"]()
+    mv2 =mv2.eval()
+    capture_config = exir.CaptureConfig(pt2_mode=True, enable_dynamic_shape=False)
+    edge_compile_config = exir.EdgeCompileConfig(_check_ir_validity=False)
+
+    edge = exir.capture(mv2, example_inputs, capture_config).to_edge(edge_compile_config)
+    with validation_disabled():
+        edge.exported_program = to_backend(
+            edge.exported_program, XnnpackFloatingPointPartitioner
+        )
+
+    edge.exported_program.graph_module(*example_inputs)
+    print("Lowered graph: \n", edge.exported_program.graph)
+
+    exec_prog = edge.to_executorch()
+    buffer = exec_prog.buffer
+
+    model_name = "xnnpack_mv2"
+    filename = f"{model_name}.ff"
+    print(f"Saving exported program to {filename}")
+    with open(filename, "wb") as file:
+        file.write(buffer)
+
+OPTIONS_TO_LOWER = {
+    "add": export_add_module_with_lower_graph,
+    "mv2": export_mv2_with_lower_graph
+}
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--option",
+        required=True,
+        choices=list(OPTIONS_TO_LOWER.keys()),
+        help=f"Provide the flow name. Valid ones: {list(OPTIONS_TO_LOWER.keys())}",
+    )
+
+    args = parser.parse_args()
+
+    # Choose one option
+    option = OPTIONS_TO_LOWER[args.option]
+
+    # Run the example flow
+    option()
@@ -113,18 +113,21 @@ def _patch_headers(kwargs):
 
     # header_namespace is to workaround the fact that all C++ source files are having the pattern:
     # `include <executorch/.../*.h>` but BUCK2 root is at executorch/ so the `executorch/` prefix is redundant.
-    kwargs["header_namespace"] = "executorch/" + native.package_name()
+    if not kwargs["name"] in ["FXdiv", "pthreadpool_header", "interface", "operators", "hot", "subgraph","ukernels_scalar", "tables"]: # HACK do not commit!!!!
+        kwargs["header_namespace"] = "executorch/" + native.package_name()
     return kwargs
 
 def _patch_pp_flags(kwargs):
     return kwargs
 
 def _patch_cxx_compiler_flags(kwargs):
-    """Compiler flags to enable C++17 features."""
-    if "compiler_flags" in kwargs:
-        kwargs["compiler_flags"].append("-std=c++17")
+    """CXX Compiler flags to enable C++17 features."""
+    if "lang_compiler_flags" not in kwargs:
+        kwargs["lang_compiler_flags"] = {"cxx_cpp_output": ["-std=c++17"]}
+    elif "cxx_cpp_output" not in kwargs["lang_compiler_flags"]:
+        kwargs["lang_compiler_flags"]["cxx_cpp_output"] = ["-std=c++17"]
     else:
-        kwargs["compiler_flags"] = ["-std=c++17"]
+        kwargs["lang_compiler_flags"]["cxx_cpp_output"].append("-std=c++17")
     return kwargs
 
 # buildifier: disable=unused-variable