Pull request pytorch#41: [EIEX-86] Include header with tensor format into byte payload produced by backend

skywall · skywall · commit 925b5a82e7f5 · 2024-12-04T13:28:34.000+01:00
Merge in AITEC/executorch from feature/nxf93343/EIEX-86-tensor-format-in-payload to main-nxp

* commit '8070682be46492a4cea03fa1d8960cc17f0dd586':
  [NO-UPSTREAM] Add tests for tensor format in payload
  Include header with tensor format into byte payload produced by backend
diff --git a/backends/nxp/backend/ir/converter/builder/aten_model_builder_director.py b/backends/nxp/backend/ir/converter/builder/aten_model_builder_director.py
@@ -81,22 +81,25 @@ def append_operators(self, ops_to_add: list[tflite_model.Operator]):
 
             self.check_and_append_operator(op)
 
-    def assign_model_io_to_subgraph_and_get_io_formats(self, graph_signature) -> dict[str, TensorFormat]:
+    def assign_model_io_to_subgraph_and_get_io_formats(self, graph_signature) -> dict[str, dict]:
         """
         Assign model's inputs/outputs to SubGraph.
 
         :param graph_signature: Instance of GraphSignature.
         :returns: Mapping between IO tensors' names and their formats.
         """
-        io_formats = {}
+        io_formats = {
+            "inputs": {},
+            "outputs": {},
+        }
 
         self.get_sub_graph().inputs = tflite_model.SubGraphInputs()
         for input_name in graph_signature.user_inputs:
             tensor = self.tensor_for_name(input_name)
             assert input_name == tensor.name, ("Program's input name doesn't match with tensor name in TFLite. "
                                                "Input was probably redirected.")
             self.get_sub_graph().inputs.tmp_inputs.append(tensor)
-            io_formats[tensor.name] = tensor.tensor_format
+            io_formats["inputs"][tensor.name] = tensor.tensor_format
 
         self.get_sub_graph().outputs = tflite_model.SubGraphOutputs()
         for output_name in graph_signature.user_outputs:
@@ -105,6 +108,6 @@ def assign_model_io_to_subgraph_and_get_io_formats(self, graph_signature) -> dic
                                                "Output was probably redirected.")
             self.get_sub_graph().outputs.tmp_outputs.append(tensor)
 
-            io_formats[tensor.name] = tensor.tensor_format
+            io_formats["outputs"][tensor.name] = tensor.tensor_format
 
         return io_formats
diff --git a/backends/nxp/neutron_node_extraction.py b/backends/nxp/neutron_node_extraction.py
@@ -5,28 +5,24 @@
 # LICENSE file in the root directory of this source tree.
 
 import logging
-import struct
+from dataclasses import dataclass
 
 import numpy as np
 
 from executorch.backends.nxp.backend.ir.lib.tflite.BuiltinOperator import BuiltinOperator
 from executorch.backends.nxp.backend.ir.lib.tflite.Model import Model
-from executorch.exir.backend.backend_details import PreprocessResult
 
 
-def extract_artifacts_from_neutron_node(tflite_flatbuffer_or_path: bytes | str) -> PreprocessResult:
-    """ Extract the payload (microcode, weights, kernels) from the Neutron Node in the given TFLite model.
-        The model can be provided as a binary flatbuffer, or a path to a `.tflite` model.
+@dataclass
+class NeutronNodeArtifacts:
+    microcode: np.ndarray
+    weights: np.ndarray
+    kernels: np.ndarray
 
-        The return format is a `PreprocessResult` object, and its `processed_bytes` attribute contains the serialized
-         binary data of the following C struct:
-         struct NeutronBinary {
-            uint8[] microcode;
-            uint8[] weights;
-            uint8[] kernels;
-        }
 
-        The individual components must be aligned to 16 bytes.
+def extract_artifacts_from_neutron_node(tflite_flatbuffer_or_path: bytes | str) -> NeutronNodeArtifacts:
+    """ Extract the payload (microcode, weights, kernels) from the Neutron Node in the given TFLite model.
+        The model can be provided as a binary flatbuffer, or a path to a `.tflite` model.
     """
 
     if isinstance(tflite_flatbuffer_or_path, str):
@@ -77,35 +73,4 @@ def extract_artifacts_from_neutron_node(tflite_flatbuffer_or_path: bytes | str)
     assert microcode.dtype == weights.dtype == kernels.dtype == np.dtype('uint8'), \
         'The Neutron Node uses unexpected data types.'
 
-    # Align to 16B (according to commit 008bdc17670).
-    alignment = 16
-
-    def padding_format_string_for_array(array: np.ndarray) -> str:
-        """ Create a padding format string for the given array, which will add 0s at the end for correct alignment.
-            E.g. the string '10x' represents adding 10 bytes of '0' padding.
-        """
-        assert array.dtype == np.dtype('uint8')
-
-        overflow = array.size % alignment
-        if overflow == 0:
-            return ''
-
-        # Overflow 1 means padding 15, so use `alignment - overflow` padding.
-        return f'{alignment - overflow}x'
-
-    def format_string_for_array(array: np.ndarray) -> str:
-        """ Create a format string which will represent the provided array. It also handles the necessary alignment.
-            E.g. for array [1,2,3] we get '3s13x', because '3s' means string of 3 bytes, and `13x` means adding 13 bytes
-             of '0' padding at the end (for 16B alignment).
-        """
-        assert array.dtype == np.dtype('uint8')
-
-        return f'{array.size}s{padding_format_string_for_array(array)}'
-
-    # The resulting payload should be structured as a binary in the format defined in the function header.
-    payload = struct.pack(
-        format_string_for_array(microcode) + format_string_for_array(weights) + format_string_for_array(kernels),
-        microcode.tobytes(), weights.tobytes(), kernels.tobytes()
-    )
-
-    return PreprocessResult(processed_bytes=payload)
+    return NeutronNodeArtifacts(microcode, weights, kernels)
diff --git a/backends/nxp/nxp_backend.py b/backends/nxp/nxp_backend.py
@@ -9,22 +9,20 @@
 #
 
 import logging
+import struct
 from typing import final, List, Optional
 
-from torch.export.exported_program import ExportedProgram
-
+import numpy as np
 import torch
+from torch.export.exported_program import ExportedProgram
 
 from executorch.backends.nxp.backend.edge_program_converter import EdgeProgramToIRConverter
 from executorch.backends.nxp.backend.ir.tensor_formatting import TensorFormat
 from executorch.backends.nxp.backend.neutron_converter_manager import NeutronConverterManager
-from executorch.backends.nxp.neutron_node_extraction import extract_artifacts_from_neutron_node
+from executorch.backends.nxp.neutron_node_extraction import extract_artifacts_from_neutron_node, NeutronNodeArtifacts
 from executorch.backends.xnnpack.passes import RemoveGetItemPass, XNNPACKPassManager
 from executorch.exir.backend.backend_details import BackendDetails, PreprocessResult
 from executorch.exir.backend.compile_spec_schema import CompileSpec
-
-
-from torch.export.exported_program import ExportedProgram
 from executorch.exir.verification.verifier import EXIREdgeDialectVerifier
 
 
@@ -132,13 +130,6 @@ def preprocess(
 
             # Convert the edge program to TFLite.
             tflite_model, io_formats = EdgeProgramToIRConverter().convert_program(edge_program)
-            for tensor, tensor_format in io_formats.items():
-                if tensor_format == TensorFormat.CHANNELS_LAST:
-                    channel_last_format = b'1'
-                else:
-                    channel_last_format = b'0'
-
-                compile_spec.append(CompileSpec(tensor, channel_last_format))
 
             # Call the neutron converter with the TFLite model.
             neutron_model = NeutronConverterManager().convert(tflite_model)
@@ -153,11 +144,121 @@ def preprocess(
                     f.write(bytes(neutron_model))
                 NeutronBackend.counter = NeutronBackend.counter + 1
 
-            # Extract the Neutron microcode, weights and kernels from the Neutron Node in the `neutron_model`.
-            payload = extract_artifacts_from_neutron_node(neutron_model)
-            binary = payload.processed_bytes
+            binary = PayloadComposer().get_binary_payload(io_formats, neutron_model)
 
         else:
             raise RuntimeError(f"Unknown format {output_format}")
 
         return PreprocessResult(processed_bytes=binary)
+
+
+class PayloadComposer:
+    ALIGNMENT = 16
+
+    def _padding_format_string_for_array(self, array: np.ndarray) -> str:
+        """ Create a padding format string for the given array, which will add 0s at the end for correct alignment.
+            E.g. the string '10x' represents adding 10 bytes of '0' padding.
+        """
+        assert array.dtype == np.dtype('uint8')
+
+        overflow = array.size % self.ALIGNMENT
+        if overflow == 0:
+            return ''
+
+        # Overflow 1 means padding 15, so use `alignment - overflow` padding.
+        return f'{self.ALIGNMENT - overflow}x'
+
+    def _format_string_for_array(self, array: np.ndarray) -> str:
+        """ Create a format string which will represent the provided array. It also handles the necessary alignment.
+            E.g. for array [1,2,3] we get '3s13x', because '3s' means string of 3 bytes, and `13x` means adding 13 bytes
+             of '0' padding at the end (for 16B alignment).
+        """
+        assert array.dtype == np.dtype('uint8')
+
+        return f'{array.size}s{self._padding_format_string_for_array(array)}'
+
+    def _create_payload_header(self, io_formats) -> np.ndarray:
+        """
+        Create bytes header for returned payload. It contains information about
+        input and output tensor formats. Tensors are ordered based on graph signature
+        of ExportedProgram. Header schema:
+
+        +----------------------------------+------------------------+---------------------------+
+        | Input TensorFormats length (1B)  | 1st tensor format (1B) | [nth* tensor format (1B)] |
+        +----------------------------------+------------------------+---------------------------+
+        | Output TensorFormats length (1B) | 1st tensor format (1B) | [nth* tensor format (1B)] |
+        +----------------------------------+------------------------+---------------------------+
+
+        :param io_formats: IO tensors formats.
+        :return: Bytes representation of payload header.
+        """
+        inputs = io_formats["inputs"]
+        outputs = io_formats["outputs"]
+
+        assert len(inputs) < 256, "Models with more than 255 inputs are not supported."
+        assert len(outputs) < 256, "Models with more than 255 outputs are not supported."
+
+        header_data = [len(inputs)]
+        for tensor, tensor_format in inputs.items():
+            header_data.append(1 if tensor_format == TensorFormat.CHANNELS_LAST else 0)
+
+        header_data.append(len(outputs))
+        for tensor, tensor_format in outputs.items():
+            header_data.append(1 if tensor_format == TensorFormat.CHANNELS_LAST else 0)
+
+        # noinspection PyTypeChecker
+        return np.array(header_data, dtype=np.uint8)
+
+    def _pack_with_alignment(self, header: np.ndarray, neutron_artifacts: NeutronNodeArtifacts) -> bytes:
+        """
+        Packs provided data into serialized binary data of the following C struct:
+         struct NeutronBinary {
+             uint8[] header;
+             uint8[] microcode;
+             uint8[] weights;
+             uint8[] kernels;
+         }
+        The individual components must be aligned to 16 bytes.
+        """
+
+        return struct.pack(
+            self._format_string_for_array(header) +
+            self._format_string_for_array(neutron_artifacts.microcode) +
+            self._format_string_for_array(neutron_artifacts.weights) +
+            self._format_string_for_array(neutron_artifacts.kernels),
+            header.tobytes(),
+            neutron_artifacts.microcode.tobytes(),
+            neutron_artifacts.weights.tobytes(),
+            neutron_artifacts.kernels.tobytes()
+        )
+
+    def get_binary_payload(self, io_formats, neutron_model) -> bytes:
+        """
+        Get binary payload for provided input/output tensor formats and neutron_model. Returned data have
+        following structure:
+
+        +----------------------------------------------------------------------------------------------------------------+
+        |                                            16 bytes aligned blocks                                             |
+        +===========================+===========================+============================+===========================+
+        | Input formats length (1B) | [nth* tensor format (1B)] | Output formats length (1B) | [nth* tensor format (1B)] |
+        +---------------------------+---------------------------+----------------------------+---------------------------+
+        |                                                Neutron microcode                                               |
+        +----------------------------------------------------------------------------------------------------------------+
+        |                                                 Neutron weights                                                |
+        +----------------------------------------------------------------------------------------------------------------+
+        |                                                 Neutron kernels                                                |
+        +----------------------------------------------------------------------------------------------------------------+
+
+        Tensor format definition: '0x1' == CHANNELS_LAST, '0x0' == FORMATLESS (no format).
+
+        :param io_formats: Dictionary with keys 'inputs' and 'outputs' that contains dictionaries
+            mapping tensor name to TensorFormat.
+        :param neutron_model: Neutron model with single NeutronGraph node.
+        :return: 16 bytes aligned binary payload.
+        """
+        header = self._create_payload_header(io_formats)
+
+        # Extract the Neutron microcode, weights and kernels from the Neutron Node in the `neutron_model`.
+        neutron_artifacts = extract_artifacts_from_neutron_node(neutron_model)
+
+        return self._pack_with_alignment(header, neutron_artifacts)
diff --git a/backends/nxp/tests/test_neutron_backend.py b/backends/nxp/tests/test_neutron_backend.py
@@ -13,7 +13,7 @@
 from executorch.backends.nxp.tests.executorch_pipeline import to_quantized_edge_program
 from executorch.backends.nxp.tests.executors import TFLiteExecutor, EdgeProgramExecutor, convert_run_compare, \
     ToNHWCPreprocess
-from executorch.backends.nxp.tests.models import Conv2dModule
+from executorch.backends.nxp.tests.models import Conv2dModule, SoftmaxModule
 from executorch.backends.nxp.tests.models import ConvFCSoftmaxModule
 
 
@@ -22,6 +22,28 @@ def test_neutron_backend__single_conv_model():
     lowered_module = edge_program_manager.exported_program().graph_module.lowered_module_0
     assert len(lowered_module.processed_bytes) != 0  # The Neutron microcode, weights and kernels have been written here
 
+def test_neutron_backend__single_conv_model__payload_header():
+    edge_program_manager = to_quantized_edge_program(Conv2dModule(bias=False), (1, 4, 32, 32))
+    payload = edge_program_manager.exported_program().graph_module.lowered_module_0.processed_bytes
+
+    assert payload[0] == 0x1 # Single input
+    assert payload[1] == 0x1 # Channels last
+    assert payload[2] == 0x1 # Single output
+    assert payload[3] == 0x1 # Channels last
+    assert all(byte == 0x0 for byte in payload[4:16]) # Aligned to 16 bytes
+    assert payload[17] != 0x0 # Followed by non-zero content
+
+def test_neutron_backend__single_softmax_model__payload_header():
+    edge_program_manager = to_quantized_edge_program(SoftmaxModule(1), (1, 64))
+    payload = edge_program_manager.exported_program().graph_module.lowered_module_0.processed_bytes
+
+    assert payload[0] == 0x1 # Single input
+    assert payload[1] == 0x0 # Formatless
+    assert payload[2] == 0x1 # Single output
+    assert payload[3] == 0x0 # Formatless
+    assert all(byte == 0x0 for byte in payload[4:16]) # Aligned to 16 bytes
+    assert payload[17] != 0x0 # Followed by non-zero content
+
 
 def test_lowered_program_and_tflite_output_match__conv2d__no_bias(mocker):
     converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program")