From aef99e39fda769b919c94c7d2e309b8fa36aa831 Mon Sep 17 00:00:00 2001
From: Max Ren <maxren@meta.com>
Date: Thu, 13 Mar 2025 23:59:56 -0700
Subject: [PATCH] [ExecuTorch][Weight Sharing] Track Named Data Store in
 EdgeProgramManager

Pull Request resolved: https://github.com/pytorch/executorch/pull/9151

We enable Backends to return Named Data by adding NamedDataStoreOutput to the preprocess result. This is a completely BC change, as no backends with an implemented preprocess will see any change if nothing is explicitly implemented.

For backend developers to leverage the new NamedDataStore, they can initialize a new NamedDataStore() within preprocess, add_named_data to the data store, and return the NamedDataStore.get_named_data_store_output() in the preprocess result like such:

```
def preprocess(ExportedProgram, List[CompileSpecs]) -> PreprocessResult:
    named_data_store = NamedDataStore()

    for node in exported_program.graph.nodes:
        named_data_store.add_named_data("name", bytes)

    return PreprocessResult(
        processed_bytes=bytes,
        debug_handle_map={},
        data_store_output= named_data_store.get_named_data_store_output()
    )
```


Under the hood, the data store output is embedded in the loweredbackendmodule, (serializing loweredbackendmodule by itself with the a named_data_store_output is still a todo). But via the EdgeProgramManager path, we add the named_data_store_outputs to the edge_program_manger's named data store to keep track of all the named data returned by backends.
ghstack-source-id: 271732049
@exported-using-ghexport

Differential Revision: [D70451660](https://our.internmc.facebook.com/intern/diff/D70451660/)
---
 exir/backend/backend_api.py                   |   1 +
 exir/backend/backend_details.py               |   7 ++
 exir/backend/test/TARGETS                     |  56 +++++++++
 .../test/backend_with_named_data_map.py       | 115 ++++++++++++++++++
 .../test/test_backend_with_named_data_map.py  |  83 +++++++++++++
 exir/lowered_backend_module.py                |  14 +++
 exir/program/_program.py                      |  27 +++-
 7 files changed, 301 insertions(+), 2 deletions(-)
 create mode 100644 exir/backend/test/backend_with_named_data_map.py
 create mode 100644 exir/backend/test/test_backend_with_named_data_map.py

diff --git a/exir/backend/backend_api.py b/exir/backend/backend_api.py
index 519f184871a..d5bd574ec5a 100644
--- a/exir/backend/backend_api.py
+++ b/exir/backend/backend_api.py
@@ -120,6 +120,7 @@ def to_backend(
                 backend_id=backend_id,
                 processed_bytes=preprocess_result.processed_bytes,
                 compile_specs=compile_specs,
+                named_data_store_output=preprocess_result.data_store_output,
             )
             lowered_module.meta = {
                 "debug_handle_map": preprocess_result.debug_handle_map
diff --git a/exir/backend/backend_details.py b/exir/backend/backend_details.py
index bdbc1a1fafd..248d03f2b05 100644
--- a/exir/backend/backend_details.py
+++ b/exir/backend/backend_details.py
@@ -9,6 +9,8 @@
 
 from typing import Dict, List, Optional, Tuple, Union
 
+from executorch.exir._serialize._named_data_store import NamedDataStoreOutput
+
 from executorch.exir.backend.compile_spec_schema import CompileSpec
 from torch.export.exported_program import ExportedProgram
 
@@ -24,6 +26,11 @@ class PreprocessResult:
     debug_handle_map: Optional[Union[Dict[int, Tuple[int]], Dict[str, Tuple[int]]]] = (
         None
     )
+    # Data Store output created from NamedDataStore.
+
+    # Named Data store contains all the named data that is stored in the PTE file,
+    # but retrieveable by delegates via the NamedDataMap at runtime.
+    data_store_output: Optional[NamedDataStoreOutput] = None
 
 
 """
diff --git a/exir/backend/test/TARGETS b/exir/backend/test/TARGETS
index b453f4c722a..f0ba618936d 100644
--- a/exir/backend/test/TARGETS
+++ b/exir/backend/test/TARGETS
@@ -38,6 +38,62 @@ python_library(
     ],
 )
 
+python_library(
+    name = "backend_with_named_data_map",
+    srcs = [
+        "backend_with_named_data_map.py",
+    ],
+    visibility = [
+        "//executorch/...",
+        "//executorch/test/...",
+    ],
+    deps = [
+        "//caffe2:torch",
+        "//caffe2/functorch:functorch_src",
+        "//executorch/exir:delegate",
+        "//executorch/exir:graph_module",
+        "//executorch/exir:lib",
+        "//executorch/exir:lowered_backend_module",
+        "//executorch/exir:print_program",
+        "//executorch/exir:schema",
+        "//executorch/exir/backend:backend_api",
+        "//executorch/exir/backend:compile_spec_schema",
+        "//executorch/exir/backend:partitioner",
+        "//executorch/exir/dialects:lib",
+        "//executorch/extension/pybindings:portable_lib",  # @manual
+        "//executorch/extension/pytree:pylib",
+        "//executorch/exir/backend/canonical_partitioners:canonical_partitioner_lib",
+    ],
+)
+
+python_unittest(
+    name = "test_backend_with_named_data_map",
+    srcs = [
+        "test_backend_with_named_data_map.py",
+    ],
+    visibility = [
+        "//executorch/...",
+        "//executorch/test/...",
+    ],
+    deps = [
+        "//caffe2:torch",
+        "//caffe2/functorch:functorch_src",
+        "//executorch/exir:delegate",
+        "//executorch/exir:graph_module",
+        "//executorch/exir:lib",
+        "//executorch/exir:lowered_backend_module",
+        "//executorch/exir:print_program",
+        "//executorch/exir:schema",
+        "//executorch/exir/backend:backend_api",
+        "//executorch/exir/backend:compile_spec_schema",
+        "//executorch/exir/backend:partitioner",
+        "//executorch/exir/dialects:lib",
+        "//executorch/extension/pybindings:portable_lib",  # @manual
+        "//executorch/extension/pytree:pylib",
+        ":backend_with_named_data_map",
+    ],
+)
+
 python_library(
     name = "qnn_backend_demo",
     srcs = [
diff --git a/exir/backend/test/backend_with_named_data_map.py b/exir/backend/test/backend_with_named_data_map.py
new file mode 100644
index 00000000000..47dbc294133
--- /dev/null
+++ b/exir/backend/test/backend_with_named_data_map.py
@@ -0,0 +1,115 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Dict, final, List, Tuple
+
+import torch
+from executorch.exir._serialize._named_data_store import NamedDataStore
+
+from executorch.exir.backend.backend_details import BackendDetails, PreprocessResult
+from executorch.exir.backend.canonical_partitioners.pattern_op_partitioner import (
+    generate_pattern_op_partitions,
+)
+
+from executorch.exir.backend.compile_spec_schema import CompileSpec
+from executorch.exir.backend.partitioner import (
+    DelegationSpec,
+    Partitioner,
+    PartitionResult,
+)
+from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.exir.graph_module import get_control_flow_submodules
+from torch.export.exported_program import ExportedProgram
+from torch.fx.passes.operator_support import OperatorSupportBase
+
+
+# Backend details are final (cannot be subclassed).
+@final
+class BackendWithNamedDataMap(BackendDetails):
+    """
+    Test Backend for Named Data Map Functionality
+
+    This backend returns no processed_bytes, instead it uses
+    the named data store and serializes the name of the op
+    as the key and the data as its code value
+    """
+
+    @staticmethod
+    def preprocess(
+        edge_program: ExportedProgram,
+        compile_specs: List[CompileSpec],
+    ) -> PreprocessResult:
+        op_codes = {
+            exir_ops.edge.aten.sin.default: 0,
+            exir_ops.edge.aten.add.Tensor: 1,
+            exir_ops.edge.aten.sub.Tensor: 2,
+            exir_ops.edge.aten.mul.Tensor: 3,
+            exir_ops.edge.aten.div.Tensor: 4,
+        }
+        ndm = NamedDataStore()
+        for node in edge_program.graph.nodes:
+            if node.op == "call_function":
+                if node.target in op_codes.keys():
+                    ndm.add_named_data(
+                        node.target.__name__, bytes(op_codes[node.target])
+                    )
+
+        return PreprocessResult(
+            processed_bytes=bytes(b""),
+            debug_handle_map={},
+            data_store_output=ndm.get_named_data_store_output(),
+        )
+
+
+class SimpleOperatorSupport(OperatorSupportBase):
+    def is_node_supported(self, submodules, node: torch.fx.Node) -> bool:
+        return node.op == "call_function" and node.target in [
+            exir_ops.edge.aten.sin.default,
+            exir_ops.edge.aten.add.Tensor,
+            exir_ops.edge.aten.sub.Tensor,
+            exir_ops.edge.aten.mul.Tensor,
+            exir_ops.edge.aten.div.Tensor,
+        ]
+
+
+@final
+class BackendWithNDMPartitioner(Partitioner):
+    def __init__(self) -> None:
+        self._op_support = SimpleOperatorSupport()
+        self.backend_id = BackendWithNamedDataMap.__name__
+
+    def _partition_gm(
+        self, graph_module: torch.fx.GraphModule, id_start: int = 0
+    ) -> Tuple[int, Dict[str, DelegationSpec]]:
+        partition_tags: Dict[str, DelegationSpec] = {}
+        partition_list = generate_pattern_op_partitions(
+            graph_module, op_support=self._op_support
+        )
+
+        num_partitions_in_gm = len(partition_list)
+        for partition in partition_list:
+            curr_par_id = partition.id or 0
+            delegation_tag = f"tag_{curr_par_id + id_start}"
+            for node in partition.nodes:
+                node.meta["delegation_tag"] = delegation_tag
+            delegation_spec = DelegationSpec(self.backend_id, [])
+            partition_tags[delegation_tag] = delegation_spec
+
+        start_idx_for_submodules = num_partitions_in_gm
+        for _, submodule, _ in get_control_flow_submodules(graph_module):
+            start_idx_for_submodules, ret_partition_tags = self._partition_gm(
+                submodule, start_idx_for_submodules
+            )
+            partition_tags.update(ret_partition_tags)
+
+        return start_idx_for_submodules, partition_tags
+
+    def partition(self, edge_program: ExportedProgram) -> PartitionResult:
+        _, partition_tags = self._partition_gm(edge_program.graph_module)
+        return PartitionResult(
+            tagged_exported_program=edge_program,
+            partition_tags=partition_tags,
+        )
diff --git a/exir/backend/test/test_backend_with_named_data_map.py b/exir/backend/test/test_backend_with_named_data_map.py
new file mode 100644
index 00000000000..cc7aad641f0
--- /dev/null
+++ b/exir/backend/test/test_backend_with_named_data_map.py
@@ -0,0 +1,83 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import torch
+
+from executorch.exir import to_edge
+from executorch.exir.backend.backend_api import to_backend
+
+from executorch.exir.backend.test.backend_with_named_data_map import (
+    BackendWithNamedDataMap,
+    BackendWithNDMPartitioner,
+)
+
+
+class TestBackendWithNamedDataMap(unittest.TestCase):
+    def test_lowered_backend_module_has_output(self):
+        class M(torch.nn.Module):
+            def forward(self, x):
+                return x + x
+
+        ep = to_edge(torch.export.export(M(), (torch.randn(1, 2),)))
+        lowered = to_backend(
+            BackendWithNamedDataMap.__name__, ep.exported_program(), []
+        )
+
+        buffer_entries = lowered.named_data_store_output.buffers
+        self.assertTrue(len(buffer_entries) == 1)
+        stored_data = lowered.named_data_store_output.pte_data
+
+        self.assertTrue("aten.add.Tensor" in stored_data)
+        self.assertTrue(buffer_entries[0].buffer == bytes(1))
+
+    def test_named_data_with_partitioner(self):
+        class M(torch.nn.Module):
+            def forward(self, x):
+                y = x + x
+                y = torch.cos(y)
+                y = y + y
+                y = torch.sin(y)
+                return y - y
+
+        ep = to_edge(torch.export.export(M(), (torch.randn(1, 2),)))
+        ep.to_backend(BackendWithNDMPartitioner())
+
+        ndm_output = ep._named_data_store.get_named_data_store_output()
+        buffer_entries = ndm_output.buffers
+        stored_data = ndm_output.pte_data
+        self.assertEqual(len(buffer_entries), 3)
+        self.assertTrue("aten.add.Tensor" in stored_data)
+        self.assertTrue("aten.sub.Tensor" in stored_data)
+        self.assertTrue("aten.sin.default" in stored_data)
+
+    def test_named_data_with_control_flow(self):
+        class M(torch.nn.Module):
+            def true_branch(self, x):
+                y = x * x
+                y = torch.cos(y)
+                return torch.sin(y)
+
+            def false_branch(self, x):
+                return torch.sin(x)
+
+            def forward(self, x, y):
+                z = x / y
+                z = torch.cond(z.sum() > 0, self.true_branch, self.false_branch, [x])
+                return z - z
+
+        ep = to_edge(torch.export.export(M(), (torch.randn(1, 2), torch.randn(1, 2))))
+        ep.to_backend(BackendWithNDMPartitioner())
+
+        ndm_output = ep._named_data_store.get_named_data_store_output()
+        buffer_entries = ndm_output.buffers
+        stored_data = ndm_output.pte_data
+        self.assertEqual(len(buffer_entries), 4)
+        self.assertTrue("aten.sub.Tensor" in stored_data)
+        self.assertTrue("aten.div.Tensor" in stored_data)
+        self.assertTrue("aten.sin.default" in stored_data)
+        self.assertTrue("aten.mul.Tensor" in stored_data)
diff --git a/exir/lowered_backend_module.py b/exir/lowered_backend_module.py
index dde6a397d9a..ed155555ef5 100644
--- a/exir/lowered_backend_module.py
+++ b/exir/lowered_backend_module.py
@@ -14,6 +14,7 @@
 import torch
 import torch.utils._pytree as pytree
 from executorch.exir._serialize import _serialize_pte_binary
+from executorch.exir._serialize._named_data_store import NamedDataStoreOutput
 from executorch.exir.backend.compile_spec_schema import CompileSpec
 from executorch.exir.delegate import executorch_call_delegate, get_lowered_module_name
 from executorch.exir.emit import emit_program
@@ -62,6 +63,9 @@ class LoweredBackendModule(torch.nn.Module):
         CompileSpec
     ]  # A list of backend-specific objects with static metadata to configure the "compilation" process.
     _original_exported_program: ExportedProgram  # The original EXIR module
+    _named_data_store_output: Optional[
+        NamedDataStoreOutput
+    ]  # Named Data serialized by the backend
 
     def __init__(
         self,
@@ -69,12 +73,14 @@ def __init__(
         backend_id: str,
         processed_bytes: bytes,
         compile_specs: List[CompileSpec],
+        named_data_store_output: Optional[NamedDataStoreOutput] = None,
     ) -> None:
         super().__init__()
         self._original_exported_program = edge_program
         self._backend_id = backend_id
         self._processed_bytes = processed_bytes
         self._compile_specs = compile_specs
+        self._named_data_store_output = named_data_store_output
 
     # pyre-ignore
     def __deepcopy__(self, memo: Optional[Dict[int, Any]]) -> "LoweredBackendModule":
@@ -134,6 +140,13 @@ def original_module(self) -> ExportedProgram:
         """
         return self._original_exported_program
 
+    @property
+    def named_data_store_output(self) -> Optional[NamedDataStoreOutput]:
+        """
+        Returns the Named Data Store Output
+        """
+        return self._named_data_store_output
+
     # TODO(chenlai): consolidate the seriailization config with serialize_to_flatbuffer api
     def buffer(
         self,
@@ -154,6 +167,7 @@ def buffer(
                 segment_alignment=segment_alignment,
                 constant_tensor_alignment=constant_tensor_alignment,
                 delegate_alignment=delegate_alignment,
+                named_data=self.named_data_store_output,
             )
         )
         return out
diff --git a/exir/program/_program.py b/exir/program/_program.py
index ed9dace34d1..c00c003263f 100644
--- a/exir/program/_program.py
+++ b/exir/program/_program.py
@@ -26,6 +26,7 @@
 from executorch.exir.backend.backend_api import to_backend
 from executorch.exir.backend.partitioner import Partitioner
 from executorch.exir.capture._config import EdgeCompileConfig, ExecutorchBackendConfig
+from executorch.exir.delegate import executorch_call_delegate, is_lowered_module
 from executorch.exir.emit import emit_program, EmitterOutput
 from executorch.exir.emit._emitter import _DelegateDebugIdentifierMap
 from executorch.exir.error import ExportError
@@ -1304,6 +1305,7 @@ def __init__(
         constant_methods: Optional[Dict[str, Any]] = None,
         compile_config: Optional[EdgeCompileConfig] = None,
         ops_set_to_not_decompose: Optional[List[torch._ops.OpOverload]] = None,
+        named_data_store: Optional[NamedDataStore] = None,
     ):
         """
         Should not be called directly by users. User should use :func:'to_edge' instead.
@@ -1327,7 +1329,7 @@ def __init__(
         self._edge_programs: Dict[str, ExportedProgram] = edge_programs
         self._config_methods = constant_methods
 
-        self._named_data_store = NamedDataStore()
+        self._named_data_store = named_data_store or NamedDataStore()
 
     @property
     def methods(self) -> Set[str]:
@@ -1437,9 +1439,30 @@ def to_backend(
             for name, program in self._edge_programs.items():
                 new_edge_programs[name] = to_backend(program, partitioner)
 
+        # collected all the named data into the named data store for deduplication
+        def collect_named_data_store_outputs(
+            graph_module: torch.fx.GraphModule,
+        ) -> None:
+            for node in graph_module.graph.nodes:
+                if node.target == executorch_call_delegate:
+                    lbm = getattr(graph_module, node.args[0].name)
+                    assert is_lowered_module(lbm)
+                    data_store_output = lbm.named_data_store_output
+                    if data_store_output is not None:
+                        self._named_data_store.merge_named_data_store(data_store_output)
+
+            for _, submod, _ in get_control_flow_submodules(graph_module):
+                collect_named_data_store_outputs(submod)
+
+        for _, program in new_edge_programs.items():
+            collect_named_data_store_outputs(program.graph_module)
+
         config = EdgeCompileConfig(_check_ir_validity=False)
         return EdgeProgramManager(
-            new_edge_programs, copy.deepcopy(self._config_methods), config
+            new_edge_programs,
+            copy.deepcopy(self._config_methods),
+            config,
+            named_data_store=self._named_data_store,
         )
 
     @et_logger("to_executorch")