[ET-VK] Enable Partial GPU lowering via Vulkan in stories model export

SS-JIA · SS-JIA · commit 1d81cd022110 · 2024-03-12T08:59:25.000-07:00
## Context Simple change to add Vulkan Partitioner as a dependency for the llama exporter and runner, and provide a command line flag to invoke the vulkan partitioner during export. Included a small change to the Vulkan serializer which was needed for everything to work (i.e. enable serializing multiple graph outputs). Differential Revision: [D54805831](https://our.internmc.facebook.com/intern/diff/D54805831/) ghstack-source-id: 218336771 Pull Request resolved: #2368
diff --git a/backends/vulkan/serialization/vulkan_graph_builder.py b/backends/vulkan/serialization/vulkan_graph_builder.py
@@ -218,13 +218,14 @@ def process_getattr_node(self, node: Node) -> None:
         self.create_tensor_values(node)
 
     def process_output_node(self, node: Node) -> None:
-        if node.all_input_nodes[0] not in self.node_to_value_ids:
-            raise AssertionError(
-                "Cannot find input to output node in node_to_value_ids. This means the "
-                "output node is being serialized before its corresponding internal node "
-                "which is not allowed."
-            )
-        self.output_ids.append(self.node_to_value_ids[node.all_input_nodes[0]])
+        for out_node in node.all_input_nodes:
+            if out_node not in self.node_to_value_ids:
+                raise AssertionError(
+                    "Cannot find input to output node in node_to_value_ids. This means "
+                    "the output node is being serialized before its corresponding "
+                    "internal node which is not allowed."
+                )
+            self.output_ids.append(self.node_to_value_ids[out_node])
 
     def process_node(self, node: Node) -> None:
         if node.op == "placeholder":
diff --git a/examples/models/llama2/TARGETS b/examples/models/llama2/TARGETS
@@ -82,6 +82,7 @@ runtime.python_library(
         "//executorch/backends/transforms:duplicate_dynamic_quant_chain",
         "//executorch/backends/xnnpack:xnnpack_backend",
         "//executorch/backends/xnnpack/partition:xnnpack_partitioner",
+        "//executorch/backends/vulkan/partitioner:vulkan_partitioner",
         "//executorch/examples/models:model_base",
         "//executorch/examples/models:models",
         "//executorch/examples/portable:utils",
diff --git a/examples/models/llama2/export_llama_lib.py b/examples/models/llama2/export_llama_lib.py
@@ -17,6 +17,7 @@
 
 import pkg_resources
 import torch
+from executorch.backends.vulkan.partitioner.vulkan_partitioner import VulkanPartitioner
 from executorch.backends.xnnpack.partition.xnnpack_partitioner import (
     XnnpackDynamicallyQuantizedPartitioner,
 )
@@ -356,6 +357,7 @@ def build_args_parser() -> argparse.ArgumentParser:
     parser.add_argument("-2", "--fairseq2", action="store_true")
     parser.add_argument("-v", "--verbose", action="store_true")
     parser.add_argument("-X", "--xnnpack", action="store_true")
+    parser.add_argument("-V", "--vulkan", action="store_true")
 
     return parser
 
@@ -451,6 +453,9 @@ def _export_llama(modelname, args) -> str:  # noqa: C901
         )
         # partitioners[XnnpackPartitioner.__name__] = XnnpackPartitioner()
         modelname = f"xnnpack_{modelname}"
+    if args.vulkan:
+        partitioners[VulkanPartitioner.__name__] = VulkanPartitioner()
+        modelname = f"vulkan_{modelname}"
 
     builder = (
         load_llama_model(
diff --git a/examples/models/llama2/runner/targets.bzl b/examples/models/llama2/runner/targets.bzl
@@ -29,6 +29,7 @@ def define_common_targets():
             ],
             exported_deps = [
                 "//executorch/backends/xnnpack:xnnpack_backend",
+                "//executorch/backends/vulkan:vulkan_backend_lib",
                 "//executorch/examples/models/llama2/sampler:sampler" + aten_suffix,
                 "//executorch/examples/models/llama2/tokenizer:tokenizer",
                 "//executorch/extension/evalue_util:print_evalue" + aten_suffix,