From 8ffca64ba372bc3cd51be4e952fb236d76a17cd2 Mon Sep 17 00:00:00 2001
From: DannyYuyang-quic <quic_yuyazhua@quicinc.com>
Date: Mon, 10 Feb 2025 16:50:21 +0800
Subject: [PATCH] Qualcomm AI Engine Direct - Support Qnn IR backend in online
 preparation

 - Support Qnn IR backend
 - Replace QCir with Dlc in online prepare flow
 - Add config for Saver backend
 - Block online preparation if the QNN version is below 2.30.
 - Fix SDK version checking
 - quant/dequant op breakage fix
 - Upgrade ANDROID_NATIVE_API_LEVEL from 23 to 30
 - Add comments for qat_training_data/passes_job
---
 .ci/scripts/build-qnn-sdk.sh                  |   1 +
 backends/qualcomm/CMakeLists.txt              |   7 +-
 .../qualcomm/aot/python/PyQnnManagerAdaptor.h |   9 +-
 backends/qualcomm/builders/op_dequantize.py   |   2 +-
 backends/qualcomm/builders/op_quantize.py     |   2 +-
 backends/qualcomm/qnn_preprocess.py           |   9 ++
 .../qualcomm/runtime/QnnExecuTorchBackend.cpp |   6 +-
 backends/qualcomm/runtime/QnnManager.cpp      | 118 ++++++++++++---
 backends/qualcomm/runtime/QnnManager.h        |  22 ++-
 .../qualcomm/runtime/backends/CMakeLists.txt  |  10 ++
 .../runtime/backends/QnnBackendCache.cpp      |  12 +-
 .../runtime/backends/QnnBackendCache.h        |   5 +
 .../runtime/backends/QnnBackendCommon.cpp     |  15 +-
 .../runtime/backends/QnnBackendCommon.h       |   5 +-
 .../runtime/backends/QnnBackendFactory.cpp    |  10 +-
 .../runtime/backends/QnnBackendFactory.h      |   5 +-
 .../runtime/backends/QnnContextCommon.cpp     |  18 ++-
 .../runtime/backends/QnnContextCommon.h       |  14 +-
 .../qualcomm/runtime/backends/QnnDlcManager.h |  71 +++++++++
 .../runtime/backends/QnnFunctionInterface.h   |   3 +
 .../runtime/backends/QnnGraphCommon.cpp       |   8 +-
 .../runtime/backends/QnnGraphCommon.h         |   6 +
 .../runtime/backends/QnnImplementation.cpp    |   6 -
 .../runtime/backends/QnnImplementation.h      |   7 +
 .../backends/htpbackend/HtpContext.cpp        |   1 -
 .../runtime/backends/htpbackend/HtpContext.h  |   6 +-
 .../runtime/backends/irbackend/IrBackend.h    |  39 +++++
 .../runtime/backends/irbackend/IrContext.h    |  28 ++++
 .../backends/irbackend/aarch64/IrContext.cpp  |  27 ++++
 .../irbackend/aarch64/QnnDlcManager.cpp       | 143 ++++++++++++++++++
 .../backends/irbackend/x86_64/IrContext.cpp   |  43 ++++++
 .../irbackend/x86_64/QnnDlcManager.cpp        | 139 +++++++++++++++++
 backends/qualcomm/scripts/build.sh            |   4 +-
 .../serialization/qc_compiler_spec.fbs        |   6 +
 backends/qualcomm/serialization/qc_schema.py  |   2 +
 backends/qualcomm/tests/test_qnn_delegate.py  | 116 ++++++++++----
 backends/qualcomm/tests/utils.py              | 110 +++++++++++++-
 backends/qualcomm/utils/utils.py              |  10 +-
 docs/source/backends-qualcomm.md              |   4 +-
 examples/qualcomm/test_qualcomm.sh            |   2 +-
 examples/qualcomm/utils.py                    |  14 +-
 41 files changed, 954 insertions(+), 111 deletions(-)
 create mode 100644 backends/qualcomm/runtime/backends/QnnDlcManager.h
 create mode 100644 backends/qualcomm/runtime/backends/irbackend/IrBackend.h
 create mode 100644 backends/qualcomm/runtime/backends/irbackend/IrContext.h
 create mode 100644 backends/qualcomm/runtime/backends/irbackend/aarch64/IrContext.cpp
 create mode 100644 backends/qualcomm/runtime/backends/irbackend/aarch64/QnnDlcManager.cpp
 create mode 100644 backends/qualcomm/runtime/backends/irbackend/x86_64/IrContext.cpp
 create mode 100644 backends/qualcomm/runtime/backends/irbackend/x86_64/QnnDlcManager.cpp

diff --git a/.ci/scripts/build-qnn-sdk.sh b/.ci/scripts/build-qnn-sdk.sh
index f256e8eec6d..8237b70d03d 100644
--- a/.ci/scripts/build-qnn-sdk.sh
+++ b/.ci/scripts/build-qnn-sdk.sh
@@ -33,6 +33,7 @@ set_up_aot() {
   cmake .. \
       -DCMAKE_INSTALL_PREFIX=$PWD \
       -DEXECUTORCH_BUILD_QNN=ON \
+      -DANDROID_NATIVE_API_LEVEL=30 \
       -DQNN_SDK_ROOT=${QNN_SDK_ROOT} \
       -DEXECUTORCH_BUILD_DEVTOOLS=ON \
       -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
diff --git a/backends/qualcomm/CMakeLists.txt b/backends/qualcomm/CMakeLists.txt
index f5adc84f903..1b7c8891a4e 100644
--- a/backends/qualcomm/CMakeLists.txt
+++ b/backends/qualcomm/CMakeLists.txt
@@ -70,6 +70,7 @@ endif()
 
 include_directories(
   BEFORE ${_common_include_directories} ${QNN_SDK_ROOT}/include/QNN
+  ${QNN_SDK_ROOT}/share/QNN/converter/jni
   ${EXECUTORCH_SOURCE_DIR}/third-party/flatbuffers/include
   ${EXECUTORCH_SOURCE_DIR}/runtime/core/portable_type/c10
 )
@@ -117,6 +118,7 @@ add_library(qnn_backend STATIC)
 add_library(qnn_backend_cache STATIC)
 add_library(qnn_context STATIC)
 add_library(qnn_custom_protocol STATIC)
+add_library(qnn_dlc_manager STATIC)
 add_library(qnn_device STATIC)
 add_library(qnn_executorch_backend SHARED)
 add_library(qnn_executorch_header INTERFACE)
@@ -174,8 +176,11 @@ target_link_libraries(
   qnn_factory PRIVATE qnn_schema qnn_backend qnn_device qnn_context qnn_graph
           qnn_mem_manager qnn_custom_protocol
 )
+
+target_link_libraries(qnn_dlc_manager PRIVATE qnn_factory qnn_backend qnn_device qnn_context qnn_graph qnn_mem_manager)
+
 target_link_libraries(
-  qnn_manager PRIVATE qnn_factory wrappers qnn_schema utils shared_buffer
+  qnn_manager PRIVATE qnn_factory wrappers qnn_schema utils shared_buffer qnn_dlc_manager
 )
 target_link_libraries(
   qnn_executorch_backend PRIVATE qnn_executorch_header qnn_schema qnn_manager
diff --git a/backends/qualcomm/aot/python/PyQnnManagerAdaptor.h b/backends/qualcomm/aot/python/PyQnnManagerAdaptor.h
index 9914b11676e..67abadd6731 100644
--- a/backends/qualcomm/aot/python/PyQnnManagerAdaptor.h
+++ b/backends/qualcomm/aot/python/PyQnnManagerAdaptor.h
@@ -195,7 +195,7 @@ class PyQnnManager {
       std::vector<std::shared_ptr<OpWrapper>>& op_wrappers) {
     QnnExecuTorchContextBinary binary_info;
 
-    if (qnn_manager_->IsOnlinePrepare() || qnn_manager_->IsMultipleGraphs()) {
+    if (qnn_manager_->IsMultipleGraphs()) {
       builder_.Reset();
       std::vector<uint8_t> tensor_data;
       std::vector<uint64_t> offsets;
@@ -305,8 +305,11 @@ class PyQnnManager {
         QNN_EXECUTORCH_LOG_ERROR("Fail to compile QNN graph");
         return py::array_t<char>(0);
       }
-      if (qnn_manager_->GetContextBinary(binary_info) !=
-          executorch::runtime::Error::Ok) {
+      auto qnn_executorch_options = GetQnnExecuTorchOptions(
+          qnn_executorch_option_ptr_.cast<std::string_view>().data());
+      if (qnn_executorch_options->saver() ||
+          qnn_manager_->GetContextBinary(binary_info) !=
+              executorch::runtime::Error::Ok) {
         return py::array_t<char>(0);
       }
     }
diff --git a/backends/qualcomm/builders/op_dequantize.py b/backends/qualcomm/builders/op_dequantize.py
index 507ecc4e3e3..722cdbe7957 100644
--- a/backends/qualcomm/builders/op_dequantize.py
+++ b/backends/qualcomm/builders/op_dequantize.py
@@ -45,7 +45,7 @@ def define_node(
         dequant_output_tensors = [output_tensor_wrapper]
 
         dequant_op = PyQnnWrapper.PyQnnOpWrapper(
-            node.target.__name__,
+            node.name,
             QNN_OP_PACKAGE_NAME_QTI_AISW,
             OpDequantize.op_name,
         )
diff --git a/backends/qualcomm/builders/op_quantize.py b/backends/qualcomm/builders/op_quantize.py
index 4921f96b467..e10f88795bb 100644
--- a/backends/qualcomm/builders/op_quantize.py
+++ b/backends/qualcomm/builders/op_quantize.py
@@ -52,7 +52,7 @@ def define_node(
         quant_output_tensors = [output_tensor_wrapper]
 
         quant_op = PyQnnWrapper.PyQnnOpWrapper(
-            node.target.__name__,
+            node.name,
             QNN_OP_PACKAGE_NAME_QTI_AISW,
             OpQuantize.op_name,
         )
diff --git a/backends/qualcomm/qnn_preprocess.py b/backends/qualcomm/qnn_preprocess.py
index 4a11bf050a2..63c1795c117 100644
--- a/backends/qualcomm/qnn_preprocess.py
+++ b/backends/qualcomm/qnn_preprocess.py
@@ -15,6 +15,9 @@
 from executorch.backends.qualcomm.builders.node_visitor import get_node_visitors
 from executorch.backends.qualcomm.builders.qnn_constants import OpContextLoader
 from executorch.backends.qualcomm.partition.utils import generate_qnn_executorch_option
+from executorch.backends.qualcomm.serialization.qc_schema_serialize import (
+    flatbuffer_to_option,
+)
 from executorch.exir.backend.backend_details import (
     BackendDetails,
     CompileSpec,
@@ -92,6 +95,12 @@ def preprocess(
             qnn_manager.GetGraphNames()[0],
             [py_op_wrapper.GetOpWrapper() for py_op_wrapper in py_op_wrapper_list],
         )
+
+        obj_options = flatbuffer_to_option(option)
+        if obj_options.saver:
+            exit(
+                f"Record all QNN API calls from saver backend at: {obj_options.saver_output_dir}"
+            )
         assert len(qnn_context_binary) != 0, "Failed to generate Qnn context binary."
         qnn_manager.Destroy()
         # For now, debug_handle_map is not used by QNN ExecuTorch
diff --git a/backends/qualcomm/runtime/QnnExecuTorchBackend.cpp b/backends/qualcomm/runtime/QnnExecuTorchBackend.cpp
index 32d82950908..ab038404582 100644
--- a/backends/qualcomm/runtime/QnnExecuTorchBackend.cpp
+++ b/backends/qualcomm/runtime/QnnExecuTorchBackend.cpp
@@ -36,7 +36,6 @@ Result<DelegateHandle*> QnnExecuTorchBackend::init(
   // covert SizedBuffer to qnn ExecuTorch option
   QnnExecuTorchContextBinary qnn_context_blob;
   const qnn_delegate::QnnExecuTorchOptions* qnn_executorch_options = nullptr;
-
   auto [status, signature, ctx_size, ctx_bin] =
       QnnContextCustomProtocol().DeserializeContextCustomBuffer(
           const_cast<void*>(processed->data()));
@@ -74,7 +73,6 @@ Result<DelegateHandle*> QnnExecuTorchBackend::init(
   // NOTE: Since we use placement new and since this type is not trivially
   // destructible, we must call the destructor manually in destroy().
   new (qnn_manager) QnnManager(qnn_executorch_options, qnn_context_blob);
-
   // TODO: this is a temporal solution for multi-graph support, will be
   //       removed once framework starts to accept runtime configuration
   // ---
@@ -96,9 +94,9 @@ Result<DelegateHandle*> QnnExecuTorchBackend::init(
 
   if (qnn_manager->IsOnlinePrepare()) {
     ET_CHECK_OR_RETURN_ERROR(
-        qnn_manager->CompileQcir() == Error::Ok,
+        qnn_manager->CompileDlc() == Error::Ok,
         Internal,
-        "Fail to compile binary in qcir format");
+        "Fail to compile binary in Dlc format");
   } else {
     for (const std::string& graph_name : qnn_manager->GetGraphNames()) {
       ET_CHECK_OR_RETURN_ERROR(
diff --git a/backends/qualcomm/runtime/QnnManager.cpp b/backends/qualcomm/runtime/QnnManager.cpp
index 994cc1931cc..13718b0891a 100644
--- a/backends/qualcomm/runtime/QnnManager.cpp
+++ b/backends/qualcomm/runtime/QnnManager.cpp
@@ -37,9 +37,7 @@ bool CompareExportedInput(
 }
 
 QnnManager::~QnnManager() {
-  backend_params_ptr_.reset(new BackendConfigParameters());
-  logger_.reset();
-  qnn_loaded_backend_.TerminateAllBackends();
+  Destroy();
 }
 
 QnnManager::QnnManager(
@@ -96,10 +94,14 @@ QnnManager::QnnManager(
   }
   qnn_loaded_backend_ = QnnImplementation(library_path);
   backend_params_ptr_ = std::make_unique<BackendConfigParameters>();
+
+  qnn_dlc_manager_ =
+      std::make_shared<QnnDlcManager>(qnn_context_blob_, options_);
 }
 
 Error QnnManager::LoadQnnLibrary() {
-  Error ret = qnn_loaded_backend_.Load(nullptr);
+  auto config = GetImplementationConfig();
+  Error ret = qnn_loaded_backend_.Load(config.get());
   return ret;
 }
 
@@ -286,7 +288,11 @@ Error QnnManager::Init() {
         "parameters for Qnn executorch backend type %d",
         options_->backend_options()->backend_type());
     backend_params_ptr_ = QnnBackendFactory().Create(
-        qnn_loaded_backend_, logger_.get(), qnn_context_blob_, options_);
+        qnn_loaded_backend_,
+        logger_.get(),
+        qnn_context_blob_,
+        options_,
+        qnn_dlc_manager_.get());
     ET_CHECK_OR_RETURN_ERROR(
         backend_params_ptr_ != nullptr,
         Internal,
@@ -326,6 +332,18 @@ Error QnnManager::Init() {
       Internal,
       "Fail to pre register custom memory handle");
 #endif
+
+  if (IsOnlinePrepare()) {
+    Qnn_ApiVersion_t qnn_version = {QNN_VERSION_INIT};
+    qnn_loaded_backend_.GetQnnInterface().qnn_backend_get_api_version(
+        &qnn_version);
+
+    ET_CHECK_OR_RETURN_ERROR(
+        qnn_dlc_manager_->SetUpDlcEnvironment(qnn_version.coreApiVersion) ==
+            Error::Ok,
+        Internal,
+        "Fail to setup Dlc environment");
+  }
   return Error::Ok;
 }
 
@@ -446,9 +464,11 @@ Error QnnManager::ProfileExecuteData(
 void QnnManager::Destroy() {
   QNN_EXECUTORCH_LOG_INFO("Destroy Qnn backend parameters");
   backend_params_ptr_.reset(new BackendConfigParameters());
+  qnn_dlc_manager_->ResetBackendParams();
   logger_.reset();
-
+  qnn_dlc_manager_->ResetLogger();
   qnn_loaded_backend_.TerminateAllBackends();
+  qnn_dlc_manager_->TerminateAllBackends();
 }
 
 bool QnnManager::IsNodeSupportedByBackend(
@@ -483,11 +503,64 @@ bool QnnManager::IsNodeSupportedByBackend(
 
 Error QnnManager::GetContextBinary(
     QnnExecuTorchContextBinary& qnn_executorch_context_binary) {
-  ET_CHECK_OR_RETURN_ERROR(
-      backend_params_ptr_->qnn_context_ptr_->GetContextBinary(
-          qnn_executorch_context_binary) == Error::Ok,
-      Internal,
-      "Fail to get context binary.");
+  if (IsOnlinePrepare() &&
+      qnn_dlc_manager_->backend_params_ptr_->qnn_context_ptr_.get() !=
+          nullptr) {
+    ET_CHECK_OR_RETURN_ERROR(
+        qnn_dlc_manager_->backend_params_ptr_->qnn_context_ptr_
+                ->GetContextBinary(qnn_executorch_context_binary) == Error::Ok,
+        Internal,
+        "Fail to get context binary.");
+  }
+
+  else {
+    ET_CHECK_OR_RETURN_ERROR(
+        backend_params_ptr_->qnn_context_ptr_->GetContextBinary(
+            qnn_executorch_context_binary) == Error::Ok,
+        Internal,
+        "Fail to get context binary.");
+  }
+  return Error::Ok;
+}
+
+Error QnnManager::CompileDlc() {
+  Qnn_ErrorHandle_t error;
+  auto qnn_dlc_graph_info = qnn_dlc_manager_->GetQnnDlcGraphInfoPtr();
+  uint32_t qnn_dlc_graph_info_num = qnn_dlc_manager_->GetQnnDlcGraphInfoNum();
+  for (uint32_t i = 0; i < qnn_dlc_graph_info_num; ++i) {
+    auto& graphInfo = (*qnn_dlc_graph_info)[i];
+    backend_params_ptr_->qnn_graph_ptr_->SetGraphHandle(
+        graphInfo.graphName, graphInfo.graph);
+    error =
+        backend_params_ptr_->qnn_graph_ptr_->GraphFinalize(graphInfo.graphName);
+    if (error != QNN_SUCCESS) {
+      QNN_EXECUTORCH_LOG_ERROR(
+          "Failed to finalize Qnn Graph with error: %d",
+          QNN_GET_ERROR_CODE(error));
+      return Error::Internal;
+    }
+
+    std::vector<std::shared_ptr<TensorWrapper>> graph_inputs, graph_outputs,
+        tensors;
+
+    for (int i = 0; i < graphInfo.numInputTensors; ++i) {
+      auto tw = CreateTensorWrapper(graphInfo.inputTensors[i]);
+      tw->UpdateQnnTensorMeta(graphInfo.inputTensors[i]);
+      graph_inputs.push_back(tw);
+    }
+    for (int i = 0; i < graphInfo.numOutputTensors; ++i) {
+      auto tw = CreateTensorWrapper(graphInfo.outputTensors[i]);
+      tw->UpdateQnnTensorMeta(graphInfo.outputTensors[i]);
+      graph_outputs.push_back(tw);
+    }
+
+    ET_CHECK_OR_RETURN_ERROR(
+        AllocateTensor(graphInfo.graphName, graph_inputs, graph_outputs) ==
+            Error::Ok,
+        Internal,
+        "Fail to allocate tensor for Dlc with graph_name: %s",
+        graphInfo.graphName);
+  }
 
   return Error::Ok;
 }
@@ -616,31 +689,34 @@ Error QnnManager::Compile(
     const std::string& graph_name,
     std::vector<std::shared_ptr<OpWrapper>>& op_wrappers) {
   Qnn_ErrorHandle_t error = QNN_SUCCESS;
+  QnnGraph* qnn_graph_ptr = backend_params_ptr_->qnn_graph_ptr_.get();
 
+  if (IsOnlinePrepare() &&
+      qnn_dlc_manager_->backend_params_ptr_->qnn_graph_ptr_.get() != nullptr) {
+    qnn_graph_ptr = qnn_dlc_manager_->backend_params_ptr_->qnn_graph_ptr_.get();
+  }
   for (std::shared_ptr<OpWrapper>& op_wrapper : op_wrappers) {
     for (const auto& tensor_wrapper : op_wrapper->GetInputTensors()) {
       ET_CHECK_OR_RETURN_ERROR(
-          backend_params_ptr_->qnn_graph_ptr_->EnsureTensorInQnnGraph(
-              graph_name, tensor_wrapper) == Error::Ok,
+          qnn_graph_ptr->EnsureTensorInQnnGraph(graph_name, tensor_wrapper) ==
+              Error::Ok,
           Internal,
           "Tensor name %s isn't added to Qnn Graph",
           tensor_wrapper->GetName().c_str());
     }
-
     for (const auto& tensor_wrapper : op_wrapper->GetOutputTensors()) {
       ET_CHECK_OR_RETURN_ERROR(
-          backend_params_ptr_->qnn_graph_ptr_->EnsureTensorInQnnGraph(
-              graph_name, tensor_wrapper) == Error::Ok,
+          qnn_graph_ptr->EnsureTensorInQnnGraph(graph_name, tensor_wrapper) ==
+              Error::Ok,
           Internal,
           "Tensor name %s isn't added to Qnn Graph",
           tensor_wrapper->GetName().c_str());
     }
-
     for (const auto& param : op_wrapper->GetParams()) {
       auto* p_tensor_param = dynamic_cast<TensorParamWrapper*>(param.get());
       if (p_tensor_param != nullptr) {
         ET_CHECK_OR_RETURN_ERROR(
-            backend_params_ptr_->qnn_graph_ptr_->EnsureTensorInQnnGraph(
+            qnn_graph_ptr->EnsureTensorInQnnGraph(
                 graph_name, p_tensor_param->GetTensorWrapper()) == Error::Ok,
             Internal,
             "Param tensor name %s isn't added to Qnn Graph",
@@ -652,8 +728,7 @@ Error QnnManager::Compile(
           "Fail to configure Qnn backend");
     }
 
-    error = backend_params_ptr_->qnn_graph_ptr_->GraphAddNode(
-        graph_name, op_wrapper->GetOpConfig());
+    error = qnn_graph_ptr->GraphAddNode(graph_name, op_wrapper->GetOpConfig());
     if (error != QNN_SUCCESS) {
       QNN_EXECUTORCH_LOG_ERROR(
           "Failed to add node to Qnn Graph with error: %d",
@@ -661,14 +736,13 @@ Error QnnManager::Compile(
       return Error::Internal;
     }
   }
-  error = backend_params_ptr_->qnn_graph_ptr_->GraphFinalize(graph_name);
+  error = qnn_graph_ptr->GraphFinalize(graph_name);
   if (error != QNN_SUCCESS) {
     QNN_EXECUTORCH_LOG_ERROR(
         "Failed to finalize Qnn Graph with error: %d",
         QNN_GET_ERROR_CODE(error));
     return Error::Internal;
   }
-
   return Error::Ok;
 }
 
diff --git a/backends/qualcomm/runtime/QnnManager.h b/backends/qualcomm/runtime/QnnManager.h
index 17294afbd88..ee9c4337532 100644
--- a/backends/qualcomm/runtime/QnnManager.h
+++ b/backends/qualcomm/runtime/QnnManager.h
@@ -13,6 +13,7 @@
 #include <executorch/backends/qualcomm/runtime/Logging.h>
 #include <executorch/backends/qualcomm/runtime/QnnExecuTorch.h>
 #include <executorch/backends/qualcomm/runtime/backends/QnnBackendFactory.h>
+#include <executorch/backends/qualcomm/runtime/backends/QnnDlcManager.h>
 #include <executorch/runtime/core/error.h>
 
 #include <memory>
@@ -71,7 +72,7 @@ class QnnManager {
       QnnExecuTorchContextBinary& qnn_executorch_context_binary);
 
   executorch::runtime::Error CompileQcir();
-
+  executorch::runtime::Error CompileDlc();
   executorch::runtime::Error Compile(
       const std::string& graph_name,
       std::vector<std::shared_ptr<OpWrapper>>& op_wrappers);
@@ -110,6 +111,22 @@ class QnnManager {
   std::string GetBinarySignature();
 
  private:
+  std::unique_ptr<const QnnSaver_Config_t*[]> GetImplementationConfig() {
+    if (options_->saver()) {
+      auto outputDirCfg = std::make_unique<QnnSaver_Config_t>();
+      outputDirCfg->option = QNN_SAVER_CONFIG_OPTION_OUTPUT_DIRECTORY;
+      outputDirCfg->outputDirectory = options_->saver_output_dir()->c_str();
+
+      auto saverCfg = std::make_unique<const QnnSaver_Config_t*[]>(2);
+      saverCfg[0] = outputDirCfg.release();
+      saverCfg[1] = nullptr;
+
+      return saverCfg;
+    } else {
+      return nullptr;
+    }
+  }
+
   executorch::runtime::Error LoadQnnLibrary();
 
   static constexpr const char* htp_library_name_ = "libQnnHtp.so";
@@ -147,6 +164,9 @@ class QnnManager {
           {Qnn_DataType_t::QNN_DATATYPE_UFIXED_POINT_16,
            executorch::aten::ScalarType::UInt16},
   };
+
+  // Manager for handling DLC (Deep Learning Container)
+  std::shared_ptr<QnnDlcManager> qnn_dlc_manager_;
 };
 } // namespace qnn
 } // namespace backends
diff --git a/backends/qualcomm/runtime/backends/CMakeLists.txt b/backends/qualcomm/runtime/backends/CMakeLists.txt
index 2df38086133..e4e0f6ada16 100644
--- a/backends/qualcomm/runtime/backends/CMakeLists.txt
+++ b/backends/qualcomm/runtime/backends/CMakeLists.txt
@@ -67,10 +67,12 @@ target_sources(
   qnn_context
   PUBLIC ${CMAKE_CURRENT_LIST_DIR}/QnnContextCommon.h
          ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpContext.h
+         ${CMAKE_CURRENT_LIST_DIR}/irbackend/IrContext.h
   PRIVATE ${CMAKE_CURRENT_LIST_DIR}/QnnContextCommon.cpp
           ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpContext.cpp
           ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpContextCustomConfig.h
           ${HOST_ARCHITECTURE}/HtpContextCustomConfig.cpp
+          ${CMAKE_CURRENT_LIST_DIR}/irbackend/${CMAKE_SYSTEM_PROCESSOR}/IrContext.cpp
 )
 
 # qnn_backend_cache
@@ -99,6 +101,7 @@ target_sources(
   qnn_backend
   PUBLIC ${CMAKE_CURRENT_LIST_DIR}/QnnBackendCommon.h
          ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpBackend.h
+         ${CMAKE_CURRENT_LIST_DIR}/irbackend/IrBackend.h
   PRIVATE ${CMAKE_CURRENT_LIST_DIR}/QnnBackendCommon.cpp
 )
 
@@ -122,3 +125,10 @@ target_sources(
   PUBLIC ${CMAKE_CURRENT_LIST_DIR}/QnnCustomProtocol.h
   PRIVATE ${CMAKE_CURRENT_LIST_DIR}/QnnCustomProtocol.cpp
 )
+
+# qnn_dlc_manager
+target_sources(
+  qnn_dlc_manager
+  PUBLIC ${CMAKE_CURRENT_LIST_DIR}/QnnDlcManager.h
+  PRIVATE ${CMAKE_CURRENT_LIST_DIR}/irbackend/${CMAKE_SYSTEM_PROCESSOR}/QnnDlcManager.cpp
+)
diff --git a/backends/qualcomm/runtime/backends/QnnBackendCache.cpp b/backends/qualcomm/runtime/backends/QnnBackendCache.cpp
index 699e0646697..1e6b1262c3a 100644
--- a/backends/qualcomm/runtime/backends/QnnBackendCache.cpp
+++ b/backends/qualcomm/runtime/backends/QnnBackendCache.cpp
@@ -134,20 +134,16 @@ Error QnnBackendCache::Configure() {
         QnnQcirCustomProtocol().DeserializeQcirCustomBuffer(
             qnn_context_blob_.buffer);
     if (status == Error::Ok) {
-      // online prepare or first stage of multi graph
-      state_ = ONLINE_PREPARE;
+      // first stage of multi graph
+      state_ = MULTI_GRAPH;
       auto context = qcir::GetContext(qcir_fbs_ptr);
       for (const auto& graph : *context->graphs()) {
         graph_names_.emplace_back(graph->name()->str());
       }
       return Error::Ok;
     }
-
-    QNN_EXECUTORCH_LOG_ERROR(
-        "Failed to parse QNN Graph Info. The cache "
-        "might be broken. Please consider to re-generate the "
-        "cache.");
-    InvalidateCache();
+    // online prepare
+    state_ = ONLINE_PREPARE;
   }
   return Error::Ok;
 }
diff --git a/backends/qualcomm/runtime/backends/QnnBackendCache.h b/backends/qualcomm/runtime/backends/QnnBackendCache.h
index b9e00f0a662..9abec186c3a 100644
--- a/backends/qualcomm/runtime/backends/QnnBackendCache.h
+++ b/backends/qualcomm/runtime/backends/QnnBackendCache.h
@@ -24,6 +24,7 @@ class QnnBackendCache {
     SERIALIZE = 1,
     DESERIALIZE = 2,
     ONLINE_PREPARE = 3,
+    MULTI_GRAPH = 4,
   };
   explicit QnnBackendCache(
       const QnnExecuTorchContextBinary& qnn_context_blob,
@@ -55,6 +56,10 @@ class QnnBackendCache {
     return graph_names_;
   }
 
+  void SetGraphNames(const std::string& graph_name) {
+    graph_names_.emplace_back(graph_name);
+  }
+
   executorch::runtime::Error Configure();
 
  protected:
diff --git a/backends/qualcomm/runtime/backends/QnnBackendCommon.cpp b/backends/qualcomm/runtime/backends/QnnBackendCommon.cpp
index 0df40ddb4e5..310e38d1744 100644
--- a/backends/qualcomm/runtime/backends/QnnBackendCommon.cpp
+++ b/backends/qualcomm/runtime/backends/QnnBackendCommon.cpp
@@ -57,8 +57,7 @@ Error QnnBackend::Configure() {
   return Error::Ok;
 }
 
-Error QnnBackend::VerifyQNNSDKVersion(
-    const QnnExecuTorchBackendType backend_id) {
+Error QnnBackend::VerifyQNNSDKVersion() {
   const QnnInterface& qnn_interface = implementation_.GetQnnInterface();
 
   Qnn_ApiVersion_t qnn_version = {QNN_VERSION_INIT};
@@ -73,8 +72,16 @@ Error QnnBackend::VerifyQNNSDKVersion(
   expected_version.coreApiVersion.major = QNN_API_VERSION_MAJOR;
   expected_version.coreApiVersion.minor = QNN_API_VERSION_MINOR;
   expected_version.coreApiVersion.patch = QNN_API_VERSION_PATCH;
-  expected_version.backendApiVersion = GetExpectedBackendVersion();
-  const char* backend_type = EnumNameQnnExecuTorchBackendType(backend_id);
+  expected_version.backendApiVersion = QNN_VERSION_INIT;
+  if (qnn_interface.GetBackendId() == QNN_BACKEND_ID_SAVER) {
+    expected_version.backendApiVersion.major = QNN_SAVER_API_VERSION_MAJOR;
+    expected_version.backendApiVersion.minor = QNN_SAVER_API_VERSION_MINOR;
+    expected_version.backendApiVersion.patch = QNN_SAVER_API_VERSION_PATCH;
+  } else {
+    expected_version.backendApiVersion = GetExpectedBackendVersion();
+  }
+  const char* backend_type = EnumNameQnnExecuTorchBackendType(
+      static_cast<QnnExecuTorchBackendType>(qnn_interface.GetBackendId()));
 
   Error status = VersionChecker(
       qnn_version.coreApiVersion, expected_version.coreApiVersion, "Qnn API");
diff --git a/backends/qualcomm/runtime/backends/QnnBackendCommon.h b/backends/qualcomm/runtime/backends/QnnBackendCommon.h
index 56b5284c537..58bdee10846 100644
--- a/backends/qualcomm/runtime/backends/QnnBackendCommon.h
+++ b/backends/qualcomm/runtime/backends/QnnBackendCommon.h
@@ -17,6 +17,8 @@
 #include "QnnBackend.h"
 #include "QnnCommon.h"
 #include "QnnTypes.h"
+#include "Saver/QnnSaverCommon.h"
+
 namespace executorch {
 namespace backends {
 namespace qnn {
@@ -45,8 +47,7 @@ class QnnBackend {
     return handle_;
   }
 
-  executorch::runtime::Error VerifyQNNSDKVersion(
-      const QnnExecuTorchBackendType backend_id);
+  executorch::runtime::Error VerifyQNNSDKVersion();
 
  protected:
   virtual Qnn_Version_t GetExpectedBackendVersion() const = 0;
diff --git a/backends/qualcomm/runtime/backends/QnnBackendFactory.cpp b/backends/qualcomm/runtime/backends/QnnBackendFactory.cpp
index 29e6686740b..1f251aeaffa 100644
--- a/backends/qualcomm/runtime/backends/QnnBackendFactory.cpp
+++ b/backends/qualcomm/runtime/backends/QnnBackendFactory.cpp
@@ -7,6 +7,7 @@
  */
 #include <executorch/backends/qualcomm/runtime/Logging.h>
 #include <executorch/backends/qualcomm/runtime/backends/QnnBackendFactory.h>
+#include <executorch/backends/qualcomm/runtime/backends/QnnDlcManager.h>
 namespace executorch {
 namespace backends {
 namespace qnn {
@@ -17,7 +18,8 @@ std::unique_ptr<BackendConfigParameters> QnnBackendFactory::Create(
     const QnnImplementation& implementation,
     QnnLogger* logger,
     const QnnExecuTorchContextBinary& qnn_context_blob,
-    const QnnExecuTorchOptions* options) {
+    const QnnExecuTorchOptions* options,
+    QnnDlcManager* qnn_dlc_manager) {
   auto backend_params = std::make_unique<BackendConfigParameters>();
 
   switch (options->backend_options()->backend_type()) {
@@ -68,7 +70,8 @@ std::unique_ptr<BackendConfigParameters> QnnBackendFactory::Create(
           backend_params->qnn_backend_ptr_.get(),
           backend_params->qnn_device_ptr_.get(),
           backend_params->qnn_backend_cache_ptr_.get(),
-          htp_options);
+          htp_options,
+          qnn_dlc_manager);
 
       backend_params->qnn_graph_ptr_ = std::make_unique<HtpGraph>(
           implementation,
@@ -88,8 +91,7 @@ std::unique_ptr<BackendConfigParameters> QnnBackendFactory::Create(
       return nullptr;
   }
 
-  if (backend_params->qnn_backend_ptr_->VerifyQNNSDKVersion(
-          options->backend_options()->backend_type()) == Error::Ok) {
+  if (backend_params->qnn_backend_ptr_->VerifyQNNSDKVersion() == Error::Ok) {
     return backend_params;
   }
 
diff --git a/backends/qualcomm/runtime/backends/QnnBackendFactory.h b/backends/qualcomm/runtime/backends/QnnBackendFactory.h
index 012c2cc7b5b..3d78a36b9f0 100644
--- a/backends/qualcomm/runtime/backends/QnnBackendFactory.h
+++ b/backends/qualcomm/runtime/backends/QnnBackendFactory.h
@@ -27,6 +27,8 @@
 namespace executorch {
 namespace backends {
 namespace qnn {
+
+class QnnDlcManager;
 typedef enum { UNINITIALIZED, INITIALIZED } BackendInitializeState;
 
 // @brief Struct containing all handles for a given QNN backend
@@ -67,7 +69,8 @@ class QnnBackendFactory {
       const QnnImplementation& implementation,
       QnnLogger* logger,
       const QnnExecuTorchContextBinary& qnn_context_blob,
-      const QnnExecuTorchOptions* options);
+      const QnnExecuTorchOptions* options,
+      QnnDlcManager* qnn_dlc_manager);
 };
 } // namespace qnn
 } // namespace backends
diff --git a/backends/qualcomm/runtime/backends/QnnContextCommon.cpp b/backends/qualcomm/runtime/backends/QnnContextCommon.cpp
index 7c66e5ad19a..ee49b10215a 100644
--- a/backends/qualcomm/runtime/backends/QnnContextCommon.cpp
+++ b/backends/qualcomm/runtime/backends/QnnContextCommon.cpp
@@ -7,12 +7,12 @@
  */
 
 #include <executorch/backends/qualcomm/runtime/backends/QnnContextCommon.h>
+#include <executorch/backends/qualcomm/runtime/backends/QnnDlcManager.h>
+
 namespace executorch {
 namespace backends {
 namespace qnn {
 
-using executorch::runtime::Error;
-
 QnnContext::~QnnContext() {
   const QnnInterface& qnn_interface = implementation_.GetQnnInterface();
   Qnn_ErrorHandle_t error = QNN_SUCCESS;
@@ -63,13 +63,13 @@ Error QnnContext::Configure() {
     }
   } else if (
       cache_->GetCacheState() == QnnBackendCache::SERIALIZE ||
-      cache_->GetCacheState() == QnnBackendCache::ONLINE_PREPARE) {
+      cache_->GetCacheState() == QnnBackendCache::ONLINE_PREPARE ||
+      cache_->GetCacheState() == QnnBackendCache::MULTI_GRAPH) {
     error = qnn_interface.qnn_context_create(
         backend_->GetHandle(),
         device_->GetHandle(),
         temp_context_config.empty() ? nullptr : temp_context_config.data(),
         &handle_);
-
     if (error != QNN_SUCCESS) {
       QNN_EXECUTORCH_LOG_ERROR(
           "Failed to create QNN context for Backend "
@@ -82,7 +82,15 @@ Error QnnContext::Configure() {
     QNN_EXECUTORCH_LOG_ERROR("QNN context cache is invalid.");
     return Error::Internal;
   }
-  return AfterConfigure();
+  if (AfterConfigure() != Error::Ok) {
+    return Error::Internal;
+  }
+  if (cache_->GetCacheState() == QnnBackendCache::ONLINE_PREPARE) {
+    // Register graphs from DLC during online prepare for HTP/GPU/DSP backends
+    return qnn_dlc_manager_->RegisterGraphsFromDLC(
+        implementation_, backend_, this, cache_);
+  }
+  return Error::Ok;
 }
 
 Error QnnContext::GetContextBinary(
diff --git a/backends/qualcomm/runtime/backends/QnnContextCommon.h b/backends/qualcomm/runtime/backends/QnnContextCommon.h
index 62a0b953eec..0e9e12ef544 100644
--- a/backends/qualcomm/runtime/backends/QnnContextCommon.h
+++ b/backends/qualcomm/runtime/backends/QnnContextCommon.h
@@ -14,23 +14,30 @@
 #include <executorch/backends/qualcomm/runtime/backends/QnnDeviceCommon.h>
 
 #include <memory>
+
 namespace executorch {
 namespace backends {
 namespace qnn {
+
+class QnnDlcManager;
+
 class QnnContext {
  public:
   explicit QnnContext(
       const QnnImplementation& implementation,
       QnnBackend* backend,
       QnnDevice* device,
-      QnnBackendCache* cache)
+      QnnBackendCache* cache,
+      QnnDlcManager* qnn_dlc_manager)
       : handle_(nullptr),
         implementation_(implementation),
         backend_(backend),
         device_(device),
-        cache_(cache) {}
+        cache_(cache),
+        qnn_dlc_manager_(qnn_dlc_manager) {}
 
   virtual ~QnnContext();
+
   executorch::runtime::Error Configure();
 
   Qnn_ContextHandle_t GetHandle() const {
@@ -53,7 +60,7 @@ class QnnContext {
     return cache_->GetCacheState();
   };
 
-  executorch::runtime::Error GetContextBinary(
+  virtual executorch::runtime::Error GetContextBinary(
       QnnExecuTorchContextBinary& qnn_executorch_context_binary);
 
  protected:
@@ -72,6 +79,7 @@ class QnnContext {
   QnnDevice* device_;
   QnnBackendCache* cache_;
   QnnContextCustomProtocol qnn_context_custom_protocol_;
+  QnnDlcManager* qnn_dlc_manager_;
 };
 } // namespace qnn
 } // namespace backends
diff --git a/backends/qualcomm/runtime/backends/QnnDlcManager.h b/backends/qualcomm/runtime/backends/QnnDlcManager.h
new file mode 100644
index 00000000000..a57906df4e3
--- /dev/null
+++ b/backends/qualcomm/runtime/backends/QnnDlcManager.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) Qualcomm Innovation Center, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#pragma once
+#include <executorch/backends/qualcomm/runtime/QnnExecuTorch.h>
+
+#include <QnnTypes.h>
+#include <executorch/backends/qualcomm/runtime/backends/QnnBackendFactory.h>
+#include <executorch/backends/qualcomm/runtime/backends/irbackend/IrContext.h>
+
+#include "QnnWrapperUtils.hpp"
+namespace executorch {
+namespace backends {
+namespace qnn {
+
+using executorch::runtime::Error;
+using QnnModel_composeGraphsFromDlc = qnn_wrapper_api::ModelError_t (*)(...);
+class QnnDlcManager {
+ public:
+  QnnDlcManager(
+      const QnnExecuTorchContextBinary& qnn_context_blob,
+      const QnnExecuTorchOptions* options);
+
+  qnn_wrapper_api::GraphInfoPtr_t* GetQnnDlcGraphInfoPtr() {
+    return qnn_dlc_graph_info_;
+  }
+
+  uint32_t GetQnnDlcGraphInfoNum() {
+    return qnn_dlc_graph_info_num_;
+  }
+
+  std::unique_ptr<BackendConfigParameters> backend_params_ptr_ =
+      std::make_unique<BackendConfigParameters>();
+
+  void ResetBackendParams();
+  void ResetLogger();
+  void TerminateAllBackends();
+
+  Error SetUpDlcEnvironment(const Qnn_Version_t& coreApiVersion);
+
+  Error RegisterGraphsFromDLC(
+      const QnnImplementation& implementation,
+      QnnBackend* backend,
+      QnnContext* context,
+      QnnBackendCache* cache);
+
+ private:
+  static constexpr const char* library_name_ = "libQnnIr.so";
+  QnnImplementation qnn_loaded_backend_;
+  std::unique_ptr<QnnLogger> logger_;
+
+  const QnnExecuTorchContextBinary& qnn_context_blob_;
+  const QnnExecuTorchOptions* options_;
+
+  static constexpr const char* dlc_lib_ = "libQnnModelDlc.so";
+  qnn_wrapper_api::GraphInfoPtr_t* qnn_dlc_graph_info_ = nullptr;
+  uint32_t qnn_dlc_graph_info_num_ = 0;
+
+  Error LoadQnnIrLibrary();
+
+  Error Create();
+
+  Error Configure();
+};
+} // namespace qnn
+} // namespace backends
+} // namespace executorch
diff --git a/backends/qualcomm/runtime/backends/QnnFunctionInterface.h b/backends/qualcomm/runtime/backends/QnnFunctionInterface.h
index 86de76f0d99..12a6be36b64 100644
--- a/backends/qualcomm/runtime/backends/QnnFunctionInterface.h
+++ b/backends/qualcomm/runtime/backends/QnnFunctionInterface.h
@@ -101,6 +101,9 @@ class QnnInterface {
   bool IsLoaded() const {
     return qnn_interface_ != nullptr;
   }
+  const QNN_INTERFACE_VER_TYPE& GetInterfaceVer() const {
+    return qnn_interface_->QNN_INTERFACE_VER_NAME;
+  }
 
  private:
   // --------- QnnInterface ---------
diff --git a/backends/qualcomm/runtime/backends/QnnGraphCommon.cpp b/backends/qualcomm/runtime/backends/QnnGraphCommon.cpp
index b4614ac2d3d..9fe81f4cf54 100644
--- a/backends/qualcomm/runtime/backends/QnnGraphCommon.cpp
+++ b/backends/qualcomm/runtime/backends/QnnGraphCommon.cpp
@@ -16,7 +16,6 @@ Error QnnGraph::Configure(const std::string& graph_name) {
   // create qnn backend
   const QnnInterface& qnn_interface = implementation_.GetQnnInterface();
   Qnn_ErrorHandle_t error = QNN_SUCCESS;
-
   std::vector<const QnnGraph_Config_t*> temp_graph_config;
   ET_CHECK_OR_RETURN_ERROR(
       MakeConfig(temp_graph_config) == Error::Ok,
@@ -44,8 +43,8 @@ Error QnnGraph::Configure(const std::string& graph_name) {
     }
   } else if (
       context_->GetCacheState() == QnnBackendCache::SERIALIZE ||
-      context_->GetCacheState() == QnnBackendCache::ONLINE_PREPARE) {
-    Qnn_ErrorHandle_t error = qnn_interface.qnn_graph_create(
+      context_->GetCacheState() == QnnBackendCache::MULTI_GRAPH) {
+    error = qnn_interface.qnn_graph_create(
         context_->GetHandle(),
         graph_name.c_str(),
         temp_graph_config.empty() ? nullptr : temp_graph_config.data(),
@@ -56,6 +55,9 @@ Error QnnGraph::Configure(const std::string& graph_name) {
           "qnn_graph_create failed. Error  %d", QNN_GET_ERROR_CODE(error));
       return Error::Internal;
     }
+  } else if (context_->GetCacheState() == QnnBackendCache::ONLINE_PREPARE) {
+    QNN_EXECUTORCH_LOG_INFO(
+        "Skip qnn_graph_create, graph has already been composed from Dlc.");
   } else {
     QNN_EXECUTORCH_LOG_ERROR("QNN context cache is invalid.");
     return Error::Internal;
diff --git a/backends/qualcomm/runtime/backends/QnnGraphCommon.h b/backends/qualcomm/runtime/backends/QnnGraphCommon.h
index 62d9b1b9e1a..33f903dae41 100644
--- a/backends/qualcomm/runtime/backends/QnnGraphCommon.h
+++ b/backends/qualcomm/runtime/backends/QnnGraphCommon.h
@@ -66,6 +66,12 @@ class QnnGraph {
     return handle_[graph_name];
   }
 
+  void SetGraphHandle(
+      const std::string& graph_name,
+      Qnn_GraphHandle_t graph_handle) {
+    handle_[graph_name] = graph_handle;
+  }
+
   QnnProfile* GetProfile(const std::string& graph_name) {
     return profile_[graph_name].get();
   }
diff --git a/backends/qualcomm/runtime/backends/QnnImplementation.cpp b/backends/qualcomm/runtime/backends/QnnImplementation.cpp
index 6baf4cbb411..7add5e744f9 100644
--- a/backends/qualcomm/runtime/backends/QnnImplementation.cpp
+++ b/backends/qualcomm/runtime/backends/QnnImplementation.cpp
@@ -5,7 +5,6 @@
  * This source code is licensed under the BSD-style license found in the
  * LICENSE file in the root directory of this source tree.
  */
-#include <dlfcn.h>
 #include <executorch/backends/qualcomm/runtime/backends/QnnImplementation.h>
 
 #include "QnnInterface.h"
@@ -15,11 +14,6 @@ namespace qnn {
 
 using executorch::runtime::Error;
 
-template <typename Fn>
-Fn loadQnnFunction(void* handle, const char* function_name) {
-  return reinterpret_cast<Fn>(dlsym(handle, function_name)); // NOLINT
-}
-
 Error QnnImplementation::InitBackend(
     void* const lib_handle,
     const QnnSaver_Config_t** saver_config) {
diff --git a/backends/qualcomm/runtime/backends/QnnImplementation.h b/backends/qualcomm/runtime/backends/QnnImplementation.h
index 61ee6222e91..a49ee6516fc 100644
--- a/backends/qualcomm/runtime/backends/QnnImplementation.h
+++ b/backends/qualcomm/runtime/backends/QnnImplementation.h
@@ -10,12 +10,19 @@
 #include <executorch/backends/qualcomm/runtime/Logging.h>
 #include <executorch/backends/qualcomm/runtime/backends/QnnFunctionInterface.h>
 
+#include <dlfcn.h>
 #include <mutex>
 #include <string>
 #include <unordered_map>
 namespace executorch {
 namespace backends {
 namespace qnn {
+
+template <typename Fn>
+Fn loadQnnFunction(void* handle, const char* function_name) {
+  return reinterpret_cast<Fn>(dlsym(handle, function_name)); // NOLINT
+}
+
 class QnnImplementation {
  public:
   using BackendIdType = decltype(QnnInterface_t{}.backendId);
diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpContext.cpp b/backends/qualcomm/runtime/backends/htpbackend/HtpContext.cpp
index 28d0bbf8055..50d299b55e9 100644
--- a/backends/qualcomm/runtime/backends/htpbackend/HtpContext.cpp
+++ b/backends/qualcomm/runtime/backends/htpbackend/HtpContext.cpp
@@ -10,7 +10,6 @@
 #include <executorch/backends/qualcomm/runtime/backends/htpbackend/HtpContext.h>
 
 #include "HTP/QnnHtpCommon.h"
-#include "Saver/QnnSaverCommon.h"
 
 namespace executorch {
 namespace backends {
diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpContext.h b/backends/qualcomm/runtime/backends/htpbackend/HtpContext.h
index 131cf856ce1..88660db080a 100644
--- a/backends/qualcomm/runtime/backends/htpbackend/HtpContext.h
+++ b/backends/qualcomm/runtime/backends/htpbackend/HtpContext.h
@@ -16,6 +16,7 @@ namespace executorch {
 namespace backends {
 namespace qnn {
 
+class QnnDlcManager;
 class HtpContext : public QnnContext {
  public:
   HtpContext(
@@ -23,8 +24,9 @@ class HtpContext : public QnnContext {
       QnnBackend* backend,
       QnnDevice* device,
       QnnBackendCache* cache,
-      const QnnExecuTorchHtpBackendOptions* htp_options)
-      : QnnContext(implementation, backend, device, cache) {
+      const QnnExecuTorchHtpBackendOptions* htp_options,
+      QnnDlcManager* qnn_dlc_manager)
+      : QnnContext(implementation, backend, device, cache, qnn_dlc_manager) {
     htp_context_custom_config_ =
         std::make_unique<HtpContextCustomConfig>(this, htp_options);
   }
diff --git a/backends/qualcomm/runtime/backends/irbackend/IrBackend.h b/backends/qualcomm/runtime/backends/irbackend/IrBackend.h
new file mode 100644
index 00000000000..ddeb3a24460
--- /dev/null
+++ b/backends/qualcomm/runtime/backends/irbackend/IrBackend.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) Qualcomm Innovation Center, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#pragma once
+
+#include <executorch/backends/qualcomm/runtime/backends/QnnBackendCommon.h>
+#if (QNN_API_VERSION_MAJOR >= 2 && QNN_API_VERSION_MINOR >= 23)
+#include "IR/QnnIrCommon.h"
+#endif
+#include "QnnTypes.h"
+
+namespace executorch {
+namespace backends {
+namespace qnn {
+class IrBackend : public QnnBackend {
+ public:
+  IrBackend(const QnnImplementation& implementation, QnnLogger* logger)
+      : QnnBackend(implementation, logger) {}
+  ~IrBackend() {}
+
+  Qnn_Version_t GetExpectedBackendVersion() const override {
+    Qnn_Version_t backend_version;
+#if (QNN_API_VERSION_MAJOR >= 2 && QNN_API_VERSION_MINOR >= 23)
+    backend_version.major = QNN_IR_API_VERSION_MAJOR;
+    backend_version.minor = QNN_IR_API_VERSION_MINOR;
+    backend_version.patch = QNN_IR_API_VERSION_PATCH;
+#else
+    backend_version = QNN_VERSION_INIT;
+#endif
+    return backend_version;
+  }
+};
+} // namespace qnn
+} // namespace backends
+} // namespace executorch
diff --git a/backends/qualcomm/runtime/backends/irbackend/IrContext.h b/backends/qualcomm/runtime/backends/irbackend/IrContext.h
new file mode 100644
index 00000000000..824d41bb7ff
--- /dev/null
+++ b/backends/qualcomm/runtime/backends/irbackend/IrContext.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) Qualcomm Innovation Center, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <executorch/backends/qualcomm/runtime/backends/QnnContextCommon.h>
+
+namespace executorch {
+namespace backends {
+namespace qnn {
+class IrContext : public QnnContext {
+ public:
+  using QnnContext::QnnContext;
+
+  executorch::runtime::Error GetContextBinary(
+      QnnExecuTorchContextBinary& qnn_executorch_context_binary) override;
+
+ private:
+  std::vector<char> buffer_;
+};
+} // namespace qnn
+} // namespace backends
+} // namespace executorch
diff --git a/backends/qualcomm/runtime/backends/irbackend/aarch64/IrContext.cpp b/backends/qualcomm/runtime/backends/irbackend/aarch64/IrContext.cpp
new file mode 100644
index 00000000000..44ce8de8f46
--- /dev/null
+++ b/backends/qualcomm/runtime/backends/irbackend/aarch64/IrContext.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) Qualcomm Innovation Center, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <fstream>
+
+#include <executorch/backends/qualcomm/runtime/Logging.h>
+#include <executorch/backends/qualcomm/runtime/backends/irbackend/IrContext.h>
+
+namespace executorch {
+namespace backends {
+namespace qnn {
+
+using executorch::runtime::Error;
+
+Error IrContext::GetContextBinary(
+    QnnExecuTorchContextBinary& qnn_executorch_context_binary) {
+  return Error::Ok;
+}
+
+} // namespace qnn
+} // namespace backends
+} // namespace executorch
diff --git a/backends/qualcomm/runtime/backends/irbackend/aarch64/QnnDlcManager.cpp b/backends/qualcomm/runtime/backends/irbackend/aarch64/QnnDlcManager.cpp
new file mode 100644
index 00000000000..57d0b9170bc
--- /dev/null
+++ b/backends/qualcomm/runtime/backends/irbackend/aarch64/QnnDlcManager.cpp
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) Qualcomm Innovation Center, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#include <executorch/backends/qualcomm/runtime/backends/QnnDlcManager.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <fstream>
+
+namespace executorch {
+namespace backends {
+namespace qnn {
+
+QnnDlcManager::QnnDlcManager(
+    const QnnExecuTorchContextBinary& qnn_context_blob,
+    const QnnExecuTorchOptions* options)
+    : qnn_loaded_backend_(""),
+      qnn_context_blob_(qnn_context_blob),
+      options_(options) {
+  QNN_EXECUTORCH_LOG_INFO(
+      "QnnDlcManager Get Qnn Context blob bytes %u", qnn_context_blob_.nbytes);
+
+  if (options_ == nullptr) {
+    QNN_EXECUTORCH_LOG_ERROR(
+        "Fail to create QnnDlcManager, options is nullptr");
+  }
+}
+
+Error QnnDlcManager::LoadQnnIrLibrary() {
+  return Error::Ok;
+}
+
+Error QnnDlcManager::Create() {
+  return Error::Ok;
+}
+
+Error QnnDlcManager::Configure() {
+  return Error::Ok;
+}
+
+Error QnnDlcManager::SetUpDlcEnvironment(const Qnn_Version_t& coreApiVersion) {
+  return Error::Ok;
+}
+
+Error QnnDlcManager::RegisterGraphsFromDLC(
+    const QnnImplementation& implementation,
+    QnnBackend* backend,
+    QnnContext* context,
+    QnnBackendCache* cache) {
+  void* lib_handle = dlopen(dlc_lib_, RTLD_NOW | RTLD_LOCAL);
+  if (lib_handle == nullptr) {
+    QNN_EXECUTORCH_LOG_ERROR(
+        "Cannot Open lib %s, with error: %s", dlc_lib_, dlerror());
+    return Error::Internal;
+  }
+  QnnModel_composeGraphsFromDlc composeGraphsFromDlc =
+      loadQnnFunction<QnnModel_composeGraphsFromDlc>(
+          lib_handle, "QnnModel_composeGraphsFromDlc");
+  if (composeGraphsFromDlc == nullptr) {
+    QNN_EXECUTORCH_LOG_ERROR(
+        "Cannot load symbol "
+        "QnnModel_composeGraphsFromDlc : %s",
+        dlerror());
+    return Error::Internal;
+  }
+
+  const QnnExecuTorchContextBinary& qnn_context_blob =
+      cache->GetQnnContextBlob();
+
+  // memfd_create on android api level 30 and above
+  int fd = -1;
+#ifdef __ANDROID__
+#if __ANDROID_API__ >= 30
+  fd = memfd_create("tmp.dlc", 0);
+#endif
+#endif
+  if (fd == -1) {
+    QNN_EXECUTORCH_LOG_ERROR("memfd_create fail");
+    return Error::Internal;
+  }
+
+  if (ftruncate(fd, qnn_context_blob.nbytes) == -1) {
+    QNN_EXECUTORCH_LOG_ERROR("ftruncate fail");
+    close(fd);
+    return Error::Internal;
+  }
+
+  void* addr = mmap(
+      NULL, qnn_context_blob.nbytes, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+  if (addr == MAP_FAILED) {
+    QNN_EXECUTORCH_LOG_ERROR("mmap");
+    close(fd);
+    return Error::Internal;
+  }
+
+  memcpy(addr, qnn_context_blob.buffer, qnn_context_blob.nbytes);
+
+  char dlc_path[256];
+  snprintf(dlc_path, sizeof(dlc_path), "/proc/self/fd/%d", fd);
+
+  const QNN_INTERFACE_VER_TYPE& interfaceVer =
+      implementation.GetQnnInterface().GetInterfaceVer();
+
+  if (composeGraphsFromDlc(
+          /*backendHandle=*/backend->GetHandle(),
+          /*interface=*/interfaceVer,
+          /*contextHandle=*/context->GetHandle(),
+          /*graphsConfigInfo=*/nullptr,
+          /*dlcPath=*/dlc_path,
+          /*numGraphsConfigInfo=*/0,
+          /*graphsInfo=*/&qnn_dlc_graph_info_,
+          /*numGraphsInfo=*/&qnn_dlc_graph_info_num_,
+          /*debug=*/false,
+          /*logCallback=*/nullptr,
+          /*maxLogLevel=*/QNN_LOG_LEVEL_VERBOSE) !=
+      qnn_wrapper_api::ModelError_t::MODEL_NO_ERROR) {
+    QNN_EXECUTORCH_LOG_ERROR("Failed to open Dlc");
+    return Error::Internal;
+  }
+  munmap(addr, qnn_context_blob.nbytes);
+  close(fd);
+  dlclose(lib_handle);
+
+  for (uint32_t i = 0; i < qnn_dlc_graph_info_num_; ++i) {
+    auto& graphInfo = (*qnn_dlc_graph_info_)[i];
+    cache->SetGraphNames(graphInfo.graphName);
+  }
+
+  return Error::Ok;
+}
+
+void QnnDlcManager::ResetBackendParams() {}
+void QnnDlcManager::ResetLogger() {}
+void QnnDlcManager::TerminateAllBackends() {}
+
+} // namespace qnn
+} // namespace backends
+} // namespace executorch
diff --git a/backends/qualcomm/runtime/backends/irbackend/x86_64/IrContext.cpp b/backends/qualcomm/runtime/backends/irbackend/x86_64/IrContext.cpp
new file mode 100644
index 00000000000..f167aae9319
--- /dev/null
+++ b/backends/qualcomm/runtime/backends/irbackend/x86_64/IrContext.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) Qualcomm Innovation Center, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <fstream>
+
+#include <executorch/backends/qualcomm/runtime/Logging.h>
+#include <executorch/backends/qualcomm/runtime/backends/irbackend/IrContext.h>
+namespace executorch {
+namespace backends {
+namespace qnn {
+
+using executorch::runtime::Error;
+
+Error IrContext::GetContextBinary(
+    QnnExecuTorchContextBinary& qnn_executorch_context_binary) {
+  // read Dlc and write to buffer
+  std::string dlc_name = GetGraphNames()[0] + ".dlc";
+  std::ifstream dlc_file(dlc_name, std::ios::binary | std::ios::ate);
+  if (dlc_file.is_open()) {
+    std::streamsize size = dlc_file.tellg();
+    dlc_file.seekg(0, std::ios::beg);
+
+    buffer_ = std::vector<char>(size);
+    dlc_file.read(buffer_.data(), size);
+    dlc_file.close();
+    qnn_executorch_context_binary.buffer = buffer_.data();
+    qnn_executorch_context_binary.nbytes = size;
+    return Error::Ok;
+  } else {
+    QNN_EXECUTORCH_LOG_ERROR(
+        "Unable to open dlc file %s for building QnnExecuTorchContextBinary",
+        dlc_name.c_str());
+  }
+  return Error::Internal;
+}
+} // namespace qnn
+} // namespace backends
+} // namespace executorch
diff --git a/backends/qualcomm/runtime/backends/irbackend/x86_64/QnnDlcManager.cpp b/backends/qualcomm/runtime/backends/irbackend/x86_64/QnnDlcManager.cpp
new file mode 100644
index 00000000000..14b9aeadf3a
--- /dev/null
+++ b/backends/qualcomm/runtime/backends/irbackend/x86_64/QnnDlcManager.cpp
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) Qualcomm Innovation Center, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#include <executorch/backends/qualcomm/runtime/backends/QnnDlcManager.h>
+#include <executorch/backends/qualcomm/runtime/backends/irbackend/IrBackend.h>
+
+namespace executorch {
+namespace backends {
+namespace qnn {
+
+QnnDlcManager::QnnDlcManager(
+    const QnnExecuTorchContextBinary& qnn_context_blob,
+    const QnnExecuTorchOptions* options)
+    : qnn_loaded_backend_(""),
+      qnn_context_blob_(qnn_context_blob),
+      options_(options) {
+  QNN_EXECUTORCH_LOG_INFO(
+      "QnnDlcManager Get Qnn Context blob bytes %u", qnn_context_blob_.nbytes);
+
+  if (options_ == nullptr) {
+    QNN_EXECUTORCH_LOG_ERROR(
+        "Fail to create QnnDlcManager, options is nullptr");
+  }
+}
+
+Error QnnDlcManager::LoadQnnIrLibrary() {
+  qnn_loaded_backend_ = QnnImplementation(library_name_);
+  Error ret = qnn_loaded_backend_.Load(nullptr);
+  return ret;
+}
+
+Error QnnDlcManager::Create() {
+  backend_params_ptr_->qnn_backend_ptr_ =
+      std::make_unique<IrBackend>(qnn_loaded_backend_, logger_.get());
+
+  backend_params_ptr_->qnn_device_ptr_ =
+      std::make_unique<QnnDevice>(qnn_loaded_backend_, logger_.get());
+
+  backend_params_ptr_->qnn_backend_cache_ptr_ =
+      std::make_unique<QnnBackendCache>(
+          qnn_context_blob_, options_->graph_name()->str());
+
+  backend_params_ptr_->qnn_context_ptr_ = std::make_unique<IrContext>(
+      qnn_loaded_backend_,
+      backend_params_ptr_->qnn_backend_ptr_.get(),
+      backend_params_ptr_->qnn_device_ptr_.get(),
+      backend_params_ptr_->qnn_backend_cache_ptr_.get(),
+      nullptr);
+
+  backend_params_ptr_->qnn_graph_ptr_ = std::make_unique<QnnGraph>(
+      qnn_loaded_backend_,
+      backend_params_ptr_->qnn_backend_ptr_.get(),
+      backend_params_ptr_->qnn_context_ptr_.get(),
+      options_->profile_level());
+  backend_params_ptr_->backend_init_state_ =
+      BackendInitializeState::INITIALIZED;
+  return backend_params_ptr_->qnn_backend_ptr_->VerifyQNNSDKVersion();
+}
+
+Error QnnDlcManager::Configure() {
+  ET_CHECK_OR_RETURN_ERROR(
+      backend_params_ptr_ != nullptr, Internal, "Failed to load Qnn backend.");
+  ET_CHECK_OR_RETURN_ERROR(
+      backend_params_ptr_->qnn_backend_cache_ptr_->Configure() == Error::Ok,
+      Internal,
+      "Fail to configure Qnn backend cache");
+  ET_CHECK_OR_RETURN_ERROR(
+      backend_params_ptr_->qnn_backend_ptr_->Configure() == Error::Ok,
+      Internal,
+      "Fail to configure Qnn backend");
+  ET_CHECK_OR_RETURN_ERROR(
+      backend_params_ptr_->qnn_context_ptr_->Configure() == Error::Ok,
+      Internal,
+      "Fail to configure Qnn context");
+  for (const std::string& graph_name :
+       backend_params_ptr_->qnn_context_ptr_->GetGraphNames()) {
+    ET_CHECK_OR_RETURN_ERROR(
+        backend_params_ptr_->qnn_graph_ptr_->Configure(graph_name) == Error::Ok,
+        Internal,
+        "Fail to configure Qnn graph");
+  }
+  backend_params_ptr_->backend_init_state_ =
+      BackendInitializeState::INITIALIZED;
+
+  return Error::Ok;
+}
+
+Error QnnDlcManager::SetUpDlcEnvironment(const Qnn_Version_t& coreApiVersion) {
+  ET_CHECK_MSG(
+      (coreApiVersion.major >= 2 && coreApiVersion.minor >= 23),
+      "Qnn API version %u.%u.%u is not supported for Qnn IR backend, The minimum supported version is 2.23.0 or QNN_SDK version 2.30.0",
+      coreApiVersion.major,
+      coreApiVersion.minor,
+      coreApiVersion.patch);
+
+  ET_CHECK_OR_RETURN_ERROR(
+      LoadQnnIrLibrary() == Error::Ok,
+      Internal,
+      "Fail to Load Qnn IR library.");
+
+  logger_ = std::make_unique<QnnLogger>(
+      qnn_loaded_backend_, LoggingCallback, options_->log_level());
+
+  ET_CHECK_OR_RETURN_ERROR(
+      Create() == Error::Ok, Internal, "Failed to load Qnn IR backend.");
+
+  ET_CHECK_OR_RETURN_ERROR(
+      Configure() == Error::Ok, Internal, "Fail to configure IR backend.");
+
+  return Error::Ok;
+}
+
+Error QnnDlcManager::RegisterGraphsFromDLC(
+    const QnnImplementation& implementation,
+    QnnBackend* backend,
+    QnnContext* context,
+    QnnBackendCache* cache) {
+  return Error::Ok;
+}
+
+void QnnDlcManager::ResetBackendParams() {
+  backend_params_ptr_.reset(new BackendConfigParameters());
+}
+
+void QnnDlcManager::ResetLogger() {
+  logger_.reset();
+}
+
+void QnnDlcManager::TerminateAllBackends() {
+  qnn_loaded_backend_.TerminateAllBackends();
+}
+
+} // namespace qnn
+} // namespace backends
+} // namespace executorch
diff --git a/backends/qualcomm/scripts/build.sh b/backends/qualcomm/scripts/build.sh
index fef177fd300..c079dd41a2a 100755
--- a/backends/qualcomm/scripts/build.sh
+++ b/backends/qualcomm/scripts/build.sh
@@ -86,8 +86,8 @@ if [ "$BUILD_AARCH64" = true ]; then
         -DQNN_SDK_ROOT=$QNN_SDK_ROOT \
         -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK_ROOT/build/cmake/android.toolchain.cmake \
         -DANDROID_ABI='arm64-v8a' \
-        -DANDROID_NATIVE_API_LEVEL=23 \
         -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
+        -DANDROID_PLATFORM=android-30 \
         -DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \
         -B$BUILD_ROOT
 
@@ -100,7 +100,7 @@ if [ "$BUILD_AARCH64" = true ]; then
         -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK_ROOT/build/cmake/android.toolchain.cmake \
         -DCMAKE_BUILD_TYPE=$BUILD_TYPE \
         -DANDROID_ABI='arm64-v8a' \
-        -DANDROID_NATIVE_API_LEVEL=23 \
+        -DANDROID_PLATFORM=android-30 \
         -DCMAKE_PREFIX_PATH=$CMAKE_PREFIX_PATH \
         -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
         -DCMAKE_FIND_ROOT_PATH_MODE_PACKAGE=BOTH \
diff --git a/backends/qualcomm/serialization/qc_compiler_spec.fbs b/backends/qualcomm/serialization/qc_compiler_spec.fbs
index 3e78ba23ac8..d8809231a9f 100644
--- a/backends/qualcomm/serialization/qc_compiler_spec.fbs
+++ b/backends/qualcomm/serialization/qc_compiler_spec.fbs
@@ -194,6 +194,12 @@ table QnnExecuTorchOptions {
 
   /// True if there exists multiple graphs in one .pte file.
   multiple_graphs:bool;
+
+  // Enable this option to record all QNN API calls for debugging purpose
+  saver:bool;
+
+  // Path to saver output folder
+  saver_output_dir:string;
 }
 
 root_type QnnExecuTorchOptions;
diff --git a/backends/qualcomm/serialization/qc_schema.py b/backends/qualcomm/serialization/qc_schema.py
index 56ba27bb000..93305b1dbb5 100644
--- a/backends/qualcomm/serialization/qc_schema.py
+++ b/backends/qualcomm/serialization/qc_schema.py
@@ -157,3 +157,5 @@ class QnnExecuTorchOptions:
     shared_buffer: bool = False
     is_from_context_binary: bool = False
     multiple_graphs: bool = False
+    saver: bool = False
+    saver_output_dir: str = "saver_output"
diff --git a/backends/qualcomm/tests/test_qnn_delegate.py b/backends/qualcomm/tests/test_qnn_delegate.py
index 7d097fd45bf..338209fcd4a 100644
--- a/backends/qualcomm/tests/test_qnn_delegate.py
+++ b/backends/qualcomm/tests/test_qnn_delegate.py
@@ -25,10 +25,10 @@
 from executorch.backends.qualcomm.tests.utils import (
     generate_context_binary,
     ModuleQConfig,
+    QnnTool,
     QuantDtype,
     TestQNN,
     validate_context_binary,
-    validate_qcir,
 )
 from executorch.backends.qualcomm.utils.constants import (
     QCOM_ANNOTATION,
@@ -2563,15 +2563,30 @@ def test_qnn_backend_context_extraction(self):
         module = SimpleModel()  # noqa: F405
         sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28))
         backend_options = generate_htp_compiler_spec(use_fp16=True)
+
+        # Validate dlc
+        compiler_spec = generate_qnn_executorch_compiler_spec(
+            soc_model=self.chipset_table[TestQNN.model],
+            backend_options=backend_options,
+            online_prepare=True,
+        )
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            edge_prog_mgr = to_edge_transform_and_lower_to_qnn(
+                module, sample_input, compiler_spec
+            ).to_executorch()
+            pte_path = f"{tmp_dir}/model.pte"
+            with open(pte_path, "wb") as f:
+                edge_prog_mgr.write_to_file(f)
+            dump_context_from_pte(pte_path)
+
+            qnn_tool = QnnTool(tmp_dir, pte_path, sample_input)
+            qnn_tool.qnn_context_binary_generator()
+            qnn_tool.qnn_net_run()
+
         compiler_specs = [
             self.compiler_specs,
-            generate_qnn_executorch_compiler_spec(
-                soc_model=self.chipset_table[TestQNN.model],
-                backend_options=backend_options,
-                online_prepare=True,
-            ),
         ]
-        validators = [validate_context_binary, validate_qcir]
+        validators = [validate_context_binary]
 
         for compiler_spec, validate in zip(compiler_specs, validators):
             edge_prog_mgr = to_edge_transform_and_lower_to_qnn(
@@ -2591,15 +2606,30 @@ def test_qnn_backend_dump_context_from_pte(self):
         module = SimpleModel()  # noqa: F405
         sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28))
         backend_options = generate_htp_compiler_spec(use_fp16=True)
+
+        # Validate dlc
+        compiler_spec = generate_qnn_executorch_compiler_spec(
+            soc_model=self.chipset_table[TestQNN.model],
+            backend_options=backend_options,
+            online_prepare=True,
+        )
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            edge_prog_mgr = to_edge_transform_and_lower_to_qnn(
+                module, sample_input, compiler_spec
+            ).to_executorch()
+            pte_path = f"{tmp_dir}/model.pte"
+            with open(pte_path, "wb") as f:
+                edge_prog_mgr.write_to_file(f)
+            dump_context_from_pte(pte_path)
+
+            qnn_tool = QnnTool(tmp_dir, pte_path, sample_input)
+            qnn_tool.qnn_context_binary_generator()
+            qnn_tool.qnn_net_run()
+
         compiler_specs = [
             self.compiler_specs,
-            generate_qnn_executorch_compiler_spec(
-                soc_model=self.chipset_table[TestQNN.model],
-                backend_options=backend_options,
-                online_prepare=True,
-            ),
         ]
-        validators = [validate_context_binary, validate_qcir]
+        validators = [validate_context_binary]
 
         for compiler_spec, validate in zip(compiler_specs, validators):
             edge_prog_mgr = to_edge_transform_and_lower_to_qnn(
@@ -3181,15 +3211,30 @@ def test_qnn_backend_context_extraction(self):
         sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28))
         module = self.get_qdq_module(module, sample_input)
         backend_options = generate_htp_compiler_spec(use_fp16=False)
+
+        # Validate dlc
+        compiler_spec = generate_qnn_executorch_compiler_spec(
+            soc_model=self.chipset_table[TestQNN.model],
+            backend_options=backend_options,
+            online_prepare=True,
+        )
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            edge_prog_mgr = to_edge_transform_and_lower_to_qnn(
+                module, sample_input, compiler_spec
+            ).to_executorch()
+            pte_path = f"{tmp_dir}/model.pte"
+            with open(pte_path, "wb") as f:
+                edge_prog_mgr.write_to_file(f)
+            dump_context_from_pte(pte_path)
+
+            qnn_tool = QnnTool(tmp_dir, pte_path, sample_input)
+            qnn_tool.qnn_context_binary_generator()
+            qnn_tool.qnn_net_run()
+
         compiler_specs = [
             self.compiler_specs,
-            generate_qnn_executorch_compiler_spec(
-                soc_model=self.chipset_table[TestQNN.model],
-                backend_options=backend_options,
-                online_prepare=True,
-            ),
         ]
-        validators = [validate_context_binary, validate_qcir]
+        validators = [validate_context_binary]
 
         for compiler_spec, validate in zip(compiler_specs, validators):
             edge_prog_mgr = to_edge_transform_and_lower_to_qnn(
@@ -3210,15 +3255,30 @@ def test_qnn_backend_dump_context_from_pte(self):
         sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28))
         module = self.get_qdq_module(module, sample_input)
         backend_options = generate_htp_compiler_spec(use_fp16=True)
+
+        # Validate dlc
+        compiler_spec = generate_qnn_executorch_compiler_spec(
+            soc_model=self.chipset_table[TestQNN.model],
+            backend_options=backend_options,
+            online_prepare=True,
+        )
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            edge_prog_mgr = to_edge_transform_and_lower_to_qnn(
+                module, sample_input, compiler_spec
+            ).to_executorch()
+            pte_path = f"{tmp_dir}/model.pte"
+            with open(pte_path, "wb") as f:
+                edge_prog_mgr.write_to_file(f)
+            dump_context_from_pte(pte_path)
+
+            qnn_tool = QnnTool(tmp_dir, pte_path, sample_input)
+            qnn_tool.qnn_context_binary_generator()
+            qnn_tool.qnn_net_run()
+
         compiler_specs = [
             self.compiler_specs,
-            generate_qnn_executorch_compiler_spec(
-                soc_model=self.chipset_table[TestQNN.model],
-                backend_options=backend_options,
-                online_prepare=True,
-            ),
         ]
-        validators = [validate_context_binary, validate_qcir]
+        validators = [validate_context_binary]
 
         for compiler_spec, validate in zip(compiler_specs, validators):
             edge_prog_mgr = to_edge_transform_and_lower_to_qnn(
@@ -4679,12 +4739,6 @@ def setup_environment():
         help="Input the model to export",
         type=str,
     )
-    parser.add_argument(
-        "-o",
-        "--online_prepare",
-        help="Conduct on-device graph compilation",
-        action="store_true",
-    )
     parser.add_argument(
         "-P",
         "--enable_profile",
diff --git a/backends/qualcomm/tests/utils.py b/backends/qualcomm/tests/utils.py
index 71d3b9e7ec2..695c846de05 100644
--- a/backends/qualcomm/tests/utils.py
+++ b/backends/qualcomm/tests/utils.py
@@ -5,6 +5,7 @@
 # LICENSE file in the root directory of this source tree.
 import collections
 import copy
+import json
 import os
 import subprocess
 import tempfile
@@ -13,7 +14,6 @@
 
 import numpy as np
 import torch
-
 from executorch import exir
 from executorch.backends.qualcomm.qnn_preprocess import QnnBackend
 from executorch.backends.qualcomm.quantizer.quantizer import ModuleQConfig, QuantDtype
@@ -636,3 +636,111 @@ def call(self, graph_module: torch.fx.GraphModule):
             QCOM_PASS_ACTIVATE_KEY: True,
             QCOM_PASS_ARGS_KWARGS_DEFAULTS_KEY: {"division": division},
         }
+
+
+class QnnTool(TestQNN):
+    def __init__(
+        self,
+        tmp_dir,
+        pte_fname,
+        sample_input,
+        workspace="/data/local/tmp/qnn_executorch_test",
+    ):
+        self.qnn_sdk = os.environ.get("QNN_SDK_ROOT", None)
+        self.ndk = os.environ.get("ANDROID_NDK_ROOT", None)
+        assert self.qnn_sdk, "QNN_SDK_ROOT was not found in environment variable"
+        assert self.ndk, "ANDROID_NDK_ROOT was not found in environment"
+
+        self.tmp_dir = tmp_dir
+        self.workspace = workspace
+        self.adb = SimpleADB(
+            qnn_sdk=self.qnn_sdk,
+            build_path=self.build_folder,
+            pte_path=pte_fname,
+            workspace=self.workspace,
+            device_id=self.device,
+            host_id=self.host,
+            soc_model=self.model,
+            error_only=self.error_only,
+        )
+        self.sample_input = sample_input
+
+    def qnn_context_binary_generator(
+        self, dlc_name="forward_0.dlc", binary_name="forward.serialized"
+    ):
+        cmds = [
+            f"{self.qnn_sdk}/bin/x86_64-linux-clang/qnn-context-binary-generator",
+            "--backend",
+            f"{self.qnn_sdk}/lib/x86_64-linux-clang/libQnnHtp.so",
+            "--model",
+            f"{self.qnn_sdk}/lib/x86_64-linux-clang/libQnnModelDlc.so",
+            "--dlc_path",
+            f"{self.tmp_dir}/{dlc_name}",
+            "--binary_file",
+            f"{self.tmp_dir}/{binary_name}",
+        ]
+        result = subprocess.run(
+            " ".join(cmds),
+            shell=True,
+            executable="/bin/bash",
+            capture_output=True,
+        )
+        assert os.path.isfile(f"{self.tmp_dir}/{binary_name}.bin"), print(result.stderr)
+
+    def qnn_net_run(self, binary_name="forward.serialized"):
+        input_list = ""
+        for idx, _ in enumerate(self.sample_input):
+            input_name = f"input_{idx}_0.raw"
+            input_list += input_name + " "
+        input_list = input_list.strip() + "\n"
+        if self.enable_x86_64:
+            # TODO: Implement context binary consumption on x86_64 platform
+            return
+
+        else:
+            # Config for qnn-net-run
+            config = {
+                "backend_extension_config": {
+                    "backend_extensions": {
+                        "shared_library_path": "./libQnnHtpNetRunExtensions.so",
+                        "config_file_path": "config.json",
+                    }
+                },
+                "config": {
+                    "devices": [
+                        {
+                            "profiling_level": "linting",
+                            "cores": [
+                                {"perf_profile": "burst", "rpc_control_latency": 100}
+                            ],
+                        }
+                    ]
+                },
+            }
+
+            for file_name, data in config.items():
+                with open(f"{self.tmp_dir}/{file_name}.json", "w") as json_file:
+                    json.dump(data, json_file, indent=4)
+
+            files = [
+                f"{self.qnn_sdk}/lib/aarch64-android/libQnnHtpNetRunExtensions.so",
+                f"{self.tmp_dir}/backend_extension_config.json",
+                f"{self.tmp_dir}/config.json",
+                f"{self.tmp_dir}/{binary_name}.bin",
+                f"{self.qnn_sdk}/bin/aarch64-android/qnn-net-run",
+            ]
+            cmds = [
+                f"export LD_LIBRARY_PATH={self.workspace} &&",
+                f"export ADSP_LIBRARY_PATH={self.workspace} &&",
+                f"cd {self.workspace} &&",
+                "./qnn-net-run",
+                "--backend libQnnHtp.so",
+                "--input_list input_list.txt",
+                f"--retrieve_context {binary_name}.bin",
+                "--use_native_input_files",
+                "--use_native_output_files",
+                "--config_file backend_extension_config.json",
+                "--profiling_level backend",
+            ]
+            self.adb.push(inputs=self.sample_input, input_list=input_list, files=files)
+            self.adb.execute(custom_runner_cmd=" ".join(cmds))
diff --git a/backends/qualcomm/utils/utils.py b/backends/qualcomm/utils/utils.py
index e0ebc5beebe..3653cd3176f 100644
--- a/backends/qualcomm/utils/utils.py
+++ b/backends/qualcomm/utils/utils.py
@@ -221,7 +221,13 @@ def dump_context_from_pte(pte_path):
                     delegate.processed.index
                 ].data
                 binary = qnn_mgr.StripProtocol(processed_bytes)
-                with open(f"{ctx_path}/{execution_plan.name}_{i}.bin", "wb") as f:
+                file_extension = ".bin"
+                if len(binary) == 0:
+                    binary = processed_bytes
+                    file_extension = ".dlc"
+                with open(
+                    f"{ctx_path}/{execution_plan.name}_{i}{file_extension}", "wb"
+                ) as f:
                     f.write(binary)
 
 
@@ -1181,6 +1187,8 @@ def generate_qnn_executorch_compiler_spec(
 
     if saver:
         qnn_executorch_options.library_path = "libQnnSaver.so"
+        qnn_executorch_options.saver = True
+        qnn_executorch_options.saver_output_dir = "saver_output"
 
     if optrace:
         qnn_executorch_options.profile_level = QnnExecuTorchProfileLevel.kProfileOptrace
diff --git a/docs/source/backends-qualcomm.md b/docs/source/backends-qualcomm.md
index 422ad26c49c..3723fc9b454 100644
--- a/docs/source/backends-qualcomm.md
+++ b/docs/source/backends-qualcomm.md
@@ -182,7 +182,7 @@ cmake .. \
     -DPYTHON_EXECUTABLE=python3 \
     -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK_ROOT/build/cmake/android.toolchain.cmake \
     -DANDROID_ABI='arm64-v8a' \
-    -DANDROID_NATIVE_API_LEVEL=23
+    -DANDROID_PLATFORM=android-30
 
 # nproc is used to detect the number of available CPU.
 # If it is not applicable, please feel free to use the number you want.
@@ -191,7 +191,7 @@ cmake --build $PWD --target install -j$(nproc)
 cmake ../examples/qualcomm \
     -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK_ROOT/build/cmake/android.toolchain.cmake \
     -DANDROID_ABI='arm64-v8a' \
-    -DANDROID_NATIVE_API_LEVEL=23 \
+    -DANDROID_PLATFORM=android-30 \
     -DCMAKE_PREFIX_PATH="$PWD/lib/cmake/ExecuTorch;$PWD/third-party/gflags;" \
     -DCMAKE_FIND_ROOT_PATH_MODE_PACKAGE=BOTH \
     -DPYTHON_EXECUTABLE=python3 \
diff --git a/examples/qualcomm/test_qualcomm.sh b/examples/qualcomm/test_qualcomm.sh
index d95f53f99cb..19d3d798418 100644
--- a/examples/qualcomm/test_qualcomm.sh
+++ b/examples/qualcomm/test_qualcomm.sh
@@ -38,7 +38,7 @@ test_cmake_qualcomm() {
         -DQNN_SDK_ROOT=$QNN_SDK_ROOT \
         -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
         -DANDROID_ABI='arm64-v8a' \
-        -DANDROID_NATIVE_API_LEVEL=23 \
+        -DANDROID_NATIVE_API_LEVEL=30 \
         -DCMAKE_PREFIX_PATH="$CMAKE_PREFIX_PATH" \
         -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
         -B${build_dir} \
diff --git a/examples/qualcomm/utils.py b/examples/qualcomm/utils.py
index 242170712e1..542739a2898 100755
--- a/examples/qualcomm/utils.py
+++ b/examples/qualcomm/utils.py
@@ -129,6 +129,7 @@ def push(self, inputs=None, input_list=None, files=None):
             f"{self.qnn_sdk}/lib/aarch64-android/libQnnSystem.so",
             f"{self.build_path}/{self.runner}",
             f"{self.build_path}/backends/qualcomm/libqnn_executorch_backend.so",
+            f"{self.qnn_sdk}/lib/aarch64-android/libQnnModelDlc.so",
         ]
         input_list_file, input_files = generate_inputs(
             self.working_dir, self.input_list_filename, inputs, input_list
@@ -294,6 +295,7 @@ def build_executorch_binary(
     dump_intermediate_outputs=False,
     passes_job=None,
     qat_training_data=None,
+    online_prepare=False,
 ):
     """
     A function to generate an ExecuTorch binary for Qualcomm platforms.
@@ -311,7 +313,9 @@ def build_executorch_binary(
         shared_buffer (bool, optional): Applies zero-copy mechanism to optimize runtime memory allocation.
         metadata (dict, optional): An optional dictionary that maps each method name to a constant value in eager mode.
         dump_intermediate_outputs (bool, optional): Enables dumping model intermediate outputs.
-        custom_pass_config (frozenset, optional): Set of custom passes for model processing.
+        passes_job (OrderedDict, optional): Custom passes job in capture_program, users can enable/disable specific passes or modify their attributes.
+        qat_training_data (List[torch.Tensor], optional): A dataset for quantization aware training(QAT). Typically is a pair of tensors, such as [features, ground truth].
+        online_prepare (bool, optional): Compose QNN graph on device if set to True.
 
     Returns:
         None: The function writes the output to a specified .pte file.
@@ -322,6 +326,7 @@ def build_executorch_binary(
     compile_spec = generate_qnn_executorch_compiler_spec(
         soc_model=getattr(QcomChipset, soc_model),
         backend_options=backend_options,
+        online_prepare=online_prepare,
         shared_buffer=shared_buffer,
         dump_intermediate_outputs=dump_intermediate_outputs,
     )
@@ -500,6 +505,13 @@ def setup_common_args_and_variables():
         type=str,
     )
 
+    parser.add_argument(
+        "--online_prepare",
+        help="If specified, compose QNN graph on device.",
+        action="store_true",
+        default=False,
+    )
+
     parser.add_argument(
         "--ip",
         help="IPC address for delivering execution result",