From 8ffca64ba372bc3cd51be4e952fb236d76a17cd2 Mon Sep 17 00:00:00 2001 From: DannyYuyang-quic Date: Mon, 10 Feb 2025 16:50:21 +0800 Subject: [PATCH] Qualcomm AI Engine Direct - Support Qnn IR backend in online preparation - Support Qnn IR backend - Replace QCir with Dlc in online prepare flow - Add config for Saver backend - Block online preparation if the QNN version is below 2.30. - Fix SDK version checking - quant/dequant op breakage fix - Upgrade ANDROID_NATIVE_API_LEVEL from 23 to 30 - Add comments for qat_training_data/passes_job --- .ci/scripts/build-qnn-sdk.sh | 1 + backends/qualcomm/CMakeLists.txt | 7 +- .../qualcomm/aot/python/PyQnnManagerAdaptor.h | 9 +- backends/qualcomm/builders/op_dequantize.py | 2 +- backends/qualcomm/builders/op_quantize.py | 2 +- backends/qualcomm/qnn_preprocess.py | 9 ++ .../qualcomm/runtime/QnnExecuTorchBackend.cpp | 6 +- backends/qualcomm/runtime/QnnManager.cpp | 118 ++++++++++++--- backends/qualcomm/runtime/QnnManager.h | 22 ++- .../qualcomm/runtime/backends/CMakeLists.txt | 10 ++ .../runtime/backends/QnnBackendCache.cpp | 12 +- .../runtime/backends/QnnBackendCache.h | 5 + .../runtime/backends/QnnBackendCommon.cpp | 15 +- .../runtime/backends/QnnBackendCommon.h | 5 +- .../runtime/backends/QnnBackendFactory.cpp | 10 +- .../runtime/backends/QnnBackendFactory.h | 5 +- .../runtime/backends/QnnContextCommon.cpp | 18 ++- .../runtime/backends/QnnContextCommon.h | 14 +- .../qualcomm/runtime/backends/QnnDlcManager.h | 71 +++++++++ .../runtime/backends/QnnFunctionInterface.h | 3 + .../runtime/backends/QnnGraphCommon.cpp | 8 +- .../runtime/backends/QnnGraphCommon.h | 6 + .../runtime/backends/QnnImplementation.cpp | 6 - .../runtime/backends/QnnImplementation.h | 7 + .../backends/htpbackend/HtpContext.cpp | 1 - .../runtime/backends/htpbackend/HtpContext.h | 6 +- .../runtime/backends/irbackend/IrBackend.h | 39 +++++ .../runtime/backends/irbackend/IrContext.h | 28 ++++ .../backends/irbackend/aarch64/IrContext.cpp | 27 ++++ .../irbackend/aarch64/QnnDlcManager.cpp | 143 ++++++++++++++++++ .../backends/irbackend/x86_64/IrContext.cpp | 43 ++++++ .../irbackend/x86_64/QnnDlcManager.cpp | 139 +++++++++++++++++ backends/qualcomm/scripts/build.sh | 4 +- .../serialization/qc_compiler_spec.fbs | 6 + backends/qualcomm/serialization/qc_schema.py | 2 + backends/qualcomm/tests/test_qnn_delegate.py | 116 ++++++++++---- backends/qualcomm/tests/utils.py | 110 +++++++++++++- backends/qualcomm/utils/utils.py | 10 +- docs/source/backends-qualcomm.md | 4 +- examples/qualcomm/test_qualcomm.sh | 2 +- examples/qualcomm/utils.py | 14 +- 41 files changed, 954 insertions(+), 111 deletions(-) create mode 100644 backends/qualcomm/runtime/backends/QnnDlcManager.h create mode 100644 backends/qualcomm/runtime/backends/irbackend/IrBackend.h create mode 100644 backends/qualcomm/runtime/backends/irbackend/IrContext.h create mode 100644 backends/qualcomm/runtime/backends/irbackend/aarch64/IrContext.cpp create mode 100644 backends/qualcomm/runtime/backends/irbackend/aarch64/QnnDlcManager.cpp create mode 100644 backends/qualcomm/runtime/backends/irbackend/x86_64/IrContext.cpp create mode 100644 backends/qualcomm/runtime/backends/irbackend/x86_64/QnnDlcManager.cpp diff --git a/.ci/scripts/build-qnn-sdk.sh b/.ci/scripts/build-qnn-sdk.sh index f256e8eec6d..8237b70d03d 100644 --- a/.ci/scripts/build-qnn-sdk.sh +++ b/.ci/scripts/build-qnn-sdk.sh @@ -33,6 +33,7 @@ set_up_aot() { cmake .. \ -DCMAKE_INSTALL_PREFIX=$PWD \ -DEXECUTORCH_BUILD_QNN=ON \ + -DANDROID_NATIVE_API_LEVEL=30 \ -DQNN_SDK_ROOT=${QNN_SDK_ROOT} \ -DEXECUTORCH_BUILD_DEVTOOLS=ON \ -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ diff --git a/backends/qualcomm/CMakeLists.txt b/backends/qualcomm/CMakeLists.txt index f5adc84f903..1b7c8891a4e 100644 --- a/backends/qualcomm/CMakeLists.txt +++ b/backends/qualcomm/CMakeLists.txt @@ -70,6 +70,7 @@ endif() include_directories( BEFORE ${_common_include_directories} ${QNN_SDK_ROOT}/include/QNN + ${QNN_SDK_ROOT}/share/QNN/converter/jni ${EXECUTORCH_SOURCE_DIR}/third-party/flatbuffers/include ${EXECUTORCH_SOURCE_DIR}/runtime/core/portable_type/c10 ) @@ -117,6 +118,7 @@ add_library(qnn_backend STATIC) add_library(qnn_backend_cache STATIC) add_library(qnn_context STATIC) add_library(qnn_custom_protocol STATIC) +add_library(qnn_dlc_manager STATIC) add_library(qnn_device STATIC) add_library(qnn_executorch_backend SHARED) add_library(qnn_executorch_header INTERFACE) @@ -174,8 +176,11 @@ target_link_libraries( qnn_factory PRIVATE qnn_schema qnn_backend qnn_device qnn_context qnn_graph qnn_mem_manager qnn_custom_protocol ) + +target_link_libraries(qnn_dlc_manager PRIVATE qnn_factory qnn_backend qnn_device qnn_context qnn_graph qnn_mem_manager) + target_link_libraries( - qnn_manager PRIVATE qnn_factory wrappers qnn_schema utils shared_buffer + qnn_manager PRIVATE qnn_factory wrappers qnn_schema utils shared_buffer qnn_dlc_manager ) target_link_libraries( qnn_executorch_backend PRIVATE qnn_executorch_header qnn_schema qnn_manager diff --git a/backends/qualcomm/aot/python/PyQnnManagerAdaptor.h b/backends/qualcomm/aot/python/PyQnnManagerAdaptor.h index 9914b11676e..67abadd6731 100644 --- a/backends/qualcomm/aot/python/PyQnnManagerAdaptor.h +++ b/backends/qualcomm/aot/python/PyQnnManagerAdaptor.h @@ -195,7 +195,7 @@ class PyQnnManager { std::vector>& op_wrappers) { QnnExecuTorchContextBinary binary_info; - if (qnn_manager_->IsOnlinePrepare() || qnn_manager_->IsMultipleGraphs()) { + if (qnn_manager_->IsMultipleGraphs()) { builder_.Reset(); std::vector tensor_data; std::vector offsets; @@ -305,8 +305,11 @@ class PyQnnManager { QNN_EXECUTORCH_LOG_ERROR("Fail to compile QNN graph"); return py::array_t(0); } - if (qnn_manager_->GetContextBinary(binary_info) != - executorch::runtime::Error::Ok) { + auto qnn_executorch_options = GetQnnExecuTorchOptions( + qnn_executorch_option_ptr_.cast().data()); + if (qnn_executorch_options->saver() || + qnn_manager_->GetContextBinary(binary_info) != + executorch::runtime::Error::Ok) { return py::array_t(0); } } diff --git a/backends/qualcomm/builders/op_dequantize.py b/backends/qualcomm/builders/op_dequantize.py index 507ecc4e3e3..722cdbe7957 100644 --- a/backends/qualcomm/builders/op_dequantize.py +++ b/backends/qualcomm/builders/op_dequantize.py @@ -45,7 +45,7 @@ def define_node( dequant_output_tensors = [output_tensor_wrapper] dequant_op = PyQnnWrapper.PyQnnOpWrapper( - node.target.__name__, + node.name, QNN_OP_PACKAGE_NAME_QTI_AISW, OpDequantize.op_name, ) diff --git a/backends/qualcomm/builders/op_quantize.py b/backends/qualcomm/builders/op_quantize.py index 4921f96b467..e10f88795bb 100644 --- a/backends/qualcomm/builders/op_quantize.py +++ b/backends/qualcomm/builders/op_quantize.py @@ -52,7 +52,7 @@ def define_node( quant_output_tensors = [output_tensor_wrapper] quant_op = PyQnnWrapper.PyQnnOpWrapper( - node.target.__name__, + node.name, QNN_OP_PACKAGE_NAME_QTI_AISW, OpQuantize.op_name, ) diff --git a/backends/qualcomm/qnn_preprocess.py b/backends/qualcomm/qnn_preprocess.py index 4a11bf050a2..63c1795c117 100644 --- a/backends/qualcomm/qnn_preprocess.py +++ b/backends/qualcomm/qnn_preprocess.py @@ -15,6 +15,9 @@ from executorch.backends.qualcomm.builders.node_visitor import get_node_visitors from executorch.backends.qualcomm.builders.qnn_constants import OpContextLoader from executorch.backends.qualcomm.partition.utils import generate_qnn_executorch_option +from executorch.backends.qualcomm.serialization.qc_schema_serialize import ( + flatbuffer_to_option, +) from executorch.exir.backend.backend_details import ( BackendDetails, CompileSpec, @@ -92,6 +95,12 @@ def preprocess( qnn_manager.GetGraphNames()[0], [py_op_wrapper.GetOpWrapper() for py_op_wrapper in py_op_wrapper_list], ) + + obj_options = flatbuffer_to_option(option) + if obj_options.saver: + exit( + f"Record all QNN API calls from saver backend at: {obj_options.saver_output_dir}" + ) assert len(qnn_context_binary) != 0, "Failed to generate Qnn context binary." qnn_manager.Destroy() # For now, debug_handle_map is not used by QNN ExecuTorch diff --git a/backends/qualcomm/runtime/QnnExecuTorchBackend.cpp b/backends/qualcomm/runtime/QnnExecuTorchBackend.cpp index 32d82950908..ab038404582 100644 --- a/backends/qualcomm/runtime/QnnExecuTorchBackend.cpp +++ b/backends/qualcomm/runtime/QnnExecuTorchBackend.cpp @@ -36,7 +36,6 @@ Result QnnExecuTorchBackend::init( // covert SizedBuffer to qnn ExecuTorch option QnnExecuTorchContextBinary qnn_context_blob; const qnn_delegate::QnnExecuTorchOptions* qnn_executorch_options = nullptr; - auto [status, signature, ctx_size, ctx_bin] = QnnContextCustomProtocol().DeserializeContextCustomBuffer( const_cast(processed->data())); @@ -74,7 +73,6 @@ Result QnnExecuTorchBackend::init( // NOTE: Since we use placement new and since this type is not trivially // destructible, we must call the destructor manually in destroy(). new (qnn_manager) QnnManager(qnn_executorch_options, qnn_context_blob); - // TODO: this is a temporal solution for multi-graph support, will be // removed once framework starts to accept runtime configuration // --- @@ -96,9 +94,9 @@ Result QnnExecuTorchBackend::init( if (qnn_manager->IsOnlinePrepare()) { ET_CHECK_OR_RETURN_ERROR( - qnn_manager->CompileQcir() == Error::Ok, + qnn_manager->CompileDlc() == Error::Ok, Internal, - "Fail to compile binary in qcir format"); + "Fail to compile binary in Dlc format"); } else { for (const std::string& graph_name : qnn_manager->GetGraphNames()) { ET_CHECK_OR_RETURN_ERROR( diff --git a/backends/qualcomm/runtime/QnnManager.cpp b/backends/qualcomm/runtime/QnnManager.cpp index 994cc1931cc..13718b0891a 100644 --- a/backends/qualcomm/runtime/QnnManager.cpp +++ b/backends/qualcomm/runtime/QnnManager.cpp @@ -37,9 +37,7 @@ bool CompareExportedInput( } QnnManager::~QnnManager() { - backend_params_ptr_.reset(new BackendConfigParameters()); - logger_.reset(); - qnn_loaded_backend_.TerminateAllBackends(); + Destroy(); } QnnManager::QnnManager( @@ -96,10 +94,14 @@ QnnManager::QnnManager( } qnn_loaded_backend_ = QnnImplementation(library_path); backend_params_ptr_ = std::make_unique(); + + qnn_dlc_manager_ = + std::make_shared(qnn_context_blob_, options_); } Error QnnManager::LoadQnnLibrary() { - Error ret = qnn_loaded_backend_.Load(nullptr); + auto config = GetImplementationConfig(); + Error ret = qnn_loaded_backend_.Load(config.get()); return ret; } @@ -286,7 +288,11 @@ Error QnnManager::Init() { "parameters for Qnn executorch backend type %d", options_->backend_options()->backend_type()); backend_params_ptr_ = QnnBackendFactory().Create( - qnn_loaded_backend_, logger_.get(), qnn_context_blob_, options_); + qnn_loaded_backend_, + logger_.get(), + qnn_context_blob_, + options_, + qnn_dlc_manager_.get()); ET_CHECK_OR_RETURN_ERROR( backend_params_ptr_ != nullptr, Internal, @@ -326,6 +332,18 @@ Error QnnManager::Init() { Internal, "Fail to pre register custom memory handle"); #endif + + if (IsOnlinePrepare()) { + Qnn_ApiVersion_t qnn_version = {QNN_VERSION_INIT}; + qnn_loaded_backend_.GetQnnInterface().qnn_backend_get_api_version( + &qnn_version); + + ET_CHECK_OR_RETURN_ERROR( + qnn_dlc_manager_->SetUpDlcEnvironment(qnn_version.coreApiVersion) == + Error::Ok, + Internal, + "Fail to setup Dlc environment"); + } return Error::Ok; } @@ -446,9 +464,11 @@ Error QnnManager::ProfileExecuteData( void QnnManager::Destroy() { QNN_EXECUTORCH_LOG_INFO("Destroy Qnn backend parameters"); backend_params_ptr_.reset(new BackendConfigParameters()); + qnn_dlc_manager_->ResetBackendParams(); logger_.reset(); - + qnn_dlc_manager_->ResetLogger(); qnn_loaded_backend_.TerminateAllBackends(); + qnn_dlc_manager_->TerminateAllBackends(); } bool QnnManager::IsNodeSupportedByBackend( @@ -483,11 +503,64 @@ bool QnnManager::IsNodeSupportedByBackend( Error QnnManager::GetContextBinary( QnnExecuTorchContextBinary& qnn_executorch_context_binary) { - ET_CHECK_OR_RETURN_ERROR( - backend_params_ptr_->qnn_context_ptr_->GetContextBinary( - qnn_executorch_context_binary) == Error::Ok, - Internal, - "Fail to get context binary."); + if (IsOnlinePrepare() && + qnn_dlc_manager_->backend_params_ptr_->qnn_context_ptr_.get() != + nullptr) { + ET_CHECK_OR_RETURN_ERROR( + qnn_dlc_manager_->backend_params_ptr_->qnn_context_ptr_ + ->GetContextBinary(qnn_executorch_context_binary) == Error::Ok, + Internal, + "Fail to get context binary."); + } + + else { + ET_CHECK_OR_RETURN_ERROR( + backend_params_ptr_->qnn_context_ptr_->GetContextBinary( + qnn_executorch_context_binary) == Error::Ok, + Internal, + "Fail to get context binary."); + } + return Error::Ok; +} + +Error QnnManager::CompileDlc() { + Qnn_ErrorHandle_t error; + auto qnn_dlc_graph_info = qnn_dlc_manager_->GetQnnDlcGraphInfoPtr(); + uint32_t qnn_dlc_graph_info_num = qnn_dlc_manager_->GetQnnDlcGraphInfoNum(); + for (uint32_t i = 0; i < qnn_dlc_graph_info_num; ++i) { + auto& graphInfo = (*qnn_dlc_graph_info)[i]; + backend_params_ptr_->qnn_graph_ptr_->SetGraphHandle( + graphInfo.graphName, graphInfo.graph); + error = + backend_params_ptr_->qnn_graph_ptr_->GraphFinalize(graphInfo.graphName); + if (error != QNN_SUCCESS) { + QNN_EXECUTORCH_LOG_ERROR( + "Failed to finalize Qnn Graph with error: %d", + QNN_GET_ERROR_CODE(error)); + return Error::Internal; + } + + std::vector> graph_inputs, graph_outputs, + tensors; + + for (int i = 0; i < graphInfo.numInputTensors; ++i) { + auto tw = CreateTensorWrapper(graphInfo.inputTensors[i]); + tw->UpdateQnnTensorMeta(graphInfo.inputTensors[i]); + graph_inputs.push_back(tw); + } + for (int i = 0; i < graphInfo.numOutputTensors; ++i) { + auto tw = CreateTensorWrapper(graphInfo.outputTensors[i]); + tw->UpdateQnnTensorMeta(graphInfo.outputTensors[i]); + graph_outputs.push_back(tw); + } + + ET_CHECK_OR_RETURN_ERROR( + AllocateTensor(graphInfo.graphName, graph_inputs, graph_outputs) == + Error::Ok, + Internal, + "Fail to allocate tensor for Dlc with graph_name: %s", + graphInfo.graphName); + } return Error::Ok; } @@ -616,31 +689,34 @@ Error QnnManager::Compile( const std::string& graph_name, std::vector>& op_wrappers) { Qnn_ErrorHandle_t error = QNN_SUCCESS; + QnnGraph* qnn_graph_ptr = backend_params_ptr_->qnn_graph_ptr_.get(); + if (IsOnlinePrepare() && + qnn_dlc_manager_->backend_params_ptr_->qnn_graph_ptr_.get() != nullptr) { + qnn_graph_ptr = qnn_dlc_manager_->backend_params_ptr_->qnn_graph_ptr_.get(); + } for (std::shared_ptr& op_wrapper : op_wrappers) { for (const auto& tensor_wrapper : op_wrapper->GetInputTensors()) { ET_CHECK_OR_RETURN_ERROR( - backend_params_ptr_->qnn_graph_ptr_->EnsureTensorInQnnGraph( - graph_name, tensor_wrapper) == Error::Ok, + qnn_graph_ptr->EnsureTensorInQnnGraph(graph_name, tensor_wrapper) == + Error::Ok, Internal, "Tensor name %s isn't added to Qnn Graph", tensor_wrapper->GetName().c_str()); } - for (const auto& tensor_wrapper : op_wrapper->GetOutputTensors()) { ET_CHECK_OR_RETURN_ERROR( - backend_params_ptr_->qnn_graph_ptr_->EnsureTensorInQnnGraph( - graph_name, tensor_wrapper) == Error::Ok, + qnn_graph_ptr->EnsureTensorInQnnGraph(graph_name, tensor_wrapper) == + Error::Ok, Internal, "Tensor name %s isn't added to Qnn Graph", tensor_wrapper->GetName().c_str()); } - for (const auto& param : op_wrapper->GetParams()) { auto* p_tensor_param = dynamic_cast(param.get()); if (p_tensor_param != nullptr) { ET_CHECK_OR_RETURN_ERROR( - backend_params_ptr_->qnn_graph_ptr_->EnsureTensorInQnnGraph( + qnn_graph_ptr->EnsureTensorInQnnGraph( graph_name, p_tensor_param->GetTensorWrapper()) == Error::Ok, Internal, "Param tensor name %s isn't added to Qnn Graph", @@ -652,8 +728,7 @@ Error QnnManager::Compile( "Fail to configure Qnn backend"); } - error = backend_params_ptr_->qnn_graph_ptr_->GraphAddNode( - graph_name, op_wrapper->GetOpConfig()); + error = qnn_graph_ptr->GraphAddNode(graph_name, op_wrapper->GetOpConfig()); if (error != QNN_SUCCESS) { QNN_EXECUTORCH_LOG_ERROR( "Failed to add node to Qnn Graph with error: %d", @@ -661,14 +736,13 @@ Error QnnManager::Compile( return Error::Internal; } } - error = backend_params_ptr_->qnn_graph_ptr_->GraphFinalize(graph_name); + error = qnn_graph_ptr->GraphFinalize(graph_name); if (error != QNN_SUCCESS) { QNN_EXECUTORCH_LOG_ERROR( "Failed to finalize Qnn Graph with error: %d", QNN_GET_ERROR_CODE(error)); return Error::Internal; } - return Error::Ok; } diff --git a/backends/qualcomm/runtime/QnnManager.h b/backends/qualcomm/runtime/QnnManager.h index 17294afbd88..ee9c4337532 100644 --- a/backends/qualcomm/runtime/QnnManager.h +++ b/backends/qualcomm/runtime/QnnManager.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -71,7 +72,7 @@ class QnnManager { QnnExecuTorchContextBinary& qnn_executorch_context_binary); executorch::runtime::Error CompileQcir(); - + executorch::runtime::Error CompileDlc(); executorch::runtime::Error Compile( const std::string& graph_name, std::vector>& op_wrappers); @@ -110,6 +111,22 @@ class QnnManager { std::string GetBinarySignature(); private: + std::unique_ptr GetImplementationConfig() { + if (options_->saver()) { + auto outputDirCfg = std::make_unique(); + outputDirCfg->option = QNN_SAVER_CONFIG_OPTION_OUTPUT_DIRECTORY; + outputDirCfg->outputDirectory = options_->saver_output_dir()->c_str(); + + auto saverCfg = std::make_unique(2); + saverCfg[0] = outputDirCfg.release(); + saverCfg[1] = nullptr; + + return saverCfg; + } else { + return nullptr; + } + } + executorch::runtime::Error LoadQnnLibrary(); static constexpr const char* htp_library_name_ = "libQnnHtp.so"; @@ -147,6 +164,9 @@ class QnnManager { {Qnn_DataType_t::QNN_DATATYPE_UFIXED_POINT_16, executorch::aten::ScalarType::UInt16}, }; + + // Manager for handling DLC (Deep Learning Container) + std::shared_ptr qnn_dlc_manager_; }; } // namespace qnn } // namespace backends diff --git a/backends/qualcomm/runtime/backends/CMakeLists.txt b/backends/qualcomm/runtime/backends/CMakeLists.txt index 2df38086133..e4e0f6ada16 100644 --- a/backends/qualcomm/runtime/backends/CMakeLists.txt +++ b/backends/qualcomm/runtime/backends/CMakeLists.txt @@ -67,10 +67,12 @@ target_sources( qnn_context PUBLIC ${CMAKE_CURRENT_LIST_DIR}/QnnContextCommon.h ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpContext.h + ${CMAKE_CURRENT_LIST_DIR}/irbackend/IrContext.h PRIVATE ${CMAKE_CURRENT_LIST_DIR}/QnnContextCommon.cpp ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpContext.cpp ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpContextCustomConfig.h ${HOST_ARCHITECTURE}/HtpContextCustomConfig.cpp + ${CMAKE_CURRENT_LIST_DIR}/irbackend/${CMAKE_SYSTEM_PROCESSOR}/IrContext.cpp ) # qnn_backend_cache @@ -99,6 +101,7 @@ target_sources( qnn_backend PUBLIC ${CMAKE_CURRENT_LIST_DIR}/QnnBackendCommon.h ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpBackend.h + ${CMAKE_CURRENT_LIST_DIR}/irbackend/IrBackend.h PRIVATE ${CMAKE_CURRENT_LIST_DIR}/QnnBackendCommon.cpp ) @@ -122,3 +125,10 @@ target_sources( PUBLIC ${CMAKE_CURRENT_LIST_DIR}/QnnCustomProtocol.h PRIVATE ${CMAKE_CURRENT_LIST_DIR}/QnnCustomProtocol.cpp ) + +# qnn_dlc_manager +target_sources( + qnn_dlc_manager + PUBLIC ${CMAKE_CURRENT_LIST_DIR}/QnnDlcManager.h + PRIVATE ${CMAKE_CURRENT_LIST_DIR}/irbackend/${CMAKE_SYSTEM_PROCESSOR}/QnnDlcManager.cpp +) diff --git a/backends/qualcomm/runtime/backends/QnnBackendCache.cpp b/backends/qualcomm/runtime/backends/QnnBackendCache.cpp index 699e0646697..1e6b1262c3a 100644 --- a/backends/qualcomm/runtime/backends/QnnBackendCache.cpp +++ b/backends/qualcomm/runtime/backends/QnnBackendCache.cpp @@ -134,20 +134,16 @@ Error QnnBackendCache::Configure() { QnnQcirCustomProtocol().DeserializeQcirCustomBuffer( qnn_context_blob_.buffer); if (status == Error::Ok) { - // online prepare or first stage of multi graph - state_ = ONLINE_PREPARE; + // first stage of multi graph + state_ = MULTI_GRAPH; auto context = qcir::GetContext(qcir_fbs_ptr); for (const auto& graph : *context->graphs()) { graph_names_.emplace_back(graph->name()->str()); } return Error::Ok; } - - QNN_EXECUTORCH_LOG_ERROR( - "Failed to parse QNN Graph Info. The cache " - "might be broken. Please consider to re-generate the " - "cache."); - InvalidateCache(); + // online prepare + state_ = ONLINE_PREPARE; } return Error::Ok; } diff --git a/backends/qualcomm/runtime/backends/QnnBackendCache.h b/backends/qualcomm/runtime/backends/QnnBackendCache.h index b9e00f0a662..9abec186c3a 100644 --- a/backends/qualcomm/runtime/backends/QnnBackendCache.h +++ b/backends/qualcomm/runtime/backends/QnnBackendCache.h @@ -24,6 +24,7 @@ class QnnBackendCache { SERIALIZE = 1, DESERIALIZE = 2, ONLINE_PREPARE = 3, + MULTI_GRAPH = 4, }; explicit QnnBackendCache( const QnnExecuTorchContextBinary& qnn_context_blob, @@ -55,6 +56,10 @@ class QnnBackendCache { return graph_names_; } + void SetGraphNames(const std::string& graph_name) { + graph_names_.emplace_back(graph_name); + } + executorch::runtime::Error Configure(); protected: diff --git a/backends/qualcomm/runtime/backends/QnnBackendCommon.cpp b/backends/qualcomm/runtime/backends/QnnBackendCommon.cpp index 0df40ddb4e5..310e38d1744 100644 --- a/backends/qualcomm/runtime/backends/QnnBackendCommon.cpp +++ b/backends/qualcomm/runtime/backends/QnnBackendCommon.cpp @@ -57,8 +57,7 @@ Error QnnBackend::Configure() { return Error::Ok; } -Error QnnBackend::VerifyQNNSDKVersion( - const QnnExecuTorchBackendType backend_id) { +Error QnnBackend::VerifyQNNSDKVersion() { const QnnInterface& qnn_interface = implementation_.GetQnnInterface(); Qnn_ApiVersion_t qnn_version = {QNN_VERSION_INIT}; @@ -73,8 +72,16 @@ Error QnnBackend::VerifyQNNSDKVersion( expected_version.coreApiVersion.major = QNN_API_VERSION_MAJOR; expected_version.coreApiVersion.minor = QNN_API_VERSION_MINOR; expected_version.coreApiVersion.patch = QNN_API_VERSION_PATCH; - expected_version.backendApiVersion = GetExpectedBackendVersion(); - const char* backend_type = EnumNameQnnExecuTorchBackendType(backend_id); + expected_version.backendApiVersion = QNN_VERSION_INIT; + if (qnn_interface.GetBackendId() == QNN_BACKEND_ID_SAVER) { + expected_version.backendApiVersion.major = QNN_SAVER_API_VERSION_MAJOR; + expected_version.backendApiVersion.minor = QNN_SAVER_API_VERSION_MINOR; + expected_version.backendApiVersion.patch = QNN_SAVER_API_VERSION_PATCH; + } else { + expected_version.backendApiVersion = GetExpectedBackendVersion(); + } + const char* backend_type = EnumNameQnnExecuTorchBackendType( + static_cast(qnn_interface.GetBackendId())); Error status = VersionChecker( qnn_version.coreApiVersion, expected_version.coreApiVersion, "Qnn API"); diff --git a/backends/qualcomm/runtime/backends/QnnBackendCommon.h b/backends/qualcomm/runtime/backends/QnnBackendCommon.h index 56b5284c537..58bdee10846 100644 --- a/backends/qualcomm/runtime/backends/QnnBackendCommon.h +++ b/backends/qualcomm/runtime/backends/QnnBackendCommon.h @@ -17,6 +17,8 @@ #include "QnnBackend.h" #include "QnnCommon.h" #include "QnnTypes.h" +#include "Saver/QnnSaverCommon.h" + namespace executorch { namespace backends { namespace qnn { @@ -45,8 +47,7 @@ class QnnBackend { return handle_; } - executorch::runtime::Error VerifyQNNSDKVersion( - const QnnExecuTorchBackendType backend_id); + executorch::runtime::Error VerifyQNNSDKVersion(); protected: virtual Qnn_Version_t GetExpectedBackendVersion() const = 0; diff --git a/backends/qualcomm/runtime/backends/QnnBackendFactory.cpp b/backends/qualcomm/runtime/backends/QnnBackendFactory.cpp index 29e6686740b..1f251aeaffa 100644 --- a/backends/qualcomm/runtime/backends/QnnBackendFactory.cpp +++ b/backends/qualcomm/runtime/backends/QnnBackendFactory.cpp @@ -7,6 +7,7 @@ */ #include #include +#include namespace executorch { namespace backends { namespace qnn { @@ -17,7 +18,8 @@ std::unique_ptr QnnBackendFactory::Create( const QnnImplementation& implementation, QnnLogger* logger, const QnnExecuTorchContextBinary& qnn_context_blob, - const QnnExecuTorchOptions* options) { + const QnnExecuTorchOptions* options, + QnnDlcManager* qnn_dlc_manager) { auto backend_params = std::make_unique(); switch (options->backend_options()->backend_type()) { @@ -68,7 +70,8 @@ std::unique_ptr QnnBackendFactory::Create( backend_params->qnn_backend_ptr_.get(), backend_params->qnn_device_ptr_.get(), backend_params->qnn_backend_cache_ptr_.get(), - htp_options); + htp_options, + qnn_dlc_manager); backend_params->qnn_graph_ptr_ = std::make_unique( implementation, @@ -88,8 +91,7 @@ std::unique_ptr QnnBackendFactory::Create( return nullptr; } - if (backend_params->qnn_backend_ptr_->VerifyQNNSDKVersion( - options->backend_options()->backend_type()) == Error::Ok) { + if (backend_params->qnn_backend_ptr_->VerifyQNNSDKVersion() == Error::Ok) { return backend_params; } diff --git a/backends/qualcomm/runtime/backends/QnnBackendFactory.h b/backends/qualcomm/runtime/backends/QnnBackendFactory.h index 012c2cc7b5b..3d78a36b9f0 100644 --- a/backends/qualcomm/runtime/backends/QnnBackendFactory.h +++ b/backends/qualcomm/runtime/backends/QnnBackendFactory.h @@ -27,6 +27,8 @@ namespace executorch { namespace backends { namespace qnn { + +class QnnDlcManager; typedef enum { UNINITIALIZED, INITIALIZED } BackendInitializeState; // @brief Struct containing all handles for a given QNN backend @@ -67,7 +69,8 @@ class QnnBackendFactory { const QnnImplementation& implementation, QnnLogger* logger, const QnnExecuTorchContextBinary& qnn_context_blob, - const QnnExecuTorchOptions* options); + const QnnExecuTorchOptions* options, + QnnDlcManager* qnn_dlc_manager); }; } // namespace qnn } // namespace backends diff --git a/backends/qualcomm/runtime/backends/QnnContextCommon.cpp b/backends/qualcomm/runtime/backends/QnnContextCommon.cpp index 7c66e5ad19a..ee49b10215a 100644 --- a/backends/qualcomm/runtime/backends/QnnContextCommon.cpp +++ b/backends/qualcomm/runtime/backends/QnnContextCommon.cpp @@ -7,12 +7,12 @@ */ #include +#include + namespace executorch { namespace backends { namespace qnn { -using executorch::runtime::Error; - QnnContext::~QnnContext() { const QnnInterface& qnn_interface = implementation_.GetQnnInterface(); Qnn_ErrorHandle_t error = QNN_SUCCESS; @@ -63,13 +63,13 @@ Error QnnContext::Configure() { } } else if ( cache_->GetCacheState() == QnnBackendCache::SERIALIZE || - cache_->GetCacheState() == QnnBackendCache::ONLINE_PREPARE) { + cache_->GetCacheState() == QnnBackendCache::ONLINE_PREPARE || + cache_->GetCacheState() == QnnBackendCache::MULTI_GRAPH) { error = qnn_interface.qnn_context_create( backend_->GetHandle(), device_->GetHandle(), temp_context_config.empty() ? nullptr : temp_context_config.data(), &handle_); - if (error != QNN_SUCCESS) { QNN_EXECUTORCH_LOG_ERROR( "Failed to create QNN context for Backend " @@ -82,7 +82,15 @@ Error QnnContext::Configure() { QNN_EXECUTORCH_LOG_ERROR("QNN context cache is invalid."); return Error::Internal; } - return AfterConfigure(); + if (AfterConfigure() != Error::Ok) { + return Error::Internal; + } + if (cache_->GetCacheState() == QnnBackendCache::ONLINE_PREPARE) { + // Register graphs from DLC during online prepare for HTP/GPU/DSP backends + return qnn_dlc_manager_->RegisterGraphsFromDLC( + implementation_, backend_, this, cache_); + } + return Error::Ok; } Error QnnContext::GetContextBinary( diff --git a/backends/qualcomm/runtime/backends/QnnContextCommon.h b/backends/qualcomm/runtime/backends/QnnContextCommon.h index 62a0b953eec..0e9e12ef544 100644 --- a/backends/qualcomm/runtime/backends/QnnContextCommon.h +++ b/backends/qualcomm/runtime/backends/QnnContextCommon.h @@ -14,23 +14,30 @@ #include #include + namespace executorch { namespace backends { namespace qnn { + +class QnnDlcManager; + class QnnContext { public: explicit QnnContext( const QnnImplementation& implementation, QnnBackend* backend, QnnDevice* device, - QnnBackendCache* cache) + QnnBackendCache* cache, + QnnDlcManager* qnn_dlc_manager) : handle_(nullptr), implementation_(implementation), backend_(backend), device_(device), - cache_(cache) {} + cache_(cache), + qnn_dlc_manager_(qnn_dlc_manager) {} virtual ~QnnContext(); + executorch::runtime::Error Configure(); Qnn_ContextHandle_t GetHandle() const { @@ -53,7 +60,7 @@ class QnnContext { return cache_->GetCacheState(); }; - executorch::runtime::Error GetContextBinary( + virtual executorch::runtime::Error GetContextBinary( QnnExecuTorchContextBinary& qnn_executorch_context_binary); protected: @@ -72,6 +79,7 @@ class QnnContext { QnnDevice* device_; QnnBackendCache* cache_; QnnContextCustomProtocol qnn_context_custom_protocol_; + QnnDlcManager* qnn_dlc_manager_; }; } // namespace qnn } // namespace backends diff --git a/backends/qualcomm/runtime/backends/QnnDlcManager.h b/backends/qualcomm/runtime/backends/QnnDlcManager.h new file mode 100644 index 00000000000..a57906df4e3 --- /dev/null +++ b/backends/qualcomm/runtime/backends/QnnDlcManager.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ +#pragma once +#include + +#include +#include +#include + +#include "QnnWrapperUtils.hpp" +namespace executorch { +namespace backends { +namespace qnn { + +using executorch::runtime::Error; +using QnnModel_composeGraphsFromDlc = qnn_wrapper_api::ModelError_t (*)(...); +class QnnDlcManager { + public: + QnnDlcManager( + const QnnExecuTorchContextBinary& qnn_context_blob, + const QnnExecuTorchOptions* options); + + qnn_wrapper_api::GraphInfoPtr_t* GetQnnDlcGraphInfoPtr() { + return qnn_dlc_graph_info_; + } + + uint32_t GetQnnDlcGraphInfoNum() { + return qnn_dlc_graph_info_num_; + } + + std::unique_ptr backend_params_ptr_ = + std::make_unique(); + + void ResetBackendParams(); + void ResetLogger(); + void TerminateAllBackends(); + + Error SetUpDlcEnvironment(const Qnn_Version_t& coreApiVersion); + + Error RegisterGraphsFromDLC( + const QnnImplementation& implementation, + QnnBackend* backend, + QnnContext* context, + QnnBackendCache* cache); + + private: + static constexpr const char* library_name_ = "libQnnIr.so"; + QnnImplementation qnn_loaded_backend_; + std::unique_ptr logger_; + + const QnnExecuTorchContextBinary& qnn_context_blob_; + const QnnExecuTorchOptions* options_; + + static constexpr const char* dlc_lib_ = "libQnnModelDlc.so"; + qnn_wrapper_api::GraphInfoPtr_t* qnn_dlc_graph_info_ = nullptr; + uint32_t qnn_dlc_graph_info_num_ = 0; + + Error LoadQnnIrLibrary(); + + Error Create(); + + Error Configure(); +}; +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/QnnFunctionInterface.h b/backends/qualcomm/runtime/backends/QnnFunctionInterface.h index 86de76f0d99..12a6be36b64 100644 --- a/backends/qualcomm/runtime/backends/QnnFunctionInterface.h +++ b/backends/qualcomm/runtime/backends/QnnFunctionInterface.h @@ -101,6 +101,9 @@ class QnnInterface { bool IsLoaded() const { return qnn_interface_ != nullptr; } + const QNN_INTERFACE_VER_TYPE& GetInterfaceVer() const { + return qnn_interface_->QNN_INTERFACE_VER_NAME; + } private: // --------- QnnInterface --------- diff --git a/backends/qualcomm/runtime/backends/QnnGraphCommon.cpp b/backends/qualcomm/runtime/backends/QnnGraphCommon.cpp index b4614ac2d3d..9fe81f4cf54 100644 --- a/backends/qualcomm/runtime/backends/QnnGraphCommon.cpp +++ b/backends/qualcomm/runtime/backends/QnnGraphCommon.cpp @@ -16,7 +16,6 @@ Error QnnGraph::Configure(const std::string& graph_name) { // create qnn backend const QnnInterface& qnn_interface = implementation_.GetQnnInterface(); Qnn_ErrorHandle_t error = QNN_SUCCESS; - std::vector temp_graph_config; ET_CHECK_OR_RETURN_ERROR( MakeConfig(temp_graph_config) == Error::Ok, @@ -44,8 +43,8 @@ Error QnnGraph::Configure(const std::string& graph_name) { } } else if ( context_->GetCacheState() == QnnBackendCache::SERIALIZE || - context_->GetCacheState() == QnnBackendCache::ONLINE_PREPARE) { - Qnn_ErrorHandle_t error = qnn_interface.qnn_graph_create( + context_->GetCacheState() == QnnBackendCache::MULTI_GRAPH) { + error = qnn_interface.qnn_graph_create( context_->GetHandle(), graph_name.c_str(), temp_graph_config.empty() ? nullptr : temp_graph_config.data(), @@ -56,6 +55,9 @@ Error QnnGraph::Configure(const std::string& graph_name) { "qnn_graph_create failed. Error %d", QNN_GET_ERROR_CODE(error)); return Error::Internal; } + } else if (context_->GetCacheState() == QnnBackendCache::ONLINE_PREPARE) { + QNN_EXECUTORCH_LOG_INFO( + "Skip qnn_graph_create, graph has already been composed from Dlc."); } else { QNN_EXECUTORCH_LOG_ERROR("QNN context cache is invalid."); return Error::Internal; diff --git a/backends/qualcomm/runtime/backends/QnnGraphCommon.h b/backends/qualcomm/runtime/backends/QnnGraphCommon.h index 62d9b1b9e1a..33f903dae41 100644 --- a/backends/qualcomm/runtime/backends/QnnGraphCommon.h +++ b/backends/qualcomm/runtime/backends/QnnGraphCommon.h @@ -66,6 +66,12 @@ class QnnGraph { return handle_[graph_name]; } + void SetGraphHandle( + const std::string& graph_name, + Qnn_GraphHandle_t graph_handle) { + handle_[graph_name] = graph_handle; + } + QnnProfile* GetProfile(const std::string& graph_name) { return profile_[graph_name].get(); } diff --git a/backends/qualcomm/runtime/backends/QnnImplementation.cpp b/backends/qualcomm/runtime/backends/QnnImplementation.cpp index 6baf4cbb411..7add5e744f9 100644 --- a/backends/qualcomm/runtime/backends/QnnImplementation.cpp +++ b/backends/qualcomm/runtime/backends/QnnImplementation.cpp @@ -5,7 +5,6 @@ * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ -#include #include #include "QnnInterface.h" @@ -15,11 +14,6 @@ namespace qnn { using executorch::runtime::Error; -template -Fn loadQnnFunction(void* handle, const char* function_name) { - return reinterpret_cast(dlsym(handle, function_name)); // NOLINT -} - Error QnnImplementation::InitBackend( void* const lib_handle, const QnnSaver_Config_t** saver_config) { diff --git a/backends/qualcomm/runtime/backends/QnnImplementation.h b/backends/qualcomm/runtime/backends/QnnImplementation.h index 61ee6222e91..a49ee6516fc 100644 --- a/backends/qualcomm/runtime/backends/QnnImplementation.h +++ b/backends/qualcomm/runtime/backends/QnnImplementation.h @@ -10,12 +10,19 @@ #include #include +#include #include #include #include namespace executorch { namespace backends { namespace qnn { + +template +Fn loadQnnFunction(void* handle, const char* function_name) { + return reinterpret_cast(dlsym(handle, function_name)); // NOLINT +} + class QnnImplementation { public: using BackendIdType = decltype(QnnInterface_t{}.backendId); diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpContext.cpp b/backends/qualcomm/runtime/backends/htpbackend/HtpContext.cpp index 28d0bbf8055..50d299b55e9 100644 --- a/backends/qualcomm/runtime/backends/htpbackend/HtpContext.cpp +++ b/backends/qualcomm/runtime/backends/htpbackend/HtpContext.cpp @@ -10,7 +10,6 @@ #include #include "HTP/QnnHtpCommon.h" -#include "Saver/QnnSaverCommon.h" namespace executorch { namespace backends { diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpContext.h b/backends/qualcomm/runtime/backends/htpbackend/HtpContext.h index 131cf856ce1..88660db080a 100644 --- a/backends/qualcomm/runtime/backends/htpbackend/HtpContext.h +++ b/backends/qualcomm/runtime/backends/htpbackend/HtpContext.h @@ -16,6 +16,7 @@ namespace executorch { namespace backends { namespace qnn { +class QnnDlcManager; class HtpContext : public QnnContext { public: HtpContext( @@ -23,8 +24,9 @@ class HtpContext : public QnnContext { QnnBackend* backend, QnnDevice* device, QnnBackendCache* cache, - const QnnExecuTorchHtpBackendOptions* htp_options) - : QnnContext(implementation, backend, device, cache) { + const QnnExecuTorchHtpBackendOptions* htp_options, + QnnDlcManager* qnn_dlc_manager) + : QnnContext(implementation, backend, device, cache, qnn_dlc_manager) { htp_context_custom_config_ = std::make_unique(this, htp_options); } diff --git a/backends/qualcomm/runtime/backends/irbackend/IrBackend.h b/backends/qualcomm/runtime/backends/irbackend/IrBackend.h new file mode 100644 index 00000000000..ddeb3a24460 --- /dev/null +++ b/backends/qualcomm/runtime/backends/irbackend/IrBackend.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ +#pragma once + +#include +#if (QNN_API_VERSION_MAJOR >= 2 && QNN_API_VERSION_MINOR >= 23) +#include "IR/QnnIrCommon.h" +#endif +#include "QnnTypes.h" + +namespace executorch { +namespace backends { +namespace qnn { +class IrBackend : public QnnBackend { + public: + IrBackend(const QnnImplementation& implementation, QnnLogger* logger) + : QnnBackend(implementation, logger) {} + ~IrBackend() {} + + Qnn_Version_t GetExpectedBackendVersion() const override { + Qnn_Version_t backend_version; +#if (QNN_API_VERSION_MAJOR >= 2 && QNN_API_VERSION_MINOR >= 23) + backend_version.major = QNN_IR_API_VERSION_MAJOR; + backend_version.minor = QNN_IR_API_VERSION_MINOR; + backend_version.patch = QNN_IR_API_VERSION_PATCH; +#else + backend_version = QNN_VERSION_INIT; +#endif + return backend_version; + } +}; +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/irbackend/IrContext.h b/backends/qualcomm/runtime/backends/irbackend/IrContext.h new file mode 100644 index 00000000000..824d41bb7ff --- /dev/null +++ b/backends/qualcomm/runtime/backends/irbackend/IrContext.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include + +namespace executorch { +namespace backends { +namespace qnn { +class IrContext : public QnnContext { + public: + using QnnContext::QnnContext; + + executorch::runtime::Error GetContextBinary( + QnnExecuTorchContextBinary& qnn_executorch_context_binary) override; + + private: + std::vector buffer_; +}; +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/irbackend/aarch64/IrContext.cpp b/backends/qualcomm/runtime/backends/irbackend/aarch64/IrContext.cpp new file mode 100644 index 00000000000..44ce8de8f46 --- /dev/null +++ b/backends/qualcomm/runtime/backends/irbackend/aarch64/IrContext.cpp @@ -0,0 +1,27 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +#include +#include + +namespace executorch { +namespace backends { +namespace qnn { + +using executorch::runtime::Error; + +Error IrContext::GetContextBinary( + QnnExecuTorchContextBinary& qnn_executorch_context_binary) { + return Error::Ok; +} + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/irbackend/aarch64/QnnDlcManager.cpp b/backends/qualcomm/runtime/backends/irbackend/aarch64/QnnDlcManager.cpp new file mode 100644 index 00000000000..57d0b9170bc --- /dev/null +++ b/backends/qualcomm/runtime/backends/irbackend/aarch64/QnnDlcManager.cpp @@ -0,0 +1,143 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ +#include +#include +#include +#include +#include +#include + +namespace executorch { +namespace backends { +namespace qnn { + +QnnDlcManager::QnnDlcManager( + const QnnExecuTorchContextBinary& qnn_context_blob, + const QnnExecuTorchOptions* options) + : qnn_loaded_backend_(""), + qnn_context_blob_(qnn_context_blob), + options_(options) { + QNN_EXECUTORCH_LOG_INFO( + "QnnDlcManager Get Qnn Context blob bytes %u", qnn_context_blob_.nbytes); + + if (options_ == nullptr) { + QNN_EXECUTORCH_LOG_ERROR( + "Fail to create QnnDlcManager, options is nullptr"); + } +} + +Error QnnDlcManager::LoadQnnIrLibrary() { + return Error::Ok; +} + +Error QnnDlcManager::Create() { + return Error::Ok; +} + +Error QnnDlcManager::Configure() { + return Error::Ok; +} + +Error QnnDlcManager::SetUpDlcEnvironment(const Qnn_Version_t& coreApiVersion) { + return Error::Ok; +} + +Error QnnDlcManager::RegisterGraphsFromDLC( + const QnnImplementation& implementation, + QnnBackend* backend, + QnnContext* context, + QnnBackendCache* cache) { + void* lib_handle = dlopen(dlc_lib_, RTLD_NOW | RTLD_LOCAL); + if (lib_handle == nullptr) { + QNN_EXECUTORCH_LOG_ERROR( + "Cannot Open lib %s, with error: %s", dlc_lib_, dlerror()); + return Error::Internal; + } + QnnModel_composeGraphsFromDlc composeGraphsFromDlc = + loadQnnFunction( + lib_handle, "QnnModel_composeGraphsFromDlc"); + if (composeGraphsFromDlc == nullptr) { + QNN_EXECUTORCH_LOG_ERROR( + "Cannot load symbol " + "QnnModel_composeGraphsFromDlc : %s", + dlerror()); + return Error::Internal; + } + + const QnnExecuTorchContextBinary& qnn_context_blob = + cache->GetQnnContextBlob(); + + // memfd_create on android api level 30 and above + int fd = -1; +#ifdef __ANDROID__ +#if __ANDROID_API__ >= 30 + fd = memfd_create("tmp.dlc", 0); +#endif +#endif + if (fd == -1) { + QNN_EXECUTORCH_LOG_ERROR("memfd_create fail"); + return Error::Internal; + } + + if (ftruncate(fd, qnn_context_blob.nbytes) == -1) { + QNN_EXECUTORCH_LOG_ERROR("ftruncate fail"); + close(fd); + return Error::Internal; + } + + void* addr = mmap( + NULL, qnn_context_blob.nbytes, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (addr == MAP_FAILED) { + QNN_EXECUTORCH_LOG_ERROR("mmap"); + close(fd); + return Error::Internal; + } + + memcpy(addr, qnn_context_blob.buffer, qnn_context_blob.nbytes); + + char dlc_path[256]; + snprintf(dlc_path, sizeof(dlc_path), "/proc/self/fd/%d", fd); + + const QNN_INTERFACE_VER_TYPE& interfaceVer = + implementation.GetQnnInterface().GetInterfaceVer(); + + if (composeGraphsFromDlc( + /*backendHandle=*/backend->GetHandle(), + /*interface=*/interfaceVer, + /*contextHandle=*/context->GetHandle(), + /*graphsConfigInfo=*/nullptr, + /*dlcPath=*/dlc_path, + /*numGraphsConfigInfo=*/0, + /*graphsInfo=*/&qnn_dlc_graph_info_, + /*numGraphsInfo=*/&qnn_dlc_graph_info_num_, + /*debug=*/false, + /*logCallback=*/nullptr, + /*maxLogLevel=*/QNN_LOG_LEVEL_VERBOSE) != + qnn_wrapper_api::ModelError_t::MODEL_NO_ERROR) { + QNN_EXECUTORCH_LOG_ERROR("Failed to open Dlc"); + return Error::Internal; + } + munmap(addr, qnn_context_blob.nbytes); + close(fd); + dlclose(lib_handle); + + for (uint32_t i = 0; i < qnn_dlc_graph_info_num_; ++i) { + auto& graphInfo = (*qnn_dlc_graph_info_)[i]; + cache->SetGraphNames(graphInfo.graphName); + } + + return Error::Ok; +} + +void QnnDlcManager::ResetBackendParams() {} +void QnnDlcManager::ResetLogger() {} +void QnnDlcManager::TerminateAllBackends() {} + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/irbackend/x86_64/IrContext.cpp b/backends/qualcomm/runtime/backends/irbackend/x86_64/IrContext.cpp new file mode 100644 index 00000000000..f167aae9319 --- /dev/null +++ b/backends/qualcomm/runtime/backends/irbackend/x86_64/IrContext.cpp @@ -0,0 +1,43 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +#include +#include +namespace executorch { +namespace backends { +namespace qnn { + +using executorch::runtime::Error; + +Error IrContext::GetContextBinary( + QnnExecuTorchContextBinary& qnn_executorch_context_binary) { + // read Dlc and write to buffer + std::string dlc_name = GetGraphNames()[0] + ".dlc"; + std::ifstream dlc_file(dlc_name, std::ios::binary | std::ios::ate); + if (dlc_file.is_open()) { + std::streamsize size = dlc_file.tellg(); + dlc_file.seekg(0, std::ios::beg); + + buffer_ = std::vector(size); + dlc_file.read(buffer_.data(), size); + dlc_file.close(); + qnn_executorch_context_binary.buffer = buffer_.data(); + qnn_executorch_context_binary.nbytes = size; + return Error::Ok; + } else { + QNN_EXECUTORCH_LOG_ERROR( + "Unable to open dlc file %s for building QnnExecuTorchContextBinary", + dlc_name.c_str()); + } + return Error::Internal; +} +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/irbackend/x86_64/QnnDlcManager.cpp b/backends/qualcomm/runtime/backends/irbackend/x86_64/QnnDlcManager.cpp new file mode 100644 index 00000000000..14b9aeadf3a --- /dev/null +++ b/backends/qualcomm/runtime/backends/irbackend/x86_64/QnnDlcManager.cpp @@ -0,0 +1,139 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ +#include +#include + +namespace executorch { +namespace backends { +namespace qnn { + +QnnDlcManager::QnnDlcManager( + const QnnExecuTorchContextBinary& qnn_context_blob, + const QnnExecuTorchOptions* options) + : qnn_loaded_backend_(""), + qnn_context_blob_(qnn_context_blob), + options_(options) { + QNN_EXECUTORCH_LOG_INFO( + "QnnDlcManager Get Qnn Context blob bytes %u", qnn_context_blob_.nbytes); + + if (options_ == nullptr) { + QNN_EXECUTORCH_LOG_ERROR( + "Fail to create QnnDlcManager, options is nullptr"); + } +} + +Error QnnDlcManager::LoadQnnIrLibrary() { + qnn_loaded_backend_ = QnnImplementation(library_name_); + Error ret = qnn_loaded_backend_.Load(nullptr); + return ret; +} + +Error QnnDlcManager::Create() { + backend_params_ptr_->qnn_backend_ptr_ = + std::make_unique(qnn_loaded_backend_, logger_.get()); + + backend_params_ptr_->qnn_device_ptr_ = + std::make_unique(qnn_loaded_backend_, logger_.get()); + + backend_params_ptr_->qnn_backend_cache_ptr_ = + std::make_unique( + qnn_context_blob_, options_->graph_name()->str()); + + backend_params_ptr_->qnn_context_ptr_ = std::make_unique( + qnn_loaded_backend_, + backend_params_ptr_->qnn_backend_ptr_.get(), + backend_params_ptr_->qnn_device_ptr_.get(), + backend_params_ptr_->qnn_backend_cache_ptr_.get(), + nullptr); + + backend_params_ptr_->qnn_graph_ptr_ = std::make_unique( + qnn_loaded_backend_, + backend_params_ptr_->qnn_backend_ptr_.get(), + backend_params_ptr_->qnn_context_ptr_.get(), + options_->profile_level()); + backend_params_ptr_->backend_init_state_ = + BackendInitializeState::INITIALIZED; + return backend_params_ptr_->qnn_backend_ptr_->VerifyQNNSDKVersion(); +} + +Error QnnDlcManager::Configure() { + ET_CHECK_OR_RETURN_ERROR( + backend_params_ptr_ != nullptr, Internal, "Failed to load Qnn backend."); + ET_CHECK_OR_RETURN_ERROR( + backend_params_ptr_->qnn_backend_cache_ptr_->Configure() == Error::Ok, + Internal, + "Fail to configure Qnn backend cache"); + ET_CHECK_OR_RETURN_ERROR( + backend_params_ptr_->qnn_backend_ptr_->Configure() == Error::Ok, + Internal, + "Fail to configure Qnn backend"); + ET_CHECK_OR_RETURN_ERROR( + backend_params_ptr_->qnn_context_ptr_->Configure() == Error::Ok, + Internal, + "Fail to configure Qnn context"); + for (const std::string& graph_name : + backend_params_ptr_->qnn_context_ptr_->GetGraphNames()) { + ET_CHECK_OR_RETURN_ERROR( + backend_params_ptr_->qnn_graph_ptr_->Configure(graph_name) == Error::Ok, + Internal, + "Fail to configure Qnn graph"); + } + backend_params_ptr_->backend_init_state_ = + BackendInitializeState::INITIALIZED; + + return Error::Ok; +} + +Error QnnDlcManager::SetUpDlcEnvironment(const Qnn_Version_t& coreApiVersion) { + ET_CHECK_MSG( + (coreApiVersion.major >= 2 && coreApiVersion.minor >= 23), + "Qnn API version %u.%u.%u is not supported for Qnn IR backend, The minimum supported version is 2.23.0 or QNN_SDK version 2.30.0", + coreApiVersion.major, + coreApiVersion.minor, + coreApiVersion.patch); + + ET_CHECK_OR_RETURN_ERROR( + LoadQnnIrLibrary() == Error::Ok, + Internal, + "Fail to Load Qnn IR library."); + + logger_ = std::make_unique( + qnn_loaded_backend_, LoggingCallback, options_->log_level()); + + ET_CHECK_OR_RETURN_ERROR( + Create() == Error::Ok, Internal, "Failed to load Qnn IR backend."); + + ET_CHECK_OR_RETURN_ERROR( + Configure() == Error::Ok, Internal, "Fail to configure IR backend."); + + return Error::Ok; +} + +Error QnnDlcManager::RegisterGraphsFromDLC( + const QnnImplementation& implementation, + QnnBackend* backend, + QnnContext* context, + QnnBackendCache* cache) { + return Error::Ok; +} + +void QnnDlcManager::ResetBackendParams() { + backend_params_ptr_.reset(new BackendConfigParameters()); +} + +void QnnDlcManager::ResetLogger() { + logger_.reset(); +} + +void QnnDlcManager::TerminateAllBackends() { + qnn_loaded_backend_.TerminateAllBackends(); +} + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/scripts/build.sh b/backends/qualcomm/scripts/build.sh index fef177fd300..c079dd41a2a 100755 --- a/backends/qualcomm/scripts/build.sh +++ b/backends/qualcomm/scripts/build.sh @@ -86,8 +86,8 @@ if [ "$BUILD_AARCH64" = true ]; then -DQNN_SDK_ROOT=$QNN_SDK_ROOT \ -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK_ROOT/build/cmake/android.toolchain.cmake \ -DANDROID_ABI='arm64-v8a' \ - -DANDROID_NATIVE_API_LEVEL=23 \ -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ + -DANDROID_PLATFORM=android-30 \ -DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \ -B$BUILD_ROOT @@ -100,7 +100,7 @@ if [ "$BUILD_AARCH64" = true ]; then -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK_ROOT/build/cmake/android.toolchain.cmake \ -DCMAKE_BUILD_TYPE=$BUILD_TYPE \ -DANDROID_ABI='arm64-v8a' \ - -DANDROID_NATIVE_API_LEVEL=23 \ + -DANDROID_PLATFORM=android-30 \ -DCMAKE_PREFIX_PATH=$CMAKE_PREFIX_PATH \ -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ -DCMAKE_FIND_ROOT_PATH_MODE_PACKAGE=BOTH \ diff --git a/backends/qualcomm/serialization/qc_compiler_spec.fbs b/backends/qualcomm/serialization/qc_compiler_spec.fbs index 3e78ba23ac8..d8809231a9f 100644 --- a/backends/qualcomm/serialization/qc_compiler_spec.fbs +++ b/backends/qualcomm/serialization/qc_compiler_spec.fbs @@ -194,6 +194,12 @@ table QnnExecuTorchOptions { /// True if there exists multiple graphs in one .pte file. multiple_graphs:bool; + + // Enable this option to record all QNN API calls for debugging purpose + saver:bool; + + // Path to saver output folder + saver_output_dir:string; } root_type QnnExecuTorchOptions; diff --git a/backends/qualcomm/serialization/qc_schema.py b/backends/qualcomm/serialization/qc_schema.py index 56ba27bb000..93305b1dbb5 100644 --- a/backends/qualcomm/serialization/qc_schema.py +++ b/backends/qualcomm/serialization/qc_schema.py @@ -157,3 +157,5 @@ class QnnExecuTorchOptions: shared_buffer: bool = False is_from_context_binary: bool = False multiple_graphs: bool = False + saver: bool = False + saver_output_dir: str = "saver_output" diff --git a/backends/qualcomm/tests/test_qnn_delegate.py b/backends/qualcomm/tests/test_qnn_delegate.py index 7d097fd45bf..338209fcd4a 100644 --- a/backends/qualcomm/tests/test_qnn_delegate.py +++ b/backends/qualcomm/tests/test_qnn_delegate.py @@ -25,10 +25,10 @@ from executorch.backends.qualcomm.tests.utils import ( generate_context_binary, ModuleQConfig, + QnnTool, QuantDtype, TestQNN, validate_context_binary, - validate_qcir, ) from executorch.backends.qualcomm.utils.constants import ( QCOM_ANNOTATION, @@ -2563,15 +2563,30 @@ def test_qnn_backend_context_extraction(self): module = SimpleModel() # noqa: F405 sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28)) backend_options = generate_htp_compiler_spec(use_fp16=True) + + # Validate dlc + compiler_spec = generate_qnn_executorch_compiler_spec( + soc_model=self.chipset_table[TestQNN.model], + backend_options=backend_options, + online_prepare=True, + ) + with tempfile.TemporaryDirectory() as tmp_dir: + edge_prog_mgr = to_edge_transform_and_lower_to_qnn( + module, sample_input, compiler_spec + ).to_executorch() + pte_path = f"{tmp_dir}/model.pte" + with open(pte_path, "wb") as f: + edge_prog_mgr.write_to_file(f) + dump_context_from_pte(pte_path) + + qnn_tool = QnnTool(tmp_dir, pte_path, sample_input) + qnn_tool.qnn_context_binary_generator() + qnn_tool.qnn_net_run() + compiler_specs = [ self.compiler_specs, - generate_qnn_executorch_compiler_spec( - soc_model=self.chipset_table[TestQNN.model], - backend_options=backend_options, - online_prepare=True, - ), ] - validators = [validate_context_binary, validate_qcir] + validators = [validate_context_binary] for compiler_spec, validate in zip(compiler_specs, validators): edge_prog_mgr = to_edge_transform_and_lower_to_qnn( @@ -2591,15 +2606,30 @@ def test_qnn_backend_dump_context_from_pte(self): module = SimpleModel() # noqa: F405 sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28)) backend_options = generate_htp_compiler_spec(use_fp16=True) + + # Validate dlc + compiler_spec = generate_qnn_executorch_compiler_spec( + soc_model=self.chipset_table[TestQNN.model], + backend_options=backend_options, + online_prepare=True, + ) + with tempfile.TemporaryDirectory() as tmp_dir: + edge_prog_mgr = to_edge_transform_and_lower_to_qnn( + module, sample_input, compiler_spec + ).to_executorch() + pte_path = f"{tmp_dir}/model.pte" + with open(pte_path, "wb") as f: + edge_prog_mgr.write_to_file(f) + dump_context_from_pte(pte_path) + + qnn_tool = QnnTool(tmp_dir, pte_path, sample_input) + qnn_tool.qnn_context_binary_generator() + qnn_tool.qnn_net_run() + compiler_specs = [ self.compiler_specs, - generate_qnn_executorch_compiler_spec( - soc_model=self.chipset_table[TestQNN.model], - backend_options=backend_options, - online_prepare=True, - ), ] - validators = [validate_context_binary, validate_qcir] + validators = [validate_context_binary] for compiler_spec, validate in zip(compiler_specs, validators): edge_prog_mgr = to_edge_transform_and_lower_to_qnn( @@ -3181,15 +3211,30 @@ def test_qnn_backend_context_extraction(self): sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28)) module = self.get_qdq_module(module, sample_input) backend_options = generate_htp_compiler_spec(use_fp16=False) + + # Validate dlc + compiler_spec = generate_qnn_executorch_compiler_spec( + soc_model=self.chipset_table[TestQNN.model], + backend_options=backend_options, + online_prepare=True, + ) + with tempfile.TemporaryDirectory() as tmp_dir: + edge_prog_mgr = to_edge_transform_and_lower_to_qnn( + module, sample_input, compiler_spec + ).to_executorch() + pte_path = f"{tmp_dir}/model.pte" + with open(pte_path, "wb") as f: + edge_prog_mgr.write_to_file(f) + dump_context_from_pte(pte_path) + + qnn_tool = QnnTool(tmp_dir, pte_path, sample_input) + qnn_tool.qnn_context_binary_generator() + qnn_tool.qnn_net_run() + compiler_specs = [ self.compiler_specs, - generate_qnn_executorch_compiler_spec( - soc_model=self.chipset_table[TestQNN.model], - backend_options=backend_options, - online_prepare=True, - ), ] - validators = [validate_context_binary, validate_qcir] + validators = [validate_context_binary] for compiler_spec, validate in zip(compiler_specs, validators): edge_prog_mgr = to_edge_transform_and_lower_to_qnn( @@ -3210,15 +3255,30 @@ def test_qnn_backend_dump_context_from_pte(self): sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28)) module = self.get_qdq_module(module, sample_input) backend_options = generate_htp_compiler_spec(use_fp16=True) + + # Validate dlc + compiler_spec = generate_qnn_executorch_compiler_spec( + soc_model=self.chipset_table[TestQNN.model], + backend_options=backend_options, + online_prepare=True, + ) + with tempfile.TemporaryDirectory() as tmp_dir: + edge_prog_mgr = to_edge_transform_and_lower_to_qnn( + module, sample_input, compiler_spec + ).to_executorch() + pte_path = f"{tmp_dir}/model.pte" + with open(pte_path, "wb") as f: + edge_prog_mgr.write_to_file(f) + dump_context_from_pte(pte_path) + + qnn_tool = QnnTool(tmp_dir, pte_path, sample_input) + qnn_tool.qnn_context_binary_generator() + qnn_tool.qnn_net_run() + compiler_specs = [ self.compiler_specs, - generate_qnn_executorch_compiler_spec( - soc_model=self.chipset_table[TestQNN.model], - backend_options=backend_options, - online_prepare=True, - ), ] - validators = [validate_context_binary, validate_qcir] + validators = [validate_context_binary] for compiler_spec, validate in zip(compiler_specs, validators): edge_prog_mgr = to_edge_transform_and_lower_to_qnn( @@ -4679,12 +4739,6 @@ def setup_environment(): help="Input the model to export", type=str, ) - parser.add_argument( - "-o", - "--online_prepare", - help="Conduct on-device graph compilation", - action="store_true", - ) parser.add_argument( "-P", "--enable_profile", diff --git a/backends/qualcomm/tests/utils.py b/backends/qualcomm/tests/utils.py index 71d3b9e7ec2..695c846de05 100644 --- a/backends/qualcomm/tests/utils.py +++ b/backends/qualcomm/tests/utils.py @@ -5,6 +5,7 @@ # LICENSE file in the root directory of this source tree. import collections import copy +import json import os import subprocess import tempfile @@ -13,7 +14,6 @@ import numpy as np import torch - from executorch import exir from executorch.backends.qualcomm.qnn_preprocess import QnnBackend from executorch.backends.qualcomm.quantizer.quantizer import ModuleQConfig, QuantDtype @@ -636,3 +636,111 @@ def call(self, graph_module: torch.fx.GraphModule): QCOM_PASS_ACTIVATE_KEY: True, QCOM_PASS_ARGS_KWARGS_DEFAULTS_KEY: {"division": division}, } + + +class QnnTool(TestQNN): + def __init__( + self, + tmp_dir, + pte_fname, + sample_input, + workspace="/data/local/tmp/qnn_executorch_test", + ): + self.qnn_sdk = os.environ.get("QNN_SDK_ROOT", None) + self.ndk = os.environ.get("ANDROID_NDK_ROOT", None) + assert self.qnn_sdk, "QNN_SDK_ROOT was not found in environment variable" + assert self.ndk, "ANDROID_NDK_ROOT was not found in environment" + + self.tmp_dir = tmp_dir + self.workspace = workspace + self.adb = SimpleADB( + qnn_sdk=self.qnn_sdk, + build_path=self.build_folder, + pte_path=pte_fname, + workspace=self.workspace, + device_id=self.device, + host_id=self.host, + soc_model=self.model, + error_only=self.error_only, + ) + self.sample_input = sample_input + + def qnn_context_binary_generator( + self, dlc_name="forward_0.dlc", binary_name="forward.serialized" + ): + cmds = [ + f"{self.qnn_sdk}/bin/x86_64-linux-clang/qnn-context-binary-generator", + "--backend", + f"{self.qnn_sdk}/lib/x86_64-linux-clang/libQnnHtp.so", + "--model", + f"{self.qnn_sdk}/lib/x86_64-linux-clang/libQnnModelDlc.so", + "--dlc_path", + f"{self.tmp_dir}/{dlc_name}", + "--binary_file", + f"{self.tmp_dir}/{binary_name}", + ] + result = subprocess.run( + " ".join(cmds), + shell=True, + executable="/bin/bash", + capture_output=True, + ) + assert os.path.isfile(f"{self.tmp_dir}/{binary_name}.bin"), print(result.stderr) + + def qnn_net_run(self, binary_name="forward.serialized"): + input_list = "" + for idx, _ in enumerate(self.sample_input): + input_name = f"input_{idx}_0.raw" + input_list += input_name + " " + input_list = input_list.strip() + "\n" + if self.enable_x86_64: + # TODO: Implement context binary consumption on x86_64 platform + return + + else: + # Config for qnn-net-run + config = { + "backend_extension_config": { + "backend_extensions": { + "shared_library_path": "./libQnnHtpNetRunExtensions.so", + "config_file_path": "config.json", + } + }, + "config": { + "devices": [ + { + "profiling_level": "linting", + "cores": [ + {"perf_profile": "burst", "rpc_control_latency": 100} + ], + } + ] + }, + } + + for file_name, data in config.items(): + with open(f"{self.tmp_dir}/{file_name}.json", "w") as json_file: + json.dump(data, json_file, indent=4) + + files = [ + f"{self.qnn_sdk}/lib/aarch64-android/libQnnHtpNetRunExtensions.so", + f"{self.tmp_dir}/backend_extension_config.json", + f"{self.tmp_dir}/config.json", + f"{self.tmp_dir}/{binary_name}.bin", + f"{self.qnn_sdk}/bin/aarch64-android/qnn-net-run", + ] + cmds = [ + f"export LD_LIBRARY_PATH={self.workspace} &&", + f"export ADSP_LIBRARY_PATH={self.workspace} &&", + f"cd {self.workspace} &&", + "./qnn-net-run", + "--backend libQnnHtp.so", + "--input_list input_list.txt", + f"--retrieve_context {binary_name}.bin", + "--use_native_input_files", + "--use_native_output_files", + "--config_file backend_extension_config.json", + "--profiling_level backend", + ] + self.adb.push(inputs=self.sample_input, input_list=input_list, files=files) + self.adb.execute(custom_runner_cmd=" ".join(cmds)) diff --git a/backends/qualcomm/utils/utils.py b/backends/qualcomm/utils/utils.py index e0ebc5beebe..3653cd3176f 100644 --- a/backends/qualcomm/utils/utils.py +++ b/backends/qualcomm/utils/utils.py @@ -221,7 +221,13 @@ def dump_context_from_pte(pte_path): delegate.processed.index ].data binary = qnn_mgr.StripProtocol(processed_bytes) - with open(f"{ctx_path}/{execution_plan.name}_{i}.bin", "wb") as f: + file_extension = ".bin" + if len(binary) == 0: + binary = processed_bytes + file_extension = ".dlc" + with open( + f"{ctx_path}/{execution_plan.name}_{i}{file_extension}", "wb" + ) as f: f.write(binary) @@ -1181,6 +1187,8 @@ def generate_qnn_executorch_compiler_spec( if saver: qnn_executorch_options.library_path = "libQnnSaver.so" + qnn_executorch_options.saver = True + qnn_executorch_options.saver_output_dir = "saver_output" if optrace: qnn_executorch_options.profile_level = QnnExecuTorchProfileLevel.kProfileOptrace diff --git a/docs/source/backends-qualcomm.md b/docs/source/backends-qualcomm.md index 422ad26c49c..3723fc9b454 100644 --- a/docs/source/backends-qualcomm.md +++ b/docs/source/backends-qualcomm.md @@ -182,7 +182,7 @@ cmake .. \ -DPYTHON_EXECUTABLE=python3 \ -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK_ROOT/build/cmake/android.toolchain.cmake \ -DANDROID_ABI='arm64-v8a' \ - -DANDROID_NATIVE_API_LEVEL=23 + -DANDROID_PLATFORM=android-30 # nproc is used to detect the number of available CPU. # If it is not applicable, please feel free to use the number you want. @@ -191,7 +191,7 @@ cmake --build $PWD --target install -j$(nproc) cmake ../examples/qualcomm \ -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK_ROOT/build/cmake/android.toolchain.cmake \ -DANDROID_ABI='arm64-v8a' \ - -DANDROID_NATIVE_API_LEVEL=23 \ + -DANDROID_PLATFORM=android-30 \ -DCMAKE_PREFIX_PATH="$PWD/lib/cmake/ExecuTorch;$PWD/third-party/gflags;" \ -DCMAKE_FIND_ROOT_PATH_MODE_PACKAGE=BOTH \ -DPYTHON_EXECUTABLE=python3 \ diff --git a/examples/qualcomm/test_qualcomm.sh b/examples/qualcomm/test_qualcomm.sh index d95f53f99cb..19d3d798418 100644 --- a/examples/qualcomm/test_qualcomm.sh +++ b/examples/qualcomm/test_qualcomm.sh @@ -38,7 +38,7 @@ test_cmake_qualcomm() { -DQNN_SDK_ROOT=$QNN_SDK_ROOT \ -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \ -DANDROID_ABI='arm64-v8a' \ - -DANDROID_NATIVE_API_LEVEL=23 \ + -DANDROID_NATIVE_API_LEVEL=30 \ -DCMAKE_PREFIX_PATH="$CMAKE_PREFIX_PATH" \ -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \ -B${build_dir} \ diff --git a/examples/qualcomm/utils.py b/examples/qualcomm/utils.py index 242170712e1..542739a2898 100755 --- a/examples/qualcomm/utils.py +++ b/examples/qualcomm/utils.py @@ -129,6 +129,7 @@ def push(self, inputs=None, input_list=None, files=None): f"{self.qnn_sdk}/lib/aarch64-android/libQnnSystem.so", f"{self.build_path}/{self.runner}", f"{self.build_path}/backends/qualcomm/libqnn_executorch_backend.so", + f"{self.qnn_sdk}/lib/aarch64-android/libQnnModelDlc.so", ] input_list_file, input_files = generate_inputs( self.working_dir, self.input_list_filename, inputs, input_list @@ -294,6 +295,7 @@ def build_executorch_binary( dump_intermediate_outputs=False, passes_job=None, qat_training_data=None, + online_prepare=False, ): """ A function to generate an ExecuTorch binary for Qualcomm platforms. @@ -311,7 +313,9 @@ def build_executorch_binary( shared_buffer (bool, optional): Applies zero-copy mechanism to optimize runtime memory allocation. metadata (dict, optional): An optional dictionary that maps each method name to a constant value in eager mode. dump_intermediate_outputs (bool, optional): Enables dumping model intermediate outputs. - custom_pass_config (frozenset, optional): Set of custom passes for model processing. + passes_job (OrderedDict, optional): Custom passes job in capture_program, users can enable/disable specific passes or modify their attributes. + qat_training_data (List[torch.Tensor], optional): A dataset for quantization aware training(QAT). Typically is a pair of tensors, such as [features, ground truth]. + online_prepare (bool, optional): Compose QNN graph on device if set to True. Returns: None: The function writes the output to a specified .pte file. @@ -322,6 +326,7 @@ def build_executorch_binary( compile_spec = generate_qnn_executorch_compiler_spec( soc_model=getattr(QcomChipset, soc_model), backend_options=backend_options, + online_prepare=online_prepare, shared_buffer=shared_buffer, dump_intermediate_outputs=dump_intermediate_outputs, ) @@ -500,6 +505,13 @@ def setup_common_args_and_variables(): type=str, ) + parser.add_argument( + "--online_prepare", + help="If specified, compose QNN graph on device.", + action="store_true", + default=False, + ) + parser.add_argument( "--ip", help="IPC address for delivering execution result",