Skip to content

Commit 4086509

Browse files
authored
Dev weight sharing
Differential Revision: D66012622 Pull Request resolved: #6657
1 parent 76a0e0f commit 4086509

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

65 files changed

+1317
-595
lines changed

backends/qualcomm/CMakeLists.txt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,10 @@ include_directories(
7474
${EXECUTORCH_SOURCE_DIR}/third-party/flatbuffers/include
7575
)
7676

77-
set(_qnn_schema__srcs backends/qualcomm/serialization/schema.fbs)
77+
set(_qnn_schema__srcs
78+
backends/qualcomm/serialization/qc_compiler_spec.fbs
79+
backends/qualcomm/serialization/qc_binary_info.fbs
80+
)
7881
set(_qnn_schema__include_dir "${CMAKE_BINARY_DIR}/schema/include")
7982
# Paths to headers generated from the .fbs files.
8083
set(_qnn_schema__outputs)

backends/qualcomm/_passes/remove_redundancy.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ class RemoveRedundancy(ExportPass):
2020
exir_ops.edge.aten.clone.default,
2121
torch.ops.aten.alias.default,
2222
exir_ops.edge.aten.alias.default,
23+
exir_ops.edge.aten.lift_fresh_copy.default,
2324
}
2425

2526
def __init__(self):

backends/qualcomm/aot/ir/qcir.fbs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,13 @@ table Operator {
9494
}
9595

9696
table Graph {
97+
name: string;
9798
nodes: [Operator];
9899
tensors: [Tensor];
99100
}
100101

101-
root_type Graph;
102+
table Context {
103+
graphs: [Graph];
104+
}
105+
106+
root_type Context;

backends/qualcomm/aot/ir/qcir_utils.cpp

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -161,9 +161,7 @@ flatbuffers::Offset<qcir::QuantizeParam> ToQuantizeParam(
161161
}
162162
} break;
163163
default:
164-
QNN_EXECUTORCH_LOG_WARN(
165-
"QNN_QUANTIZATION_ENCODING_UNDEFINED detected: %s",
166-
QNN_VER_PTR(tensor)->name);
164+
// encodings are not required if lowering with floating point precision
167165
break;
168166
}
169167
return CreateQuantizeParamDirect(
@@ -229,9 +227,7 @@ Qnn_QuantizeParams_t ToQuantizeParam(const tensor_type& tensor) {
229227
const_cast<int32_t*>(param->offsets()->data());
230228
} break;
231229
default:
232-
QNN_EXECUTORCH_LOG_WARN(
233-
"qcir::QuantizeType::UNDEFINED detected: %s",
234-
tensor->name()->c_str());
230+
// encodings are not required if lowering with floating point precision
235231
break;
236232
}
237233
return p;

backends/qualcomm/aot/python/PyQnnManagerAdaptor.cpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,16 +30,26 @@ PYBIND11_MODULE(PyQnnManagerAdaptor, m) {
3030
py::class_<PyQnnManager, std::shared_ptr<PyQnnManager>>(m, "QnnManager")
3131
.def(py::init<const py::bytes&>())
3232
.def(py::init<const py::bytes&, const py::bytes&>())
33+
.def(py::init<const py::bytes&, const py::list&>())
3334
.def("Init", &PyQnnManager::Init)
3435
.def("IsNodeSupportedByBackend", &PyQnnManager::IsNodeSupportedByBackend)
35-
.def("Compile", &PyQnnManager::Compile)
36+
.def("Compile", py::overload_cast<>(&PyQnnManager::Compile))
37+
.def(
38+
"Compile",
39+
py::overload_cast<
40+
const std::string&,
41+
std::vector<std::shared_ptr<OpWrapper>>&>(&PyQnnManager::Compile))
3642
.def("Destroy", &PyQnnManager::Destroy)
3743
.def("IsAvailable", &PyQnnManager::IsAvailable)
3844
.def("IsTensorDump", &PyQnnManager::IsTensorDump)
3945
.def("AllocateTensor", &PyQnnManager::AllocateTensor)
4046
.def("GetGraphInputs", &PyQnnManager::GetGraphInputs)
4147
.def("GetGraphOutputs", &PyQnnManager::GetGraphOutputs)
42-
.def("GetSpillFillBufferSize", &PyQnnManager::GetSpillFillBufferSize);
48+
.def("GetGraphNames", &PyQnnManager::GetGraphNames)
49+
.def("GetSpillFillBufferSize", &PyQnnManager::GetSpillFillBufferSize)
50+
.def(
51+
"MakeBinaryInfo",
52+
py::overload_cast<const py::bytes&>(&PyQnnManager::MakeBinaryInfo));
4353
}
4454
} // namespace qnn
4555
} // namespace backends

backends/qualcomm/aot/python/PyQnnManagerAdaptor.h

Lines changed: 170 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,11 @@
88
#pragma once
99
#include <executorch/backends/qualcomm/aot/ir/qcir_utils.h>
1010
#include <executorch/backends/qualcomm/aot/python/PyQnnWrapperAdaptor.h>
11+
#include <executorch/backends/qualcomm/qc_binary_info_generated.h>
12+
#include <executorch/backends/qualcomm/qc_compiler_spec_generated.h>
1113
#include <executorch/backends/qualcomm/runtime/Logging.h>
1214
#include <executorch/backends/qualcomm/runtime/QnnExecuTorch.h>
1315
#include <executorch/backends/qualcomm/runtime/QnnManager.h>
14-
#include <executorch/backends/qualcomm/schema_generated.h>
1516
#include <pybind11/numpy.h>
1617
#include <pybind11/pybind11.h>
1718
#include <pybind11/stl.h>
@@ -35,32 +36,127 @@ class PyQnnManager {
3536
qnn_manager_ = std::make_shared<QnnManager>(
3637
qnn_executorch_options, qnn_executorch_context_binary_);
3738
}
39+
3840
// used for loading context binary directly
3941
explicit PyQnnManager(const py::bytes& buffer, const py::bytes& ctx_bin)
4042
: qnn_executorch_option_ptr_(buffer) {
4143
auto qnn_executorch_options = GetQnnExecuTorchOptions(
4244
qnn_executorch_option_ptr_.cast<std::string_view>().data());
4345

4446
py::buffer_info info(py::buffer(ctx_bin).request());
45-
qnn_executorch_context_binary_.buffer = static_cast<void*>(info.ptr);
47+
qnn_executorch_context_binary_.buffer = info.ptr;
4648
qnn_executorch_context_binary_.nbytes = info.size * info.itemsize;
4749
qnn_manager_ = std::make_shared<QnnManager>(
4850
qnn_executorch_options, qnn_executorch_context_binary_);
4951
}
5052

53+
// used for loading multiple graphs in qcir
54+
explicit PyQnnManager(const py::bytes& buffer, const py::list& qcirs)
55+
: qnn_executorch_option_ptr_(buffer) {
56+
auto qnn_executorch_options = GetQnnExecuTorchOptions(
57+
qnn_executorch_option_ptr_.cast<std::string_view>().data());
58+
59+
// merge multiple qcirs into one context with multiple graphs
60+
std::vector<flatbuffers::Offset<qcir::Graph>> graphs;
61+
for (size_t i = 0; i < qcirs.size(); ++i) {
62+
py::buffer_info info(py::buffer(qcirs[i].cast<py::bytes>()).request());
63+
flatbuffers::Verifier verifier_binary_info(
64+
static_cast<const uint8_t* const>(info.ptr),
65+
info.size * info.itemsize);
66+
if (!qnn_delegate::VerifyBinaryInfoBuffer(verifier_binary_info)) {
67+
QNN_EXECUTORCH_LOG_ERROR("Fail to verify binary info");
68+
return;
69+
}
70+
auto binary_info = qnn_delegate::GetBinaryInfo(info.ptr);
71+
72+
flatbuffers::Verifier verifier_qcir(
73+
binary_info->data()->data(), binary_info->data()->size());
74+
if (!qcir::VerifyContextBuffer(verifier_qcir)) {
75+
QNN_EXECUTORCH_LOG_ERROR("Fail to verify qcir format");
76+
return;
77+
}
78+
auto context = qcir::GetContext(binary_info->data()->data());
79+
for (const auto& graph : *context->graphs()) {
80+
std::vector<flatbuffers::Offset<qcir::Tensor>> tensors;
81+
for (const auto tensor : *graph->tensors()) {
82+
// here we need to take a detour to merge multiple qcir flatbuffers
83+
// outer ToTensor
84+
// return: flatbuffers::Offset<Tensor>
85+
// consume: QnnTensor, flatbuffers::FlatBufferBuilder*
86+
// inner ToTensor
87+
// return: QnnTensor
88+
// consume: flatbuffers::Vector<::flatbuffers::Offset<qcir::Tensor>>
89+
tensors.emplace_back(ToTensor(ToTensor(tensor), &builder_));
90+
}
91+
std::vector<flatbuffers::Offset<qcir::Operator>> nodes;
92+
for (const auto& node : *graph->nodes()) {
93+
int32_t* inputs_ptr = const_cast<int32_t*>(node->inputs()->data());
94+
int32_t* outputs_ptr = const_cast<int32_t*>(node->outputs()->data());
95+
int32_t* params_ptr = const_cast<int32_t*>(node->params()->data());
96+
std::vector<int32_t> inputs(
97+
inputs_ptr, inputs_ptr + node->inputs()->size());
98+
std::vector<int32_t> outputs(
99+
outputs_ptr, outputs_ptr + node->outputs()->size());
100+
std::vector<int32_t> params(
101+
params_ptr, params_ptr + node->params()->size());
102+
nodes.emplace_back(qcir::CreateOperatorDirect(
103+
builder_,
104+
node->name()->str().c_str(),
105+
node->package_name()->str().c_str(),
106+
node->type_name()->str().c_str(),
107+
&inputs,
108+
&outputs,
109+
&params));
110+
}
111+
graphs.emplace_back(qcir::CreateGraphDirect(
112+
builder_, graph->name()->str().c_str(), &nodes, &tensors));
113+
}
114+
}
115+
116+
auto context = qcir::CreateContextDirect(builder_, &graphs);
117+
builder_.Finish(context);
118+
QnnExecuTorchContextBinary qcir_bin(
119+
{builder_.GetBufferPointer(), builder_.GetSize()});
120+
121+
qnn_executorch_context_binary_ = MakeBinaryInfo(qcir_bin);
122+
qnn_manager_ = std::make_shared<QnnManager>(
123+
qnn_executorch_options, qnn_executorch_context_binary_);
124+
}
125+
51126
executorch::runtime::Error Init() {
52127
return qnn_manager_->Init();
53128
}
129+
54130
bool IsNodeSupportedByBackend(
55131
std::vector<std::shared_ptr<OpWrapper>>& op_wrappers) {
56132
return qnn_manager_->IsNodeSupportedByBackend(op_wrappers);
57133
}
134+
135+
// this method is specific for compiling multi-graphs
136+
py::array_t<char> Compile() {
137+
if (qnn_manager_->CompileQcir() != Error::Ok) {
138+
QNN_EXECUTORCH_LOG_ERROR("Fail to compile qcir");
139+
return py::array_t<char>(0);
140+
}
141+
142+
// generate context binary if compilation succeded
143+
QnnExecuTorchContextBinary binary_info;
144+
qnn_manager_->GetContextBinary(binary_info);
145+
// allocate py::array (to pass the result of the C++ function to Python)
146+
auto result = py::array_t<char>(binary_info.nbytes);
147+
auto result_buffer = result.request();
148+
char* result_ptr = (char*)result_buffer.ptr;
149+
std::memcpy(result_ptr, binary_info.buffer, binary_info.nbytes);
150+
return result;
151+
}
152+
58153
py::array_t<char> Compile(
154+
const std::string& graph_name,
59155
std::vector<std::shared_ptr<OpWrapper>>& op_wrappers) {
60-
QnnExecuTorchContextBinary context_binary;
61-
flatbuffers::FlatBufferBuilder builder;
156+
QnnExecuTorchContextBinary binary_info;
62157

63-
if (qnn_manager_->IsOnlinePrepare()) {
158+
if (qnn_manager_->IsOnlinePrepare() || qnn_manager_->IsMultipleGraphs()) {
159+
builder_.Reset();
64160
std::vector<flatbuffers::Offset<qcir::Tensor>> tensors;
65161
std::unordered_map<void*, int> tensor_map;
66162

@@ -74,7 +170,7 @@ class PyQnnManager {
74170
tensor_map[wrapper.get()] = i;
75171
index.push_back(i);
76172
tensors.emplace_back(
77-
ToTensor(wrapper->CloneTensorStruct(), &builder));
173+
ToTensor(wrapper->CloneTensorStruct(), &builder_));
78174
}
79175
};
80176

@@ -112,38 +208,48 @@ class PyQnnManager {
112208
QNN_VER_PTR(t)->clientBuf.dataSize =
113209
GetDataTypeSize(QNN_VER_PTR(t)->dataType);
114210
params.push_back(tensors.size());
115-
tensors.emplace_back(ToTensor(t, &builder));
211+
tensors.emplace_back(ToTensor(t, &builder_));
116212
}
117213
}
118214

119215
Qnn_OpConfig_t op_config = op_wrapper->GetOpConfig();
120216
operators.emplace_back(qcir::CreateOperatorDirect(
121-
builder,
217+
builder_,
122218
QNN_VER_PTR(op_config)->name,
123219
QNN_VER_PTR(op_config)->packageName,
124220
QNN_VER_PTR(op_config)->typeName,
125221
&inputs,
126222
&outputs,
127223
&params));
128224
}
129-
auto graph = qcir::CreateGraphDirect(builder, &operators, &tensors);
130-
builder.Finish(graph);
131-
context_binary.buffer = builder.GetBufferPointer();
132-
context_binary.nbytes = builder.GetSize();
133-
} else if (
134-
qnn_manager_->Compile(op_wrappers, context_binary) !=
135-
executorch::runtime::Error::Ok) {
136-
return py::array_t<char>(0);
225+
auto graph = qcir::CreateGraphDirect(
226+
builder_, graph_name.c_str(), &operators, &tensors);
227+
std::vector<flatbuffers::Offset<qcir::Graph>> graphs({graph});
228+
auto context = qcir::CreateContextDirect(builder_, &graphs);
229+
builder_.Finish(context);
230+
QnnExecuTorchContextBinary qcir_binary(
231+
{builder_.GetBufferPointer(), builder_.GetSize()});
232+
binary_info = MakeBinaryInfo(qcir_binary);
233+
} else {
234+
if (qnn_manager_->Compile(graph_name, op_wrappers) !=
235+
executorch::runtime::Error::Ok) {
236+
QNN_EXECUTORCH_LOG_ERROR("Fail to compile QNN graph");
237+
return py::array_t<char>(0);
238+
}
239+
if (qnn_manager_->GetContextBinary(binary_info) !=
240+
executorch::runtime::Error::Ok) {
241+
return py::array_t<char>(0);
242+
}
137243
}
138244

139-
// allocate py::array (to pass the result of the C++ function to
140-
// Python)
141-
auto result = py::array_t<char>(context_binary.nbytes);
245+
// allocate py::array (to pass the result of the C++ function to Python)
246+
auto result = py::array_t<char>(binary_info.nbytes);
142247
auto result_buffer = result.request();
143248
char* result_ptr = (char*)result_buffer.ptr;
144-
std::memcpy(result_ptr, context_binary.buffer, context_binary.nbytes);
249+
std::memcpy(result_ptr, binary_info.buffer, binary_info.nbytes);
145250
return result;
146251
}
252+
147253
void Destroy() {
148254
return qnn_manager_->Destroy();
149255
}
@@ -156,38 +262,76 @@ class PyQnnManager {
156262
return qnn_manager_->IsTensorDump();
157263
}
158264

159-
executorch::runtime::Error AllocateTensor() {
160-
return qnn_manager_->AllocateTensor();
265+
executorch::runtime::Error AllocateTensor(const std::string& graph_name) {
266+
return qnn_manager_->AllocateTensor(graph_name);
161267
}
162268

163-
py::list GetGraphInputs() {
269+
py::list GetGraphInputs(const std::string& graph_name) {
164270
py::list ret;
165271
for (const std::shared_ptr<TensorWrapper>& input :
166-
qnn_manager_->GetGraphInputs()) {
272+
qnn_manager_->GetGraphInputs(graph_name)) {
167273
ret.append(PyQnnTensorWrapper(input));
168274
}
169275
return ret;
170276
}
171277

172-
py::list GetGraphOutputs() {
278+
py::list GetGraphOutputs(const std::string& graph_name) {
173279
py::list ret;
174280
for (const std::shared_ptr<TensorWrapper>& output :
175-
qnn_manager_->GetGraphOutputs()) {
281+
qnn_manager_->GetGraphOutputs(graph_name)) {
176282
ret.append(PyQnnTensorWrapper(output));
177283
}
178284
return ret;
179285
}
180286

287+
py::list GetGraphNames() {
288+
py::list ret;
289+
for (const std::string& graph_name : qnn_manager_->GetGraphNames()) {
290+
ret.append(graph_name);
291+
}
292+
return ret;
293+
}
294+
181295
uint64_t GetSpillFillBufferSize() {
182296
return qnn_manager_->GetSpillFillBufferSize();
183297
}
184298

299+
py::array_t<char> MakeBinaryInfo(const py::bytes& ctx_bin) {
300+
py::buffer_info info(py::buffer(ctx_bin).request());
301+
QnnExecuTorchContextBinary binary(
302+
{info.ptr, static_cast<uint64_t>(info.size * info.itemsize)});
303+
auto binary_info = MakeBinaryInfo(binary);
304+
auto result = py::array_t<char>(binary_info.nbytes);
305+
auto result_buffer = result.request();
306+
std::memcpy(result_buffer.ptr, binary_info.buffer, binary_info.nbytes);
307+
return result;
308+
}
309+
185310
private:
311+
QnnExecuTorchContextBinary MakeBinaryInfo(
312+
const QnnExecuTorchContextBinary& ctx_bin) {
313+
auto signature = []() {
314+
return std::to_string(
315+
std::chrono::high_resolution_clock::now().time_since_epoch().count());
316+
};
317+
const uint8_t* base = static_cast<uint8_t*>(ctx_bin.buffer);
318+
std::vector<uint8_t> data(base, base + ctx_bin.nbytes);
319+
// add signature to binary for cache reuse in runtime
320+
builder_.Reset();
321+
auto binary_info = qnn_delegate::CreateBinaryInfoDirect(
322+
builder_, signature().c_str(), &data);
323+
builder_.Finish(binary_info);
324+
325+
return QnnExecuTorchContextBinary(
326+
{builder_.GetBufferPointer(), builder_.GetSize()});
327+
}
328+
186329
// Store the bytes object instead of a raw pointer so that this module will
187330
// keep the bytes alive.
188331
const py::bytes qnn_executorch_option_ptr_;
189332
QnnExecuTorchContextBinary qnn_executorch_context_binary_;
190333
std::shared_ptr<QnnManager> qnn_manager_;
334+
flatbuffers::FlatBufferBuilder builder_;
191335
};
192336
} // namespace qnn
193337
} // namespace backends

backends/qualcomm/aot/python/targets.bzl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ def define_common_targets():
3131
"//executorch/backends/qualcomm/aot/wrappers:wrappers",
3232
"//executorch/backends/qualcomm/runtime:logging",
3333
"//executorch/backends/qualcomm:schema",
34+
"//executorch/backends/qualcomm:qc_binary_info_schema",
3435
"//executorch/backends/qualcomm/aot/ir:qcir_utils",
3536
"//executorch/backends/qualcomm/runtime:runtime",
3637
"fbsource//third-party/qualcomm/qnn/qnn-{0}:api".format(get_qnn_library_verision()),

0 commit comments

Comments
 (0)