8
8
#pragma once
9
9
#include < executorch/backends/qualcomm/aot/ir/qcir_utils.h>
10
10
#include < executorch/backends/qualcomm/aot/python/PyQnnWrapperAdaptor.h>
11
+ #include < executorch/backends/qualcomm/qc_binary_info_generated.h>
12
+ #include < executorch/backends/qualcomm/qc_compiler_spec_generated.h>
11
13
#include < executorch/backends/qualcomm/runtime/Logging.h>
12
14
#include < executorch/backends/qualcomm/runtime/QnnExecuTorch.h>
13
15
#include < executorch/backends/qualcomm/runtime/QnnManager.h>
14
- #include < executorch/backends/qualcomm/schema_generated.h>
15
16
#include < pybind11/numpy.h>
16
17
#include < pybind11/pybind11.h>
17
18
#include < pybind11/stl.h>
@@ -35,32 +36,127 @@ class PyQnnManager {
35
36
qnn_manager_ = std::make_shared<QnnManager>(
36
37
qnn_executorch_options, qnn_executorch_context_binary_);
37
38
}
39
+
38
40
// used for loading context binary directly
39
41
explicit PyQnnManager (const py::bytes& buffer, const py::bytes& ctx_bin)
40
42
: qnn_executorch_option_ptr_(buffer) {
41
43
auto qnn_executorch_options = GetQnnExecuTorchOptions (
42
44
qnn_executorch_option_ptr_.cast <std::string_view>().data ());
43
45
44
46
py::buffer_info info (py::buffer (ctx_bin).request ());
45
- qnn_executorch_context_binary_.buffer = static_cast < void *>( info.ptr ) ;
47
+ qnn_executorch_context_binary_.buffer = info.ptr ;
46
48
qnn_executorch_context_binary_.nbytes = info.size * info.itemsize ;
47
49
qnn_manager_ = std::make_shared<QnnManager>(
48
50
qnn_executorch_options, qnn_executorch_context_binary_);
49
51
}
50
52
53
+ // used for loading multiple graphs in qcir
54
+ explicit PyQnnManager (const py::bytes& buffer, const py::list& qcirs)
55
+ : qnn_executorch_option_ptr_(buffer) {
56
+ auto qnn_executorch_options = GetQnnExecuTorchOptions (
57
+ qnn_executorch_option_ptr_.cast <std::string_view>().data ());
58
+
59
+ // merge multiple qcirs into one context with multiple graphs
60
+ std::vector<flatbuffers::Offset<qcir::Graph>> graphs;
61
+ for (size_t i = 0 ; i < qcirs.size (); ++i) {
62
+ py::buffer_info info (py::buffer (qcirs[i].cast <py::bytes>()).request ());
63
+ flatbuffers::Verifier verifier_binary_info (
64
+ static_cast <const uint8_t * const >(info.ptr ),
65
+ info.size * info.itemsize );
66
+ if (!qnn_delegate::VerifyBinaryInfoBuffer (verifier_binary_info)) {
67
+ QNN_EXECUTORCH_LOG_ERROR (" Fail to verify binary info" );
68
+ return ;
69
+ }
70
+ auto binary_info = qnn_delegate::GetBinaryInfo (info.ptr );
71
+
72
+ flatbuffers::Verifier verifier_qcir (
73
+ binary_info->data ()->data (), binary_info->data ()->size ());
74
+ if (!qcir::VerifyContextBuffer (verifier_qcir)) {
75
+ QNN_EXECUTORCH_LOG_ERROR (" Fail to verify qcir format" );
76
+ return ;
77
+ }
78
+ auto context = qcir::GetContext (binary_info->data ()->data ());
79
+ for (const auto & graph : *context->graphs ()) {
80
+ std::vector<flatbuffers::Offset<qcir::Tensor>> tensors;
81
+ for (const auto tensor : *graph->tensors ()) {
82
+ // here we need to take a detour to merge multiple qcir flatbuffers
83
+ // outer ToTensor
84
+ // return: flatbuffers::Offset<Tensor>
85
+ // consume: QnnTensor, flatbuffers::FlatBufferBuilder*
86
+ // inner ToTensor
87
+ // return: QnnTensor
88
+ // consume: flatbuffers::Vector<::flatbuffers::Offset<qcir::Tensor>>
89
+ tensors.emplace_back (ToTensor (ToTensor (tensor), &builder_));
90
+ }
91
+ std::vector<flatbuffers::Offset<qcir::Operator>> nodes;
92
+ for (const auto & node : *graph->nodes ()) {
93
+ int32_t * inputs_ptr = const_cast <int32_t *>(node->inputs ()->data ());
94
+ int32_t * outputs_ptr = const_cast <int32_t *>(node->outputs ()->data ());
95
+ int32_t * params_ptr = const_cast <int32_t *>(node->params ()->data ());
96
+ std::vector<int32_t > inputs (
97
+ inputs_ptr, inputs_ptr + node->inputs ()->size ());
98
+ std::vector<int32_t > outputs (
99
+ outputs_ptr, outputs_ptr + node->outputs ()->size ());
100
+ std::vector<int32_t > params (
101
+ params_ptr, params_ptr + node->params ()->size ());
102
+ nodes.emplace_back (qcir::CreateOperatorDirect (
103
+ builder_,
104
+ node->name ()->str ().c_str (),
105
+ node->package_name ()->str ().c_str (),
106
+ node->type_name ()->str ().c_str (),
107
+ &inputs,
108
+ &outputs,
109
+ ¶ms));
110
+ }
111
+ graphs.emplace_back (qcir::CreateGraphDirect (
112
+ builder_, graph->name ()->str ().c_str (), &nodes, &tensors));
113
+ }
114
+ }
115
+
116
+ auto context = qcir::CreateContextDirect (builder_, &graphs);
117
+ builder_.Finish (context);
118
+ QnnExecuTorchContextBinary qcir_bin (
119
+ {builder_.GetBufferPointer (), builder_.GetSize ()});
120
+
121
+ qnn_executorch_context_binary_ = MakeBinaryInfo (qcir_bin);
122
+ qnn_manager_ = std::make_shared<QnnManager>(
123
+ qnn_executorch_options, qnn_executorch_context_binary_);
124
+ }
125
+
51
126
executorch::runtime::Error Init () {
52
127
return qnn_manager_->Init ();
53
128
}
129
+
54
130
bool IsNodeSupportedByBackend (
55
131
std::vector<std::shared_ptr<OpWrapper>>& op_wrappers) {
56
132
return qnn_manager_->IsNodeSupportedByBackend (op_wrappers);
57
133
}
134
+
135
+ // this method is specific for compiling multi-graphs
136
+ py::array_t <char > Compile () {
137
+ if (qnn_manager_->CompileQcir () != Error::Ok) {
138
+ QNN_EXECUTORCH_LOG_ERROR (" Fail to compile qcir" );
139
+ return py::array_t <char >(0 );
140
+ }
141
+
142
+ // generate context binary if compilation succeded
143
+ QnnExecuTorchContextBinary binary_info;
144
+ qnn_manager_->GetContextBinary (binary_info);
145
+ // allocate py::array (to pass the result of the C++ function to Python)
146
+ auto result = py::array_t <char >(binary_info.nbytes );
147
+ auto result_buffer = result.request ();
148
+ char * result_ptr = (char *)result_buffer.ptr ;
149
+ std::memcpy (result_ptr, binary_info.buffer , binary_info.nbytes );
150
+ return result;
151
+ }
152
+
58
153
py::array_t <char > Compile (
154
+ const std::string& graph_name,
59
155
std::vector<std::shared_ptr<OpWrapper>>& op_wrappers) {
60
- QnnExecuTorchContextBinary context_binary;
61
- flatbuffers::FlatBufferBuilder builder;
156
+ QnnExecuTorchContextBinary binary_info;
62
157
63
- if (qnn_manager_->IsOnlinePrepare ()) {
158
+ if (qnn_manager_->IsOnlinePrepare () || qnn_manager_->IsMultipleGraphs ()) {
159
+ builder_.Reset ();
64
160
std::vector<flatbuffers::Offset<qcir::Tensor>> tensors;
65
161
std::unordered_map<void *, int > tensor_map;
66
162
@@ -74,7 +170,7 @@ class PyQnnManager {
74
170
tensor_map[wrapper.get ()] = i;
75
171
index .push_back (i);
76
172
tensors.emplace_back (
77
- ToTensor (wrapper->CloneTensorStruct (), &builder ));
173
+ ToTensor (wrapper->CloneTensorStruct (), &builder_ ));
78
174
}
79
175
};
80
176
@@ -112,38 +208,48 @@ class PyQnnManager {
112
208
QNN_VER_PTR (t)->clientBuf .dataSize =
113
209
GetDataTypeSize (QNN_VER_PTR (t)->dataType );
114
210
params.push_back (tensors.size ());
115
- tensors.emplace_back (ToTensor (t, &builder ));
211
+ tensors.emplace_back (ToTensor (t, &builder_ ));
116
212
}
117
213
}
118
214
119
215
Qnn_OpConfig_t op_config = op_wrapper->GetOpConfig ();
120
216
operators.emplace_back (qcir::CreateOperatorDirect (
121
- builder ,
217
+ builder_ ,
122
218
QNN_VER_PTR (op_config)->name ,
123
219
QNN_VER_PTR (op_config)->packageName ,
124
220
QNN_VER_PTR (op_config)->typeName ,
125
221
&inputs,
126
222
&outputs,
127
223
¶ms));
128
224
}
129
- auto graph = qcir::CreateGraphDirect (builder, &operators, &tensors);
130
- builder.Finish (graph);
131
- context_binary.buffer = builder.GetBufferPointer ();
132
- context_binary.nbytes = builder.GetSize ();
133
- } else if (
134
- qnn_manager_->Compile (op_wrappers, context_binary) !=
135
- executorch::runtime::Error::Ok) {
136
- return py::array_t <char >(0 );
225
+ auto graph = qcir::CreateGraphDirect (
226
+ builder_, graph_name.c_str (), &operators, &tensors);
227
+ std::vector<flatbuffers::Offset<qcir::Graph>> graphs ({graph});
228
+ auto context = qcir::CreateContextDirect (builder_, &graphs);
229
+ builder_.Finish (context);
230
+ QnnExecuTorchContextBinary qcir_binary (
231
+ {builder_.GetBufferPointer (), builder_.GetSize ()});
232
+ binary_info = MakeBinaryInfo (qcir_binary);
233
+ } else {
234
+ if (qnn_manager_->Compile (graph_name, op_wrappers) !=
235
+ executorch::runtime::Error::Ok) {
236
+ QNN_EXECUTORCH_LOG_ERROR (" Fail to compile QNN graph" );
237
+ return py::array_t <char >(0 );
238
+ }
239
+ if (qnn_manager_->GetContextBinary (binary_info) !=
240
+ executorch::runtime::Error::Ok) {
241
+ return py::array_t <char >(0 );
242
+ }
137
243
}
138
244
139
- // allocate py::array (to pass the result of the C++ function to
140
- // Python)
141
- auto result = py::array_t <char >(context_binary.nbytes );
245
+ // allocate py::array (to pass the result of the C++ function to Python)
246
+ auto result = py::array_t <char >(binary_info.nbytes );
142
247
auto result_buffer = result.request ();
143
248
char * result_ptr = (char *)result_buffer.ptr ;
144
- std::memcpy (result_ptr, context_binary .buffer , context_binary .nbytes );
249
+ std::memcpy (result_ptr, binary_info .buffer , binary_info .nbytes );
145
250
return result;
146
251
}
252
+
147
253
void Destroy () {
148
254
return qnn_manager_->Destroy ();
149
255
}
@@ -156,38 +262,76 @@ class PyQnnManager {
156
262
return qnn_manager_->IsTensorDump ();
157
263
}
158
264
159
- executorch::runtime::Error AllocateTensor () {
160
- return qnn_manager_->AllocateTensor ();
265
+ executorch::runtime::Error AllocateTensor (const std::string& graph_name ) {
266
+ return qnn_manager_->AllocateTensor (graph_name );
161
267
}
162
268
163
- py::list GetGraphInputs () {
269
+ py::list GetGraphInputs (const std::string& graph_name ) {
164
270
py::list ret;
165
271
for (const std::shared_ptr<TensorWrapper>& input :
166
- qnn_manager_->GetGraphInputs ()) {
272
+ qnn_manager_->GetGraphInputs (graph_name )) {
167
273
ret.append (PyQnnTensorWrapper (input));
168
274
}
169
275
return ret;
170
276
}
171
277
172
- py::list GetGraphOutputs () {
278
+ py::list GetGraphOutputs (const std::string& graph_name ) {
173
279
py::list ret;
174
280
for (const std::shared_ptr<TensorWrapper>& output :
175
- qnn_manager_->GetGraphOutputs ()) {
281
+ qnn_manager_->GetGraphOutputs (graph_name )) {
176
282
ret.append (PyQnnTensorWrapper (output));
177
283
}
178
284
return ret;
179
285
}
180
286
287
+ py::list GetGraphNames () {
288
+ py::list ret;
289
+ for (const std::string& graph_name : qnn_manager_->GetGraphNames ()) {
290
+ ret.append (graph_name);
291
+ }
292
+ return ret;
293
+ }
294
+
181
295
uint64_t GetSpillFillBufferSize () {
182
296
return qnn_manager_->GetSpillFillBufferSize ();
183
297
}
184
298
299
+ py::array_t <char > MakeBinaryInfo (const py::bytes& ctx_bin) {
300
+ py::buffer_info info (py::buffer (ctx_bin).request ());
301
+ QnnExecuTorchContextBinary binary (
302
+ {info.ptr , static_cast <uint64_t >(info.size * info.itemsize )});
303
+ auto binary_info = MakeBinaryInfo (binary);
304
+ auto result = py::array_t <char >(binary_info.nbytes );
305
+ auto result_buffer = result.request ();
306
+ std::memcpy (result_buffer.ptr , binary_info.buffer , binary_info.nbytes );
307
+ return result;
308
+ }
309
+
185
310
private:
311
+ QnnExecuTorchContextBinary MakeBinaryInfo (
312
+ const QnnExecuTorchContextBinary& ctx_bin) {
313
+ auto signature = []() {
314
+ return std::to_string (
315
+ std::chrono::high_resolution_clock::now ().time_since_epoch ().count ());
316
+ };
317
+ const uint8_t * base = static_cast <uint8_t *>(ctx_bin.buffer );
318
+ std::vector<uint8_t > data (base, base + ctx_bin.nbytes );
319
+ // add signature to binary for cache reuse in runtime
320
+ builder_.Reset ();
321
+ auto binary_info = qnn_delegate::CreateBinaryInfoDirect (
322
+ builder_, signature ().c_str (), &data);
323
+ builder_.Finish (binary_info);
324
+
325
+ return QnnExecuTorchContextBinary (
326
+ {builder_.GetBufferPointer (), builder_.GetSize ()});
327
+ }
328
+
186
329
// Store the bytes object instead of a raw pointer so that this module will
187
330
// keep the bytes alive.
188
331
const py::bytes qnn_executorch_option_ptr_;
189
332
QnnExecuTorchContextBinary qnn_executorch_context_binary_;
190
333
std::shared_ptr<QnnManager> qnn_manager_;
334
+ flatbuffers::FlatBufferBuilder builder_;
191
335
};
192
336
} // namespace qnn
193
337
} // namespace backends
0 commit comments