@@ -95,6 +95,34 @@ void write_data_to_file(const std::string& path, void* buf, size_t size) {
95
95
}
96
96
}
97
97
98
+ void setup_output_storage (
99
+ executor::Method& method,
100
+ const std::vector<Span<uint8_t >>& output_storages) {
101
+ if (output_storages.size () != method.outputs_size ()) {
102
+ THROW_IF_ERROR (
103
+ Error (),
104
+ " number of output storages %zu does not match number of outputs %zu" ,
105
+ output_storages.size (),
106
+ method.outputs_size ());
107
+ }
108
+ for (size_t i = 0 ; i < output_storages.size (); ++i) {
109
+ if (output_storages[i].size () == 0 ) {
110
+ // Skip empty output storages, this would happen for non-tensor outputs.
111
+ continue ;
112
+ }
113
+ Error output_status = method.set_output_data_ptr (
114
+ output_storages[i].data (), output_storages[i].size (), i);
115
+ // InvalidState can be the status if outputs are already memory planned.
116
+ // That's fine and we don't need to alert the user to that error.
117
+ if (output_status != Error::Ok && output_status != Error::InvalidState) {
118
+ ET_LOG (
119
+ Error,
120
+ " Cannot set_output_data_ptr(): 0x%" PRIx32,
121
+ static_cast <uint32_t >(output_status));
122
+ }
123
+ }
124
+ }
125
+
98
126
using util::BufferDataLoader;
99
127
using util::MallocMemoryAllocator;
100
128
using util::MmapDataLoader;
@@ -209,33 +237,19 @@ class Module final {
209
237
c10::autograd_dispatch_keyset);
210
238
#endif
211
239
if (output_storages) {
212
- if (output_storages->size () != method->outputs_size ()) {
213
- THROW_IF_ERROR (
214
- Error (),
215
- " number of output storages %zu does not match number of outputs %zu" ,
216
- output_storages->size (),
217
- method->outputs_size ());
218
- }
219
- for (size_t i = 0 ; i < output_storages->size (); ++i) {
220
- Error output_status = method->set_output_data_ptr (
221
- (*output_storages)[i].data (), (*output_storages)[i].size (), i);
222
- // InvalidState can be the status if outputs are already memory planned.
223
- // That's fine and we don't need to alert the user to that error.
224
- if (output_status != Error::Ok &&
225
- output_status != Error::InvalidState) {
226
- ET_LOG (
227
- Error,
228
- " Cannot set_output_data_ptr(): 0x%" PRIx32,
229
- static_cast <uint32_t >(output_status));
230
- }
231
- }
240
+ setup_output_storage (*method, *output_storages);
232
241
}
233
242
Error execute_status = method->execute ();
234
243
THROW_IF_ERROR (
235
244
execute_status,
236
245
" method->execute() failed with error 0x%" PRIx32,
237
246
static_cast <uint32_t >(execute_status));
238
247
// process outputs
248
+ return get_outputs (method_name);
249
+ }
250
+
251
+ std::vector<EValue> get_outputs (const std::string& method_name) {
252
+ auto & method = methods_[method_name];
239
253
std::vector<EValue> result (method->outputs_size ());
240
254
241
255
Error get_outputs_status =
@@ -556,62 +570,17 @@ struct PyModule final {
556
570
557
571
const auto & method = module_->get_method (method_name);
558
572
const auto num_outputs = method.outputs_size ();
559
- // These output storages will not be used if the ExecuTorch program already
560
- // pre-allocated output space. That is represented by an error from
561
- // set_output_data_ptr.
562
- std::vector<std::unique_ptr<uint8_t []>> output_storages (num_outputs);
573
+ output_storages_ = make_output_storages (method);
563
574
std::vector<Span<uint8_t >> output_storage_spans (num_outputs);
564
- for (size_t i = 0 ; i < num_outputs; ++i) {
565
- const auto & output_tensor_meta =
566
- method.method_meta ().output_tensor_meta (i);
567
- if (!output_tensor_meta.ok ()) {
568
- // If the output isn't a tensor it won't have a tensor meta.
569
- ET_LOG (
570
- Info,
571
- " Tensor meta doesn't exist for output %zu, error is 0x%" PRIx32
572
- " , skipping allocating storage" ,
573
- i,
574
- static_cast <uint32_t >(output_tensor_meta.error ()));
575
- output_storage_spans[i] = Span<uint8_t >();
576
- continue ;
577
- }
578
- const size_t output_size = output_tensor_meta.get ().nbytes ();
579
- std::unique_ptr<uint8_t []> output (new uint8_t [output_size]);
580
- output_storage_spans[i] = Span<uint8_t >(output.get (), output_size);
581
- output_storages[i] = std::move (output);
575
+ for (int i = 0 ; i < output_storages_.size (); ++i) {
576
+ output_storage_spans[i] =
577
+ Span<uint8_t >(output_storages_[i].data (), output_storages_[i].size ());
582
578
}
583
579
auto outputs =
584
580
module_->run_method (method_name, cpp_inputs, output_storage_spans);
585
581
586
582
// Retrieve outputs
587
- const auto outputs_size = outputs.size ();
588
- py::list list (outputs_size);
589
- for (size_t i = 0 ; i < outputs_size; ++i) {
590
- auto & v = outputs[i];
591
- if (Tag::None == v.tag ) {
592
- list[i] = py::none ();
593
- } else if (Tag::Int == v.tag ) {
594
- list[i] = py::cast (v.toInt ());
595
- } else if (Tag::Double == v.tag ) {
596
- list[i] = py::cast (v.toDouble ());
597
- } else if (Tag::Bool == v.tag ) {
598
- list[i] = py::cast (v.toBool ());
599
- } else if (Tag::String == v.tag ) {
600
- list[i] = py::cast (std::string (v.toString ().data ()));
601
- } else if (Tag::Tensor == v.tag ) {
602
- #ifdef USE_ATEN_LIB
603
- // Clone so the outputs in python do not share a lifetime with the
604
- // module object
605
- list[i] = py::cast (v.toTensor ().clone ());
606
- #else
607
- list[i] = py::cast (
608
- torch::util::alias_attensor_to_etensor (v.toTensor ()).clone ());
609
- #endif
610
- } else {
611
- ET_ASSERT_UNREACHABLE_MSG (" Invalid model output type" );
612
- }
613
- }
614
- return list;
583
+ return get_outputs_as_py_list (outputs);
615
584
}
616
585
617
586
py::list forward (const py::sequence& inputs) {
@@ -670,35 +639,113 @@ struct PyModule final {
670
639
static_cast <uint32_t >(status));
671
640
}
672
641
673
- void verify_result_with_bundled_expected_output (
642
+ py::list verify_result_with_bundled_expected_output (
674
643
PyBundledModule& m,
675
644
const string method_name,
676
645
size_t testset_idx,
677
646
double rtol = 1e-5 ,
678
647
double atol = 1e-8 ) {
679
648
const void * bundled_program_ptr = m.get_bundled_program_ptr ();
680
- Error status = bundled_program::VerifyResultWithBundledExpectedOutput (
681
- module_->get_method (method_name),
682
- bundled_program_ptr,
683
- testset_idx,
684
- rtol,
685
- atol );
649
+ auto & method = module_->get_method (method_name);
650
+ Error status = bundled_program::LoadBundledInput (
651
+ method, bundled_program_ptr, testset_idx);
652
+ THROW_IF_ERROR (
653
+ status,
654
+ " LoadBundledInput failed with status %" PRIu32,
655
+ static_cast <uint32_t >(status));
656
+ py::list outputs = plan_execute (method_name);
657
+ status = bundled_program::VerifyResultWithBundledExpectedOutput (
658
+ method, bundled_program_ptr, testset_idx, rtol, atol );
686
659
THROW_IF_ERROR (
687
660
status,
688
661
" Result verification failed with status %" PRIu32,
689
662
static_cast <uint32_t >(status));
663
+ return outputs;
690
664
}
691
665
692
- void plan_execute (const string method_name) {
693
- auto status = module_->get_method (method_name).execute ();
666
+ py::list plan_execute (const string method_name) {
667
+ auto & method = module_->get_method (method_name);
668
+ // Need to pre-allocate space for outputs just like in run_method.
669
+ const auto num_outputs = method.outputs_size ();
670
+ output_storages_ = make_output_storages (method);
671
+ std::vector<Span<uint8_t >> output_storage_spans (num_outputs);
672
+ for (int i = 0 ; i < output_storages_.size (); ++i) {
673
+ output_storage_spans[i] =
674
+ Span<uint8_t >(output_storages_[i].data (), output_storages_[i].size ());
675
+ }
676
+ setup_output_storage (method, output_storage_spans);
677
+ auto status = method.execute ();
694
678
THROW_IF_ERROR (
695
679
status,
696
680
" executing execution plan for method 'forward' failed with error: 0x%" PRIx32,
697
681
static_cast <uint32_t >(status));
682
+ const auto outputs = module_->get_outputs (method_name);
683
+ return get_outputs_as_py_list (outputs);
684
+ }
685
+
686
+ py::list get_outputs_as_py_list (const std::vector<EValue>& outputs) {
687
+ const auto outputs_size = outputs.size ();
688
+ py::list list (outputs_size);
689
+ for (size_t i = 0 ; i < outputs_size; ++i) {
690
+ auto & v = outputs[i];
691
+ if (Tag::None == v.tag ) {
692
+ list[i] = py::none ();
693
+ } else if (Tag::Int == v.tag ) {
694
+ list[i] = py::cast (v.toInt ());
695
+ } else if (Tag::Double == v.tag ) {
696
+ list[i] = py::cast (v.toDouble ());
697
+ } else if (Tag::Bool == v.tag ) {
698
+ list[i] = py::cast (v.toBool ());
699
+ } else if (Tag::String == v.tag ) {
700
+ list[i] = py::cast (std::string (v.toString ().data ()));
701
+ } else if (Tag::Tensor == v.tag ) {
702
+ #ifdef USE_ATEN_LIB
703
+ // Clone so the outputs in python do not share a lifetime with the
704
+ // module object
705
+ list[i] = py::cast (v.toTensor ().clone ());
706
+ #else
707
+ list[i] = py::cast (
708
+ torch::util::alias_attensor_to_etensor (v.toTensor ()).clone ());
709
+ #endif
710
+ } else {
711
+ ET_ASSERT_UNREACHABLE_MSG (" Invalid model output type" );
712
+ }
713
+ }
714
+ return list;
698
715
}
699
716
700
717
private:
701
718
std::unique_ptr<Module> module_;
719
+ // Need to keep-alive output storages until they can be compared in case of
720
+ // bundled programs.
721
+ std::vector<std::vector<uint8_t >> output_storages_;
722
+
723
+ std::vector<std::vector<uint8_t >> make_output_storages (
724
+ const executor::Method& method) {
725
+ const auto num_outputs = method.outputs_size ();
726
+ // These output storages will not be used if the ExecuTorch program already
727
+ // pre-allocated output space. That is represented by an error from
728
+ // set_output_data_ptr.
729
+ std::vector<std::vector<uint8_t >> output_storages (num_outputs);
730
+ for (size_t i = 0 ; i < num_outputs; ++i) {
731
+ const auto & output_tensor_meta =
732
+ method.method_meta ().output_tensor_meta (i);
733
+ if (!output_tensor_meta.ok ()) {
734
+ // If the output isn't a tensor it won't have a tensor meta.
735
+ ET_LOG (
736
+ Error,
737
+ " Tensor meta doesn't exist for output %zu, error is 0x%" PRIx32
738
+ " , skipping allocating storage" ,
739
+ i,
740
+ static_cast <uint32_t >(output_tensor_meta.error ()));
741
+ output_storages[i] = std::vector<uint8_t >();
742
+ continue ;
743
+ }
744
+ const size_t output_size = output_tensor_meta.get ().nbytes ();
745
+ output_storages[i] = std::vector<uint8_t >(output_size);
746
+ }
747
+ return output_storages;
748
+ }
702
749
};
703
750
704
751
void create_profile_block (const std::string& name) {
0 commit comments