Skip to content

Commit a314db7

Browse files
dulinrileyfacebook-github-bot
authored andcommitted
Fix bundled program and plan_execute in pybindings (#4595)
Summary: Pull Request resolved: #4595 Bundled programs needed to use the `plan_execute` function on ExecutorchModule because the inputs were previously loaded. Unfortunately this meant that the programs didn't work if they did not have pre-planned outputs (i.e. the caller is supposed to provide the memory). Share code for allocating outputs as necessary between `run_method` and `plan_execute`. Also share code for *returning* those outputs, as there doesn't seem to be a reason to execute the method without getting its output. In order for the output storage to live long enough for the bundled program comparison, add the storages as private data members of `PyModule`. As an improvement in user API, have `verify_result_with_bundled_expected_output` do all of these things at once for the user. Differential Revision: D60939315
1 parent 79c15ef commit a314db7

File tree

2 files changed

+130
-80
lines changed

2 files changed

+130
-80
lines changed

extension/pybindings/pybindings.cpp

+126-79
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,34 @@ void write_data_to_file(const std::string& path, void* buf, size_t size) {
9595
}
9696
}
9797

98+
void setup_output_storage(
99+
executor::Method& method,
100+
const std::vector<Span<uint8_t>>& output_storages) {
101+
if (output_storages.size() != method.outputs_size()) {
102+
THROW_IF_ERROR(
103+
Error(),
104+
"number of output storages %zu does not match number of outputs %zu",
105+
output_storages.size(),
106+
method.outputs_size());
107+
}
108+
for (size_t i = 0; i < output_storages.size(); ++i) {
109+
if (output_storages[i].size() == 0) {
110+
// Skip empty output storages, this would happen for non-tensor outputs.
111+
continue;
112+
}
113+
Error output_status = method.set_output_data_ptr(
114+
output_storages[i].data(), output_storages[i].size(), i);
115+
// InvalidState can be the status if outputs are already memory planned.
116+
// That's fine and we don't need to alert the user to that error.
117+
if (output_status != Error::Ok && output_status != Error::InvalidState) {
118+
ET_LOG(
119+
Error,
120+
"Cannot set_output_data_ptr(): 0x%" PRIx32,
121+
static_cast<uint32_t>(output_status));
122+
}
123+
}
124+
}
125+
98126
using util::BufferDataLoader;
99127
using util::MallocMemoryAllocator;
100128
using util::MmapDataLoader;
@@ -209,33 +237,19 @@ class Module final {
209237
c10::autograd_dispatch_keyset);
210238
#endif
211239
if (output_storages) {
212-
if (output_storages->size() != method->outputs_size()) {
213-
THROW_IF_ERROR(
214-
Error(),
215-
"number of output storages %zu does not match number of outputs %zu",
216-
output_storages->size(),
217-
method->outputs_size());
218-
}
219-
for (size_t i = 0; i < output_storages->size(); ++i) {
220-
Error output_status = method->set_output_data_ptr(
221-
(*output_storages)[i].data(), (*output_storages)[i].size(), i);
222-
// InvalidState can be the status if outputs are already memory planned.
223-
// That's fine and we don't need to alert the user to that error.
224-
if (output_status != Error::Ok &&
225-
output_status != Error::InvalidState) {
226-
ET_LOG(
227-
Error,
228-
"Cannot set_output_data_ptr(): 0x%" PRIx32,
229-
static_cast<uint32_t>(output_status));
230-
}
231-
}
240+
setup_output_storage(*method, *output_storages);
232241
}
233242
Error execute_status = method->execute();
234243
THROW_IF_ERROR(
235244
execute_status,
236245
"method->execute() failed with error 0x%" PRIx32,
237246
static_cast<uint32_t>(execute_status));
238247
// process outputs
248+
return get_outputs(method_name);
249+
}
250+
251+
std::vector<EValue> get_outputs(const std::string& method_name) {
252+
auto& method = methods_[method_name];
239253
std::vector<EValue> result(method->outputs_size());
240254

241255
Error get_outputs_status =
@@ -556,62 +570,17 @@ struct PyModule final {
556570

557571
const auto& method = module_->get_method(method_name);
558572
const auto num_outputs = method.outputs_size();
559-
// These output storages will not be used if the ExecuTorch program already
560-
// pre-allocated output space. That is represented by an error from
561-
// set_output_data_ptr.
562-
std::vector<std::unique_ptr<uint8_t[]>> output_storages(num_outputs);
573+
output_storages_ = make_output_storages(method);
563574
std::vector<Span<uint8_t>> output_storage_spans(num_outputs);
564-
for (size_t i = 0; i < num_outputs; ++i) {
565-
const auto& output_tensor_meta =
566-
method.method_meta().output_tensor_meta(i);
567-
if (!output_tensor_meta.ok()) {
568-
// If the output isn't a tensor it won't have a tensor meta.
569-
ET_LOG(
570-
Info,
571-
"Tensor meta doesn't exist for output %zu, error is 0x%" PRIx32
572-
", skipping allocating storage",
573-
i,
574-
static_cast<uint32_t>(output_tensor_meta.error()));
575-
output_storage_spans[i] = Span<uint8_t>();
576-
continue;
577-
}
578-
const size_t output_size = output_tensor_meta.get().nbytes();
579-
std::unique_ptr<uint8_t[]> output(new uint8_t[output_size]);
580-
output_storage_spans[i] = Span<uint8_t>(output.get(), output_size);
581-
output_storages[i] = std::move(output);
575+
for (int i = 0; i < output_storages_.size(); ++i) {
576+
output_storage_spans[i] =
577+
Span<uint8_t>(output_storages_[i].data(), output_storages_[i].size());
582578
}
583579
auto outputs =
584580
module_->run_method(method_name, cpp_inputs, output_storage_spans);
585581

586582
// Retrieve outputs
587-
const auto outputs_size = outputs.size();
588-
py::list list(outputs_size);
589-
for (size_t i = 0; i < outputs_size; ++i) {
590-
auto& v = outputs[i];
591-
if (Tag::None == v.tag) {
592-
list[i] = py::none();
593-
} else if (Tag::Int == v.tag) {
594-
list[i] = py::cast(v.toInt());
595-
} else if (Tag::Double == v.tag) {
596-
list[i] = py::cast(v.toDouble());
597-
} else if (Tag::Bool == v.tag) {
598-
list[i] = py::cast(v.toBool());
599-
} else if (Tag::String == v.tag) {
600-
list[i] = py::cast(std::string(v.toString().data()));
601-
} else if (Tag::Tensor == v.tag) {
602-
#ifdef USE_ATEN_LIB
603-
// Clone so the outputs in python do not share a lifetime with the
604-
// module object
605-
list[i] = py::cast(v.toTensor().clone());
606-
#else
607-
list[i] = py::cast(
608-
torch::util::alias_attensor_to_etensor(v.toTensor()).clone());
609-
#endif
610-
} else {
611-
ET_ASSERT_UNREACHABLE_MSG("Invalid model output type");
612-
}
613-
}
614-
return list;
583+
return get_outputs_as_py_list(outputs);
615584
}
616585

617586
py::list forward(const py::sequence& inputs) {
@@ -670,35 +639,113 @@ struct PyModule final {
670639
static_cast<uint32_t>(status));
671640
}
672641

673-
void verify_result_with_bundled_expected_output(
642+
py::list verify_result_with_bundled_expected_output(
674643
PyBundledModule& m,
675644
const string method_name,
676645
size_t testset_idx,
677646
double rtol = 1e-5,
678647
double atol = 1e-8) {
679648
const void* bundled_program_ptr = m.get_bundled_program_ptr();
680-
Error status = bundled_program::VerifyResultWithBundledExpectedOutput(
681-
module_->get_method(method_name),
682-
bundled_program_ptr,
683-
testset_idx,
684-
rtol,
685-
atol);
649+
auto& method = module_->get_method(method_name);
650+
Error status = bundled_program::LoadBundledInput(
651+
method, bundled_program_ptr, testset_idx);
652+
THROW_IF_ERROR(
653+
status,
654+
"LoadBundledInput failed with status %" PRIu32,
655+
static_cast<uint32_t>(status));
656+
py::list outputs = plan_execute(method_name);
657+
status = bundled_program::VerifyResultWithBundledExpectedOutput(
658+
method, bundled_program_ptr, testset_idx, rtol, atol);
686659
THROW_IF_ERROR(
687660
status,
688661
"Result verification failed with status %" PRIu32,
689662
static_cast<uint32_t>(status));
663+
return outputs;
690664
}
691665

692-
void plan_execute(const string method_name) {
693-
auto status = module_->get_method(method_name).execute();
666+
py::list plan_execute(const string method_name) {
667+
auto& method = module_->get_method(method_name);
668+
// Need to pre-allocate space for outputs just like in run_method.
669+
const auto num_outputs = method.outputs_size();
670+
output_storages_ = make_output_storages(method);
671+
std::vector<Span<uint8_t>> output_storage_spans(num_outputs);
672+
for (int i = 0; i < output_storages_.size(); ++i) {
673+
output_storage_spans[i] =
674+
Span<uint8_t>(output_storages_[i].data(), output_storages_[i].size());
675+
}
676+
setup_output_storage(method, output_storage_spans);
677+
auto status = method.execute();
694678
THROW_IF_ERROR(
695679
status,
696680
"executing execution plan for method 'forward' failed with error: 0x%" PRIx32,
697681
static_cast<uint32_t>(status));
682+
const auto outputs = module_->get_outputs(method_name);
683+
return get_outputs_as_py_list(outputs);
684+
}
685+
686+
py::list get_outputs_as_py_list(const std::vector<EValue>& outputs) {
687+
const auto outputs_size = outputs.size();
688+
py::list list(outputs_size);
689+
for (size_t i = 0; i < outputs_size; ++i) {
690+
auto& v = outputs[i];
691+
if (Tag::None == v.tag) {
692+
list[i] = py::none();
693+
} else if (Tag::Int == v.tag) {
694+
list[i] = py::cast(v.toInt());
695+
} else if (Tag::Double == v.tag) {
696+
list[i] = py::cast(v.toDouble());
697+
} else if (Tag::Bool == v.tag) {
698+
list[i] = py::cast(v.toBool());
699+
} else if (Tag::String == v.tag) {
700+
list[i] = py::cast(std::string(v.toString().data()));
701+
} else if (Tag::Tensor == v.tag) {
702+
#ifdef USE_ATEN_LIB
703+
// Clone so the outputs in python do not share a lifetime with the
704+
// module object
705+
list[i] = py::cast(v.toTensor().clone());
706+
#else
707+
list[i] = py::cast(
708+
torch::util::alias_attensor_to_etensor(v.toTensor()).clone());
709+
#endif
710+
} else {
711+
ET_ASSERT_UNREACHABLE_MSG("Invalid model output type");
712+
}
713+
}
714+
return list;
698715
}
699716

700717
private:
701718
std::unique_ptr<Module> module_;
719+
// Need to keep-alive output storages until they can be compared in case of
720+
// bundled programs.
721+
std::vector<std::vector<uint8_t>> output_storages_;
722+
723+
std::vector<std::vector<uint8_t>> make_output_storages(
724+
const executor::Method& method) {
725+
const auto num_outputs = method.outputs_size();
726+
// These output storages will not be used if the ExecuTorch program already
727+
// pre-allocated output space. That is represented by an error from
728+
// set_output_data_ptr.
729+
std::vector<std::vector<uint8_t>> output_storages(num_outputs);
730+
for (size_t i = 0; i < num_outputs; ++i) {
731+
const auto& output_tensor_meta =
732+
method.method_meta().output_tensor_meta(i);
733+
if (!output_tensor_meta.ok()) {
734+
// If the output isn't a tensor it won't have a tensor meta.
735+
ET_LOG(
736+
Error,
737+
"Tensor meta doesn't exist for output %zu, error is 0x%" PRIx32
738+
", skipping allocating storage",
739+
i,
740+
static_cast<uint32_t>(output_tensor_meta.error()));
741+
output_storages[i] = std::vector<uint8_t>();
742+
continue;
743+
}
744+
const size_t output_size = output_tensor_meta.get().nbytes();
745+
output_storages[i] = std::vector<uint8_t>(output_size);
746+
}
747+
return output_storages;
748+
}
702749
};
703750

704751
void create_profile_block(const std::string& name) {

extension/pybindings/pybindings.pyi

+4-1
Original file line numberDiff line numberDiff line change
@@ -14,18 +14,21 @@ class ExecuTorchModule:
1414
def run_method(self, method_name: str, inputs: Sequence[Any]) -> List[Any]: ...
1515
# pyre-ignore[2, 3]: "Any" in parameter and return type annotations.
1616
def forward(self, inputs: Sequence[Any]) -> List[Any]: ...
17+
# pyre-ignore[3]: "Any" in return type annotations.
18+
def plan_execute(self) -> List[Any]: ...
1719
# Bundled program methods.
1820
def load_bundled_input(
1921
self, bundle: BundledModule, method_name: str, testset_idx: int
2022
) -> None: ...
23+
# pyre-ignore[3]: "Any" in return type annotations.
2124
def verify_result_with_bundled_expected_output(
2225
self,
2326
bundle: BundledModule,
2427
method_name: str,
2528
testset_idx: int,
2629
rtol: float = 1e-5,
2730
atol: float = 1e-8,
28-
) -> None: ...
31+
) -> List[Any]: ...
2932
def has_etdump(self) -> bool: ...
3033
def write_etdump_result_to_file(
3134
self, path: str, debug_buffer_path: Optional[str] = None

0 commit comments

Comments
 (0)