Skip to content

Commit 19154af

Browse files
MatthiasKreiledercopybara-github
authored andcommitted
18/N NPU executor: Further cleanup
LiteRT-LM-PiperOrigin-RevId: 775276323
1 parent caaf3fd commit 19154af

File tree

4 files changed

+13
-24
lines changed

4 files changed

+13
-24
lines changed

runtime/core/engine_impl.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ class EngineImpl : public Engine {
119119

120120
std::filesystem::path path(model_path);
121121
ABSL_CHECK(std::filesystem::exists(path));
122-
auto executor = odml::infra::LlmLiteRtNpuCompiledModelExecutor::Create(
122+
auto executor = LlmLiteRtNpuCompiledModelExecutor::Create(
123123
engine_settings_.GetMainExecutorSettings(), *litert_model_resources_,
124124
path.parent_path().string());
125125
ABSL_CHECK_OK(executor);

runtime/engine/litert_lm_npu_main.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ ABSL_FLAG(std::string, litert_dispatch_lib_path, "",
4242
ABSL_FLAG(std::string, prompt, "", "Prompt to run.");
4343
ABSL_FLAG(int, num_runs, 1, "Number of times to run the benchmark.");
4444

45-
using odml::infra::LlmLiteRtNpuCompiledModelExecutor;
45+
using litert::lm::LlmLiteRtNpuCompiledModelExecutor;
4646

4747
using litert::lm::InputText;
4848
using litert::lm::ThreadOptions;
@@ -209,7 +209,7 @@ RunStats CreateAndRun(const std::string& prompt) {
209209
auto executor_settings = litert::lm::LlmExecutorSettings::CreateDefault(
210210
model_assets, litert::lm::Backend::NPU)
211211
.value();
212-
auto executor = odml::infra::LlmLiteRtNpuCompiledModelExecutor::Create(
212+
auto executor = LlmLiteRtNpuCompiledModelExecutor::Create(
213213
executor_settings, **model_resources,
214214
absl::GetFlag(FLAGS_litert_dispatch_lib_path));
215215
auto end = absl::Now();

runtime/executor/llm_litert_npu_compiled_model_executor.cc

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
#include "runtime/executor/llm_litert_npu_compiled_model_executor.h"
22

3-
#include <cmath>
43
#include <cstdint>
54
#include <cstring>
65
#include <limits>
@@ -22,35 +21,26 @@
2221
#include "absl/time/time.h" // from @com_google_absl
2322
#include "absl/types/span.h" // from @com_google_absl
2423
#include "litert/c/litert_common.h" // from @litert
25-
#include "litert/c/litert_model.h" // from @litert
2624
#include "litert/cc/litert_compiled_model.h" // from @litert
2725
#include "litert/cc/litert_environment.h" // from @litert
2826
#include "litert/cc/litert_layout.h" // from @litert
2927
#include "litert/cc/litert_macros.h" // from @litert
3028
#include "litert/cc/litert_model.h" // from @litert
3129
#include "litert/cc/litert_tensor_buffer.h" // from @litert
3230
#include "runtime/components/model_resources.h"
33-
#include "runtime/executor/executor_settings_base.h"
3431
#include "runtime/executor/litert_compiled_model_executor_utils.h"
3532
#include "runtime/executor/llm_executor_io_types.h"
3633
#include "runtime/executor/llm_executor_settings.h"
3734
#include "runtime/util/convert_tensor_buffer.h"
3835
#include "runtime/util/litert_status_util.h"
3936
#include "runtime/util/status_macros.h" // NOLINT
4037

41-
namespace odml::infra {
38+
namespace litert::lm {
4239

4340
namespace {
4441
using ::litert::CompiledModel;
4542
using ::litert::Environment;
46-
using ::litert::Model;
4743
using ::litert::TensorBuffer;
48-
using ::litert::lm::CopyFromTensorBuffer;
49-
using ::litert::lm::ExecutorInputs;
50-
using ::litert::lm::ExecutorPrefillParams;
51-
using ::litert::lm::GetOptimizedPrefillWorkGroups;
52-
using ::litert::lm::ReferTensorBufferAsSpan;
53-
using ::litert::lm::SortedPrefillSignatureMap;
5444

5545
constexpr char kPrefillSignature[] = "prefill_128";
5646
constexpr int kPrefillSize = 128;
@@ -538,8 +528,8 @@ absl::Status LlmLiteRtNpuCompiledModelExecutor::Decode(
538528
LITERT_ASSIGN_OR_RETURN(auto logits_buffer_int16,
539529
CopyFromTensorBuffer<int16_t>(decoded_logits));
540530
int max_index = 0;
541-
int16_t max_value = logits_buffer_int16[0];
542-
for (int i = 1; i < logits_buffer_int16.size(); ++i) {
531+
int16_t max_value = std::numeric_limits<int16_t>::min();
532+
for (int i = 0; i < logits_buffer_int16.size(); ++i) {
543533
if (logits_buffer_int16[i] > max_value) {
544534
max_value = logits_buffer_int16[i];
545535
max_index = i;
@@ -1015,4 +1005,4 @@ LlmLiteRtNpuCompiledModelExecutor::Create(
10151005
return executor;
10161006
};
10171007

1018-
} // namespace odml::infra
1008+
} // namespace litert::lm

runtime/executor/llm_litert_npu_compiled_model_executor.h

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
#include "runtime/executor/llm_executor_io_types.h"
3939
#include "runtime/executor/llm_executor_settings.h"
4040

41-
namespace odml::infra {
41+
namespace litert::lm {
4242

4343
// Component intended to be used with an NPU variant of Gemma3.
4444
class LlmLiteRtNpuCompiledModelExecutor : public ::litert::lm::LlmExecutor {
@@ -122,7 +122,7 @@ class LlmLiteRtNpuCompiledModelExecutor : public ::litert::lm::LlmExecutor {
122122
absl::Status Reset() override;
123123

124124
private:
125-
// Holds the tensor buffers maps for the inference of a precompiled model,
125+
// Holds the tensor buffer maps for the inference of a precompiled model,
126126
// both for prefill and decode.
127127
struct InferenceContext {
128128
absl::flat_hash_map<absl::string_view, ::litert::TensorBuffer>
@@ -165,7 +165,8 @@ class LlmLiteRtNpuCompiledModelExecutor : public ::litert::lm::LlmExecutor {
165165
// signatures for Mask, RoPE and KV cache update computation.
166166
struct NpuAuxiliaryContext {
167167
::litert::CompiledModel npu_auxiliary_compiled_model;
168-
NpuAuxiliaryContext(::litert::CompiledModel npu_auxiliary_compiled_model);
168+
explicit NpuAuxiliaryContext(
169+
::litert::CompiledModel npu_auxiliary_compiled_model);
169170
};
170171

171172
protected:
@@ -188,9 +189,7 @@ class LlmLiteRtNpuCompiledModelExecutor : public ::litert::lm::LlmExecutor {
188189
llm_inference_context_(std::move(llm_inference_context)),
189190
cache_update_inference_context_(
190191
std::move(cache_update_inference_context)),
191-
prefill_signature_map_(std::move(prefill_signature_map)) {
192-
executor_settings_.SetMaxNumTokens(1280);
193-
}
192+
prefill_signature_map_(std::move(prefill_signature_map)) {}
194193

195194
private:
196195
// Prefill internal implementation, for one prefill call to the Interpreter
@@ -309,6 +308,6 @@ class LlmLiteRtNpuCompiledModelExecutor : public ::litert::lm::LlmExecutor {
309308
int next_input_token_id_ = -1;
310309
};
311310

312-
} // namespace odml::infra
311+
} // namespace litert::lm
313312

314313
#endif // THIRD_PARTY_ODML_LITERT_LM_RUNTIME_EXECUTOR_LITERT_NPU_COMPILED_MODEL_EXECUTOR_H_

0 commit comments

Comments
 (0)