@@ -39,19 +39,25 @@ static constexpr auto kUseSDPAWithKVCache = "use_sdpa_with_kv_cache";
39
39
Runner::Runner (
40
40
const std::string& model_path,
41
41
const std::string& tokenizer_path,
42
- const float temperature)
42
+ const float temperature,
43
+ std::optional<const std::string> data_path)
43
44
// NOTE: we observed ~2x loading performance increase on iPhone 15
44
45
// and a ~5% improvement on Galaxy S22 by switching to
45
46
// FileDataLoader instead of MmapDataLoader + UseMlockIgnoreErrors.
46
47
: temperature_(temperature),
47
- module_ (std::make_unique<Module>(model_path, Module::LoadMode::File)),
48
48
tokenizer_path_ (tokenizer_path),
49
49
metadata_({
50
50
{kEnableDynamicShape , false },
51
51
{kMaxSeqLen , 128 },
52
52
{kUseKVCache , true },
53
53
{kUseSDPAWithKVCache , false },
54
54
}) {
55
+ if (data_path.has_value ()) {
56
+ module_ = std::make_unique<Module>(
57
+ model_path, data_path.value (), Module::LoadMode::File);
58
+ } else {
59
+ module_ = std::make_unique<Module>(model_path, Module::LoadMode::File);
60
+ }
55
61
ET_LOG (
56
62
Info,
57
63
" Creating LLaMa runner: model_path=%s, tokenizer_path=%s" ,
0 commit comments