File tree 1 file changed +3
-2
lines changed
examples/qualcomm/oss_scripts/llama3_2/runner 1 file changed +3
-2
lines changed Original file line number Diff line number Diff line change @@ -64,8 +64,8 @@ HybridMemory::HybridMemory(
64
64
const std::string& kv_forward_name)
65
65
: Memory(modules),
66
66
shard_layers_ ({num_layers}),
67
- prefill_cache_len_(prefill_cache_len),
68
67
kv_cache_len_(kv_cache_len),
68
+ prefill_cache_len_(prefill_cache_len),
69
69
vocab_size_(vocab_size),
70
70
num_layers_(num_layers),
71
71
head_dim_(head_dim),
@@ -332,7 +332,8 @@ void HybridMemory::prepare_prefill_io(
332
332
input_tensors_[prefill_forward_name_][0 ].push_back (prefill_attn_mask_.get ());
333
333
// [O]: logits
334
334
int logit_index = 0 ;
335
- Result<TensorInfo> logits = methods_meta[0 ]->output_tensor_meta (0 );
335
+ Result<TensorInfo> logits =
336
+ methods_meta[modules_.size () - 1 ]->output_tensor_meta (logit_index);
336
337
prefill_logits_ = std::make_unique<TensorImpl>(
337
338
logits->scalar_type (),
338
339
logits->sizes ().size (),
You can’t perform that action at this time.
0 commit comments