Skip to content
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions python/sglang/srt/speculative/ngram_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ def __init__(
self.draft_token_num = draft_token_num
self.device = self.custom_mask.device
self.grammar = grammar
# Ngram speculative decoding doesn't use tree attention (topk=1),
# but some attention backends read spec_info.topk unconditionally.
self.topk = 1

def get_spec_adjust_token_coefficient(self) -> Tuple[int, int]:
return self.draft_token_num, self.draft_token_num
Expand Down
Loading