-
Notifications
You must be signed in to change notification settings - Fork 737
[Executor]CUDAGraph support Speculate Decode #3769
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 39 commits
8351e83
02e8384
1444ba6
892c0c2
18d9823
64ea2f7
3263006
5b75ade
4772a4f
4a0a6df
ec4a2df
529214c
2dd98da
1d3ef67
349988f
15d3103
7f11653
bb9c911
d1115a7
77e64ed
235b0ba
3516be4
c6cdc17
167fb58
4c10571
89c6c83
fdf49de
834639a
4c09b0b
9f71c0e
fc6ce99
8c306d8
cf01a97
e28327a
a44e2d9
00de438
678152f
d841cc6
3bf990c
2eaf778
24fa8cb
1a4190b
d3e7df9
96d85a0
1c23a3e
f814026
beaaaec
ce11adb
4c06088
c885ba6
d23206c
4a8f947
2137520
a4323aa
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2418,6 +2418,9 @@ __global__ void merge_multi_chunks_v2_kernel( | |
| __shared__ float md_smem[bdy * 2]; | ||
| for (int qid = blockIdx.x; qid < token_num; qid += gridDim.x) { | ||
| const uint32_t bid = batch_id_per_token[qid]; | ||
| if(bid == -1){ | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 注意下编码规范
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
这里能把 bid 从 uint32_t 切换成 int 吗?取值范围变小了有无风险? |
||
| continue; | ||
| } | ||
| const uint32_t local_seq_id = qid - cu_seqlens_q[bid]; | ||
| const int seq_len_q = seq_lens_q[bid]; | ||
| if (seq_len_q == 0) continue; | ||
|
|
@@ -2437,6 +2440,8 @@ __global__ void merge_multi_chunks_v2_kernel( | |
| const int num_chunks_this_seq = div_up(seq_len_kv, chunk_size); | ||
| if (num_chunks_this_seq <= 1) { | ||
| continue; | ||
| }else if (!ENABLE_PREFILL){ | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 同上 |
||
| continue; | ||
| } | ||
|
|
||
| using LoadT = AlignedVector<T, vec_size>; | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.