File tree Expand file tree Collapse file tree 3 files changed +16
-8
lines changed Expand file tree Collapse file tree 3 files changed +16
-8
lines changed Original file line number Diff line number Diff line change @@ -429,9 +429,9 @@ def event_loop_normal_disagg_decode(self):
429
429
# polling and allocating kv cache
430
430
self .process_decode_queue ()
431
431
batch = self .get_next_disagg_decode_batch_to_run ()
432
-
432
+
433
433
is_real_batch = True
434
-
434
+
435
435
if batch and batch .forward_mode .is_extend ():
436
436
self .cur_batch = batch
437
437
# Generate fake extend output.
@@ -442,9 +442,12 @@ def event_loop_normal_disagg_decode(self):
442
442
is_real_batch = False
443
443
444
444
# Handle DP attention
445
- if self .server_args .enable_dp_attention or self .server_args .enable_sp_layernorm :
445
+ if (
446
+ self .server_args .enable_dp_attention
447
+ or self .server_args .enable_sp_layernorm
448
+ ):
446
449
batch , _ = self .prepare_dp_attn_batch (batch )
447
-
450
+
448
451
if is_real_batch :
449
452
self .cur_batch = batch
450
453
Original file line number Diff line number Diff line change @@ -183,9 +183,12 @@ def event_loop_normal_disagg_prefill(self):
183
183
)
184
184
self .process_prefill_chunk ()
185
185
batch = self .get_new_batch_prefill ()
186
-
186
+
187
187
# Handle DP attention
188
- if self .server_args .enable_dp_attention or self .server_args .enable_sp_layernorm :
188
+ if (
189
+ self .server_args .enable_dp_attention
190
+ or self .server_args .enable_sp_layernorm
191
+ ):
189
192
batch , _ = self .prepare_dp_attn_batch (batch )
190
193
191
194
self .cur_batch = batch
Original file line number Diff line number Diff line change 24
24
import zmq
25
25
26
26
from sglang .srt .disaggregation .utils import DisaggregationMode
27
- from sglang .srt .managers .schedule_batch import Req
28
27
from sglang .srt .layers .dp_attention import compute_dp_attention_world_info
29
28
from sglang .srt .managers .io_struct import (
30
29
TokenizedEmbeddingReqInput ,
31
30
TokenizedGenerateReqInput ,
32
31
)
32
+ from sglang .srt .managers .schedule_batch import Req
33
33
from sglang .srt .managers .scheduler import run_scheduler_process
34
34
from sglang .srt .server_args import PortArgs , ServerArgs
35
35
from sglang .srt .utils import bind_port , configure_logger , get_zmq_socket
@@ -225,7 +225,9 @@ def launch_tensor_parallel_group(
225
225
def round_robin_scheduler (self , req : Req ):
226
226
if self .server_args .disaggregation_mode == DisaggregationMode .NULL :
227
227
self .workers [self .round_robin_counter ].send_pyobj (req )
228
- self .round_robin_counter = (self .round_robin_counter + 1 ) % len (self .workers )
228
+ self .round_robin_counter = (self .round_robin_counter + 1 ) % len (
229
+ self .workers
230
+ )
229
231
else :
230
232
self .workers [req .bootstrap_room % len (self .workers )].send_pyobj (req )
231
233
You can’t perform that action at this time.
0 commit comments