Skip to content

Commit 56d895f

Browse files
ispobockxwu-intel
authored andcommitted
Fix set kv cache multi-stream (sgl-project#5975)
1 parent 0a08529 commit 56d895f

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

python/sglang/srt/mem_cache/memory_pool.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -374,9 +374,9 @@ def set_kv_buffer(
374374
# Overlap the copy of K and V cache for small batch size
375375
current_stream = self.device_module.current_stream()
376376
self.alt_stream.wait_stream(current_stream)
377+
self.k_buffer[layer_id - self.start_layer][loc] = cache_k
377378
with self.device_module.stream(self.alt_stream):
378-
self.k_buffer[layer_id - self.start_layer][loc] = cache_k
379-
self.v_buffer[layer_id - self.start_layer][loc] = cache_v
379+
self.v_buffer[layer_id - self.start_layer][loc] = cache_v
380380
current_stream.wait_stream(self.alt_stream)
381381
else:
382382
self.k_buffer[layer_id - self.start_layer][loc] = cache_k

0 commit comments

Comments
 (0)