We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 0a08529 commit 56d895fCopy full SHA for 56d895f
python/sglang/srt/mem_cache/memory_pool.py
@@ -374,9 +374,9 @@ def set_kv_buffer(
374
# Overlap the copy of K and V cache for small batch size
375
current_stream = self.device_module.current_stream()
376
self.alt_stream.wait_stream(current_stream)
377
+ self.k_buffer[layer_id - self.start_layer][loc] = cache_k
378
with self.device_module.stream(self.alt_stream):
- self.k_buffer[layer_id - self.start_layer][loc] = cache_k
379
- self.v_buffer[layer_id - self.start_layer][loc] = cache_v
+ self.v_buffer[layer_id - self.start_layer][loc] = cache_v
380
current_stream.wait_stream(self.alt_stream)
381
else:
382
self.k_buffer[layer_id - self.start_layer][loc] = cache_k
0 commit comments