Skip to content

Commit 98c3207

Browse files
committed
add physical_blocks_per_logical in meta
Signed-off-by: ZhanqiuHu <zhu@redhat.com>
1 parent 8651af6 commit 98c3207

3 files changed

Lines changed: 11 additions & 8 deletions

File tree

tests/v1/kv_connector/unit/test_nixl_connector.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -527,6 +527,7 @@ def _nixl_handshake(
527527
block_size=self.block_size,
528528
ssm_sizes=(0, 0),
529529
attn_backend_name=self.backend_name,
530+
physical_blocks_per_logical_kv_block=1,
530531
),
531532
remote_tp_rank=remote_tp_rank,
532533
remote_tp_size=remote_tp_size,
@@ -979,6 +980,7 @@ def test_handshake_fails_on_kv_cache_layout_mismatch(
979980
block_size=worker.block_size,
980981
ssm_sizes=(0, 0),
981982
attn_backend_name=worker.backend_name,
983+
physical_blocks_per_logical_kv_block=1,
982984
)
983985

984986
with pytest.raises(RuntimeError):
@@ -1036,6 +1038,7 @@ def test_handshake_succeed_on_kv_cache_layout_mismatch_with_experimental(
10361038
block_size=worker.block_size,
10371039
ssm_sizes=(0, 0),
10381040
attn_backend_name=worker.backend_name,
1041+
physical_blocks_per_logical_kv_block=1,
10391042
)
10401043

10411044
# We don't check layout for homogeneous TP and MLA for now, as the
@@ -2355,6 +2358,7 @@ def test_compatibility_hash_validation(
23552358
block_size=prefill_block_size,
23562359
ssm_sizes=(0, 0),
23572360
attn_backend_name=decode_worker.backend_name,
2361+
physical_blocks_per_logical_kv_block=1,
23582362
)
23592363
handshake_payload = NixlHandshakePayload(
23602364
compatibility_hash=remote_hash,

vllm/distributed/kv_transfer/kv_connector/v1/nixl/metadata.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,9 @@
3232
# Version History:
3333
# 1: Initial version with compatibility checking
3434
# 2: Add remote_request_id to kv_transfer_params
35+
# 3: Add physical_blocks_per_logical_kv_block to NixlAgentMetadata
3536
#
36-
NIXL_CONNECTOR_VERSION: int = 2
37+
NIXL_CONNECTOR_VERSION: int = 3
3738

3839

3940
@dataclass
@@ -48,6 +49,7 @@ class NixlAgentMetadata:
4849
block_size: int
4950
ssm_sizes: tuple[int, int]
5051
attn_backend_name: str
52+
physical_blocks_per_logical_kv_block: int
5153

5254

5355
@dataclass

vllm/distributed/kv_transfer/kv_connector/v1/nixl/worker.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,6 @@
5757
)
5858
from vllm.distributed.kv_transfer.kv_connector.v1.ssm_conv_transfer_utils import (
5959
MambaConvSplitInfo,
60-
compute_physical_blocks_per_logical,
6160
derive_mamba_conv_split,
6261
)
6362
from vllm.distributed.nixl_utils import NixlWrapper, nixl_agent_config
@@ -948,6 +947,9 @@ def register_kv_caches(self, kv_caches: dict[str, torch.Tensor]):
948947
block_size=self.block_size,
949948
ssm_sizes=self._mamba_ssm_size,
950949
attn_backend_name=self.backend_name,
950+
physical_blocks_per_logical_kv_block=(
951+
self._physical_blocks_per_logical_kv_block
952+
),
951953
)
952954
# Wrap metadata in payload with hash for defensive decoding
953955
assert self.compat_hash is not None
@@ -1245,12 +1247,7 @@ def add_remote_agent(
12451247
assert self.transfer_topo is not None
12461248
transfer_topo = self.transfer_topo
12471249
physical_blocks_per_logical = (
1248-
compute_physical_blocks_per_logical(
1249-
nixl_agent_meta.ssm_sizes,
1250-
nixl_agent_meta.block_lens[0],
1251-
)
1252-
if self._has_mamba
1253-
else 1
1250+
nixl_agent_meta.physical_blocks_per_logical_kv_block
12541251
)
12551252
transfer_info = EngineTransferInfo(
12561253
remote_tp_size=remote_tp_size,

0 commit comments

Comments
 (0)