@@ -243,10 +243,12 @@ def determine_available_memory(self) -> int:
243
243
if (self .cache_config .enable_pp_prop_kv_cache
244
244
and pp_group .world_size > 1
245
245
and envs .VLLM_PP_LAYER_PARTITION ):
246
- num_hidden_layers = self .model_config .hf_text_config .num_hidden_layers
246
+ hf_config = self .model_config .hf_text_config
247
+ num_hidden_layers = hf_config .num_hidden_layers
247
248
248
249
start_layer , end_layer = get_pp_indices (
249
- num_hidden_layers , pp_group .rank_in_group , pp_group .world_size )
250
+ num_hidden_layers , pp_group .rank_in_group ,
251
+ pp_group .world_size )
250
252
local_layers = end_layer - start_layer
251
253
prop_kv_cache_bytes = int (kv_cache_memory_bytes *
252
254
local_layers / num_hidden_layers )
@@ -263,16 +265,17 @@ def determine_available_memory(self) -> int:
263
265
return prop_kv_cache_bytes
264
266
265
267
msg = (
266
- f"Initial free memory { GiB (self .init_snapshot .free_memory ):.2f} "
267
- f"GiB, reserved { GiB (kv_cache_memory_bytes ):.2f} GiB memory for "
268
- "KV Cache as specified by kv_cache_memory_bytes config and "
269
- "skipped memory profiling. This does not respect the "
270
- "gpu_memory_utilization config. Only use kv_cache_memory_bytes "
271
- "config when you want manual control of KV cache memory "
272
- "size. If OOM'ed, check the difference of initial free "
273
- "memory between the current run and the previous run "
274
- "where kv_cache_memory_bytes is suggested and update it "
275
- "correspondingly." )
268
+ f"Initial free memory "
269
+ f"{ GiB (self .init_snapshot .free_memory ):.2f} "
270
+ f"GiB, reserved { GiB (kv_cache_memory_bytes ):.2f} GiB memory "
271
+ f"for KV Cache as specified by kv_cache_memory_bytes config "
272
+ f"and skipped memory profiling. This does not respect the "
273
+ f"gpu_memory_utilization config. Only use "
274
+ f"kv_cache_memory_bytes config when you want manual control "
275
+ f"of KV cache memory size. If OOM'ed, check the difference "
276
+ f"of initial free memory between the current run and the "
277
+ f"previous run where kv_cache_memory_bytes is suggested and "
278
+ f"update it correspondingly." )
276
279
logger .info (msg )
277
280
return kv_cache_memory_bytes
278
281
0 commit comments