File tree 1 file changed +5
-2
lines changed 1 file changed +5
-2
lines changed Original file line number Diff line number Diff line change @@ -1261,11 +1261,14 @@ def is_matryoshka(self) -> bool:
1261
1261
class CacheConfig :
1262
1262
"""Configuration for the KV cache."""
1263
1263
1264
- block_size : Optional [ BlockSize ] = None
1264
+ block_size : BlockSize = None # type: ignore
1265
1265
"""Size of a contiguous cache block in number of tokens. This is ignored on
1266
1266
neuron devices and set to `--max-model-len`. On CUDA devices, only block
1267
1267
sizes up to 32 are supported. On HPU devices, block size defaults to 128.
1268
- """
1268
+
1269
+ This config has no static default. If left unspecified by the user, it will
1270
+ be set in `Platform.check_and_update_configs()` based on the current
1271
+ platform."""
1269
1272
gpu_memory_utilization : float = 0.9
1270
1273
"""The fraction of GPU memory to be used for the model executor, which can
1271
1274
range from 0 to 1. For example, a value of 0.5 would imply 50% GPU memory
You can’t perform that action at this time.
0 commit comments