Skip to content
Closed
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion verl/workers/rollout/vllm_rollout/vllm_async_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,12 @@ async def launch_server(self, master_address: str = None, master_port: int = Non
)
logger.info(f"override_generation_config: {override_generation_config}")

enable_sleep_mode = True
if self.rollout_mode == RolloutMode.STANDALONE:
enable_sleep_mode = False
logger.info(f"enable_sleep_mode: {enable_sleep_mode}, set_expandable_segments is True")
set_expandable_segments(True)

args = {
"dtype": self.config.dtype,
"load_format": self.config.load_format,
Expand All @@ -226,7 +232,7 @@ async def launch_server(self, master_address: str = None, master_port: int = Non
"enable_chunked_prefill": self.config.enable_chunked_prefill,
"max_num_batched_tokens": self.config.max_num_batched_tokens,
"enable_prefix_caching": self.config.enable_prefix_caching,
"enable_sleep_mode": True,
"enable_sleep_mode": enable_sleep_mode,
"disable_custom_all_reduce": True,
"enforce_eager": self.config.enforce_eager,
"gpu_memory_utilization": self.config.gpu_memory_utilization,
Expand Down