@@ -5,13 +5,13 @@ stage_args:
55 - stage_id : 0
66 runtime :
77 process : true # Run this stage in a separate process
8- devices : " 0 " # Visible devices for this stage (CUDA_VISIBLE_DEVICES/torch.cuda.set_device)
8+ devices : " 5 " # Visible devices for this stage (CUDA_VISIBLE_DEVICES/torch.cuda.set_device)
99 max_batch_size : 1
1010 engine_args :
1111 model_stage : thinker
1212 model_arch : Qwen2_5OmniForConditionalGeneration
1313 worker_cls : vllm_omni.worker.gpu_ar_worker.GPUARWorker
14- scheduler_cls : vllm_omni.core.sched.scheduler.GPUARScheduler
14+ scheduler_cls : vllm_omni.core.sched.omni_ar_scheduler.OmniARScheduler
1515 gpu_memory_utilization : 0.8
1616 enforce_eager : true # Now we only support eager mode
1717 trust_remote_code : true
@@ -34,13 +34,13 @@ stage_args:
3434 - stage_id : 1
3535 runtime :
3636 process : true
37- devices : " 1 "
37+ devices : " 6 "
3838 max_batch_size : 1
3939 engine_args :
4040 model_stage : talker
4141 model_arch : Qwen2_5OmniForConditionalGeneration
4242 worker_cls : vllm_omni.worker.gpu_ar_worker.GPUARWorker
43- scheduler_cls : vllm_omni.core.sched.scheduler.GPUARScheduler
43+ scheduler_cls : vllm_omni.core.sched.omni_ar_scheduler.OmniARScheduler
4444 gpu_memory_utilization : 0.8
4545 enforce_eager : true
4646 trust_remote_code : true
@@ -65,17 +65,18 @@ stage_args:
6565 - stage_id : 2
6666 runtime :
6767 process : true
68- devices : " 2 " # Example: use a different GPU than the previous stage; use "0" if single GPU
68+ devices : " 7 " # Example: use a different GPU than the previous stage; use "0" if single GPU
6969 max_batch_size : 1
7070 engine_args :
7171 model_stage : code2wav
7272 model_arch : Qwen2_5OmniForConditionalGeneration
73- worker_cls : vllm_omni.worker.gpu_diffusion_worker .GPUGenerationWorker
74- scheduler_cls : vllm_omni.core.sched.diffusion_scheduler.GPUGenerationScheduler
73+ worker_cls : vllm_omni.worker.gpu_generation_worker .GPUGenerationWorker
74+ scheduler_cls : vllm_omni.core.sched.omni_generation_scheduler.OmniGenerationScheduler
7575 gpu_memory_utilization : 0.3
7676 enforce_eager : true
7777 trust_remote_code : true
7878 enable_prefix_caching : false
79+ max_num_batched_tokens : 32768
7980 engine_output_type : audio
8081 engine_input_source : [1]
8182 final_output : true
0 commit comments