We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent b9427c3 commit 606bba8Copy full SHA for 606bba8
1 file changed
verl/workers/engine/veomni/transformer_impl.py
@@ -83,10 +83,15 @@ def __init__(
83
world_size = dist.get_world_size()
84
dp_size = world_size // self.engine_config.ulysses_parallel_size
85
86
- if fsdp_size < 0 or fsdp_size >= world_size:
+ if fsdp_size < 0 or fsdp_size >= dp_size:
87
data_parallel_replicate_size = 1
88
- data_parallel_shard_size = world_size
+ data_parallel_shard_size = dp_size
89
else:
90
+ if dp_size % fsdp_size != 0:
91
+ raise ValueError(
92
+ f"Data parallel size ({dp_size}) must be divisible by fsdp_size ({fsdp_size}). "
93
+ "Please adjust your parallel configuration."
94
+ )
95
data_parallel_replicate_size = dp_size // fsdp_size
96
data_parallel_shard_size = fsdp_size
97
0 commit comments