Skip to content

Commit 99ff722

Browse files
committed
Set default FPS to official LTX 2 ckpt default of 24.0
1 parent 40ee3e3 commit 99ff722

File tree

3 files changed

+9
-10
lines changed

3 files changed

+9
-10
lines changed

src/diffusers/models/transformers/transformer_ltx2.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -667,7 +667,7 @@ def prepare_video_coords(
667667
height: int,
668668
width: int,
669669
device: torch.device,
670-
fps: float = 25.0,
670+
fps: float = 24.0,
671671
) -> torch.Tensor:
672672
"""
673673
Create per-dimension bounds [inclusive start, exclusive end) for each patch with respect to the original pixel
@@ -735,7 +735,6 @@ def prepare_audio_coords(
735735
batch_size: int,
736736
num_frames: int,
737737
device: torch.device,
738-
fps: float = 25.0,
739738
shift: int = 0,
740739
) -> torch.Tensor:
741740
"""
@@ -1115,7 +1114,7 @@ def forward(
11151114
num_frames: Optional[int] = None,
11161115
height: Optional[int] = None,
11171116
width: Optional[int] = None,
1118-
fps: float = 25.0,
1117+
fps: float = 24.0,
11191118
audio_num_frames: Optional[int] = None,
11201119
video_coords: Optional[torch.Tensor] = None,
11211120
audio_coords: Optional[torch.Tensor] = None,
@@ -1176,7 +1175,7 @@ def forward(
11761175
)
11771176
if audio_coords is None:
11781177
audio_coords = self.audio_rope.prepare_audio_coords(
1179-
batch_size, audio_num_frames, audio_hidden_states.device, fps=fps
1178+
batch_size, audio_num_frames, audio_hidden_states.device
11801179
)
11811180

11821181
video_rotary_emb = self.rope(video_coords, device=hidden_states.device)

src/diffusers/pipelines/ltx2/pipeline_ltx2.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -746,7 +746,7 @@ def __call__(
746746
height: int = 512,
747747
width: int = 768,
748748
num_frames: int = 121,
749-
frame_rate: float = 25.0,
749+
frame_rate: float = 24.0,
750750
num_inference_steps: int = 40,
751751
timesteps: List[int] = None,
752752
guidance_scale: float = 3.0,
@@ -781,7 +781,7 @@ def __call__(
781781
The width in pixels of the generated image. This is set to 848 by default for the best results.
782782
num_frames (`int`, *optional*, defaults to `121`):
783783
The number of video frames to generate
784-
frame_rate (`float`, *optional*, defaults to `25.0`):
784+
frame_rate (`float`, *optional*, defaults to `24.0`):
785785
The frames per second (FPS) of the generated video.
786786
num_inference_steps (`int`, *optional*, defaults to 40):
787787
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
@@ -996,7 +996,7 @@ def __call__(
996996
latents.shape[0], latent_num_frames, latent_height, latent_width, latents.device, fps=frame_rate
997997
)
998998
audio_coords = self.transformer.audio_rope.prepare_audio_coords(
999-
audio_latents.shape[0], audio_num_frames, audio_latents.device, fps=frame_rate
999+
audio_latents.shape[0], audio_num_frames, audio_latents.device
10001000
)
10011001

10021002
# 7. Denoising loop

src/diffusers/pipelines/ltx2/pipeline_ltx2_image2video.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -807,7 +807,7 @@ def __call__(
807807
height: int = 512,
808808
width: int = 768,
809809
num_frames: int = 121,
810-
frame_rate: float = 25.0,
810+
frame_rate: float = 24.0,
811811
num_inference_steps: int = 40,
812812
timesteps: List[int] = None,
813813
guidance_scale: float = 3.0,
@@ -844,7 +844,7 @@ def __call__(
844844
The width in pixels of the generated image. This is set to 848 by default for the best results.
845845
num_frames (`int`, *optional*, defaults to `121`):
846846
The number of video frames to generate
847-
frame_rate (`float`, *optional*, defaults to `25.0`):
847+
frame_rate (`float`, *optional*, defaults to `24.0`):
848848
The frames per second (FPS) of the generated video.
849849
num_inference_steps (`int`, *optional*, defaults to 40):
850850
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
@@ -1067,7 +1067,7 @@ def __call__(
10671067
latents.shape[0], latent_num_frames, latent_height, latent_width, latents.device, fps=frame_rate
10681068
)
10691069
audio_coords = self.transformer.audio_rope.prepare_audio_coords(
1070-
audio_latents.shape[0], audio_num_frames, audio_latents.device, fps=frame_rate
1070+
audio_latents.shape[0], audio_num_frames, audio_latents.device
10711071
)
10721072

10731073
# 7. Denoising loop

0 commit comments

Comments
 (0)