Skip to content

Sequential cpu offload: require accelerate 0.14.0 #2517

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Feb 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -191,10 +191,10 @@ def enable_sequential_cpu_offload(self, gpu_id=0):
Note that offloading happens on a submodule basis. Memory savings are higher than with
`enable_model_cpu_offload`, but performance is lower.
"""
if is_accelerate_available():
if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
from accelerate import cpu_offload
else:
raise ImportError("Please install accelerate via `pip install accelerate`")
raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher")

device = torch.device(f"cuda:{gpu_id}")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -213,10 +213,10 @@ def enable_sequential_cpu_offload(self, gpu_id=0):
Note that offloading happens on a submodule basis. Memory savings are higher than with
`enable_model_cpu_offload`, but performance is lower.
"""
if is_accelerate_available():
if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
from accelerate import cpu_offload
else:
raise ImportError("Please install accelerate via `pip install accelerate`")
raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher")

device = torch.device(f"cuda:{gpu_id}")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -230,10 +230,10 @@ def enable_sequential_cpu_offload(self, gpu_id=0):
Note that offloading happens on a submodule basis. Memory savings are higher than with
`enable_model_cpu_offload`, but performance is lower.
"""
if is_accelerate_available():
if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
from accelerate import cpu_offload
else:
raise ImportError("Please install accelerate via `pip install accelerate`")
raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher")

device = torch.device(f"cuda:{gpu_id}")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -194,10 +194,10 @@ def enable_sequential_cpu_offload(self, gpu_id=0):
Note that offloading happens on a submodule basis. Memory savings are higher than with
`enable_model_cpu_offload`, but performance is lower.
"""
if is_accelerate_available():
if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
from accelerate import cpu_offload
else:
raise ImportError("Please install accelerate via `pip install accelerate`")
raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher")

device = torch.device(f"cuda:{gpu_id}")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from ...models import AutoencoderKL, UNet2DConditionModel
from ...models.cross_attention import CrossAttention
from ...schedulers import KarrasDiffusionSchedulers
from ...utils import is_accelerate_available, logging, randn_tensor, replace_example_docstring
from ...utils import is_accelerate_available, is_accelerate_version, logging, randn_tensor, replace_example_docstring
from ..pipeline_utils import DiffusionPipeline
from . import StableDiffusionPipelineOutput
from .safety_checker import StableDiffusionSafetyChecker
Expand Down Expand Up @@ -256,10 +256,10 @@ def enable_sequential_cpu_offload(self, gpu_id=0):
Note that offloading happens on a submodule basis. Memory savings are higher than with
`enable_model_cpu_offload`, but performance is lower.
"""
if is_accelerate_available():
if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
from accelerate import cpu_offload
else:
raise ImportError("Please install accelerate via `pip install accelerate`")
raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher")

device = torch.device(f"cuda:{gpu_id}")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -218,10 +218,10 @@ def enable_sequential_cpu_offload(self, gpu_id=0):
Note that offloading happens on a submodule basis. Memory savings are higher than with
`enable_model_cpu_offload`, but performance is lower.
"""
if is_accelerate_available():
if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
from accelerate import cpu_offload
else:
raise ImportError("Please install accelerate via `pip install accelerate`")
raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher")

device = torch.device(f"cuda:{gpu_id}")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -265,10 +265,10 @@ def enable_sequential_cpu_offload(self, gpu_id=0):
Note that offloading happens on a submodule basis. Memory savings are higher than with
`enable_model_cpu_offload`, but performance is lower.
"""
if is_accelerate_available():
if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
from accelerate import cpu_offload
else:
raise ImportError("Please install accelerate via `pip install accelerate`")
raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher")

device = torch.device(f"cuda:{gpu_id}")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -209,10 +209,10 @@ def enable_sequential_cpu_offload(self, gpu_id=0):
Note that offloading happens on a submodule basis. Memory savings are higher than with
`enable_model_cpu_offload`, but performance is lower.
"""
if is_accelerate_available():
if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
from accelerate import cpu_offload
else:
raise ImportError("Please install accelerate via `pip install accelerate`")
raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher")

device = torch.device(f"cuda:{gpu_id}")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -398,10 +398,10 @@ def enable_sequential_cpu_offload(self, gpu_id=0):
Note that offloading happens on a submodule basis. Memory savings are higher than with
`enable_model_cpu_offload`, but performance is lower.
"""
if is_accelerate_available():
if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
from accelerate import cpu_offload
else:
raise ImportError("Please install accelerate via `pip install accelerate`")
raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher")

device = torch.device(f"cuda:{gpu_id}")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,10 +130,10 @@ def enable_sequential_cpu_offload(self, gpu_id=0):
Note that offloading happens on a submodule basis. Memory savings are higher than with
`enable_model_cpu_offload`, but performance is lower.
"""
if is_accelerate_available():
if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
from accelerate import cpu_offload
else:
raise ImportError("Please install accelerate via `pip install accelerate`")
raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher")

device = torch.device(f"cuda:{gpu_id}")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

from ...models import AutoencoderKL, UNet2DConditionModel
from ...schedulers import DDIMScheduler, PNDMScheduler
from ...utils import is_accelerate_available, logging, randn_tensor, replace_example_docstring
from ...utils import is_accelerate_available, is_accelerate_version, logging, randn_tensor, replace_example_docstring
from ..pipeline_utils import DiffusionPipeline
from . import StableDiffusionPipelineOutput
from .safety_checker import StableDiffusionSafetyChecker
Expand Down Expand Up @@ -151,10 +151,10 @@ def enable_sequential_cpu_offload(self, gpu_id=0):
Note that offloading happens on a submodule basis. Memory savings are higher than with
`enable_model_cpu_offload`, but performance is lower.
"""
if is_accelerate_available():
if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
from accelerate import cpu_offload
else:
raise ImportError("Please install accelerate via `pip install accelerate`")
raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher")

device = torch.device(f"cuda:{gpu_id}")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -365,10 +365,10 @@ def enable_sequential_cpu_offload(self, gpu_id=0):
Note that offloading happens on a submodule basis. Memory savings are higher than with
`enable_model_cpu_offload`, but performance is lower.
"""
if is_accelerate_available():
if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
from accelerate import cpu_offload
else:
raise ImportError("Please install accelerate via `pip install accelerate`")
raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher")

device = torch.device(f"cuda:{gpu_id}")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

from ...models import AutoencoderKL, UNet2DConditionModel
from ...schedulers import KarrasDiffusionSchedulers
from ...utils import is_accelerate_available, logging, randn_tensor, replace_example_docstring
from ...utils import is_accelerate_available, is_accelerate_version, logging, randn_tensor, replace_example_docstring
from ..pipeline_utils import DiffusionPipeline
from . import StableDiffusionPipelineOutput
from .safety_checker import StableDiffusionSafetyChecker
Expand Down Expand Up @@ -169,10 +169,10 @@ def enable_sequential_cpu_offload(self, gpu_id=0):
Note that offloading happens on a submodule basis. Memory savings are higher than with
`enable_model_cpu_offload`, but performance is lower.
"""
if is_accelerate_available():
if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
from accelerate import cpu_offload
else:
raise ImportError("Please install accelerate via `pip install accelerate`")
raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher")

device = torch.device(f"cuda:{gpu_id}")

Expand Down
15 changes: 12 additions & 3 deletions tests/pipelines/stable_diffusion_2/test_stable_diffusion_depth.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,16 @@
StableDiffusionDepth2ImgPipeline,
UNet2DConditionModel,
)
from diffusers.utils import floats_tensor, is_accelerate_available, load_image, load_numpy, nightly, slow, torch_device
from diffusers.utils import (
floats_tensor,
is_accelerate_available,
is_accelerate_version,
load_image,
load_numpy,
nightly,
slow,
torch_device,
)
from diffusers.utils.testing_utils import require_torch_gpu, skip_mps

from ...test_pipelines_common import PipelineTesterMixin
Expand Down Expand Up @@ -227,8 +236,8 @@ def test_float16_inference(self):
self.assertLess(max_diff, 1.3e-2, "The outputs of the fp16 and fp32 pipelines are too different.")

@unittest.skipIf(
torch_device != "cuda" or not is_accelerate_available(),
reason="CPU offload is only available with CUDA and `accelerate` installed",
torch_device != "cuda" or not is_accelerate_available() or is_accelerate_version("<", "0.14.0"),
reason="CPU offload is only available with CUDA and `accelerate v0.14.0` or higher",
)
def test_cpu_offload_forward_pass(self):
components = self.get_dummy_components()
Expand Down
6 changes: 3 additions & 3 deletions tests/test_pipelines_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
StableDiffusionImg2ImgPipeline,
)
from diffusers.utils import logging
from diffusers.utils.import_utils import is_accelerate_available, is_xformers_available
from diffusers.utils.import_utils import is_accelerate_available, is_accelerate_version, is_xformers_available
from diffusers.utils.testing_utils import require_torch, torch_device


Expand Down Expand Up @@ -473,8 +473,8 @@ def _test_attention_slicing_forward_pass(self, test_max_difference=True):
assert_mean_pixel_difference(output_with_slicing[0], output_without_slicing[0])

@unittest.skipIf(
torch_device != "cuda" or not is_accelerate_available(),
reason="CPU offload is only available with CUDA and `accelerate` installed",
torch_device != "cuda" or not is_accelerate_available() or is_accelerate_version("<", "0.14.0"),
reason="CPU offload is only available with CUDA and `accelerate v0.14.0` or higher",
)
def test_cpu_offload_forward_pass(self):
if not self.test_cpu_offload:
Expand Down