rotary embedding refactor 2: update comments, fix dtype for use_real=False #9312

yiyixuxu · 2024-08-29T09:18:03Z

also made a slow tests for all pipelines using rotary embeddings

slow test for pipelines using rotary embedding

from diffusers import DiffusionPipeline
from benchmarks.utils import benchmark_fn, flush, bytes_to_giga_bytes, BenchmarkInfo
import argparse
import torch
import os

from typing import Dict, Union
import csv

BENCHMARK_FIELDS = [
    "pipeline_cls",
    "repo_id",
    "time (secs)",
    "memory (gbs)",
]

def write_to_csv(file_name: str, data_dict: Dict[str, Union[str, bool, float]]):
    """Append a dictionary into a CSV file."""
    file_exists = os.path.isfile(file_name)
    with open(file_name, mode="a", newline="") as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=BENCHMARK_FIELDS)

        if not file_exists:
            writer.writeheader()
        
        writer.writerow(data_dict)


REPO_ID_MAPPING = {
    "flux-schnell": "black-forest-labs/FLUX.1-schnell",
    "cogvideox-5b": "THUDM/CogVideoX-5b",
    "stable-audio": "stabilityai/stable-audio-open-1.0",
    "hunyuan-dit": "Tencent-Hunyuan/HunyuanDiT-Diffusers",
    "lumina": "Alpha-VLLM/Lumina-Next-SFT-diffusers",
}

DTYPE_MAPPING = {
    "flux-schnell": torch.bfloat16,
    "cogvideox-5b": torch.bfloat16,
    "stable-audio": torch.float16,
    "hunyuan-dit": torch.float16,
    "lumina": torch.bfloat16,
}

OUTPUT_TYPE_MAPPING = {
    "flux-schnell": "image",
    "cogvideox-5b": "video",
    "stable-audio": "audio",
    "hunyuan-dit": "image",
    "lumina": "image",
}

CALL_ARGS_MAPPING = {
    "flux-schnell": {
        "prompt": "A cat holding a sign that says hello world",
        "guidance_scale": 0.0,
        "num_inference_steps":4,
        "max_sequence_length": 256,
        },
    "cogvideox-5b": {
        "prompt": "A panda, dressed in a small, red jacket and a tiny hat, sits on a wooden stool in a serene bamboo forest. The panda's fluffy paws strum a miniature acoustic guitar, producing soft, melodic tunes. Nearby, a few other pandas gather, watching curiously and some clapping in rhythm. Sunlight filters through the tall bamboo, casting a gentle glow on the scene. The panda's face is expressive, showing concentration and joy as it plays. The background includes a small, flowing stream and vibrant green foliage, enhancing the peaceful and magical atmosphere of this unique musical performance.",
        "num_inference_steps": 50,
        "num_frames": 49,
        "guidance_scale": 6,
    },
    "stable-audio": {
        "prompt": "The sound of a hammer hitting a wooden surface.",
        "negative_prompt":  "Low quality.",
        "num_inference_steps": 200,
        "audio_end_in_s": 10.0,
        "num_waveforms_per_prompt": 3,    
    },
    "hunyuan-dit": {
        "prompt": "一个宇航员在骑马",      
    },
    "lumina": {
        "prompt": "Upper body of a young woman in a Victorian-era outfit with brass goggles and leather straps. Background shows an industrial revolution cityscape with smoky skies and tall, metal structures",
    },
}


def load_pipeline(model_name, args):
    torch.cuda.empty_cache()
    torch.cuda.reset_peak_memory_stats()
    pipeline = DiffusionPipeline.from_pretrained(REPO_ID_MAPPING[model_name], torch_dtype=DTYPE_MAPPING[model_name])
    if args.offload:
        pipeline.enable_model_cpu_offload()
    else:
        pipeline = pipeline.to("cuda")

    if args.run_compile:
        pipeline.transformer.to(memory_format=torch.channels_last)
        pipeline.vae.to(memory_format=torch.channels_last)
        pipeline.transformer = torch.compile(pipeline.transformer, mode="max-autotune", fullgraph=True)
        pipeline.vae.decode = torch.compile(pipeline.vae.decode, mode="max-autotune", fullgraph=True)


    pipeline.set_progress_bar_config(disable=True)
    return pipeline


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--branch", default="main", type=str)
    parser.add_argument("--run_compile", action="store_true", help="test with torch.compile")
    parser.add_argument("--offload", action="store_true", help="test with enable_model_cpu_offload")
    parser.add_argument("--result_dir", default="slow_test_rope", type=str)
    args = parser.parse_args()

    if args.run_compile and args.offload:
        raise ValueError("cannot use both --compile and --offload flags")
    
    if not os.path.exists(args.result_dir):
        os.makedirs(args.result_dir)

    log_filename_prefix = f"{args.result_dir}/{args.branch}_compile@{args.run_compile}_offload@{args.offload}"

    for model_name, ckpt_id in REPO_ID_MAPPING.items():
        pipeline = load_pipeline(model_name, args)
        call_kwargs = CALL_ARGS_MAPPING[model_name]

        def run_inference(pipeline, call_kwargs):
            _ = pipeline(
                **call_kwargs,
                generator=torch.Generator("cpu").manual_seed(0),
                )

        flush()
        print(f"[INFO] {model_name}: Running benchmark with: {vars(args)}\n")
        time = benchmark_fn(run_inference, pipeline, call_kwargs)  # in seconds.
        memory = bytes_to_giga_bytes(torch.cuda.max_memory_allocated())  # in GBs.
        benchmark_info = BenchmarkInfo(time=time, memory=memory)
        flush()
        data_dict = {
            "pipeline_cls": model_name, 
            "repo_id": ckpt_id, 
            "time (secs)": benchmark_info.time,
            "memory (gbs)": benchmark_info.memory,
        }
        write_to_csv(log_filename_prefix + ".csv", data_dict)
        print(f"Log written to {log_filename_prefix + '.cvs'}")
        print(f"Memory: {benchmark_info.memory} gbs")
        print(f"Execution time: {benchmark_info.time} sec")

        out = pipeline(**call_kwargs, generator=torch.Generator("cpu").manual_seed(0), return_dict=False)[0][0]
        output_type = OUTPUT_TYPE_MAPPING[model_name]
        if output_type == "audio":
            import soundfile as sf
            out = out.T.float().cpu().numpy()
            out_file_name = log_filename_prefix + f"_{model_name}.wav"
            sf.write(out_file_name, out, pipeline.vae.sampling_rate)
        elif output_type == "video":
            from diffusers.utils import export_to_video
            out_file_name = log_filename_prefix + f"_{model_name}.mp4"
            export_to_video(out, out_file_name, fps=8)
        else:
            out_file_name = log_filename_prefix + f"_{model_name}.png"
            out.save(out_file_name)
        print(f" output saved to {out_file_name}")

HuggingFaceDocBuilderDev · 2024-08-29T09:25:37Z

The docs for this PR live here. All of your documentation changes will be reflected on that endpoint. The docs are available until 30 days after the last update.

rotary embedding refactor 2: update comments, fix dtype for use_real=False (#9312) fix notes and dtype up up

* quantization config. * fix-copies * fix * modules_to_not_convert * add bitsandbytes utilities. * make progress. * fixes * quality * up * up rotary embedding refactor 2: update comments, fix dtype for use_real=False (#9312) fix notes and dtype up up * minor * up * up * fix * provide credits where due. * make configurations work. * fixes * fix * update_missing_keys * fix * fix * make it work. * fix * provide credits to transformers. * empty commit * handle to() better. * tests * change to bnb from bitsandbytes * fix tests fix slow quality tests SD3 remark fix complete int4 tests add a readme to the test files. add model cpu offload tests warning test * better safeguard. * change merging status * courtesy to transformers. * move upper. * better * make the unused kwargs warning friendlier. * harmonize changes with huggingface/transformers#33122 * style * trainin tests * feedback part i. * Add Flux inpainting and Flux Img2Img (#9135) --------- Co-authored-by: yiyixuxu <[email protected]> Update `UNet2DConditionModel`'s error messages (#9230) * refactor [CI] Update Single file Nightly Tests (#9357) * update * update feedback. improve README for flux dreambooth lora (#9290) * improve readme * improve readme * improve readme * improve readme fix one uncaught deprecation warning for accessing vae_latent_channels in VaeImagePreprocessor (#9372) deprecation warning vae_latent_channels add mixed int8 tests and more tests to nf4. [core] Freenoise memory improvements (#9262) * update * implement prompt interpolation * make style * resnet memory optimizations * more memory optimizations; todo: refactor * update * update animatediff controlnet with latest changes * refactor chunked inference changes * remove print statements * update * chunk -> split * remove changes from incorrect conflict resolution * remove changes from incorrect conflict resolution * add explanation of SplitInferenceModule * update docs * Revert "update docs" This reverts commit c55a50a. * update docstring for freenoise split inference * apply suggestions from review * add tests * apply suggestions from review quantization docs. docs. * Revert "Add Flux inpainting and Flux Img2Img (#9135)" This reverts commit 5799954. * tests * don * Apply suggestions from code review Co-authored-by: Steven Liu <[email protected]> * contribution guide. * changes * empty * fix tests * harmonize with huggingface/transformers#33546. * numpy_cosine_distance * config_dict modification. * remove if config comment. * note for load_state_dict changes. * float8 check. * quantizer. * raise an error for non-True low_cpu_mem_usage values when using quant. * low_cpu_mem_usage shenanigans when using fp32 modules. * don't re-assign _pre_quantization_type. * make comments clear. * remove comments. * handle mixed types better when moving to cpu. * add tests to check if we're throwing warning rightly. * better check. * fix 8bit test_quality. * handle dtype more robustly. * better message when keep_in_fp32_modules. * handle dtype casting. * fix dtype checks in pipeline. * fix warning message. * Update src/diffusers/models/modeling_utils.py Co-authored-by: YiYi Xu <[email protected]> * mitigate the confusing cpu warning --------- Co-authored-by: Vishnu V Jaddipal <[email protected]> Co-authored-by: Steven Liu <[email protected]> Co-authored-by: YiYi Xu <[email protected]>

…False (#9312) fix notes and dtype

* quantization config. * fix-copies * fix * modules_to_not_convert * add bitsandbytes utilities. * make progress. * fixes * quality * up * up rotary embedding refactor 2: update comments, fix dtype for use_real=False (#9312) fix notes and dtype up up * minor * up * up * fix * provide credits where due. * make configurations work. * fixes * fix * update_missing_keys * fix * fix * make it work. * fix * provide credits to transformers. * empty commit * handle to() better. * tests * change to bnb from bitsandbytes * fix tests fix slow quality tests SD3 remark fix complete int4 tests add a readme to the test files. add model cpu offload tests warning test * better safeguard. * change merging status * courtesy to transformers. * move upper. * better * make the unused kwargs warning friendlier. * harmonize changes with huggingface/transformers#33122 * style * trainin tests * feedback part i. * Add Flux inpainting and Flux Img2Img (#9135) --------- Co-authored-by: yiyixuxu <[email protected]> Update `UNet2DConditionModel`'s error messages (#9230) * refactor [CI] Update Single file Nightly Tests (#9357) * update * update feedback. improve README for flux dreambooth lora (#9290) * improve readme * improve readme * improve readme * improve readme fix one uncaught deprecation warning for accessing vae_latent_channels in VaeImagePreprocessor (#9372) deprecation warning vae_latent_channels add mixed int8 tests and more tests to nf4. [core] Freenoise memory improvements (#9262) * update * implement prompt interpolation * make style * resnet memory optimizations * more memory optimizations; todo: refactor * update * update animatediff controlnet with latest changes * refactor chunked inference changes * remove print statements * update * chunk -> split * remove changes from incorrect conflict resolution * remove changes from incorrect conflict resolution * add explanation of SplitInferenceModule * update docs * Revert "update docs" This reverts commit c55a50a. * update docstring for freenoise split inference * apply suggestions from review * add tests * apply suggestions from review quantization docs. docs. * Revert "Add Flux inpainting and Flux Img2Img (#9135)" This reverts commit 5799954. * tests * don * Apply suggestions from code review Co-authored-by: Steven Liu <[email protected]> * contribution guide. * changes * empty * fix tests * harmonize with huggingface/transformers#33546. * numpy_cosine_distance * config_dict modification. * remove if config comment. * note for load_state_dict changes. * float8 check. * quantizer. * raise an error for non-True low_cpu_mem_usage values when using quant. * low_cpu_mem_usage shenanigans when using fp32 modules. * don't re-assign _pre_quantization_type. * make comments clear. * remove comments. * handle mixed types better when moving to cpu. * add tests to check if we're throwing warning rightly. * better check. * fix 8bit test_quality. * handle dtype more robustly. * better message when keep_in_fp32_modules. * handle dtype casting. * fix dtype checks in pipeline. * fix warning message. * Update src/diffusers/models/modeling_utils.py Co-authored-by: YiYi Xu <[email protected]> * mitigate the confusing cpu warning --------- Co-authored-by: Vishnu V Jaddipal <[email protected]> Co-authored-by: Steven Liu <[email protected]> Co-authored-by: YiYi Xu <[email protected]>

yiyixuxu added 2 commits August 29, 2024 09:59

fix notes and dtype

a27f053

Merge remote-tracking branch 'origin/main' into fix-rope-not-real

55b2d02

yiyixuxu merged commit 4f495b0 into main Aug 29, 2024
18 checks passed

yiyixuxu deleted the fix-rope-not-real branch August 29, 2024 09:31

sayakpaul added a commit that referenced this pull request Aug 29, 2024

up

f4feee1

rotary embedding refactor 2: update comments, fix dtype for use_real=False (#9312) fix notes and dtype up up

sayakpaul pushed a commit that referenced this pull request Dec 23, 2024

rotary embedding refactor 2: update comments, fix dtype for use_real=…

17b838f

…False (#9312) fix notes and dtype

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

rotary embedding refactor 2: update comments, fix dtype for use_real=False #9312

rotary embedding refactor 2: update comments, fix dtype for use_real=False #9312

yiyixuxu commented Aug 29, 2024

HuggingFaceDocBuilderDev commented Aug 29, 2024

rotary embedding refactor 2: update comments, fix dtype for use_real=False #9312

rotary embedding refactor 2: update comments, fix dtype for use_real=False #9312

Conversation

yiyixuxu commented Aug 29, 2024

HuggingFaceDocBuilderDev commented Aug 29, 2024