Skip to content

Commit 0d9bd96

Browse files
authored
Merge branch 'main' into saraswatmks/gpt-refactor-quantization
2 parents dafa58b + c905a04 commit 0d9bd96

File tree

102 files changed

+233
-161
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

102 files changed

+233
-161
lines changed

docs/scripts/gen_files.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def process_files(files: list[ProcessFile], project_root: Path):
4545
)
4646

4747
content = source_path.read_text(encoding="utf-8")
48-
48+
4949
# Only add frontmatter if title or weight are set
5050
if file.title is not None or file.weight is not None:
5151
frontmatter = "---\n"
@@ -91,10 +91,13 @@ def migrate_examples():
9191
project_root = find_project_root()
9292
examples_path = project_root / "examples"
9393
files = []
94-
94+
9595
# Find all README.md files 2 levels down (examples/EXAMPLE_NAME/README.md)
9696
for example_dir in examples_path.iterdir():
97-
if not example_dir.is_dir() or not (readme_path := example_dir / "README.md").exists():
97+
if (
98+
not example_dir.is_dir()
99+
or not (readme_path := example_dir / "README.md").exists()
100+
):
98101
continue
99102

100103
example_name = example_dir.name
@@ -106,7 +109,7 @@ def migrate_examples():
106109
weight=-5,
107110
)
108111
)
109-
112+
110113
process_files(files, project_root)
111114

112115

examples/autoround/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ Load the model using `AutoModelForCausalLM` for handling quantized saving and lo
4040
from transformers import AutoTokenizer, AutoModelForCausalLM
4141

4242
MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"
43-
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype="auto")
43+
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, dtype="auto")
4444
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
4545
```
4646

examples/autoround/llama3_example.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
# Select model and load it.
99
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
10-
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto")
10+
model = AutoModelForCausalLM.from_pretrained(model_id, dtype="auto")
1111
tokenizer = AutoTokenizer.from_pretrained(model_id)
1212

1313
# Select calibration dataset.

examples/awq/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ recipe = [
1818
To use your own model, start with an existing example change the `model_id` to match your own model stub.
1919
```python
2020
model_id = "path/to/your/model"
21-
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto")
21+
model = AutoModelForCausalLM.from_pretrained(model_id, dtype="auto")
2222
```
2323

2424
## Adding Mappings ##

examples/awq/llama_example.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
# Select model and load it.
99
MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"
1010

11-
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype="auto")
11+
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, dtype="auto")
1212
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
1313

1414
# Select calibration dataset.

examples/awq/qwen3-vl-30b-a3b-Instruct-example.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
# Load model.
1212
model = Qwen3VLMoeForConditionalGeneration.from_pretrained(
13-
MODEL_ID, torch_dtype=torch.bfloat16, device_map=None, trust_remote_code=True
13+
MODEL_ID, dtype=torch.bfloat16, device_map=None, trust_remote_code=True
1414
)
1515
processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
1616

examples/awq/qwen3_coder_moe_example.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
def get_calib_dataset(tokenizer):
3232
ds = load_dataset(
3333
DATASET_ID,
34-
split=f"{DATASET_SPLIT}[:{NUM_CALIBRATION_SAMPLES*10}]",
34+
split=f"{DATASET_SPLIT}[:{NUM_CALIBRATION_SAMPLES * 10}]",
3535
)
3636

3737
def preprocess(example):
@@ -51,7 +51,7 @@ def preprocess(example):
5151

5252

5353
if __name__ == "__main__":
54-
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype="auto")
54+
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, dtype="auto")
5555
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
5656

5757
###

examples/awq/qwen3_moe_example.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
# Select model and load it.
99
MODEL_ID = "Qwen/Qwen3-30B-A3B"
1010

11-
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype="auto")
11+
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, dtype="auto")
1212
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
1313

1414
# Select calibration dataset.

examples/big_models_with_sequential_onloading/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ The Llama 3.3 70b is larger than 80 GB, surpassing the size of 1 A100. However,
1818

1919
```python
2020
model_id = "meta-llama/Llama-3.3-70B-Instruct"
21-
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto", device_map=None)
21+
model = AutoModelForCausalLM.from_pretrained(model_id, dtype="auto", device_map=None)
2222
```
2323

2424
The model is first loaded onto the `cpu`, as indicated through the use of `None` for the `device_map` argument in the `from_pretrained` method when loading the model.

examples/big_models_with_sequential_onloading/llama3.3_70b.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
model_id = "meta-llama/Llama-3.3-70B-Instruct"
1111
model = AutoModelForCausalLM.from_pretrained(
1212
model_id,
13-
torch_dtype="auto",
13+
dtype="auto",
1414
device_map=None,
1515
)
1616
tokenizer = AutoTokenizer.from_pretrained(model_id)

0 commit comments

Comments
 (0)