vllm-project
diff --git a/‎docs/scripts/gen_files.py‎
Lines changed: 7 additions & 4 deletions b/‎docs/scripts/gen_files.py‎
Lines changed: 7 additions & 4 deletions
diff --git a/‎examples/autoround/README.md‎
Lines changed: 1 addition & 1 deletion b/‎examples/autoround/README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/autoround/llama3_example.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/autoround/llama3_example.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/awq/README.md‎
Lines changed: 1 addition & 1 deletion b/‎examples/awq/README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/awq/llama_example.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/awq/llama_example.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/awq/qwen3-vl-30b-a3b-Instruct-example.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/awq/qwen3-vl-30b-a3b-Instruct-example.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/awq/qwen3_coder_moe_example.py‎
Lines changed: 2 additions & 2 deletions b/‎examples/awq/qwen3_coder_moe_example.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/awq/qwen3_moe_example.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/awq/qwen3_moe_example.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/big_models_with_sequential_onloading/README.md‎
Lines changed: 1 addition & 1 deletion b/‎examples/big_models_with_sequential_onloading/README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/big_models_with_sequential_onloading/llama3.3_70b.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/big_models_with_sequential_onloading/llama3.3_70b.py‎
Lines changed: 1 addition & 1 deletion
@@ -45,7 +45,7 @@ def process_files(files: list[ProcessFile], project_root: Path):
             )
 
         content = source_path.read_text(encoding="utf-8")
-        
+
         # Only add frontmatter if title or weight are set
         if file.title is not None or file.weight is not None:
             frontmatter = "---\n"
@@ -91,10 +91,13 @@ def migrate_examples():
     project_root = find_project_root()
     examples_path = project_root / "examples"
     files = []
-    
+
     # Find all README.md files 2 levels down (examples/EXAMPLE_NAME/README.md)
     for example_dir in examples_path.iterdir():
-        if not example_dir.is_dir() or not (readme_path := example_dir / "README.md").exists():
+        if (
+            not example_dir.is_dir()
+            or not (readme_path := example_dir / "README.md").exists()
+        ):
             continue
 
         example_name = example_dir.name
@@ -106,7 +109,7 @@ def migrate_examples():
                 weight=-5,
             )
         )
-    
+
     process_files(files, project_root)
 
 
 
@@ -40,7 +40,7 @@ Load the model using `AutoModelForCausalLM` for handling quantized saving and lo
 from transformers import AutoTokenizer, AutoModelForCausalLM
 
 MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"
-model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype="auto")
+model = AutoModelForCausalLM.from_pretrained(MODEL_ID, dtype="auto")
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 ```
 
 
@@ -7,7 +7,7 @@
 
 # Select model and load it.
 model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
-model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto")
+model = AutoModelForCausalLM.from_pretrained(model_id, dtype="auto")
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 
 # Select calibration dataset.
 
@@ -18,7 +18,7 @@ recipe = [
 To use your own model, start with an existing example change the `model_id` to match your own model stub.
 ```python
 model_id = "path/to/your/model"
-model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto")
+model = AutoModelForCausalLM.from_pretrained(model_id, dtype="auto")
 ```
 
 ## Adding Mappings ##
 
@@ -8,7 +8,7 @@
 # Select model and load it.
 MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"
 
-model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype="auto")
+model = AutoModelForCausalLM.from_pretrained(MODEL_ID, dtype="auto")
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
 
 # Select calibration dataset.
 
@@ -10,7 +10,7 @@
 
 # Load model.
 model = Qwen3VLMoeForConditionalGeneration.from_pretrained(
-    MODEL_ID, torch_dtype=torch.bfloat16, device_map=None, trust_remote_code=True
+    MODEL_ID, dtype=torch.bfloat16, device_map=None, trust_remote_code=True
 )
 processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
 
 
@@ -31,7 +31,7 @@
 def get_calib_dataset(tokenizer):
     ds = load_dataset(
         DATASET_ID,
-        split=f"{DATASET_SPLIT}[:{NUM_CALIBRATION_SAMPLES*10}]",
+        split=f"{DATASET_SPLIT}[:{NUM_CALIBRATION_SAMPLES * 10}]",
     )
 
     def preprocess(example):
@@ -51,7 +51,7 @@ def preprocess(example):
 
 
 if __name__ == "__main__":
-    model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype="auto")
+    model = AutoModelForCausalLM.from_pretrained(MODEL_ID, dtype="auto")
     tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 
     ###
 
@@ -8,7 +8,7 @@
 # Select model and load it.
 MODEL_ID = "Qwen/Qwen3-30B-A3B"
 
-model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype="auto")
+model = AutoModelForCausalLM.from_pretrained(MODEL_ID, dtype="auto")
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
 
 # Select calibration dataset.
 
@@ -18,7 +18,7 @@ The Llama 3.3 70b is larger than 80 GB, surpassing the size of 1 A100. However,
 
 ```python
 model_id = "meta-llama/Llama-3.3-70B-Instruct"
-model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto", device_map=None)
+model = AutoModelForCausalLM.from_pretrained(model_id, dtype="auto", device_map=None)
 ```
 
 The model is first loaded onto the `cpu`, as indicated through the use of `None` for the `device_map` argument in the `from_pretrained` method when loading the model.
 
@@ -10,7 +10,7 @@
 model_id = "meta-llama/Llama-3.3-70B-Instruct"
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
-    torch_dtype="auto",
+    dtype="auto",
     device_map=None,
 )
 tokenizer = AutoTokenizer.from_pretrained(model_id)
Original file line number	Diff line number	Diff line change
`@@ -10,7 +10,7 @@`
`10`	`10`
`11`	`11`	`# Load model.`
`12`	`12`	`model = Qwen3VLMoeForConditionalGeneration.from_pretrained(`
`13`		`- MODEL_ID, torch_dtype=torch.bfloat16, device_map=None, trust_remote_code=True`
	`13`	`+ MODEL_ID, dtype=torch.bfloat16, device_map=None, trust_remote_code=True`
`14`	`14`	`)`
`15`	`15`	`processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)`
`16`	`16`