huggingface
diff --git a/‎docs/source/en/internal/import_utils.md
Lines changed: 1 addition & 1 deletion b/‎docs/source/en/internal/import_utils.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/flax/question-answering/run_qa.py
Lines changed: 1 addition & 1 deletion b/‎examples/flax/question-answering/run_qa.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/modular-transformers/configuration_my_new_model.py
Lines changed: 1 addition & 1 deletion b/‎examples/modular-transformers/configuration_my_new_model.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/modular-transformers/configuration_new_model.py
Lines changed: 1 addition & 1 deletion b/‎examples/modular-transformers/configuration_new_model.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/pytorch/question-answering/run_qa.py
Lines changed: 1 addition & 1 deletion b/‎examples/pytorch/question-answering/run_qa.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/pytorch/token-classification/run_ner.py
Lines changed: 1 addition & 1 deletion b/‎examples/pytorch/token-classification/run_ner.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/tensorflow/question-answering/run_qa.py
Lines changed: 1 addition & 1 deletion b/‎examples/tensorflow/question-answering/run_qa.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/transformers/models/aria/configuration_aria.py
Lines changed: 1 addition & 1 deletion b/‎src/transformers/models/aria/configuration_aria.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/transformers/models/aria/modular_aria.py
Lines changed: 1 addition & 1 deletion b/‎src/transformers/models/aria/modular_aria.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/transformers/models/bamba/configuration_bamba.py
Lines changed: 1 addition & 1 deletion b/‎src/transformers/models/bamba/configuration_bamba.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/transformers/models/bitnet/configuration_bitnet.py
Lines changed: 1 addition & 1 deletion b/‎src/transformers/models/bitnet/configuration_bitnet.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/transformers/models/chameleon/configuration_chameleon.py
Lines changed: 1 addition & 1 deletion b/‎src/transformers/models/chameleon/configuration_chameleon.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/transformers/models/chameleon/convert_chameleon_weights_to_hf.py
Lines changed: 1 addition & 1 deletion b/‎src/transformers/models/chameleon/convert_chameleon_weights_to_hf.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/transformers/models/cohere/configuration_cohere.py
Lines changed: 1 addition & 1 deletion b/‎src/transformers/models/cohere/configuration_cohere.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/transformers/models/cohere2/configuration_cohere2.py
Lines changed: 1 addition & 1 deletion b/‎src/transformers/models/cohere2/configuration_cohere2.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/transformers/models/cohere2/modular_cohere2.py
Lines changed: 1 addition & 1 deletion b/‎src/transformers/models/cohere2/modular_cohere2.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/transformers/models/csm/configuration_csm.py
Lines changed: 2 additions & 2 deletions b/‎src/transformers/models/csm/configuration_csm.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/transformers/models/deepseek_v3/configuration_deepseek_v3.py
Lines changed: 1 addition & 1 deletion b/‎src/transformers/models/deepseek_v3/configuration_deepseek_v3.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/transformers/models/diffllama/configuration_diffllama.py
Lines changed: 1 addition & 1 deletion b/‎src/transformers/models/diffllama/configuration_diffllama.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/transformers/models/emu3/configuration_emu3.py
Lines changed: 1 addition & 1 deletion b/‎src/transformers/models/emu3/configuration_emu3.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/transformers/models/falcon_h1/configuration_falcon_h1.py
Lines changed: 1 addition & 1 deletion b/‎src/transformers/models/falcon_h1/configuration_falcon_h1.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/transformers/models/gemma/configuration_gemma.py
Lines changed: 1 addition & 1 deletion b/‎src/transformers/models/gemma/configuration_gemma.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/transformers/models/gemma/convert_gemma_weights_to_hf.py
Lines changed: 1 addition & 1 deletion b/‎src/transformers/models/gemma/convert_gemma_weights_to_hf.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/transformers/models/gemma/modular_gemma.py
Lines changed: 1 addition & 1 deletion b/‎src/transformers/models/gemma/modular_gemma.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/transformers/models/gemma2/configuration_gemma2.py
Lines changed: 1 addition & 1 deletion b/‎src/transformers/models/gemma2/configuration_gemma2.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/transformers/models/gemma2/convert_gemma2_weights_to_hf.py
Lines changed: 1 addition & 1 deletion b/‎src/transformers/models/gemma2/convert_gemma2_weights_to_hf.py
Lines changed: 1 addition & 1 deletion
@@ -38,7 +38,7 @@ However, no method can be called on that object:
 ```python
 >>> DetrImageProcessorFast.from_pretrained()
 ImportError: 
-DetrImageProcessorFast requires the Torchvision library but it was not found in your environment. Checkout the instructions on the
+DetrImageProcessorFast requires the Torchvision library but it was not found in your environment. Check out the instructions on the
 installation page: https://pytorch.org/get-started/locally/ and follow the ones that match your environment.
 Please note that you may need to restart your runtime after installation.
 ```
 
@@ -546,7 +546,7 @@ def main():
     # region Tokenizer check: this script requires a fast tokenizer.
     if not isinstance(tokenizer, PreTrainedTokenizerFast):
         raise ValueError(
-            "This example script only works for models that have a fast tokenizer. Checkout the big table of models at"
+            "This example script only works for models that have a fast tokenizer. Check out the big table of models at"
             " https://huggingface.co/transformers/index.html#supported-frameworks to find the model types that meet"
             " this requirement"
         )
 
@@ -36,7 +36,7 @@ class MyNewModelConfig(PretrainedConfig):
             `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
             `num_key_value_heads=1` the model will use Multi Query Attention (MQA) otherwise GQA is used. When
             converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
-            by meanpooling all the original heads within that group. For more details checkout [this
+            by meanpooling all the original heads within that group. For more details, check out [this
             paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to
             `num_attention_heads`.
         hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
 
@@ -34,7 +34,7 @@ class NewModelConfig(PretrainedConfig):
             `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
             `num_key_value_heads=1` the model will use Multi Query Attention (MQA) otherwise GQA is used. When
             converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
-            by meanpooling all the original heads within that group. For more details checkout [this
+            by meanpooling all the original heads within that group. For more details, check out [this
             paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to
             `num_attention_heads`.
         head_dim (`int`, *optional*, defaults to 256):
 
@@ -357,7 +357,7 @@ def main():
     # Tokenizer check: this script requires a fast tokenizer.
     if not isinstance(tokenizer, PreTrainedTokenizerFast):
         raise ValueError(
-            "This example script only works for models that have a fast tokenizer. Checkout the big table of models at"
+            "This example script only works for models that have a fast tokenizer. Check out the big table of models at"
             " https://huggingface.co/transformers/index.html#supported-frameworks to find the model types that meet"
             " this requirement"
         )
 
@@ -399,7 +399,7 @@ def get_label_list(labels):
     # Tokenizer check: this script requires a fast tokenizer.
     if not isinstance(tokenizer, PreTrainedTokenizerFast):
         raise ValueError(
-            "This example script only works for models that have a fast tokenizer. Checkout the big table of models at"
+            "This example script only works for models that have a fast tokenizer. Check out the big table of models at"
             " https://huggingface.co/transformers/index.html#supported-frameworks to find the model types that meet"
             " this requirement"
         )
 
@@ -378,7 +378,7 @@ def main():
     # region Tokenizer check: this script requires a fast tokenizer.
     if not isinstance(tokenizer, PreTrainedTokenizerFast):
         raise ValueError(
-            "This example script only works for models that have a fast tokenizer. Checkout the big table of models at"
+            "This example script only works for models that have a fast tokenizer. Check out the big table of models at"
             " https://huggingface.co/transformers/index.html#supported-frameworks to find the model types that meet"
             " this requirement"
         )
 
@@ -49,7 +49,7 @@ class AriaTextConfig(PretrainedConfig):
             `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
             `num_key_value_heads=1` the model will use Multi Query Attention (MQA) otherwise GQA is used. When
             converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
-            by meanpooling all the original heads within that group. For more details checkout [this
+            by meanpooling all the original heads within that group. For more details, check out [this
             paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to
             `num_attention_heads`.
         hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
 
@@ -120,7 +120,7 @@ class AriaTextConfig(LlamaConfig):
             `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
             `num_key_value_heads=1` the model will use Multi Query Attention (MQA) otherwise GQA is used. When
             converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
-            by meanpooling all the original heads within that group. For more details checkout [this
+            by meanpooling all the original heads within that group. For more details, check out [this
             paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to
             `num_attention_heads`.
         hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
 
@@ -53,7 +53,7 @@ class BambaConfig(PretrainedConfig):
             `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
             `num_key_value_heads=1` the model will use Multi Query Attention (MQA) otherwise GQA is used. When
             converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
-            by meanpooling all the original heads within that group. For more details checkout [this
+            by meanpooling all the original heads within that group. For more details, check out [this
             paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to `8`.
         hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
             The non-linear activation function (function or string) in the decoder.
 
@@ -48,7 +48,7 @@ class BitNetConfig(PretrainedConfig):
             `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
             `num_key_value_heads=1 the model will use Multi Query Attention (MQA) otherwise GQA is used. When
             converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
-            by meanpooling all the original heads within that group. For more details checkout [this
+            by meanpooling all the original heads within that group. For more details, check out [this
             paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to
             `num_attention_heads`.
         hidden_act (`str` or `function`, *optional*, defaults to `"relu2"`):
 
@@ -125,7 +125,7 @@ class ChameleonConfig(PretrainedConfig):
             `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
             `num_key_value_heads=1 the model will use Multi Query Attention (MQA) otherwise GQA is used. When
             converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
-            by meanpooling all the original heads within that group. For more details checkout [this
+            by meanpooling all the original heads within that group. For more details, check out [this
             paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to
             `num_attention_heads`.
         hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
 
@@ -446,7 +446,7 @@ def main():
         "--model_size",
         choices=["7B", "30B"],
         help=""
-        " models correspond to the finetuned versions, and are specific to the Chameleon official release. For more details on Chameleon, checkout the original repo: https://github.com/facebookresearch/chameleon",
+        " models correspond to the finetuned versions, and are specific to the Chameleon official release. For more details on Chameleon, check out the original repo: https://github.com/facebookresearch/chameleon",
     )
     parser.add_argument(
         "--output_dir",
 
@@ -56,7 +56,7 @@ class CohereConfig(PretrainedConfig):
             `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
             `num_key_value_heads=1` the model will use Multi Query Attention (MQA) otherwise GQA is used. When
             converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
-            by meanpooling all the original heads within that group. For more details checkout [this
+            by meanpooling all the original heads within that group. For more details, check out [this
             paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to
             `num_attention_heads`.
         hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
 
@@ -52,7 +52,7 @@ class Cohere2Config(PretrainedConfig):
             `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
             `num_key_value_heads=1` the model will use Multi Query Attention (MQA) otherwise GQA is used. When
             converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
-            by meanpooling all the original heads within that group. For more details checkout [this
+            by meanpooling all the original heads within that group. For more details, check out [this
             paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to
             `num_attention_heads`.
         hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
 
@@ -74,7 +74,7 @@ class Cohere2Config(PretrainedConfig):
             `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
             `num_key_value_heads=1` the model will use Multi Query Attention (MQA) otherwise GQA is used. When
             converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
-            by meanpooling all the original heads within that group. For more details checkout [this
+            by meanpooling all the original heads within that group. For more details, check out [this
             paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to
             `num_attention_heads`.
         hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
 
@@ -54,7 +54,7 @@ class CsmDepthDecoderConfig(PretrainedConfig):
             `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
             `num_key_value_heads=1` the model will use Multi Query Attention (MQA) otherwise GQA is used. When
             converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
-            by meanpooling all the original heads within that group. For more details checkout [this
+            by meanpooling all the original heads within that group. For more details, check out [this
             paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to
             `num_attention_heads`.
         hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
@@ -235,7 +235,7 @@ class CsmConfig(PretrainedConfig):
             `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
             `num_key_value_heads=1` the model will use Multi Query Attention (MQA) otherwise GQA is used. When
             converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
-            by meanpooling all the original heads within that group. For more details checkout [this
+            by meanpooling all the original heads within that group. For more details, check out [this
             paper](https://arxiv.org/pdf/2305.13245.pdf).
         hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
             The non-linear activation function (function or string) in the backbone model Transformer decoder.
 
@@ -52,7 +52,7 @@ class DeepseekV3Config(PretrainedConfig):
             `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
             `num_key_value_heads=1 the model will use Multi Query Attention (MQA) otherwise GQA is used. When
             converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
-            by meanpooling all the original heads within that group. For more details checkout [this
+            by meanpooling all the original heads within that group. For more details, check out [this
             paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to
             `num_attention_heads`.
         n_shared_experts (`int`, *optional*, defaults to 1):
 
@@ -48,7 +48,7 @@ class DiffLlamaConfig(PretrainedConfig):
             `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
             `num_key_value_heads=1` the model will use Multi Query Attention (MQA) otherwise GQA is used. When
             converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
-            by meanpooling all the original heads within that group. For more details checkout [this
+            by meanpooling all the original heads within that group. For more details, check out [this
             paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to
             `num_attention_heads`.
         hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
 
@@ -138,7 +138,7 @@ class Emu3TextConfig(PretrainedConfig):
             `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
             `num_key_value_heads=1 the model will use Multi Query Attention (MQA) otherwise GQA is used. When
             converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
-            by meanpooling all the original heads within that group. For more details checkout [this
+            by meanpooling all the original heads within that group. For more details, check out [this
             paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to
             `num_attention_heads`.
         hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
 
@@ -50,7 +50,7 @@ class FalconH1Config(PretrainedConfig):
             `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
             `num_key_value_heads=1` the model will use Multi Query Attention (MQA) otherwise GQA is used. When
             converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
-            by meanpooling all the original heads within that group. For more details checkout [this
+            by meanpooling all the original heads within that group. For more details, check out [this
             paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to `8`.
         hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
             The non-linear activation function (function or string) in the decoder.
 
@@ -47,7 +47,7 @@ class GemmaConfig(PretrainedConfig):
             `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
             `num_key_value_heads=1` the model will use Multi Query Attention (MQA) otherwise GQA is used. When
             converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
-            by meanpooling all the original heads within that group. For more details checkout [this
+            by meanpooling all the original heads within that group. For more details, check out [this
             paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to
             `num_attention_heads`.
         head_dim (`int`, *optional*, defaults to 256):
 
@@ -151,7 +151,7 @@ def main():
         "--model_size",
         default="7B",
         choices=["2B", "7B", "tokenizer_only"],
-        help="'f' models correspond to the finetuned versions, and are specific to the Gemma2 official release. For more details on Gemma2, checkout the original repo: https://huggingface.co/google/gemma-7b",
+        help="'f' models correspond to the finetuned versions, and are specific to the Gemma2 official release. For more details on Gemma2, check out the original repo: https://huggingface.co/google/gemma-7b",
     )
     parser.add_argument(
         "--output_dir",
 
@@ -74,7 +74,7 @@ class GemmaConfig(PretrainedConfig):
             `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
             `num_key_value_heads=1` the model will use Multi Query Attention (MQA) otherwise GQA is used. When
             converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
-            by meanpooling all the original heads within that group. For more details checkout [this
+            by meanpooling all the original heads within that group. For more details, check out [this
             paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to
             `num_attention_heads`.
         head_dim (`int`, *optional*, defaults to 256):
 
@@ -47,7 +47,7 @@ class Gemma2Config(PretrainedConfig):
             `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
             `num_key_value_heads=1` the model will use Multi Query Attention (MQA) otherwise GQA is used. When
             converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
-            by meanpooling all the original heads within that group. For more details checkout [this
+            by meanpooling all the original heads within that group. For more details, check out [this
             paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to
             `num_attention_heads`.
         head_dim (`int`, *optional*, defaults to 256):
 
@@ -184,7 +184,7 @@ def main():
         "--model_size",
         default="9B",
         choices=["9B", "27B", "tokenizer_only"],
-        help="'f' models correspond to the finetuned versions, and are specific to the Gemma22 official release. For more details on Gemma2, checkout the original repo: https://huggingface.co/google/gemma-7b",
+        help="'f' models correspond to the finetuned versions, and are specific to the Gemma22 official release. For more details on Gemma2, check out the original repo: https://huggingface.co/google/gemma-7b",
     )
     parser.add_argument(
         "--output_dir",
Original file line number	Diff line number	Diff line change
`@@ -546,7 +546,7 @@ def main():`
`546`	`546`	`# region Tokenizer check: this script requires a fast tokenizer.`
`547`	`547`	`if not isinstance(tokenizer, PreTrainedTokenizerFast):`
`548`	`548`	`raise ValueError(`
`549`		`- "This example script only works for models that have a fast tokenizer. Checkout the big table of models at"`
	`549`	`+ "This example script only works for models that have a fast tokenizer. Check out the big table of models at"`
`550`	`550`	`" https://huggingface.co/transformers/index.html#supported-frameworks to find the model types that meet"`
`551`	`551`	`" this requirement"`
`552`	`552`	`)`
Original file line number	Diff line number	Diff line change
`@@ -357,7 +357,7 @@ def main():`
`357`	`357`	`# Tokenizer check: this script requires a fast tokenizer.`
`358`	`358`	`if not isinstance(tokenizer, PreTrainedTokenizerFast):`
`359`	`359`	`raise ValueError(`
`360`		`- "This example script only works for models that have a fast tokenizer. Checkout the big table of models at"`
	`360`	`+ "This example script only works for models that have a fast tokenizer. Check out the big table of models at"`
`361`	`361`	`" https://huggingface.co/transformers/index.html#supported-frameworks to find the model types that meet"`
`362`	`362`	`" this requirement"`
`363`	`363`	`)`
Original file line number	Diff line number	Diff line change
`@@ -399,7 +399,7 @@ def get_label_list(labels):`
`399`	`399`	`# Tokenizer check: this script requires a fast tokenizer.`
`400`	`400`	`if not isinstance(tokenizer, PreTrainedTokenizerFast):`
`401`	`401`	`raise ValueError(`
`402`		`- "This example script only works for models that have a fast tokenizer. Checkout the big table of models at"`
	`402`	`+ "This example script only works for models that have a fast tokenizer. Check out the big table of models at"`
`403`	`403`	`" https://huggingface.co/transformers/index.html#supported-frameworks to find the model types that meet"`
`404`	`404`	`" this requirement"`
`405`	`405`	`)`
Original file line number	Diff line number	Diff line change
`@@ -378,7 +378,7 @@ def main():`
`378`	`378`	`# region Tokenizer check: this script requires a fast tokenizer.`
`379`	`379`	`if not isinstance(tokenizer, PreTrainedTokenizerFast):`
`380`	`380`	`raise ValueError(`
`381`		`- "This example script only works for models that have a fast tokenizer. Checkout the big table of models at"`
	`381`	`+ "This example script only works for models that have a fast tokenizer. Check out the big table of models at"`
`382`	`382`	`" https://huggingface.co/transformers/index.html#supported-frameworks to find the model types that meet"`
`383`	`383`	`" this requirement"`
`384`	`384`	`)`
Original file line number	Diff line number	Diff line change
`@@ -446,7 +446,7 @@ def main():`
`446`	`446`	`"--model_size",`
`447`	`447`	`choices=["7B", "30B"],`
`448`	`448`	`help=""`
`449`		`- " models correspond to the finetuned versions, and are specific to the Chameleon official release. For more details on Chameleon, checkout the original repo: https://github.com/facebookresearch/chameleon",`
	`449`	`+ " models correspond to the finetuned versions, and are specific to the Chameleon official release. For more details on Chameleon, check out the original repo: https://github.com/facebookresearch/chameleon",`
`450`	`450`	`)`
`451`	`451`	`parser.add_argument(`
`452`	`452`	`"--output_dir",`
Original file line number	Diff line number	Diff line change
`@@ -151,7 +151,7 @@ def main():`
`151`	`151`	`"--model_size",`
`152`	`152`	`default="7B",`
`153`	`153`	`choices=["2B", "7B", "tokenizer_only"],`
`154`		`- help="'f' models correspond to the finetuned versions, and are specific to the Gemma2 official release. For more details on Gemma2, checkout the original repo: https://huggingface.co/google/gemma-7b",`
	`154`	`+ help="'f' models correspond to the finetuned versions, and are specific to the Gemma2 official release. For more details on Gemma2, check out the original repo: https://huggingface.co/google/gemma-7b",`
`155`	`155`	`)`
`156`	`156`	`parser.add_argument(`
`157`	`157`	`"--output_dir",`
Original file line number	Diff line number	Diff line change
`@@ -184,7 +184,7 @@ def main():`
`184`	`184`	`"--model_size",`
`185`	`185`	`default="9B",`
`186`	`186`	`choices=["9B", "27B", "tokenizer_only"],`
`187`		`- help="'f' models correspond to the finetuned versions, and are specific to the Gemma22 official release. For more details on Gemma2, checkout the original repo: https://huggingface.co/google/gemma-7b",`
	`187`	`+ help="'f' models correspond to the finetuned versions, and are specific to the Gemma22 official release. For more details on Gemma2, check out the original repo: https://huggingface.co/google/gemma-7b",`
`188`	`188`	`)`
`189`	`189`	`parser.add_argument(`
`190`	`190`	`"--output_dir",`