[add-new-model-like] Robust search & proper outer '),' in tokenizer mapping (huggingface#38703)

alexzms · github-actions[bot] · Lewis Marshall · commit 081268947f49 · 2025-06-11T09:25:23.000+01:00
* [add-new-model-like] Robust search &amp; proper outer '),' in tokenizer mapping

* code-style: arrange the importation in add_new_model_like.py

* Apply style fixes

---------

Co-authored-by: github-actions[bot] &lt;github-actions[bot]@users.noreply.github.com&gt;
diff --git a/src/transformers/commands/add_new_model_like.py b/src/transformers/commands/add_new_model_like.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# 1. Standard library
 import difflib
 import json
 import os
@@ -28,7 +29,12 @@
 
 from ..models import auto as auto_module
 from ..models.auto.configuration_auto import model_type_to_module_name
-from ..utils import is_flax_available, is_tf_available, is_torch_available, logging
+from ..utils import (
+    is_flax_available,
+    is_tf_available,
+    is_torch_available,
+    logging,
+)
 from . import BaseTransformersCLICommand
 from .add_fast_image_processor import add_fast_image_processor
 
@@ -1009,10 +1015,11 @@ def insert_tokenizer_in_auto_module(old_model_patterns: ModelPatterns, new_model
     with open(TRANSFORMERS_PATH / "models" / "auto" / "tokenization_auto.py", "r", encoding="utf-8") as f:
         content = f.read()
 
+    pattern_tokenizer = re.compile(r"^\s*TOKENIZER_MAPPING_NAMES\s*=\s*OrderedDict\b")
     lines = content.split("\n")
     idx = 0
     # First we get to the TOKENIZER_MAPPING_NAMES block.
-    while not lines[idx].startswith("    TOKENIZER_MAPPING_NAMES = OrderedDict("):
+    while not pattern_tokenizer.search(lines[idx]):
         idx += 1
     idx += 1
 
@@ -1024,9 +1031,12 @@ def insert_tokenizer_in_auto_module(old_model_patterns: ModelPatterns, new_model
         # Otherwise it takes several lines until we get to a "),"
         else:
             block = []
-            while not lines[idx].startswith("            ),"):
+            # should change to "        )," instead of "            ),"
+            while not lines[idx].startswith("        ),"):
                 block.append(lines[idx])
                 idx += 1
+            # if the lines[idx] does start with "        )," we still need it in our block
+            block.append(lines[idx])
             block = "\n".join(block)
         idx += 1