Skip to content

Commit 303a82c

Browse files
committed
fix
1 parent 2aef9a9 commit 303a82c

File tree

1 file changed

+1
-5
lines changed

1 file changed

+1
-5
lines changed

src/transformers/tokenization_utils_base.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -852,8 +852,6 @@ def __init__(self, verbose=True, **kwargs):
852852
continue
853853
if key in self.SPECIAL_TOKENS_ATTRIBUTES:
854854
if key == "additional_special_tokens":
855-
# TODO THIS IS NASTY! Will always reset tokens to default rstrip and lstrip because self.set_attr on strings
856-
# will not check the addedtokens decoder. WILL FIX TOMORROW
857855
assert isinstance(value, (list, tuple)), f"Value {value} is not a list or tuple"
858856
assert all(
859857
isinstance(t, (str, AddedToken)) for t in value
@@ -2204,8 +2202,6 @@ def _from_pretrained(
22042202
if str(token) in additional_special_tokens:
22052203
# at this point the token is in `additional_special_tokens` as an str, let's add the AddedToken info
22062204
additional_special_tokens.remove(str(token))
2207-
if token.special and token not in additional_special_tokens:
2208-
additional_special_tokens.append(token)
22092205
else:
22102206
raise ValueError(
22112207
f"Found a {token.__class__} in the saved `added_tokens_decoder`, should be a dictionary."
@@ -2438,7 +2434,7 @@ def save_pretrained(
24382434

24392435
# Sanitize AddedTokens in special_tokens_map
24402436

2441-
# kept for forward compatibility, will be removed in transoformers 5
2437+
# kept for forward compatibility, will be removed in transoformers 5. Adding typefield
24422438
write_dict = self.convert_added_tokens(self.special_tokens_map_extended, add_type_field=True)
24432439
with open(special_tokens_map_file, "w", encoding="utf-8") as f:
24442440
out_str = json.dumps(write_dict, indent=2, sort_keys=True, ensure_ascii=False) + "\n"

0 commit comments

Comments
 (0)