@@ -852,8 +852,6 @@ def __init__(self, verbose=True, **kwargs):
852
852
continue
853
853
if key in self .SPECIAL_TOKENS_ATTRIBUTES :
854
854
if key == "additional_special_tokens" :
855
- # TODO THIS IS NASTY! Will always reset tokens to default rstrip and lstrip because self.set_attr on strings
856
- # will not check the addedtokens decoder. WILL FIX TOMORROW
857
855
assert isinstance (value , (list , tuple )), f"Value { value } is not a list or tuple"
858
856
assert all (
859
857
isinstance (t , (str , AddedToken )) for t in value
@@ -2204,8 +2202,6 @@ def _from_pretrained(
2204
2202
if str (token ) in additional_special_tokens :
2205
2203
# at this point the token is in `additional_special_tokens` as an str, let's add the AddedToken info
2206
2204
additional_special_tokens .remove (str (token ))
2207
- if token .special and token not in additional_special_tokens :
2208
- additional_special_tokens .append (token )
2209
2205
else :
2210
2206
raise ValueError (
2211
2207
f"Found a { token .__class__ } in the saved `added_tokens_decoder`, should be a dictionary."
@@ -2438,7 +2434,7 @@ def save_pretrained(
2438
2434
2439
2435
# Sanitize AddedTokens in special_tokens_map
2440
2436
2441
- # kept for forward compatibility, will be removed in transoformers 5
2437
+ # kept for forward compatibility, will be removed in transoformers 5. Adding typefield
2442
2438
write_dict = self .convert_added_tokens (self .special_tokens_map_extended , add_type_field = True )
2443
2439
with open (special_tokens_map_file , "w" , encoding = "utf-8" ) as f :
2444
2440
out_str = json .dumps (write_dict , indent = 2 , sort_keys = True , ensure_ascii = False ) + "\n "
0 commit comments