File tree Expand file tree Collapse file tree 1 file changed +3
-3
lines changed Expand file tree Collapse file tree 1 file changed +3
-3
lines changed Original file line number Diff line number Diff line change @@ -310,7 +310,7 @@ def main(args: DataProcessArgs):
310310 print ("\033 [92mCategorizing training data type...\033 [0m" )
311311 data_with_input_ids = data_with_input_ids .map (
312312 lambda x : {
313- "is_pretrain" : get_sp_token (tokenizer , "<|pretrain|>" ) in x ["input_ids" ]
313+ "is_pretrain" : get_sp_token (tokenizer , "<|pretrain|>" )[ 0 ] in x ["input_ids" ]
314314 },
315315 num_proc = NUM_PROC ,
316316 )
@@ -320,8 +320,8 @@ def main(args: DataProcessArgs):
320320 user_tokens = user_tk ,
321321 assist_tokens = assistant_tk ,
322322 system_tokens = system_tk ,
323- pretrain_token = get_sp_token (tokenizer , "<|pretrain|>" ),
324- pretrain_end_token = get_sp_token (tokenizer , "<|/pretrain|>" ),
323+ pretrain_token = get_sp_token (tokenizer , "<|pretrain|>" )[ 0 ] ,
324+ pretrain_end_token = get_sp_token (tokenizer , "<|/pretrain|>" )[ 0 ] ,
325325 )
326326 print ("\033 [92munmasking the appropriate message content...\033 [0m" )
327327 data_with_labels = data_with_input_ids .map (
You can’t perform that action at this time.
0 commit comments