File tree Expand file tree Collapse file tree 1 file changed +11
-8
lines changed
Expand file tree Collapse file tree 1 file changed +11
-8
lines changed Original file line number Diff line number Diff line change 44# This source code is licensed under the BSD-style license found in the
55# LICENSE file in the root directory of this source tree.
66
7- from xml .etree import ElementTree as ET
7+ import re
8+ from typing import Dict
89
910import torch
1011
1112from torchtune .modules .transforms .tokenizers import ModelTokenizer
1213
1314
14- def extract_tags (text : str ) -> dict [ str , list [ str ]] :
15+ def extract_tags (text : str ) -> Dict :
1516 """
1617 Parse XML-like tags from text. Returns a dictionary with keys 'think' and 'answer'.
1718 The values are lists of strings, with each string being the content of a tag.
1819 """
19- xml_string = f"<root>{ text } </root>"
20- root = ET .fromstring (xml_string )
21-
20+ think_pattern = r"<think>(.*?)</think>"
21+ answer_pattern = r"<answer>(.*?)</answer>"
22+ think_match = re .search (think_pattern , text , re .DOTALL )
23+ answer_match = re .search (answer_pattern , text , re .DOTALL )
24+ cot = think_match .group (1 ).strip () if think_match else ""
25+ potential_answer = answer_match .group (1 ).strip () if answer_match else ""
2226 return {
2327 "think" : [
24- elem . text if elem . text is not None else "" for elem in root . findall ( "think" )
28+ cot ,
2529 ],
2630 "answer" : [
27- elem .text if elem .text is not None else ""
28- for elem in root .findall ("answer" )
31+ potential_answer ,
2932 ],
3033 }
3134
You can’t perform that action at this time.
0 commit comments