@@ -624,6 +624,7 @@ fn convert_harmony_args_to_tool_format(tool_name: &str, parsed: Value) -> Result
624624
625625fn parse_tagged_tool_call ( text : & str ) -> Option < ( String , Value ) > {
626626 const TOOL_TAG : & str = "<tool_call>" ;
627+ const TOOL_TAG_CLOSE : & str = "</tool_call>" ;
627628 const ARG_KEY_TAG : & str = "<arg_key>" ;
628629 const ARG_VALUE_TAG : & str = "<arg_value>" ;
629630 const ARG_KEY_CLOSE : & str = "</arg_key>" ;
@@ -640,7 +641,10 @@ fn parse_tagged_tool_call(text: &str) -> Option<(String, Value)> {
640641 let mut indexed_values: BTreeMap < String , BTreeMap < usize , Value > > = BTreeMap :: new ( ) ;
641642 rest = after_name;
642643
644+ // First, try standard <arg_key>/<arg_value> parsing
645+ let mut found_arg_tags = false ;
643646 while let Some ( key_index) = rest. find ( ARG_KEY_TAG ) {
647+ found_arg_tags = true ;
644648 rest = & rest[ key_index + ARG_KEY_TAG . len ( ) ..] ;
645649 let ( raw_key, mut after_key) = read_tag_text ( rest) ;
646650 if raw_key. is_empty ( ) {
@@ -674,6 +678,60 @@ fn parse_tagged_tool_call(text: &str) -> Option<(String, Value)> {
674678 }
675679 }
676680
681+ // If no arg tags found, try fallback parsing for malformed output
682+ // e.g., <tool_call>list_files<tool_call>{"path": "/tmp"} or <tool_call>read_file path="/tmp"
683+ if !found_arg_tags && object. is_empty ( ) {
684+ // Determine the content boundary (next <tool_call>, </tool_call>, or end)
685+ let content_end = after_name
686+ . find ( TOOL_TAG )
687+ . or_else ( || after_name. find ( TOOL_TAG_CLOSE ) )
688+ . unwrap_or ( after_name. len ( ) ) ;
689+ let content = after_name[ ..content_end] . trim ( ) ;
690+
691+ if !content. is_empty ( ) {
692+ // Try parsing as JSON first
693+ if let Some ( json_start) = content. find ( '{' ) {
694+ let json_content = & content[ json_start..] ;
695+ // Find matching closing brace
696+ let mut depth = 0 ;
697+ let mut json_end = None ;
698+ for ( idx, ch) in json_content. char_indices ( ) {
699+ match ch {
700+ '{' => depth += 1 ,
701+ '}' => {
702+ depth -= 1 ;
703+ if depth == 0 {
704+ json_end = Some ( idx + 1 ) ;
705+ break ;
706+ }
707+ }
708+ _ => { }
709+ }
710+ }
711+ if let Some ( end) = json_end {
712+ if let Ok ( parsed) = serde_json:: from_str :: < Value > ( & json_content[ ..end] ) {
713+ if let Some ( obj) = parsed. as_object ( ) {
714+ for ( k, v) in obj {
715+ object. insert ( k. clone ( ) , v. clone ( ) ) ;
716+ }
717+ }
718+ }
719+ }
720+ }
721+
722+ // If JSON parsing didn't work, try key=value or key:value pairs
723+ if object. is_empty ( ) {
724+ if let Some ( parsed) = parse_key_value_arguments ( content) {
725+ if let Some ( obj) = parsed. as_object ( ) {
726+ for ( k, v) in obj {
727+ object. insert ( k. clone ( ) , v. clone ( ) ) ;
728+ }
729+ }
730+ }
731+ }
732+ }
733+ }
734+
677735 for ( base, entries) in indexed_values {
678736 let offset = if entries. contains_key ( & 0 ) {
679737 0usize
@@ -1593,4 +1651,63 @@ mode: overwrite
15931651 "Should reject Harmony format with whitespace-only command"
15941652 ) ;
15951653 }
1654+
1655+ // ==================== Tests for malformed XML handling (GLM models) ====================
1656+
1657+ #[ test]
1658+ fn test_parse_tagged_tool_call_handles_double_tag_malformed_xml ( ) {
1659+ // GLM models sometimes output: <tool_call>list_files<tool_call>list
1660+ // Should extract tool name but with empty args
1661+ let message = "<tool_call>list_files<tool_call>list" ;
1662+ let result = parse_tagged_tool_call ( message) ;
1663+ assert ! ( result. is_some( ) , "Should parse malformed double-tag XML" ) ;
1664+ let ( name, args) = result. unwrap ( ) ;
1665+ assert_eq ! ( name, "list_files" ) ;
1666+ // Args should be empty object since no valid args were found
1667+ assert ! ( args. as_object( ) . map_or( true , |o| o. is_empty( ) ) ) ;
1668+ }
1669+
1670+ #[ test]
1671+ fn test_parse_tagged_tool_call_extracts_json_after_name ( ) {
1672+ // When JSON appears after the tool name
1673+ let message = r#"<tool_call>read_file{"path": "/tmp/test.txt"}</tool_call>"# ;
1674+ let result = parse_tagged_tool_call ( message) ;
1675+ assert ! ( result. is_some( ) , "Should parse JSON after tool name" ) ;
1676+ let ( name, args) = result. unwrap ( ) ;
1677+ assert_eq ! ( name, "read_file" ) ;
1678+ assert_eq ! ( args. get( "path" ) . and_then( |v| v. as_str( ) ) , Some ( "/tmp/test.txt" ) ) ;
1679+ }
1680+
1681+ #[ test]
1682+ fn test_parse_tagged_tool_call_extracts_json_with_space ( ) {
1683+ // When JSON appears after tool name with space
1684+ let message = r#"<tool_call>read_file {"path": "/tmp/test.txt"}</tool_call>"# ;
1685+ let result = parse_tagged_tool_call ( message) ;
1686+ assert ! ( result. is_some( ) , "Should parse JSON with space after tool name" ) ;
1687+ let ( name, args) = result. unwrap ( ) ;
1688+ assert_eq ! ( name, "read_file" ) ;
1689+ assert_eq ! ( args. get( "path" ) . and_then( |v| v. as_str( ) ) , Some ( "/tmp/test.txt" ) ) ;
1690+ }
1691+
1692+ #[ test]
1693+ fn test_parse_tagged_tool_call_handles_nested_json ( ) {
1694+ // Nested JSON should be parsed correctly
1695+ let message = r#"<tool_call>run_pty_cmd{"command": "echo", "env": {"PATH": "/usr/bin"}}</tool_call>"# ;
1696+ let result = parse_tagged_tool_call ( message) ;
1697+ assert ! ( result. is_some( ) , "Should parse nested JSON" ) ;
1698+ let ( name, args) = result. unwrap ( ) ;
1699+ assert_eq ! ( name, "run_pty_cmd" ) ;
1700+ assert_eq ! ( args. get( "command" ) . and_then( |v| v. as_str( ) ) , Some ( "echo" ) ) ;
1701+ assert ! ( args. get( "env" ) . and_then( |v| v. as_object( ) ) . is_some( ) ) ;
1702+ }
1703+
1704+ #[ test]
1705+ fn test_parse_tagged_tool_call_stops_at_next_tool_call_tag ( ) {
1706+ // Content boundary should be the next <tool_call> tag
1707+ let message = "<tool_call>list_files<tool_call>read_file" ;
1708+ let result = parse_tagged_tool_call ( message) ;
1709+ assert ! ( result. is_some( ) ) ;
1710+ let ( name, _) = result. unwrap ( ) ;
1711+ assert_eq ! ( name, "list_files" ) ;
1712+ }
15961713}
0 commit comments