Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions src/agent/history.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ pub(crate) fn fast_trim_tool_results(
}

/// Emergency: drop oldest non-system, non-recent messages from history.
/// Tool groups (assistant + consecutive tool messages) are dropped
/// atomically to preserve tool_use/tool_result pairing.
/// Returns number of messages dropped.
pub(crate) fn emergency_history_trim(
history: &mut Vec<crate::providers::ChatMessage>,
Expand All @@ -87,6 +89,18 @@ pub(crate) fn emergency_history_trim(
while dropped < target_drop && i < history.len().saturating_sub(keep_recent) {
if history[i].role == "system" {
i += 1;
} else if history[i].role == "assistant" {
// Count following tool messages — drop as atomic group
let mut tool_count = 0;
while i + 1 + tool_count < history.len().saturating_sub(keep_recent)
&& history[i + 1 + tool_count].role == "tool"
{
tool_count += 1;
}
for _ in 0..=tool_count {
history.remove(i);
dropped += 1;
}
Comment on lines +92 to +103
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

Don't split a tool group at the keep_recent boundary.

When keep_recent starts between two tool results, Line 95 stops counting at the cutoff but Lines 100-103 still remove the assistant and the unprotected prefix. That leaves the protected suffix orphaned, and src/agent/loop_.rs:2800-2840 retries immediately on dropped > 0, so the overflow-recovery path can still resend invalid history.

🛠️ Suggested fix
         } else if history[i].role == "assistant" {
-            // Count following tool messages — drop as atomic group
-            let mut tool_count = 0;
-            while i + 1 + tool_count < history.len().saturating_sub(keep_recent)
-                && history[i + 1 + tool_count].role == "tool"
-            {
-                tool_count += 1;
-            }
-            for _ in 0..=tool_count {
-                history.remove(i);
-                dropped += 1;
-            }
+            let mut group_end = i + 1;
+            while group_end < history.len() && history[group_end].role == "tool" {
+                group_end += 1;
+            }
+
+            let recent_start = history.len().saturating_sub(keep_recent);
+            if group_end > recent_start {
+                i = group_end;
+                continue;
+            }
+
+            dropped += group_end - i;
+            drop(history.drain(i..group_end));
         } else {
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/agent/history.rs` around lines 92 - 103, The removal loop for
assistant+tool groups can split a tool-result group at the keep_recent cutoff;
to fix, compute the cutoff as history.len().saturating_sub(keep_recent) and only
count/remove the assistant plus trailing tool messages if the entire group lies
strictly before that cutoff: in the block using variables history, i,
tool_count, keep_recent and dropped, advance tool_count as you do but then check
if (i + 1 + tool_count) <= cutoff (or that the last tool index < cutoff) before
performing the for-loop of history.remove; if the group crosses into the
protected suffix, skip removal (and advance i appropriately) so you never orphan
the protected suffix or increment dropped for a partial group.

} else {
history.remove(i);
dropped += 1;
Expand Down
251 changes: 223 additions & 28 deletions src/agent/history_pruner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,44 +101,79 @@ pub fn prune_history(messages: &mut Vec<ChatMessage>, config: &HistoryPrunerConf

let mut collapsed_pairs: usize = 0;

// Phase 1 – collapse assistant+tool pairs
// Phase 1 – collapse assistant+tool groups atomically.
// An assistant message followed by one or more consecutive tool messages
// forms an atomic group (tool_use + tool_result pairing). Collapsing only
// part of the group would orphan tool_use blocks, causing API 400 errors
// from providers that enforce pairing (e.g., Anthropic).
if config.collapse_tool_results {
let mut i = 0;
while i + 1 < messages.len() {
while i < messages.len() {
let protected = protected_indices(messages, config.keep_recent);
if messages[i].role == "assistant"
&& messages[i + 1].role == "tool"
&& !protected[i]
&& !protected[i + 1]
{
let tool_content = &messages[i + 1].content;
let truncated: String = tool_content.chars().take(100).collect();
let summary = format!("[Tool result: {truncated}...]");
messages[i] = ChatMessage {
role: "assistant".to_string(),
content: summary,
};
messages.remove(i + 1);
collapsed_pairs += 1;
} else {
i += 1;
if messages[i].role == "assistant" && !protected[i] {
// Count consecutive tool messages following this assistant
let mut tool_count = 0;
while i + 1 + tool_count < messages.len()
&& messages[i + 1 + tool_count].role == "tool"
&& !protected[i + 1 + tool_count]
{
tool_count += 1;
}
if tool_count > 0 {
let summary =
format!("[Tool exchange: {tool_count} tool call(s) — results collapsed]");
messages[i] = ChatMessage {
role: "assistant".to_string(),
content: summary,
};
for _ in 0..tool_count {
messages.remove(i + 1);
}
collapsed_pairs += tool_count;
continue;
Comment on lines +113 to +133
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

Avoid partial collapse when a multi-tool group straddles keep_recent.

Line 116 only counts unprotected tools. If the cutoff lands inside a multi-tool run, Lines 123-131 replace the original assistant with a summary and remove only the unprotected prefix. The remaining protected tool message(s) now follow a synthetic summary instead of the original tool-call payload, which can still violate provider pairing rules.

🛠️ Suggested fix
             if messages[i].role == "assistant" && !protected[i] {
-                // Count consecutive tool messages following this assistant
-                let mut tool_count = 0;
-                while i + 1 + tool_count < messages.len()
-                    && messages[i + 1 + tool_count].role == "tool"
-                    && !protected[i + 1 + tool_count]
-                {
-                    tool_count += 1;
-                }
-                if tool_count > 0 {
+                let mut group_end = i + 1;
+                while group_end < messages.len() && messages[group_end].role == "tool" {
+                    group_end += 1;
+                }
+
+                if group_end > i + 1 {
+                    if protected[i + 1..group_end].iter().any(|p| *p) {
+                        i = group_end;
+                        continue;
+                    }
+
+                    let tool_count = group_end - i - 1;
                     let summary =
                         format!("[Tool exchange: {tool_count} tool call(s) — results collapsed]");
                     messages[i] = ChatMessage {
                         role: "assistant".to_string(),
                         content: summary,
                     };
-                    for _ in 0..tool_count {
-                        messages.remove(i + 1);
-                    }
+                    drop(messages.drain(i + 1..group_end));
                     collapsed_pairs += tool_count;
                     continue;
                 }
             }
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/agent/history_pruner.rs` around lines 113 - 133, The collapse logic
currently counts only an unprotected prefix of consecutive tool messages and
then replaces the assistant with a summary while leaving protected tool messages
after that summary; change the algorithm so you first scan the full consecutive
tool run (starting at i+1) to determine its length, then check whether every
message in that run is unprotected before performing the collapse; if any tool
in the run is protected, do not replace messages[i] or remove any messages in
that run. Update the code around variables/messages: messages[i], tool_count,
protected, collapsed_pairs and the ChatMessage replacement so collapsing happens
only when the entire tool run is unprotected.

}
}
i += 1;
}
}

// Phase 2 – budget enforcement
// Phase 2 – budget enforcement: drop messages to fit token budget.
// Tool groups (assistant + consecutive tool messages) are dropped
// atomically to preserve tool_use/tool_result pairing.
let mut dropped_messages: usize = 0;
while estimate_tokens(messages) > config.max_tokens {
let protected = protected_indices(messages, config.keep_recent);
if let Some(idx) = protected
.iter()
.enumerate()
.find(|&(_, &p)| !p)
.map(|(i, _)| i)
{
messages.remove(idx);
let mut dropped_any = false;
let mut i = 0;
while i < messages.len() {
if protected[i] {
i += 1;
continue;
}
if messages[i].role == "assistant" {
// Count following tool messages — drop as atomic group
let mut tool_count = 0;
while i + 1 + tool_count < messages.len()
&& messages[i + 1 + tool_count].role == "tool"
{
tool_count += 1;
}
if tool_count > 0 {
for _ in 0..=tool_count {
messages.remove(i);
}
dropped_messages += 1 + tool_count;
dropped_any = true;
break;
Comment on lines +153 to +167
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Don't let group dropping override keep_recent.

Lines 156-164 remove every consecutive tool after an unprotected assistant without consulting protected. If the recent window starts inside that run, Phase 2 deletes protected tool results just to keep the group atomic, which breaks the keep_recent contract.

🛠️ Suggested fix
             if messages[i].role == "assistant" {
-                // Count following tool messages — drop as atomic group
-                let mut tool_count = 0;
-                while i + 1 + tool_count < messages.len()
-                    && messages[i + 1 + tool_count].role == "tool"
-                {
-                    tool_count += 1;
-                }
-                if tool_count > 0 {
-                    for _ in 0..=tool_count {
-                        messages.remove(i);
-                    }
-                    dropped_messages += 1 + tool_count;
+                let mut group_end = i + 1;
+                while group_end < messages.len() && messages[group_end].role == "tool" {
+                    group_end += 1;
+                }
+                if group_end > i + 1 {
+                    if protected[i + 1..group_end].iter().any(|p| *p) {
+                        i = group_end;
+                        continue;
+                    }
+
+                    dropped_messages += group_end - i;
+                    drop(messages.drain(i..group_end));
                     dropped_any = true;
                     break;
                 }
             }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
if messages[i].role == "assistant" {
// Count following tool messages — drop as atomic group
let mut tool_count = 0;
while i + 1 + tool_count < messages.len()
&& messages[i + 1 + tool_count].role == "tool"
{
tool_count += 1;
}
if tool_count > 0 {
for _ in 0..=tool_count {
messages.remove(i);
}
dropped_messages += 1 + tool_count;
dropped_any = true;
break;
if messages[i].role == "assistant" {
let mut group_end = i + 1;
while group_end < messages.len() && messages[group_end].role == "tool" {
group_end += 1;
}
if group_end > i + 1 {
if protected[i + 1..group_end].iter().any(|p| *p) {
i = group_end;
continue;
}
dropped_messages += group_end - i;
drop(messages.drain(i..group_end));
dropped_any = true;
break;
}
}
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/agent/history_pruner.rs` around lines 153 - 167, The current
assistant-group drop logic in the block checking if messages[i].role ==
"assistant" removes the assistant plus all following "tool" messages without
checking the protected/keep_recent boundary; change it so you only remove the
group if every message in that assistant+tool run has index >= protected (i.e.,
is unprotected). Concretely, compute the tool run as you do with tool_count,
then verify that i and i+1..i+tool_count are all >= protected before removing
any entries; if any message in that group is protected, skip dropping the entire
group (do not remove partial messages), leaving messages, dropped_messages, and
dropped_any unchanged for that group. Reference symbols: messages, protected,
tool_count, dropped_messages, dropped_any in the assistant-role removal branch.

}
}
// Non-tool-group message — safe to drop individually
messages.remove(i);
dropped_messages += 1;
} else {
dropped_any = true;
break;
}
if !dropped_any {
break;
}
}
Expand Down Expand Up @@ -220,7 +255,7 @@ mod tests {
assert_eq!(stats.collapsed_pairs, 1);
assert_eq!(messages.len(), 4);
assert_eq!(messages[1].role, "assistant");
assert!(messages[1].content.starts_with("[Tool result: "));
assert!(messages[1].content.contains("1 tool call(s)"));
}

#[test]
Expand Down Expand Up @@ -280,4 +315,164 @@ mod tests {
assert_eq!(stats.messages_before, 0);
assert_eq!(stats.messages_after, 0);
}

#[test]
fn prune_collapses_multi_tool_group() {
let mut messages = vec![
msg("system", "sys"),
msg(
"assistant",
r#"{"content":null,"tool_calls":[{"id":"t1","name":"shell","arguments":"{}"},{"id":"t2","name":"web","arguments":"{}"}]}"#,
),
msg("tool", r#"{"tool_call_id":"t1","content":"result1"}"#),
msg("tool", r#"{"tool_call_id":"t2","content":"result2"}"#),
msg("user", "thanks"),
msg("assistant", "done"),
];
let config = HistoryPrunerConfig {
enabled: true,
max_tokens: 100_000,
keep_recent: 2,
collapse_tool_results: true,
};
let stats = prune_history(&mut messages, &config);
assert_eq!(stats.collapsed_pairs, 2);
// assistant(tool_calls) + 2 tool messages -> 1 summary assistant
assert_eq!(messages.len(), 4); // sys, summary, user, assistant
assert!(messages[1].content.contains("2 tool call(s)"));
// No tool messages remain
assert!(!messages.iter().any(|m| m.role == "tool"));
}

#[test]
fn prune_drops_tool_group_atomically() {
let big = "x".repeat(2000);
let mut messages = vec![
msg("system", "sys"),
msg("assistant", &big),
msg("tool", &big),
msg("tool", &big),
msg("user", "recent"),
msg("assistant", "recent reply"),
];
let config = HistoryPrunerConfig {
enabled: true,
max_tokens: 50, // very low — forces drops
keep_recent: 2,
collapse_tool_results: false, // skip collapse, go straight to drop
};
let stats = prune_history(&mut messages, &config);
assert!(stats.dropped_messages >= 3); // assistant + 2 tools dropped together
// No orphaned tool messages
for (i, m) in messages.iter().enumerate() {
if m.role == "tool" {
assert!(
i > 0 && messages[i - 1].role == "assistant",
"tool message at index {i} has no preceding assistant"
);
}
}
Comment on lines +367 to +374
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

The orphan invariant is too strict for multi-tool groups.

This PR explicitly supports assistant + N consecutive tool exchanges, but these loops require every remaining tool message to be immediately preceded by assistant. That will fail on a valid protected multi-tool group and makes it impossible to add the boundary-straddle regression that would catch the current keep_recent bug.

Also applies to: 401-409, 468-476

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/agent/history_pruner.rs` around lines 367 - 374, The loop enforcing
"tool" messages must be immediately preceded by an "assistant" is too strict for
multi-tool groups; update the check in the iterations over messages (the for (i,
m) in messages.iter().enumerate() loop and the analogous checks at the other
spots noted) so that a "tool" is allowed if the previous message is also a
"tool" (i.e., only assert failure if i == 0 or the previous message exists and
is neither "assistant" nor "tool"); in practice change the assertion to: if
m.role == "tool" then ensure i > 0 and (messages[i-1].role == "assistant" ||
messages[i-1].role == "tool"), otherwise panic — apply the same logic to the
other two loops referenced (lines ~401-409 and ~468-476) so multi-tool
consecutive groups are accepted.

}

#[test]
fn prune_never_orphans_tool_use() {
// Simulate a conversation with multiple tool groups
let filler = "y".repeat(500);
let mut messages = vec![
msg("system", "sys"),
msg("user", "q1"),
msg("assistant", &filler), // tool group 1
msg("tool", &filler),
msg("user", "q2"),
msg("assistant", &filler), // tool group 2
msg("tool", &filler),
msg("tool", &filler),
msg("user", "recent"),
msg("assistant", "recent reply"),
];
let config = HistoryPrunerConfig {
enabled: true,
max_tokens: 100,
keep_recent: 2,
collapse_tool_results: true,
};
prune_history(&mut messages, &config);
// Verify invariant: no tool message without a preceding assistant
for (i, m) in messages.iter().enumerate() {
if m.role == "tool" {
assert!(
i > 0 && messages[i - 1].role == "assistant",
"orphaned tool message at index {i}: {:?}",
messages.iter().map(|m| &m.role).collect::<Vec<_>>()
);
}
}
}

#[test]
fn prune_protects_recent_tool_groups() {
let mut messages = vec![
msg("system", "sys"),
msg("user", "old"),
msg("assistant", "old reply"),
msg("assistant", "tool call"),
msg("tool", "tool result"),
msg("user", "recent"),
];
let config = HistoryPrunerConfig {
enabled: true,
max_tokens: 100_000,
keep_recent: 3, // protects last 3: tool call, tool result, recent
collapse_tool_results: true,
};
let stats = prune_history(&mut messages, &config);
// Protected tool group should not be collapsed
assert!(messages.iter().any(|m| m.role == "tool"));
assert_eq!(stats.collapsed_pairs, 0);
}

#[test]
fn prune_under_realistic_token_pressure_preserves_tool_pairing() {
// Simulate 15 tool iterations with realistic content sizes
let mut messages = vec![msg("system", "You are helpful.")];
messages.push(msg("user", "Research this topic thoroughly"));

// 15 tool iterations — each adds assistant(tool_calls) + tool(result)
for i in 0..15 {
let tool_json = format!(
r#"{{"content":"iteration {i}","tool_calls":[{{"id":"t{i}","name":"web_search","arguments":"{{}}"}}]}}"#
);
messages.push(msg("assistant", &tool_json));
// Realistic tool result size (~2K chars each)
let result = format!(
r#"{{"tool_call_id":"t{i}","content":"{}"}}"#,
"x".repeat(2000)
);
messages.push(msg("tool", &result));
}
messages.push(msg("assistant", "Here's what I found..."));

// 33 messages total: system + user + 15*(assistant+tool) + final assistant
assert_eq!(messages.len(), 33);

let config = HistoryPrunerConfig {
enabled: true,
max_tokens: 2000, // Forces pruning of older iterations
keep_recent: 4,
collapse_tool_results: true,
};

prune_history(&mut messages, &config);

// Invariant: no orphaned tool messages after pruning
for (i, m) in messages.iter().enumerate() {
if m.role == "tool" {
assert!(
i > 0 && messages[i - 1].role == "assistant",
"orphaned tool at index {i}: roles = {:?}",
messages.iter().map(|m| &m.role).collect::<Vec<_>>()
);
}
}
}
}
Loading