Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions src/agent/history.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ pub(crate) fn fast_trim_tool_results(
}

/// Emergency: drop oldest non-system, non-recent messages from history.
/// Tool groups (assistant + consecutive tool messages) are dropped
/// atomically to preserve tool_use/tool_result pairing. See #4810.
/// Returns number of messages dropped.
pub(crate) fn emergency_history_trim(
history: &mut Vec<crate::providers::ChatMessage>,
Expand All @@ -87,6 +89,18 @@ pub(crate) fn emergency_history_trim(
while dropped < target_drop && i < history.len().saturating_sub(keep_recent) {
if history[i].role == "system" {
i += 1;
} else if history[i].role == "assistant" {
// Count following tool messages — drop as atomic group
let mut tool_count = 0;
while i + 1 + tool_count < history.len().saturating_sub(keep_recent)
&& history[i + 1 + tool_count].role == "tool"
{
tool_count += 1;
}
for _ in 0..=tool_count {
history.remove(i);
dropped += 1;
}
} else {
history.remove(i);
dropped += 1;
Expand Down
251 changes: 223 additions & 28 deletions src/agent/history_pruner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,44 +101,79 @@ pub fn prune_history(messages: &mut Vec<ChatMessage>, config: &HistoryPrunerConf

let mut collapsed_pairs: usize = 0;

// Phase 1 – collapse assistant+tool pairs
// Phase 1 – collapse assistant+tool groups atomically.
// An assistant message followed by one or more consecutive tool messages
// forms an atomic group (tool_use + tool_result pairing). Collapsing only
// part of the group would orphan tool_use blocks, causing API 400 errors
// from providers that enforce pairing (e.g., Anthropic). See #4810.
if config.collapse_tool_results {
let mut i = 0;
while i + 1 < messages.len() {
while i < messages.len() {
let protected = protected_indices(messages, config.keep_recent);
if messages[i].role == "assistant"
&& messages[i + 1].role == "tool"
&& !protected[i]
&& !protected[i + 1]
{
let tool_content = &messages[i + 1].content;
let truncated: String = tool_content.chars().take(100).collect();
let summary = format!("[Tool result: {truncated}...]");
messages[i] = ChatMessage {
role: "assistant".to_string(),
content: summary,
};
messages.remove(i + 1);
collapsed_pairs += 1;
} else {
i += 1;
if messages[i].role == "assistant" && !protected[i] {
// Count consecutive tool messages following this assistant
let mut tool_count = 0;
while i + 1 + tool_count < messages.len()
&& messages[i + 1 + tool_count].role == "tool"
&& !protected[i + 1 + tool_count]
{
tool_count += 1;
}
if tool_count > 0 {
let summary =
format!("[Tool exchange: {tool_count} tool call(s) — results collapsed]");
messages[i] = ChatMessage {
role: "assistant".to_string(),
content: summary,
};
for _ in 0..tool_count {
messages.remove(i + 1);
}
collapsed_pairs += tool_count;
continue;
}
}
i += 1;
}
}

// Phase 2 – budget enforcement
// Phase 2 – budget enforcement: drop messages to fit token budget.
// Tool groups (assistant + consecutive tool messages) are dropped
// atomically to preserve tool_use/tool_result pairing. See #4810.
let mut dropped_messages: usize = 0;
while estimate_tokens(messages) > config.max_tokens {
let protected = protected_indices(messages, config.keep_recent);
if let Some(idx) = protected
.iter()
.enumerate()
.find(|&(_, &p)| !p)
.map(|(i, _)| i)
{
messages.remove(idx);
let mut dropped_any = false;
let mut i = 0;
while i < messages.len() {
if protected[i] {
i += 1;
continue;
}
if messages[i].role == "assistant" {
// Count following tool messages — drop as atomic group
let mut tool_count = 0;
while i + 1 + tool_count < messages.len()
&& messages[i + 1 + tool_count].role == "tool"
{
tool_count += 1;
}
if tool_count > 0 {
for _ in 0..=tool_count {
messages.remove(i);
}
dropped_messages += 1 + tool_count;
dropped_any = true;
break;
}
}
// Non-tool-group message — safe to drop individually
messages.remove(i);
dropped_messages += 1;
} else {
dropped_any = true;
break;
}
if !dropped_any {
break;
}
}
Expand Down Expand Up @@ -220,7 +255,7 @@ mod tests {
assert_eq!(stats.collapsed_pairs, 1);
assert_eq!(messages.len(), 4);
assert_eq!(messages[1].role, "assistant");
assert!(messages[1].content.starts_with("[Tool result: "));
assert!(messages[1].content.contains("1 tool call(s)"));
}

#[test]
Expand Down Expand Up @@ -280,4 +315,164 @@ mod tests {
assert_eq!(stats.messages_before, 0);
assert_eq!(stats.messages_after, 0);
}

#[test]
fn prune_collapses_multi_tool_group() {
let mut messages = vec![
msg("system", "sys"),
msg(
"assistant",
r#"{"content":null,"tool_calls":[{"id":"t1","name":"shell","arguments":"{}"},{"id":"t2","name":"web","arguments":"{}"}]}"#,
),
msg("tool", r#"{"tool_call_id":"t1","content":"result1"}"#),
msg("tool", r#"{"tool_call_id":"t2","content":"result2"}"#),
msg("user", "thanks"),
msg("assistant", "done"),
];
let config = HistoryPrunerConfig {
enabled: true,
max_tokens: 100_000,
keep_recent: 2,
collapse_tool_results: true,
};
let stats = prune_history(&mut messages, &config);
assert_eq!(stats.collapsed_pairs, 2);
// assistant(tool_calls) + 2 tool messages → 1 summary assistant
assert_eq!(messages.len(), 4); // sys, summary, user, assistant
assert!(messages[1].content.contains("2 tool call(s)"));
// No tool messages remain
assert!(!messages.iter().any(|m| m.role == "tool"));
}

#[test]
fn prune_drops_tool_group_atomically() {
let big = "x".repeat(2000);
let mut messages = vec![
msg("system", "sys"),
msg("assistant", &big),
msg("tool", &big),
msg("tool", &big),
msg("user", "recent"),
msg("assistant", "recent reply"),
];
let config = HistoryPrunerConfig {
enabled: true,
max_tokens: 50, // very low — forces drops
keep_recent: 2,
collapse_tool_results: false, // skip collapse, go straight to drop
};
let stats = prune_history(&mut messages, &config);
assert!(stats.dropped_messages >= 3); // assistant + 2 tools dropped together
// No orphaned tool messages
for (i, m) in messages.iter().enumerate() {
if m.role == "tool" {
assert!(
i > 0 && messages[i - 1].role == "assistant",
"tool message at index {i} has no preceding assistant"
);
}
}
}

#[test]
fn prune_never_orphans_tool_use() {
// Simulate a conversation with multiple tool groups
let filler = "y".repeat(500);
let mut messages = vec![
msg("system", "sys"),
msg("user", "q1"),
msg("assistant", &filler), // tool group 1
msg("tool", &filler),
msg("user", "q2"),
msg("assistant", &filler), // tool group 2
msg("tool", &filler),
msg("tool", &filler),
msg("user", "recent"),
msg("assistant", "recent reply"),
];
let config = HistoryPrunerConfig {
enabled: true,
max_tokens: 100,
keep_recent: 2,
collapse_tool_results: true,
};
prune_history(&mut messages, &config);
// Verify invariant: no tool message without a preceding assistant
for (i, m) in messages.iter().enumerate() {
if m.role == "tool" {
assert!(
i > 0 && messages[i - 1].role == "assistant",
"orphaned tool message at index {i}: {:?}",
messages.iter().map(|m| &m.role).collect::<Vec<_>>()
);
}
}
}

#[test]
fn prune_protects_recent_tool_groups() {
let mut messages = vec![
msg("system", "sys"),
msg("user", "old"),
msg("assistant", "old reply"),
msg("assistant", "tool call"),
msg("tool", "tool result"),
msg("user", "recent"),
];
let config = HistoryPrunerConfig {
enabled: true,
max_tokens: 100_000,
keep_recent: 3, // protects last 3: tool call, tool result, recent
collapse_tool_results: true,
};
let stats = prune_history(&mut messages, &config);
// Protected tool group should not be collapsed
assert!(messages.iter().any(|m| m.role == "tool"));
assert_eq!(stats.collapsed_pairs, 0);
}

#[test]
fn prune_under_realistic_token_pressure_preserves_tool_pairing() {
// Simulate 15 tool iterations with realistic content sizes
let mut messages = vec![msg("system", "You are helpful.")];
messages.push(msg("user", "Research this topic thoroughly"));

// 15 tool iterations — each adds assistant(tool_calls) + tool(result)
for i in 0..15 {
let tool_json = format!(
r#"{{"content":"iteration {i}","tool_calls":[{{"id":"t{i}","name":"web_search","arguments":"{{}}"}}]}}"#
);
messages.push(msg("assistant", &tool_json));
// Realistic tool result size (~2K chars each)
let result = format!(
r#"{{"tool_call_id":"t{i}","content":"{}"}}"#,
"x".repeat(2000)
);
messages.push(msg("tool", &result));
}
messages.push(msg("assistant", "Here's what I found..."));

// 33 messages total: system + user + 15*(assistant+tool) + final assistant
assert_eq!(messages.len(), 33);

let config = HistoryPrunerConfig {
enabled: true,
max_tokens: 2000, // Forces pruning of older iterations
keep_recent: 4,
collapse_tool_results: true,
};

prune_history(&mut messages, &config);

// Invariant: no orphaned tool messages after pruning
for (i, m) in messages.iter().enumerate() {
if m.role == "tool" {
assert!(
i > 0 && messages[i - 1].role == "assistant",
"orphaned tool at index {i}: roles = {:?}",
messages.iter().map(|m| &m.role).collect::<Vec<_>>()
);
}
}
}
}
Loading