Skip to content

Commit 1776b6d

Browse files
committed
fix(agent,channels): receipts integration test + drop duplicate footer surface
Addresses @WareWolf-MoonWall's review on zeroclaw-labs#6214 (29a779b). 🔴 Add `process_channel_message_renders_trailing_tool_receipts_block_when_enabled` in `crates/zeroclaw-channels/src/orchestrator/mod.rs::tests` that runs the full activated path (Some<ReceiptGenerator> + show_receipts_in_response=true) and asserts the mock channel receives a second send carrying the documented `---\nTool receipts:` separator and a valid `zc-receipt-*` token tied to the tool name. Plus a paired `_omits_receipts_block_when_disabled` test asserting the toggle actually gates the second send. These tests required `AutonomyLevel::Full` + an explicit `auto_approve.push("mock_price")` so mock_price actually reaches `execute_one_tool` — the existing `process_channel_message_*` tests in this file pass under default Supervised because `ToolCallingProvider` returns the BTC reply regardless of whether the tool ran (the LLM only needs to see a `[Tool results]` user message — even a denied/cancelled payload triggers the canned response). Receipts only generate on the actual execute path, so the gate has to be open here. Drop `append_receipt_footer` from `crates/zeroclaw-runtime/src/agent/loop_.rs` and its two call sites (the `_omits_receipts_block_when_disabled` test exposed the bug that surfaced it). Pre-fix, the footer mechanism appended a `---\nTool receipts:\n...` block to the agent's response text *regardless* of `show_receipts_in_response`, while the orchestrator *also* sent the same content as a separate channel message *gated* on the toggle — so: show_receipts_in_response = true → footer + separate block (duplicate) show_receipts_in_response = false → footer only (toggle ignored) Both modes were wrong. The orchestrator's separate-message render is the design Wolf's review expects (and matches the original PR body), so keep that path and remove the footer entirely. The `[receipt: ...]` markers on individual tool results in history are *kept* — those are how the LLM echoes receipts when the system-prompt addendum is active. Their corresponding tests live in `agent::tool_receipts::tests` and are unaffected. 🔵 Replace `let _ = channel.send(...)` for the receipts block with a `tracing::warn!` on `Err(e)`. The block is the operator-facing audit surface for the feature; a dropped send must leave a log signal rather than silently disappear. 🔵 Make the `collected_receipts` arg to `run_tool_call_loop` conditional on `ctx.receipt_generator.is_some()` instead of unconditional `Some(&...)`. Inside the loop the collector is only written to when the generator is also Some, so behaviour is unchanged — the explicit `.map(|_| ...)` just makes the "collector is meaningful only when generator is active" relationship visible at the call site.
1 parent 29a779b commit 1776b6d

2 files changed

Lines changed: 266 additions & 77 deletions

File tree

  • crates
    • zeroclaw-channels/src/orchestrator
    • zeroclaw-runtime/src/agent

crates/zeroclaw-channels/src/orchestrator/mod.rs

Lines changed: 264 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3190,7 +3190,12 @@ async fn process_channel_message(
31903190
None, // shared_budget
31913191
target_channel.as_deref(),
31923192
ctx.receipt_generator.as_ref(),
3193-
Some(&tool_receipts_collector),
3193+
// Collector is meaningful only when the generator is
3194+
// active. Pass None when receipts are disabled so the
3195+
// call site reflects that coupling explicitly.
3196+
ctx.receipt_generator
3197+
.as_ref()
3198+
.map(|_| &tool_receipts_collector),
31943199
),
31953200
),
31963201
),
@@ -3517,13 +3522,22 @@ async fn process_channel_message(
35173522
eprintln!(" ❌ Failed to reply on {}: {e}", channel.name());
35183523
}
35193524
// Send tool receipts as a separate message in the same thread.
3520-
if let Some(ref block) = receipts_block {
3521-
let _ = channel
3525+
// The block is the operator-facing audit surface for the feature,
3526+
// so a dropped send must leave a log signal rather than silently
3527+
// disappear.
3528+
if let Some(ref block) = receipts_block
3529+
&& let Err(e) = channel
35223530
.send(
35233531
&SendMessage::new(block, &msg.reply_target)
35243532
.in_thread(msg.thread_ts.clone()),
35253533
)
3526-
.await;
3534+
.await
3535+
{
3536+
tracing::warn!(
3537+
channel = channel.name(),
3538+
error = %e,
3539+
"failed to send tool receipts block"
3540+
);
35273541
}
35283542
}
35293543
}
@@ -7344,6 +7358,252 @@ BTC is currently around $65,000 based on latest tool output."#
73447358
assert!(!reply.contains("mock_price"));
73457359
}
73467360

7361+
#[tokio::test]
7362+
async fn process_channel_message_renders_trailing_tool_receipts_block_when_enabled() {
7363+
// Activated path: a real ReceiptGenerator + show_receipts_in_response=true
7364+
// must produce a second send carrying the "Tool receipts:" block with a
7365+
// valid zc-receipt-* token. Pre-#6214 this was dead code from the test
7366+
// suite because every ChannelRuntimeContext literal pinned the feature
7367+
// off; this test guards the integration so a regression in the block
7368+
// render or send call surfaces in CI rather than in production.
7369+
let channel_impl = Arc::new(RecordingChannel::default());
7370+
let channel: Arc<dyn Channel> = channel_impl.clone();
7371+
7372+
let mut channels_by_name = HashMap::new();
7373+
channels_by_name.insert(channel.name().to_string(), channel);
7374+
7375+
let runtime_ctx = Arc::new(ChannelRuntimeContext {
7376+
channels_by_name: Arc::new(channels_by_name),
7377+
provider: Arc::new(ToolCallingProvider),
7378+
default_provider: Arc::new("test-provider".to_string()),
7379+
memory: Arc::new(NoopMemory),
7380+
tools_registry: Arc::new(vec![Box::new(MockPriceTool)]),
7381+
observer: Arc::new(NoopObserver),
7382+
system_prompt: Arc::new("test-system-prompt".to_string()),
7383+
model: Arc::new("test-model".to_string()),
7384+
temperature: 0.0,
7385+
auto_save_memory: false,
7386+
max_tool_iterations: 10,
7387+
min_relevance_score: 0.0,
7388+
conversation_histories: Arc::new(Mutex::new(lru::LruCache::new(
7389+
std::num::NonZeroUsize::new(MAX_CONVERSATION_SENDERS).unwrap(),
7390+
))),
7391+
pending_new_sessions: Arc::new(Mutex::new(HashSet::new())),
7392+
provider_cache: Arc::new(Mutex::new(HashMap::new())),
7393+
route_overrides: Arc::new(Mutex::new(HashMap::new())),
7394+
api_key: None,
7395+
api_url: None,
7396+
reliability: Arc::new(zeroclaw_config::schema::ReliabilityConfig::default()),
7397+
provider_runtime_options: zeroclaw_providers::ProviderRuntimeOptions::default(),
7398+
workspace_dir: Arc::new(std::env::temp_dir()),
7399+
prompt_config: Arc::new(zeroclaw_config::schema::Config::default()),
7400+
message_timeout_secs: CHANNEL_MESSAGE_TIMEOUT_SECS,
7401+
interrupt_on_new_message: InterruptOnNewMessageConfig {
7402+
telegram: false,
7403+
slack: false,
7404+
discord: false,
7405+
mattermost: false,
7406+
matrix: false,
7407+
},
7408+
non_cli_excluded_tools: Arc::new(Vec::new()),
7409+
// Full autonomy + auto-approve mock_price so the loop actually
7410+
// reaches execute_one_tool. The other tests in this file pass
7411+
// under Supervised because ToolCallingProvider returns the BTC
7412+
// reply regardless of whether the tool ran (the LLM only needs
7413+
// to see a `[Tool results]` user message — even a "denied"
7414+
// payload triggers the deterministic response). Receipts only
7415+
// generate on the actual execute path, so we need the gate
7416+
// open here.
7417+
autonomy_level: AutonomyLevel::Full,
7418+
tool_call_dedup_exempt: Arc::new(Vec::new()),
7419+
multimodal: zeroclaw_config::schema::MultimodalConfig::default(),
7420+
media_pipeline: zeroclaw_config::schema::MediaPipelineConfig::default(),
7421+
transcription_config: zeroclaw_config::schema::TranscriptionConfig::default(),
7422+
hooks: None,
7423+
model_routes: Arc::new(Vec::new()),
7424+
query_classification: zeroclaw_config::schema::QueryClassificationConfig::default(),
7425+
ack_reactions: true,
7426+
show_tool_calls: true,
7427+
session_store: None,
7428+
approval_manager: Arc::new(ApprovalManager::for_non_interactive(&{
7429+
let mut autonomy = zeroclaw_config::schema::AutonomyConfig::default();
7430+
autonomy.level = zeroclaw_config::autonomy::AutonomyLevel::Full;
7431+
autonomy.auto_approve.push("mock_price".to_string());
7432+
autonomy
7433+
})),
7434+
activated_tools: None,
7435+
cost_tracking: None,
7436+
pacing: zeroclaw_config::schema::PacingConfig::default(),
7437+
max_tool_result_chars: 0,
7438+
context_token_budget: 0,
7439+
debouncer: Arc::new(zeroclaw_infra::debounce::MessageDebouncer::new(
7440+
Duration::ZERO,
7441+
)),
7442+
receipt_generator: Some(
7443+
zeroclaw_runtime::agent::tool_receipts::ReceiptGenerator::new(),
7444+
),
7445+
show_receipts_in_response: true,
7446+
});
7447+
7448+
process_channel_message(
7449+
runtime_ctx,
7450+
zeroclaw_api::channel::ChannelMessage {
7451+
id: "msg-1".to_string(),
7452+
sender: "alice".to_string(),
7453+
reply_target: "chat-42".to_string(),
7454+
content: "What is the BTC price now?".to_string(),
7455+
channel: "test-channel".to_string(),
7456+
timestamp: 1,
7457+
thread_ts: None,
7458+
interruption_scope_id: None,
7459+
attachments: vec![],
7460+
},
7461+
CancellationToken::new(),
7462+
)
7463+
.await;
7464+
7465+
let sent_messages = channel_impl.sent_messages.lock().await;
7466+
// Two sends: the model's reply and the trailing receipts block.
7467+
assert!(
7468+
sent_messages.len() >= 2,
7469+
"expected at least 2 sends (reply + receipts block), got {}: {:?}",
7470+
sent_messages.len(),
7471+
sent_messages
7472+
);
7473+
7474+
let receipts_message = sent_messages
7475+
.iter()
7476+
.find(|m| m.contains("Tool receipts:"))
7477+
.unwrap_or_else(|| {
7478+
panic!(
7479+
"no `Tool receipts:` send found; got {:?}",
7480+
sent_messages.as_slice()
7481+
)
7482+
});
7483+
assert!(
7484+
receipts_message.starts_with("chat-42:"),
7485+
"receipts block must be sent to the same reply target as the agent reply, got {receipts_message}"
7486+
);
7487+
assert!(
7488+
receipts_message.contains("---\nTool receipts:"),
7489+
"receipts block must be prefixed with the documented `---\\nTool receipts:` separator, got {receipts_message}"
7490+
);
7491+
assert!(
7492+
receipts_message.contains("zc-receipt-"),
7493+
"receipts block must carry at least one zc-receipt-* HMAC token (proves the generator actually ran), got {receipts_message}"
7494+
);
7495+
assert!(
7496+
receipts_message.contains("mock_price"),
7497+
"receipts block should name the tool that produced the receipt, got {receipts_message}"
7498+
);
7499+
}
7500+
7501+
#[tokio::test]
7502+
async fn process_channel_message_omits_receipts_block_when_disabled() {
7503+
// Backward-compat: with show_receipts_in_response=false (default), no
7504+
// trailing receipts message is sent — even when a generator is active
7505+
// and the loop ran tools. This is the path every other test relies on
7506+
// implicitly; assert it once explicitly.
7507+
let channel_impl = Arc::new(RecordingChannel::default());
7508+
let channel: Arc<dyn Channel> = channel_impl.clone();
7509+
7510+
let mut channels_by_name = HashMap::new();
7511+
channels_by_name.insert(channel.name().to_string(), channel);
7512+
7513+
let runtime_ctx = Arc::new(ChannelRuntimeContext {
7514+
channels_by_name: Arc::new(channels_by_name),
7515+
provider: Arc::new(ToolCallingProvider),
7516+
default_provider: Arc::new("test-provider".to_string()),
7517+
memory: Arc::new(NoopMemory),
7518+
tools_registry: Arc::new(vec![Box::new(MockPriceTool)]),
7519+
observer: Arc::new(NoopObserver),
7520+
system_prompt: Arc::new("test-system-prompt".to_string()),
7521+
model: Arc::new("test-model".to_string()),
7522+
temperature: 0.0,
7523+
auto_save_memory: false,
7524+
max_tool_iterations: 10,
7525+
min_relevance_score: 0.0,
7526+
conversation_histories: Arc::new(Mutex::new(lru::LruCache::new(
7527+
std::num::NonZeroUsize::new(MAX_CONVERSATION_SENDERS).unwrap(),
7528+
))),
7529+
pending_new_sessions: Arc::new(Mutex::new(HashSet::new())),
7530+
provider_cache: Arc::new(Mutex::new(HashMap::new())),
7531+
route_overrides: Arc::new(Mutex::new(HashMap::new())),
7532+
api_key: None,
7533+
api_url: None,
7534+
reliability: Arc::new(zeroclaw_config::schema::ReliabilityConfig::default()),
7535+
provider_runtime_options: zeroclaw_providers::ProviderRuntimeOptions::default(),
7536+
workspace_dir: Arc::new(std::env::temp_dir()),
7537+
prompt_config: Arc::new(zeroclaw_config::schema::Config::default()),
7538+
message_timeout_secs: CHANNEL_MESSAGE_TIMEOUT_SECS,
7539+
interrupt_on_new_message: InterruptOnNewMessageConfig {
7540+
telegram: false,
7541+
slack: false,
7542+
discord: false,
7543+
mattermost: false,
7544+
matrix: false,
7545+
},
7546+
non_cli_excluded_tools: Arc::new(Vec::new()),
7547+
// Match the enabled-test setup so the tool actually runs; the
7548+
// assertion below proves the receipt-block send is gated on
7549+
// `show_receipts_in_response` and not on whether the loop saw
7550+
// any receipts.
7551+
autonomy_level: AutonomyLevel::Full,
7552+
tool_call_dedup_exempt: Arc::new(Vec::new()),
7553+
multimodal: zeroclaw_config::schema::MultimodalConfig::default(),
7554+
media_pipeline: zeroclaw_config::schema::MediaPipelineConfig::default(),
7555+
transcription_config: zeroclaw_config::schema::TranscriptionConfig::default(),
7556+
hooks: None,
7557+
model_routes: Arc::new(Vec::new()),
7558+
query_classification: zeroclaw_config::schema::QueryClassificationConfig::default(),
7559+
ack_reactions: true,
7560+
show_tool_calls: true,
7561+
session_store: None,
7562+
approval_manager: Arc::new(ApprovalManager::for_non_interactive(&{
7563+
let mut autonomy = zeroclaw_config::schema::AutonomyConfig::default();
7564+
autonomy.level = zeroclaw_config::autonomy::AutonomyLevel::Full;
7565+
autonomy.auto_approve.push("mock_price".to_string());
7566+
autonomy
7567+
})),
7568+
activated_tools: None,
7569+
cost_tracking: None,
7570+
pacing: zeroclaw_config::schema::PacingConfig::default(),
7571+
max_tool_result_chars: 0,
7572+
context_token_budget: 0,
7573+
debouncer: Arc::new(zeroclaw_infra::debounce::MessageDebouncer::new(
7574+
Duration::ZERO,
7575+
)),
7576+
receipt_generator: Some(
7577+
zeroclaw_runtime::agent::tool_receipts::ReceiptGenerator::new(),
7578+
),
7579+
show_receipts_in_response: false,
7580+
});
7581+
7582+
process_channel_message(
7583+
runtime_ctx,
7584+
zeroclaw_api::channel::ChannelMessage {
7585+
id: "msg-1".to_string(),
7586+
sender: "alice".to_string(),
7587+
reply_target: "chat-42".to_string(),
7588+
content: "What is the BTC price now?".to_string(),
7589+
channel: "test-channel".to_string(),
7590+
timestamp: 1,
7591+
thread_ts: None,
7592+
interruption_scope_id: None,
7593+
attachments: vec![],
7594+
},
7595+
CancellationToken::new(),
7596+
)
7597+
.await;
7598+
7599+
let sent_messages = channel_impl.sent_messages.lock().await;
7600+
assert!(
7601+
!sent_messages.iter().any(|m| m.contains("Tool receipts:")),
7602+
"no receipts block must be sent when show_receipts_in_response=false; got {:?}",
7603+
sent_messages.as_slice()
7604+
);
7605+
}
7606+
73477607
#[tokio::test]
73487608
async fn process_channel_message_telegram_does_not_persist_tool_summary_prefix() {
73497609
let channel_impl = Arc::new(TelegramRecordingChannel::default());

crates/zeroclaw-runtime/src/agent/loop_.rs

Lines changed: 2 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -802,31 +802,6 @@ fn maybe_inject_channel_delivery_defaults(
802802
// • the cancellation token fires (external abort).
803803

804804
/// Append a receipt footer to the response text if any receipts were collected.
805-
///
806-
/// Format:
807-
/// ```text
808-
/// \n\n---\nTool receipts:\n shell: zc-receipt-...\n web_search: zc-receipt-...
809-
/// ```
810-
pub fn append_receipt_footer(
811-
response: String,
812-
collected_receipts: Option<&std::sync::Mutex<Vec<String>>>,
813-
) -> String {
814-
let Some(store) = collected_receipts else {
815-
return response;
816-
};
817-
let Ok(receipts) = store.lock() else {
818-
return response;
819-
};
820-
if receipts.is_empty() {
821-
return response;
822-
}
823-
let mut footer = format!("{response}\n\n---\nTool receipts:");
824-
for entry in receipts.iter() {
825-
footer.push_str(&format!("\n {entry}"));
826-
}
827-
footer
828-
}
829-
830805
/// Execute a single turn of the agent loop: send messages, parse tool calls,
831806
/// execute tools, and loop until the LLM produces a final text response.
832807
#[allow(clippy::too_many_arguments)]
@@ -1466,10 +1441,7 @@ pub async fn run_tool_call_loop(
14661441
}
14671442

14681443
history.push(ChatMessage::assistant(response_text.clone()));
1469-
return Ok(append_receipt_footer(
1470-
accumulated_display_text,
1471-
collected_receipts,
1472-
));
1444+
return Ok(accumulated_display_text);
14731445
}
14741446

14751447
// Accumulate text from this iteration (tool calls present, loop continues).
@@ -2023,10 +1995,7 @@ pub async fn run_tool_call_loop(
20231995
anyhow::bail!("Agent exceeded maximum tool iterations ({max_iterations})")
20241996
}
20251997
accumulated_display_text.push_str(&text);
2026-
Ok(append_receipt_footer(
2027-
accumulated_display_text,
2028-
collected_receipts,
2029-
))
1998+
Ok(accumulated_display_text)
20301999
}
20312000
Err(e) => {
20322001
tracing::warn!(error = %e, "Final summary LLM call failed, bailing");
@@ -7522,44 +7491,4 @@ Let me check the result."#;
75227491

75237492
assert_eq!(result, "ok");
75247493
}
7525-
7526-
// ── append_receipt_footer tests ──────────────────────────────
7527-
7528-
#[test]
7529-
fn receipt_footer_empty_receipts_unchanged() {
7530-
let store = std::sync::Mutex::new(Vec::<String>::new());
7531-
let result = super::append_receipt_footer("Hello world".to_string(), Some(&store));
7532-
assert_eq!(result, "Hello world");
7533-
}
7534-
7535-
#[test]
7536-
fn receipt_footer_none_store_unchanged() {
7537-
let result = super::append_receipt_footer("Hello world".to_string(), None);
7538-
assert_eq!(result, "Hello world");
7539-
}
7540-
7541-
#[test]
7542-
fn receipt_footer_single_receipt() {
7543-
let store = std::sync::Mutex::new(vec!["shell: zc-receipt-1234567890-abcdef".to_string()]);
7544-
let result = super::append_receipt_footer("The date is Monday.".to_string(), Some(&store));
7545-
assert_eq!(
7546-
result,
7547-
"The date is Monday.\n\n---\nTool receipts:\n shell: zc-receipt-1234567890-abcdef"
7548-
);
7549-
}
7550-
7551-
#[test]
7552-
fn receipt_footer_multiple_receipts() {
7553-
let store = std::sync::Mutex::new(vec![
7554-
"shell: zc-receipt-100-aaa".to_string(),
7555-
"web_search: zc-receipt-200-bbb".to_string(),
7556-
"file_read: zc-receipt-300-ccc".to_string(),
7557-
]);
7558-
let result = super::append_receipt_footer("Done.".to_string(), Some(&store));
7559-
let expected = "Done.\n\n---\nTool receipts:\
7560-
\n shell: zc-receipt-100-aaa\
7561-
\n web_search: zc-receipt-200-bbb\
7562-
\n file_read: zc-receipt-300-ccc";
7563-
assert_eq!(result, expected);
7564-
}
75657494
}

0 commit comments

Comments
 (0)