Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion codex-rs/app-server/tests/suite/v2/mcp_server_elicitation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ use codex_app_server_protocol::TurnStartResponse;
use codex_app_server_protocol::TurnStatus;
use codex_app_server_protocol::UserInput as V2UserInput;
use codex_config::types::AuthCredentialsStoreMode;
use core_test_support::assert_regex_match;
use core_test_support::responses;
use pretty_assertions::assert_eq;
use rmcp::handler::server::ServerHandler;
Expand Down Expand Up @@ -274,8 +275,15 @@ async fn mcp_server_elicitation_round_trip() -> Result<()> {
.get("output")
.and_then(Value::as_str)
.expect("function_call_output output should be a JSON string");
let payload = assert_regex_match(
r#"(?s)^Wall time: [0-9]+(?:\.[0-9]+)? seconds\nOutput:\n(.*)$"#,
output,
)
.get(1)
.expect("wall-time wrapped output should include payload")
.as_str();
assert_eq!(
serde_json::from_str::<Value>(output)?,
serde_json::from_str::<Value>(payload)?,
json!([{
"type": "text",
"text": "accepted"
Expand Down
57 changes: 57 additions & 0 deletions codex-rs/core/src/tools/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,63 @@ impl ToolOutput for CallToolResult {
}
}

#[derive(Clone, Debug)]
pub struct McpToolOutput {
pub result: CallToolResult,
pub wall_time: Duration,
}

impl ToolOutput for McpToolOutput {
fn log_preview(&self) -> String {
let payload = self.response_payload();
let preview = payload.body.to_text().unwrap_or_else(|| {
serde_json::to_string(&self.result.content)
.unwrap_or_else(|err| format!("failed to serialize mcp result: {err}"))
});
telemetry_preview(&preview)
}

fn success_for_logging(&self) -> bool {
self.result.success()
}

fn to_response_item(&self, call_id: &str, _payload: &ToolPayload) -> ResponseInputItem {
ResponseInputItem::FunctionCallOutput {
call_id: call_id.to_string(),
output: self.response_payload(),
}
}

fn code_mode_result(&self, _payload: &ToolPayload) -> JsonValue {
serde_json::to_value(&self.result).unwrap_or_else(|err| {
JsonValue::String(format!("failed to serialize mcp result: {err}"))
})
}
}

impl McpToolOutput {
fn response_payload(&self) -> FunctionCallOutputPayload {
let mut payload = self.result.as_function_call_output_payload();
let wall_time_seconds = self.wall_time.as_secs_f64();
let header = format!("Wall time: {wall_time_seconds:.4} seconds\nOutput:");

match &mut payload.body {
FunctionCallOutputBody::Text(text) => {
if text.is_empty() {
*text = header;
} else {
*text = format!("{header}\n{text}");
}
}
FunctionCallOutputBody::ContentItems(items) => {
items.insert(0, FunctionCallOutputContentItem::InputText { text: header });
}
}

payload
}
}

#[derive(Clone)]
pub struct ToolSearchOutput {
pub tools: Vec<ToolSearchOutputTool>,
Expand Down
139 changes: 139 additions & 0 deletions codex-rs/core/src/tools/context_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,145 @@ fn mcp_code_mode_result_serializes_full_call_tool_result() {
);
}

#[test]
fn mcp_tool_output_response_item_includes_wall_time() {
let output = McpToolOutput {
result: CallToolResult {
content: vec![serde_json::json!({
"type": "text",
"text": "done",
})],
structured_content: None,
is_error: Some(false),
meta: None,
},
wall_time: std::time::Duration::from_millis(1250),
};

let response = output.to_response_item(
"mcp-call-1",
&ToolPayload::Mcp {
server: "server".to_string(),
tool: "tool".to_string(),
raw_arguments: "{}".to_string(),
},
);

match response {
ResponseInputItem::FunctionCallOutput { call_id, output } => {
assert_eq!(call_id, "mcp-call-1");
assert_eq!(output.success, Some(true));
let Some(text) = output.body.to_text() else {
panic!("MCP output should serialize as text");
};
let Some(payload) = text.strip_prefix("Wall time: 1.2500 seconds\nOutput:\n") else {
panic!("MCP output should include wall-time header: {text}");
};
let parsed: serde_json::Value = serde_json::from_str(payload).unwrap_or_else(|err| {
panic!("MCP output should serialize JSON content: {err}");
});
assert_eq!(
parsed,
json!([{
"type": "text",
"text": "done",
}])
);
}
other => panic!("expected FunctionCallOutput, got {other:?}"),
}
}

#[test]
fn mcp_tool_output_response_item_preserves_content_items() {
let image_url = "data:image/png;base64,AAA";
let output = McpToolOutput {
result: CallToolResult {
content: vec![serde_json::json!({
"type": "image",
"mimeType": "image/png",
"data": "AAA",
})],
structured_content: None,
is_error: Some(false),
meta: None,
},
wall_time: std::time::Duration::from_millis(500),
};

let response = output.to_response_item(
"mcp-call-2",
&ToolPayload::Mcp {
server: "server".to_string(),
tool: "tool".to_string(),
raw_arguments: "{}".to_string(),
},
);

match response {
ResponseInputItem::FunctionCallOutput { output, .. } => {
assert_eq!(
output.content_items(),
Some(
vec![
FunctionCallOutputContentItem::InputText {
text: "Wall time: 0.5000 seconds\nOutput:".to_string(),
},
FunctionCallOutputContentItem::InputImage {
image_url: image_url.to_string(),
detail: None,
},
]
.as_slice()
)
);
assert_eq!(
output.body.to_text().as_deref(),
Some("Wall time: 0.5000 seconds\nOutput:")
);
}
other => panic!("expected FunctionCallOutput, got {other:?}"),
}
}

#[test]
fn mcp_tool_output_code_mode_result_stays_raw_call_tool_result() {
let output = McpToolOutput {
result: CallToolResult {
content: vec![serde_json::json!({
"type": "text",
"text": "ignored",
})],
structured_content: Some(serde_json::json!({
"content": "done",
})),
is_error: Some(false),
meta: None,
},
wall_time: std::time::Duration::from_millis(1250),
};

let result = output.code_mode_result(&ToolPayload::Mcp {
server: "server".to_string(),
tool: "tool".to_string(),
raw_arguments: "{}".to_string(),
});

assert_eq!(
result,
serde_json::json!({
"content": [{
"type": "text",
"text": "ignored",
}],
"structuredContent": {
"content": "done",
},
"isError": false,
})
);
}

#[test]
fn custom_tool_calls_can_derive_text_from_content_items() {
let payload = ToolPayload::Custom {
Expand Down
13 changes: 9 additions & 4 deletions codex-rs/core/src/tools/handlers/mcp.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
use std::sync::Arc;
use std::time::Instant;

use crate::function_tool::FunctionCallError;
use crate::mcp_tool_call::handle_mcp_tool_call;
use crate::tools::context::McpToolOutput;
use crate::tools::context::ToolInvocation;
use crate::tools::context::ToolPayload;
use crate::tools::registry::ToolHandler;
use crate::tools::registry::ToolKind;
use codex_protocol::mcp::CallToolResult;

pub struct McpHandler;
impl ToolHandler for McpHandler {
type Output = CallToolResult;
type Output = McpToolOutput;

fn kind(&self) -> ToolKind {
ToolKind::Mcp
Expand Down Expand Up @@ -41,7 +42,8 @@ impl ToolHandler for McpHandler {
let (server, tool, raw_arguments) = payload;
let arguments_str = raw_arguments;

let output = handle_mcp_tool_call(
let started = Instant::now();
let result = handle_mcp_tool_call(
Arc::clone(&session),
&turn,
call_id.clone(),
Expand All @@ -51,6 +53,9 @@ impl ToolHandler for McpHandler {
)
.await;

Ok(output)
Ok(McpToolOutput {
result,
wall_time: started.elapsed(),
})
}
}
61 changes: 52 additions & 9 deletions codex-rs/core/tests/suite/rmcp_client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ use codex_protocol::protocol::Op;
use codex_protocol::protocol::SandboxPolicy;
use codex_protocol::user_input::UserInput;
use codex_utils_cargo_bin::cargo_bin;
use core_test_support::assert_regex_match;
use core_test_support::responses;
use core_test_support::responses::mount_models_once;
use core_test_support::responses::mount_sse_once;
Expand All @@ -51,6 +52,29 @@ use tokio::time::sleep;

static OPENAI_PNG: &str = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAD0AAAA9CAYAAAAeYmHpAAAE6klEQVR4Aeyau44UVxCGx1fZsmRLlm3Zoe0XcGQ5cUiCCIgJeS9CHgAhMkISQnIuGQgJEkBcxLW+nqnZ6uqqc+nuWRC7q/P3qetf9e+MtOwyX25O4Nep6JPyop++0qev9HrfgZ+F6r2DuB/vHOrt/UIkqdDHYvujOW6fO7h/CNEI+a5jc+pBR8uy0jVFsziYu5HtfSUk+Io34q921hLNctFSX0gwww+S8wce8K1LfCU+cYW4888aov8NxqvQILUPPReLOrm6zyLxa4i+6VZuFbJo8d1MOHZm+7VUtB/aIvhPWc/3SWg49JcwFLlHxuXKjtyloo+YNhuW3VS+WPBuUEMvCFKjEDVgFBQHXrnazpqiSxNZCkQ1kYiozsbm9Oz7l4i2Il7vGccGNWAc3XosDrZe/9P3ZnMmzHNEQw4smf8RQ87XEAMsC7Az0Au+dgXerfH4+sHvEc0SYGic8WBBUGqFH2gN7yDrazy7m2pbRTeRmU3+MjZmr1h6LJgPbGy23SI6GlYT0brQ71IY8Us4PNQCm+zepSbaD2BY9xCaAsD9IIj/IzFmKMSdHHonwdZATbTnYREf6/VZGER98N9yCWIvXQwXDoDdhZJoT8jwLnJXDB9w4Sb3e6nK5ndzlkTLnP3JBu4LKkbrYrU69gCVceV0JvpyuW1xlsUVngzhwMetn/XamtTORF9IO5YnWNiyeF9zCAfqR3fUW+vZZKLtgP+ts8BmQRBREAdRDhH3o8QuRh/YucNFz2BEjxbRN6LGzphfKmvP6v6QhqIQyZ8XNJ0W0X83MR1PEcJBNO2KC2Z1TW/v244scp9FwRViZxIOBF0Lctk7ZVSavdLvRlV1hz/ysUi9sr8CIcB3nvWBwA93ykTz18eAYxQ6N/K2DkPA1lv3iXCwmDUT7YkjIby9siXueIJj9H+pzSqJ9oIuJWTUgSSt4WO7o/9GGg0viR4VinNRUDoIj34xoCd6pxD3aK3zfdbnx5v1J3ZNNEJsE0sBG7N27ReDrJc4sFxz7dI/ZAbOmmiKvHBitQXpAdR6+F7v+/ol/tOouUV01EeMZQF2BoQDn6dP4XNr+j9GZEtEK1/L8pFw7bd3a53tsTa7WD+054jOFmPg1XBKPQgnqFfmFcy32ZRvjmiIIQTYFvyDxQ8nH8WIwwGwlyDjDznnilYyFr6njrlZwsKkBpO59A7OwgdzPEWRm+G+oeb7IfyNuzjEEVLrOVxJsxvxwF8kmCM6I2QYmJunz4u4TrADpfl7mlbRTWQ7VmrBzh3+C9f6Grc3YoGN9dg/SXFthpRsT6vobfXRs2VBlgBHXVMLHjDNbIZv1sZ9+X3hB09cXdH1JKViyG0+W9bWZDa/r2f9zAFR71sTzGpMSWz2iI4YssWjWo3REy1MDGjdwe5e0dFSiAC1JakBvu4/CUS8Eh6dqHdU0Or0ioY3W5ClSqDXAy7/6SRfgw8vt4I+tbvvNtFT2kVDhY5+IGb1rCqYaXNF08vSALsXCPmt0kQNqJT1p5eI1mkIV/BxCY1z85lOzeFbPBQHURkkPTlwTYK9gTVE25l84IbFFN+YJDHjdpn0gq6mrHht0dkcjbM4UL9283O5p77GN+SPW/QwVB4IUYg7Or+Kp7naR6qktP98LNF2UxWo9yObPIT9KYg+hK4i56no4rfnM0qeyFf6AwAAAP//trwR3wAAAAZJREFUAwBZ0sR75itw5gAAAABJRU5ErkJggg==";

fn assert_wall_time_line(line: &str) {
assert_regex_match(r"^Wall time: [0-9]+(?:\.[0-9]+)? seconds$", line);
}

fn split_wall_time_wrapped_output(output: &str) -> &str {
let Some((wall_time, rest)) = output.split_once('\n') else {
panic!("wall-time output should contain an Output section: {output}");
};
assert_wall_time_line(wall_time);
let Some(output) = rest.strip_prefix("Output:\n") else {
panic!("wall-time output should contain Output marker: {output}");
};
output
}

fn assert_wall_time_header(output: &str) {
let Some((wall_time, marker)) = output.split_once('\n') else {
panic!("wall-time header should contain an Output marker: {output}");
};
assert_wall_time_line(wall_time);
assert_eq!(marker, "Output:");
}

#[tokio::test(flavor = "multi_thread", worker_threads = 1)]
#[serial(mcp_test_value)]
async fn stdio_server_round_trip() -> anyhow::Result<()> {
Expand All @@ -71,7 +95,7 @@ async fn stdio_server_round_trip() -> anyhow::Result<()> {
]),
)
.await;
mount_sse_once(
let final_mock = mount_sse_once(
&server,
responses::sse(vec![
responses::ev_assistant_message("msg-1", "rmcp echo tool completed successfully."),
Expand Down Expand Up @@ -190,6 +214,17 @@ async fn stdio_server_round_trip() -> anyhow::Result<()> {

wait_for_event(&fixture.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;

let output_item = final_mock.single_request().function_call_output(call_id);
let output_text = output_item
.get("output")
.and_then(Value::as_str)
.expect("function_call_output output should be a string");
let wrapped_payload = split_wall_time_wrapped_output(output_text);
let output_json: Value = serde_json::from_str(wrapped_payload)
.expect("wrapped MCP output should preserve structured JSON");
assert_eq!(output_json["echo"], "ECHOING: ping");
assert_eq!(output_json["env"], expected_env_value);

server.verify().await;

Ok(())
Expand Down Expand Up @@ -362,15 +397,22 @@ async fn stdio_image_responses_round_trip() -> anyhow::Result<()> {
wait_for_event(&fixture.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;

let output_item = final_mock.single_request().function_call_output(call_id);
assert_eq!(output_item["type"], "function_call_output");
assert_eq!(output_item["call_id"], call_id);
let output = output_item["output"]
.as_array()
.expect("image MCP output should be content items");
assert_eq!(output.len(), 2);
assert_wall_time_header(
output[0]["text"]
.as_str()
.expect("first MCP image output item should be wall-time text"),
);
assert_eq!(
output_item,
output[1],
json!({
"type": "function_call_output",
"call_id": call_id,
"output": [{
"type": "input_image",
"image_url": OPENAI_PNG
}]
"type": "input_image",
"image_url": OPENAI_PNG
})
);
server.verify().await;
Expand Down Expand Up @@ -533,7 +575,8 @@ async fn stdio_image_responses_are_sanitized_for_text_only_model() -> anyhow::Re
.get("output")
.and_then(Value::as_str)
.expect("function_call_output output should be a JSON string");
let output_json: Value = serde_json::from_str(output_text)
let wrapped_payload = split_wall_time_wrapped_output(output_text);
let output_json: Value = serde_json::from_str(wrapped_payload)
.expect("function_call_output output should be valid JSON");
assert_eq!(
output_json,
Expand Down
Loading
Loading