Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Directory.Build.props
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
<!-- Version can be overridden from the command line: -p:Version=0.3.1
AssemblyVersion and FileVersion are derived automatically by the SDK
(prerelease suffixes like -beta001 are stripped for assembly versions). -->
<Version>0.12.22</Version>
<Version>0.12.23</Version>
</PropertyGroup>

<!-- NuGet package metadata (shared across all packable projects) -->
Expand Down
24 changes: 23 additions & 1 deletion src/RockBot.Tools.Mcp/McpManagementExecutor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,29 @@ private async Task<ToolInvokeResponse> InvokeToolAsync(ToolInvokeRequest request
{
toolArgs = argsObj is JsonElement je ? je.GetRawText() : JsonSerializer.Serialize(argsObj, JsonOptions);
}
else if (!HasNonReservedKeys(args))
else if (HasNonReservedKeys(args))
{
// The call carries inner-tool fields at the top level instead of nested
// under `arguments`. Some models drop the wrapper and inline the schema
// fields directly (e.g. `mcp_invoke_tool(server_name=…, tool_name=…,
// accountId=…, query=…)`). Without this branch the call dispatches with
// no inner args and the server rejects with "X is required" — recovery's
// SchemaErrorEnricher then surfaces a missing-field error to the LLM
// even though the LLM actually supplied the field, just one level up.
// Promote every non-reserved top-level key into a synthetic arguments
// object so the call still works.
var promoted = new Dictionary<string, object?>(StringComparer.Ordinal);
foreach (var (k, v) in args)
{
if (!ReservedTopLevelKeys.Contains(k))
promoted[k] = v;
}
toolArgs = JsonSerializer.Serialize(promoted, JsonOptions);
_logger.LogInformation(
"mcp_invoke_tool: promoted {Count} flat top-level field(s) into 'arguments' for {Server}/{Tool}",
promoted.Count, serverName, toolName);
}
else
{
// The call carries only server_name + tool_name — no nested arguments wrapper
// and no flattened inner-tool fields. Some models hit this when invoking a
Expand Down
36 changes: 36 additions & 0 deletions src/RockBot.Wisp/WispExecutor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -795,7 +795,7 @@
// of which user created them.
try
{
File.SetUnixFileMode(fullPath,

Check warning on line 798 in src/RockBot.Wisp/WispExecutor.cs

View workflow job for this annotation

GitHub Actions / build-and-test

This call site is reachable on all platforms. 'File.SetUnixFileMode(string, UnixFileMode)' is unsupported on: 'windows'. (https://learn.microsoft.com/dotnet/fundamentals/code-analysis/quality-rules/ca1416)

Check warning on line 798 in src/RockBot.Wisp/WispExecutor.cs

View workflow job for this annotation

GitHub Actions / build-and-test

This call site is reachable on all platforms. 'File.SetUnixFileMode(string, UnixFileMode)' is unsupported on: 'windows'. (https://learn.microsoft.com/dotnet/fundamentals/code-analysis/quality-rules/ca1416)
UnixFileMode.UserRead | UnixFileMode.UserWrite |
UnixFileMode.GroupRead | UnixFileMode.GroupWrite |
UnixFileMode.OtherRead | UnixFileMode.OtherWrite);
Expand Down Expand Up @@ -998,6 +998,21 @@
return null;

var currentParams = step.ResolvedParams?.GetRawText() ?? "{}";

// Guard against pure invention: if the step has no params AND no enriched
// context naming candidate values, the LLM has nothing honest to draw on.
// It will reliably ignore the NO_CORRECTION instruction and fabricate a
// schema-clean but semantically empty value (e.g. {"query":"*"} or
// {"query":""} for a search tool). Decline up-front so the original
// validation error bubbles to the wisp caller, who can fix the dispatch.
if (IsEffectivelyEmptyParams(currentParams) && string.IsNullOrWhiteSpace(enrichedContext))
{
logger.LogInformation(
"Wisp {StepId}: auto-correction skipped (no params and no enriched context to draw from) for {Server}/{Tool}",
step.Id, step.Server, step.Tool);
return null;
}

var contextSection = string.IsNullOrEmpty(enrichedContext)
? ""
: $"Recovery context (schema, tool-description hints, recent session calls):\n{enrichedContext}\n\n";
Expand Down Expand Up @@ -1061,6 +1076,27 @@
}
}

/// <summary>
/// Returns true when the params JSON is missing, empty, or an empty object —
/// in any of those cases the auto-corrector has no source data to remap from.
/// </summary>
internal static bool IsEffectivelyEmptyParams(string? paramsJson)
{
if (string.IsNullOrWhiteSpace(paramsJson)) return true;
try
{
using var doc = JsonDocument.Parse(paramsJson);
if (doc.RootElement.ValueKind != JsonValueKind.Object) return false;
// Empty object → no fields to draw from.
using var e = doc.RootElement.EnumerateObject();
return !e.MoveNext();
}
catch (JsonException)
{
return false;
}
}

/// <summary>
/// Strips optional ```json / ``` fences an LLM may emit around a JSON payload.
/// </summary>
Expand Down
90 changes: 90 additions & 0 deletions tests/RockBot.Tools.Tests/McpManagementExecutorTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,96 @@ public async Task InvokeTool_AcceptsNestedArgAliases(string nestedKey)
await executeTask;
}

[TestMethod]
public async Task InvokeTool_FlatTopLevelFields_PromotedIntoArguments()
{
// Some models drop the `arguments` wrapper and inline the inner-tool fields
// directly at the top level of mcp_invoke_tool (e.g. `accountId`, `query`,
// `count` sitting next to `server_name`/`tool_name`). Without promotion the
// call dispatches with no inner args and the MCP server rejects with
// "X is required" — even though the LLM did pass X, one level up.
var (executor, publisher, subscriber) = CreateExecutor();

var request = new ToolInvokeRequest
{
ToolCallId = "call-flat",
ToolName = "mcp_invoke_tool",
Arguments = """
{"server_name":"calendar-mcp","tool_name":"search_emails",
"accountId":"rockbotagent","query":"VS Live Co-Chair","count":10}
"""
};

var executeTask = executor.ExecuteAsync(request, CancellationToken.None);
await Task.Delay(100);

Assert.AreEqual(1, publisher.Published.Count, "Flat-args call must dispatch.");
var innerRequest = publisher.Published[0].Envelope.GetPayload<ToolInvokeRequest>();
Assert.IsNotNull(innerRequest);
Assert.IsNotNull(innerRequest.Arguments,
"Flat top-level fields must be promoted into 'arguments', not dropped.");
StringAssert.Contains(innerRequest.Arguments, "rockbotagent");
StringAssert.Contains(innerRequest.Arguments, "VS Live Co-Chair");
StringAssert.Contains(innerRequest.Arguments, "count");

// The wrapper keys must NOT have leaked into the inner args
Assert.IsFalse(innerRequest.Arguments.Contains("server_name"),
"Reserved wrapper key 'server_name' must not leak into inner tool args.");
Assert.IsFalse(innerRequest.Arguments.Contains("tool_name"),
"Reserved wrapper key 'tool_name' must not leak into inner tool args.");

var response = new ToolInvokeResponse
{
ToolCallId = "call-flat",
ToolName = "search_emails",
Content = "[]"
};
await subscriber.DeliverAsync($"tool.result.{_identity.Name}",
response.ToEnvelope("bridge", correlationId: publisher.Published[0].Envelope.CorrelationId));
await executeTask;
}

[TestMethod]
public async Task InvokeTool_NestedArgsTakePrecedenceOverFlatFields()
{
// When both shapes are present, the explicit `arguments` wrapper wins —
// anything stray at the top level is ignored (treating it as model noise
// is safer than mixing both into the inner call).
var (executor, publisher, subscriber) = CreateExecutor();

var request = new ToolInvokeRequest
{
ToolCallId = "call-both",
ToolName = "mcp_invoke_tool",
Arguments = """
{"server_name":"calendar-mcp","tool_name":"search_emails",
"arguments":{"accountId":"good","query":"good"},
"stray":"ignored"}
"""
};

var executeTask = executor.ExecuteAsync(request, CancellationToken.None);
await Task.Delay(100);

Assert.AreEqual(1, publisher.Published.Count);
var innerRequest = publisher.Published[0].Envelope.GetPayload<ToolInvokeRequest>();
Assert.IsNotNull(innerRequest);
Assert.IsNotNull(innerRequest.Arguments);
StringAssert.Contains(innerRequest.Arguments, "good");
Assert.IsFalse(innerRequest.Arguments.Contains("stray"),
"Stray top-level field must not be merged when 'arguments' wrapper is present.");

var response = new ToolInvokeResponse
{
ToolCallId = "call-both",
ToolName = "search_emails",
Content = "[]"
};
await subscriber.DeliverAsync($"tool.result.{_identity.Name}",
response.ToEnvelope("bridge", correlationId: publisher.Published[0].Envelope.CorrelationId));
await executeTask;
}

[TestMethod]
public async Task InvokeTool_MissingServerName_ReturnsError()
{
Expand Down
66 changes: 66 additions & 0 deletions tests/RockBot.Wisp.Tests/WispAutoCorrectTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,72 @@ public async Task ValidationFailure_LlmReturnsNoCorrectionSentinel_OriginalError
Assert.AreEqual(1, llm.CallCount, "LLM was called (and declined), not skipped");
}

[TestMethod]
public async Task ValidationFailure_EmptyParamsAndNoEnrichedContext_LlmNotCalled()
{
// When the wisp step has nothing to draw from (empty params, no enriched
// recovery context), asking the LLM to "correct" the params is pure
// invention — the cheap-tier model reliably ignores the NO_CORRECTION
// instruction and emits something like {"query":"*"} or {"query":""}.
// The executor must decline up-front and bubble the original validation
// error instead.
var llm = new ScriptedLlmClient("""{"accountId":"INVENTED","calendarId":"INVENTED","timeMin":"x","timeMax":"y"}""");
var (executor, registry, _) = CreateExecutor(llm, preflightRecovery: null);
RegisterCalendarMcp(registry, capturedArgs: null);

var definition = MakeCalendarWisp("{}");

var result = await executor.ExecuteAsync(definition, "wisp-empty-1", CancellationToken.None);

Assert.IsFalse(result.IsSuccess);
Assert.AreEqual(FailureCategory.Structural, result.FailedStep!.Error!.Category);
Assert.AreEqual(0, llm.CallCount,
"LLM auto-correct must not run when there is no source data to draw from.");
}

[TestMethod]
public async Task ValidationFailure_EmptyParamsButEnrichedContextPresent_LlmStillCalled()
{
// The "empty params" guard only fires when there is also no enriched
// context. When preflight recovery surfaces same-session info the LLM
// can honestly remap, the auto-correct LLM should still be invoked.
var llm = new CapturingScriptedLlmClient("""
{"accountId":"acct-from-context","calendarId":"primary","timeMin":"t1","timeMax":"t2"}
""");
var preflight = new StubPreflightRecovery(
filledDefaults: new Dictionary<string, object?>(),
unresolved: ["accountId", "calendarId", "timeMin", "timeMax"],
enrichedContext: "Recent successful calls in this session: list_accounts returned accountId 'acct-from-context'");

var (executor, registry, captured) = CreateExecutor(llm, preflight);
RegisterCalendarMcp(registry, captured);

var definition = MakeCalendarWisp("{}");

var result = await executor.ExecuteAsync(definition, "wisp-empty-2", CancellationToken.None);

Assert.IsTrue(result.IsSuccess,
"LLM auto-correct should run when enriched context names candidate values.");
Assert.AreEqual(1, llm.CallCount);
StringAssert.Contains(llm.LastPrompt!, "acct-from-context",
"Enriched context must be in the prompt the LLM sees.");
}

[TestMethod]
public void IsEffectivelyEmptyParams_RecognizesEmptyShapes()
{
Assert.IsTrue(WispExecutor.IsEffectivelyEmptyParams(null));
Assert.IsTrue(WispExecutor.IsEffectivelyEmptyParams(""));
Assert.IsTrue(WispExecutor.IsEffectivelyEmptyParams(" "));
Assert.IsTrue(WispExecutor.IsEffectivelyEmptyParams("{}"));
Assert.IsTrue(WispExecutor.IsEffectivelyEmptyParams("{ }"));
Assert.IsFalse(WispExecutor.IsEffectivelyEmptyParams("""{"a":1}"""));
Assert.IsFalse(WispExecutor.IsEffectivelyEmptyParams("[]"),
"Non-object params shouldn't be classified as empty — let the LLM see them.");
Assert.IsFalse(WispExecutor.IsEffectivelyEmptyParams("not json"),
"Malformed params shouldn't be classified as empty — let the LLM see them.");
}

[TestMethod]
public async Task PreflightRecovery_FillsEnvDefault_NoLlmCallNeeded()
{
Expand Down
Loading