Skip to content

Commit 52051d4

Browse files
authored
Merge pull request #2181 from mnolet/fix/slash-commands-on-warm-containers
fix(poll-loop): slash commands silently broken on warm containers
2 parents eba5b78 + 64769fe commit 52051d4

2 files changed

Lines changed: 38 additions & 10 deletions

File tree

container/agent-runner/src/formatter.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,18 @@ export function isClearCommand(msg: MessageInRow): boolean {
6666
return text.toLowerCase().startsWith('/clear');
6767
}
6868

69+
/**
70+
* True for any chat that needs the outer loop's command path: /clear plus
71+
* admin/passthrough slash commands the SDK can only dispatch when they are
72+
* a query's first input. Used by the follow-up poller to bail out and let
73+
* the outer loop reopen the query.
74+
*/
75+
export function isRunnerCommand(msg: MessageInRow): boolean {
76+
if (msg.kind !== 'chat' && msg.kind !== 'chat-sdk') return false;
77+
const cat = categorizeMessage(msg).category;
78+
return cat === 'admin' || cat === 'passthrough';
79+
}
80+
6981
// eslint-disable-next-line @typescript-eslint/no-explicit-any
7082
function extractSenderId(msg: MessageInRow, content: any): string | null {
7183
const raw: string | null = content?.senderId || content?.author?.userId || null;

container/agent-runner/src/poll-loop.ts

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ import {
77
migrateLegacyContinuation,
88
setContinuation,
99
} from './db/session-state.js';
10-
import { formatMessages, extractRouting, categorizeMessage, isClearCommand, stripInternalTags, type RoutingContext } from './formatter.js';
10+
import { formatMessages, extractRouting, categorizeMessage, isClearCommand, isRunnerCommand, stripInternalTags, type RoutingContext } from './formatter.js';
1111
import type { AgentProvider, AgentQuery, ProviderEvent } from './providers/types.js';
1212

1313
const POLL_INTERVAL_MS = 1000;
@@ -255,30 +255,46 @@ async function processQuery(
255255
let done = false;
256256

257257
// Concurrent polling: push follow-ups into the active query as they arrive.
258-
// We do NOT force-end the stream on silence — keeping the query open is
259-
// strictly cheaper than close+reopen (no cold prompt cache, no reconnect).
258+
// We do NOT force-end the stream on silence — keeping the query open avoids
259+
// re-spawning the SDK subprocess (~few seconds) and re-loading the .jsonl
260+
// transcript on every turn. The Anthropic prompt cache is server-side with
261+
// a 5-min TTL keyed on prefix hash, so stream lifecycle does NOT affect
262+
// cache lifetime — close+reopen within 5 min still gets cache hits.
260263
// Stream liveness is decided host-side via the heartbeat file + processing
261264
// claim age (see src/host-sweep.ts); if something is truly stuck, the host
262265
// will kill the container and messages get reset to pending.
263266
let pollInFlight = false;
267+
let endedForCommand = false;
264268
const pollHandle = setInterval(() => {
265-
if (done || pollInFlight) return;
269+
if (done || pollInFlight || endedForCommand) return;
266270
pollInFlight = true;
267271

268272
void (async () => {
269273
try {
270-
// Skip system messages (MCP tool responses) and /clear (needs fresh query).
274+
const pending = getPendingMessages();
275+
276+
// Slash commands need a fresh query: /clear resets the SDK's
277+
// resume id (fixed at sdkQuery() time); admin/passthrough commands
278+
// (/compact, /cost, …) only dispatch when they're the first input
279+
// of a query — pushed mid-stream they arrive as plain text and
280+
// the SDK never runs them. End the stream and leave the rows
281+
// pending; the outer loop handles them on next iteration via the
282+
// canonical command path + formatMessagesWithCommands.
283+
if (pending.some((m) => isRunnerCommand(m))) {
284+
log('Pending slash command — ending stream so outer loop can process');
285+
endedForCommand = true;
286+
query.end();
287+
return;
288+
}
289+
290+
// Skip system messages (MCP tool responses).
271291
// Thread routing is the router's concern — if a message landed in this
272292
// session, the agent should see it. Per-thread sessions already isolate
273293
// threads into separate containers; shared sessions intentionally merge
274294
// everything. Filtering on thread_id here caused deadlocks when the
275295
// initial batch and follow-ups had mismatched thread_ids (e.g. a
276296
// host-generated welcome trigger with null thread vs a Discord DM reply).
277-
const newMessages = getPendingMessages().filter((m) => {
278-
if (m.kind === 'system') return false;
279-
if ((m.kind === 'chat' || m.kind === 'chat-sdk') && isClearCommand(m)) return false;
280-
return true;
281-
});
297+
const newMessages = pending.filter((m) => m.kind !== 'system');
282298
if (newMessages.length === 0) return;
283299

284300
const newIds = newMessages.map((m) => m.id);

0 commit comments

Comments
 (0)