|
7 | 7 | migrateLegacyContinuation, |
8 | 8 | setContinuation, |
9 | 9 | } from './db/session-state.js'; |
10 | | -import { formatMessages, extractRouting, categorizeMessage, isClearCommand, stripInternalTags, type RoutingContext } from './formatter.js'; |
| 10 | +import { formatMessages, extractRouting, categorizeMessage, isClearCommand, isRunnerCommand, stripInternalTags, type RoutingContext } from './formatter.js'; |
11 | 11 | import type { AgentProvider, AgentQuery, ProviderEvent } from './providers/types.js'; |
12 | 12 |
|
13 | 13 | const POLL_INTERVAL_MS = 1000; |
@@ -255,30 +255,46 @@ async function processQuery( |
255 | 255 | let done = false; |
256 | 256 |
|
257 | 257 | // Concurrent polling: push follow-ups into the active query as they arrive. |
258 | | - // We do NOT force-end the stream on silence — keeping the query open is |
259 | | - // strictly cheaper than close+reopen (no cold prompt cache, no reconnect). |
| 258 | + // We do NOT force-end the stream on silence — keeping the query open avoids |
| 259 | + // re-spawning the SDK subprocess (~few seconds) and re-loading the .jsonl |
| 260 | + // transcript on every turn. The Anthropic prompt cache is server-side with |
| 261 | + // a 5-min TTL keyed on prefix hash, so stream lifecycle does NOT affect |
| 262 | + // cache lifetime — close+reopen within 5 min still gets cache hits. |
260 | 263 | // Stream liveness is decided host-side via the heartbeat file + processing |
261 | 264 | // claim age (see src/host-sweep.ts); if something is truly stuck, the host |
262 | 265 | // will kill the container and messages get reset to pending. |
263 | 266 | let pollInFlight = false; |
| 267 | + let endedForCommand = false; |
264 | 268 | const pollHandle = setInterval(() => { |
265 | | - if (done || pollInFlight) return; |
| 269 | + if (done || pollInFlight || endedForCommand) return; |
266 | 270 | pollInFlight = true; |
267 | 271 |
|
268 | 272 | void (async () => { |
269 | 273 | try { |
270 | | - // Skip system messages (MCP tool responses) and /clear (needs fresh query). |
| 274 | + const pending = getPendingMessages(); |
| 275 | + |
| 276 | + // Slash commands need a fresh query: /clear resets the SDK's |
| 277 | + // resume id (fixed at sdkQuery() time); admin/passthrough commands |
| 278 | + // (/compact, /cost, …) only dispatch when they're the first input |
| 279 | + // of a query — pushed mid-stream they arrive as plain text and |
| 280 | + // the SDK never runs them. End the stream and leave the rows |
| 281 | + // pending; the outer loop handles them on next iteration via the |
| 282 | + // canonical command path + formatMessagesWithCommands. |
| 283 | + if (pending.some((m) => isRunnerCommand(m))) { |
| 284 | + log('Pending slash command — ending stream so outer loop can process'); |
| 285 | + endedForCommand = true; |
| 286 | + query.end(); |
| 287 | + return; |
| 288 | + } |
| 289 | + |
| 290 | + // Skip system messages (MCP tool responses). |
271 | 291 | // Thread routing is the router's concern — if a message landed in this |
272 | 292 | // session, the agent should see it. Per-thread sessions already isolate |
273 | 293 | // threads into separate containers; shared sessions intentionally merge |
274 | 294 | // everything. Filtering on thread_id here caused deadlocks when the |
275 | 295 | // initial batch and follow-ups had mismatched thread_ids (e.g. a |
276 | 296 | // host-generated welcome trigger with null thread vs a Discord DM reply). |
277 | | - const newMessages = getPendingMessages().filter((m) => { |
278 | | - if (m.kind === 'system') return false; |
279 | | - if ((m.kind === 'chat' || m.kind === 'chat-sdk') && isClearCommand(m)) return false; |
280 | | - return true; |
281 | | - }); |
| 297 | + const newMessages = pending.filter((m) => m.kind !== 'system'); |
282 | 298 | if (newMessages.length === 0) return; |
283 | 299 |
|
284 | 300 | const newIds = newMessages.map((m) => m.id); |
|
0 commit comments