Merge pull request #2181 from mnolet/fix/slash-commands-on-warm-containers

gavrielc · web-flow · commit 52051d4aa5fd · 2026-05-02T21:37:31.000+03:00
fix(poll-loop): slash commands silently broken on warm containers
diff --git a/container/agent-runner/src/formatter.ts b/container/agent-runner/src/formatter.ts
@@ -66,6 +66,18 @@ export function isClearCommand(msg: MessageInRow): boolean {
   return text.toLowerCase().startsWith('/clear');
 }
 
+/**
+ * True for any chat that needs the outer loop's command path: /clear plus
+ * admin/passthrough slash commands the SDK can only dispatch when they are
+ * a query's first input. Used by the follow-up poller to bail out and let
+ * the outer loop reopen the query.
+ */
+export function isRunnerCommand(msg: MessageInRow): boolean {
+  if (msg.kind !== 'chat' && msg.kind !== 'chat-sdk') return false;
+  const cat = categorizeMessage(msg).category;
+  return cat === 'admin' || cat === 'passthrough';
+}
+
 // eslint-disable-next-line @typescript-eslint/no-explicit-any
 function extractSenderId(msg: MessageInRow, content: any): string | null {
   const raw: string | null = content?.senderId || content?.author?.userId || null;
diff --git a/container/agent-runner/src/poll-loop.ts b/container/agent-runner/src/poll-loop.ts
@@ -7,7 +7,7 @@ import {
   migrateLegacyContinuation,
   setContinuation,
 } from './db/session-state.js';
-import { formatMessages, extractRouting, categorizeMessage, isClearCommand, stripInternalTags, type RoutingContext } from './formatter.js';
+import { formatMessages, extractRouting, categorizeMessage, isClearCommand, isRunnerCommand, stripInternalTags, type RoutingContext } from './formatter.js';
 import type { AgentProvider, AgentQuery, ProviderEvent } from './providers/types.js';
 
 const POLL_INTERVAL_MS = 1000;
@@ -255,30 +255,46 @@ async function processQuery(
   let done = false;
 
   // Concurrent polling: push follow-ups into the active query as they arrive.
-  // We do NOT force-end the stream on silence — keeping the query open is
-  // strictly cheaper than close+reopen (no cold prompt cache, no reconnect).
+  // We do NOT force-end the stream on silence — keeping the query open avoids
+  // re-spawning the SDK subprocess (~few seconds) and re-loading the .jsonl
+  // transcript on every turn. The Anthropic prompt cache is server-side with
+  // a 5-min TTL keyed on prefix hash, so stream lifecycle does NOT affect
+  // cache lifetime — close+reopen within 5 min still gets cache hits.
   // Stream liveness is decided host-side via the heartbeat file + processing
   // claim age (see src/host-sweep.ts); if something is truly stuck, the host
   // will kill the container and messages get reset to pending.
   let pollInFlight = false;
+  let endedForCommand = false;
   const pollHandle = setInterval(() => {
-    if (done || pollInFlight) return;
+    if (done || pollInFlight || endedForCommand) return;
     pollInFlight = true;
 
     void (async () => {
       try {
-        // Skip system messages (MCP tool responses) and /clear (needs fresh query).
+        const pending = getPendingMessages();
+
+        // Slash commands need a fresh query: /clear resets the SDK's
+        // resume id (fixed at sdkQuery() time); admin/passthrough commands
+        // (/compact, /cost, …) only dispatch when they're the first input
+        // of a query — pushed mid-stream they arrive as plain text and
+        // the SDK never runs them. End the stream and leave the rows
+        // pending; the outer loop handles them on next iteration via the
+        // canonical command path + formatMessagesWithCommands.
+        if (pending.some((m) => isRunnerCommand(m))) {
+          log('Pending slash command — ending stream so outer loop can process');
+          endedForCommand = true;
+          query.end();
+          return;
+        }
+
+        // Skip system messages (MCP tool responses).
         // Thread routing is the router's concern — if a message landed in this
         // session, the agent should see it. Per-thread sessions already isolate
         // threads into separate containers; shared sessions intentionally merge
         // everything. Filtering on thread_id here caused deadlocks when the
         // initial batch and follow-ups had mismatched thread_ids (e.g. a
         // host-generated welcome trigger with null thread vs a Discord DM reply).
-        const newMessages = getPendingMessages().filter((m) => {
-          if (m.kind === 'system') return false;
-          if ((m.kind === 'chat' || m.kind === 'chat-sdk') && isClearCommand(m)) return false;
-          return true;
-        });
+        const newMessages = pending.filter((m) => m.kind !== 'system');
         if (newMessages.length === 0) return;
 
         const newIds = newMessages.map((m) => m.id);