Skip to content

Commit 32c56e6

Browse files
authored
Merge pull request #1693 from automagik-dev/fix/exterminate-name-flag
fix(agent): retire --name flag + distinguish auto_resume_disabled vs null_session
2 parents 0b24b94 + fc9eae5 commit 32c56e6

11 files changed

Lines changed: 259 additions & 42 deletions

src/db/migrations/044_phase_b_flip_defaults.test.ts

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,12 @@ describe.skipIf(!DB_AVAILABLE)('migration 044 — Phase B: flip auto_resume + re
4242
await cleanup();
4343
});
4444

45-
test('fresh DB: agents.auto_resume column default is false after migration 044', async () => {
45+
test('fresh DB: agents.auto_resume column default is true after migration 055', async () => {
46+
// Migration 055 (PR #1693) flipped the column default from `false` to `true`.
47+
// Felipe directive 2026-05-07: default-off was a bug shape — 52/53 agents
48+
// had auto_resume=false, meaning no agent could resume without first
49+
// running `genie agent recover`. The migration runner applies every
50+
// migration under the test schema, so we observe the post-055 state.
4651
const sql = await getConnection();
4752
const rows = await sql<{ column_default: string | null }[]>`
4853
SELECT column_default
@@ -53,10 +58,12 @@ describe.skipIf(!DB_AVAILABLE)('migration 044 — Phase B: flip auto_resume + re
5358
`;
5459
expect(rows.length).toBe(1);
5560
// Postgres reports boolean defaults as 'true' / 'false' literal strings.
56-
expect(rows[0].column_default).toBe('false');
61+
expect(rows[0].column_default).toBe('true');
5762
});
5863

59-
test('fresh-INSERT agent row inherits auto_resume=false', async () => {
64+
test('fresh-INSERT agent row inherits auto_resume=true (post-055 default)', async () => {
65+
// Migration 055 flipped the default to true. Fresh inserts that don't
66+
// explicitly set auto_resume now inherit the safe value.
6067
const sql = await getConnection();
6168
const id = `test-fresh-${Date.now()}`;
6269
await sql`
@@ -66,7 +73,7 @@ describe.skipIf(!DB_AVAILABLE)('migration 044 — Phase B: flip auto_resume + re
6673
const rows = await sql<{ auto_resume: boolean | null }[]>`
6774
SELECT auto_resume FROM agents WHERE id = ${id}
6875
`;
69-
expect(rows[0].auto_resume).toBe(false);
76+
expect(rows[0].auto_resume).toBe(true);
7077
});
7178

7279
test('live row (last_state_change < 1h, non-terminal state) preserves auto_resume=true', async () => {
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
-- 055_default_auto_resume_true.sql — Flip the agents.auto_resume default to
2+
-- TRUE and backfill every existing FALSE row to TRUE.
3+
--
4+
-- Why
5+
-- ---
6+
-- DB evidence (2026-05-07): 52 of 53 agents had auto_resume=false. The
7+
-- chokepoint `shouldResume()` treats false as "operator paused or scheduler
8+
-- exhausted retry budget" — but the column was effectively defaulting off,
9+
-- so no agent could be resumed without first running `genie agent recover`
10+
-- to flip the flag. Felipe directive (2026-05-07): default-on is the bug
11+
-- shape; flip every existing row to true and change the column DEFAULT so
12+
-- newly-spawned agents get the safe value out of the gate.
13+
--
14+
-- This migration also lands the trace-confirmed bug fix where
15+
-- `MissingResumeSessionError` reported reason='null_session' on rows whose
16+
-- session UUID was perfectly intact — auto_resume=false was the actual
17+
-- precondition. The protocol-router error enum now distinguishes the two
18+
-- (`auto_resume_disabled` vs `null_session`); this migration removes the
19+
-- silent default-off that triggered the misleading message in the first
20+
-- place.
21+
--
22+
-- Idempotent: re-running this migration is a no-op on rows that are already
23+
-- true and on a column whose default is already true.
24+
25+
BEGIN;
26+
27+
-- 1. Flip every existing false row. No WHERE narrowing — Felipe directive
28+
-- is to flip ALL of them so resume works out-of-the-box.
29+
UPDATE agents
30+
SET auto_resume = true
31+
WHERE auto_resume = false;
32+
33+
-- 2. Change the column DEFAULT so future spawns inherit the safe value.
34+
-- NULLs (legacy rows that pre-date the column) become true — same as
35+
-- the safe-default the runtime treats them as.
36+
ALTER TABLE agents ALTER COLUMN auto_resume SET DEFAULT true;
37+
UPDATE agents SET auto_resume = true WHERE auto_resume IS NULL;
38+
39+
COMMIT;

src/lib/agent-directory.ts

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,27 @@ export async function resolve(name: string): Promise<ResolvedAgent | null> {
348348
if (templateTeam) entry.team = templateTeam;
349349
return { entry, builtin: false };
350350
}
351+
352+
// 1b. Fallback to custom_name when role didn't match. Spawned agents
353+
// routinely have a custom_name (e.g. "felipe", "fix-resume-name-flag")
354+
// that diverges from `role`; without this fallback, `genie agent
355+
// recover felipe` failed at the resolver step even though the row was
356+
// there (Felipe directive 2026-05-07).
357+
const byCustomName = await sql`
358+
SELECT role, custom_name, metadata, created_at FROM agents
359+
WHERE custom_name = ${name}
360+
ORDER BY (CASE WHEN position('dir:' in id) = 1 THEN 0 ELSE 1 END), started_at DESC
361+
LIMIT 1
362+
`;
363+
if (byCustomName.length > 0) {
364+
const row = byCustomName[0];
365+
const meta = parseMetadata(row.metadata);
366+
const createdAt =
367+
row.created_at instanceof Date ? row.created_at.toISOString() : (row.created_at as string | undefined);
368+
const entry = roleToEntry(typeof row.role === 'string' ? row.role : name, undefined, meta, createdAt);
369+
if (templateTeam) entry.team = templateTeam;
370+
return { entry, builtin: false };
371+
}
351372
} catch {
352373
/* PG unavailable — fall through to built-ins */
353374
}

src/lib/executor-registry.ts

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -259,13 +259,19 @@ export async function updateClaudeSessionId(executorId: string, sessionId: strin
259259

260260
/**
261261
* Identity passed to the JSONL fallback scanner. Mirrors the canonical
262-
* `(team, custom_name)` columns on `agents`. Both fields must be non-null
263-
* to attempt a match — the fallback refuses to return another agent's
264-
* transcript when ownership is unknown.
262+
* `(team, custom_name)` columns on `agents`. `customName` is required (the
263+
* scanner refuses to return another agent's transcript when role identity
264+
* is unknown). `team` may be null for legacy / orphan rows where no team
265+
* was ever recorded — in that case the scanner matches on `agentName` alone
266+
* and additionally accepts JSONL bodies whose `teamName` is null.
265267
*/
266268
export interface ResumeFallbackIdentity {
267-
/** Agent's `team` column. Populated by `findOrCreateAgent`. */
268-
team: string;
269+
/**
270+
* Agent's `team` column. Populated by `findOrCreateAgent`. May be null
271+
* for legacy/orphan rows; when null, the scanner relaxes to a customName
272+
* match only.
273+
*/
274+
team: string | null;
269275
/** Agent's `custom_name` column. The role-or-name part of the identity. */
270276
customName: string;
271277
}
@@ -432,7 +438,12 @@ async function defaultScanForSession(cwd: string, identity: ResumeFallbackIdenti
432438

433439
for (const candidate of sorted) {
434440
const { teamName, agentName } = await readJsonlIdentity(candidate.full);
435-
if (teamName !== identity.team || agentName !== identity.customName) continue;
441+
if (agentName !== identity.customName) continue;
442+
// Identity.team is allowed to be null for orphan / legacy rows
443+
// (Felipe directive 2026-05-07): in that case match on agentName alone.
444+
// When identity.team is set, require strict teamName equality so we
445+
// don't attach team A's runtime to team B's transcript.
446+
if (identity.team !== null && teamName !== identity.team) continue;
436447
return candidate.name.replace(/\.jsonl$/, '');
437448
}
438449
return null;
@@ -467,21 +478,23 @@ type ResumeRow = {
467478
/**
468479
* Try the JSONL on-disk fallback for an agent whose DB resume read missed.
469480
* Returns the recovered session UUID if a matching JSONL is found, or null
470-
* if no cwd / no identity / no JSONL match. Emits `resume.recovered_via_jsonl`
471-
* on hit.
481+
* if no cwd / no customName / no JSONL match. Emits
482+
* `resume.recovered_via_jsonl` on hit.
472483
*
473-
* Identity is `(team, custom_name)` and BOTH must be present — a missing
474-
* identity makes ownership unverifiable, and we refuse to attach an agent's
475-
* runtime to another agent's transcript.
484+
* Identity is `(team, custom_name)`. `custom_name` is required (the scanner
485+
* refuses to attach without role identity), but `team` may be null —
486+
* legacy/orphan rows that never got a team assigned still resume off-disk
487+
* by matching on `agentName` alone (Felipe directive 2026-05-07: previous
488+
* strict-both check stranded rows where team got dropped).
476489
*/
477490
async function tryJsonlFallback(agentId: string, row: ResumeRow | null, actor: string): Promise<string | null> {
478491
const cwd = row?.repo_path ?? null;
479492
if (!cwd) return null;
480493

481494
const team = row?.team ?? null;
482495
const customName = row?.custom_name ?? null;
483-
const identity: ResumeFallbackIdentity | null = team && customName ? { team, customName } : null;
484-
if (!identity) return null;
496+
if (!customName) return null;
497+
const identity: ResumeFallbackIdentity = { team, customName };
485498

486499
const scanner = _resumeJsonlScannerDeps.scanForSession ?? defaultScanForSession;
487500
const recoveredSessionId = await scanner(cwd, identity);

src/lib/protocol-router.test.ts

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -526,6 +526,37 @@ describe.skipIf(!DB_AVAILABLE)('pg', () => {
526526
expect(legacyAlias.reason).toBe('no_session_id');
527527
expect(legacyAlias.message).toContain('no_session_id');
528528
});
529+
530+
// Felipe directive 2026-05-07: when an agent has auto_resume=false but
531+
// its session UUID is intact, the error must NOT pretend the session is
532+
// lost. Previously every "I can't resume" outcome collapsed onto
533+
// null_session, and operators chased nonexistent missing-session bugs.
534+
test('MissingResumeSessionError(reason=auto_resume_disabled) tells the operator the session is intact and to run recover', async () => {
535+
const { MissingResumeSessionError } = await import('./protocol-router.js');
536+
537+
const paused = new MissingResumeSessionError('paused-agent', undefined, 'auto_resume_disabled');
538+
expect(paused.reason).toBe('auto_resume_disabled');
539+
expect(paused.entityId).toBe('paused-agent');
540+
// Message MUST distinguish from the "session lost" case.
541+
expect(paused.message).toContain('auto_resume is disabled');
542+
expect(paused.message).toContain('session UUID is intact');
543+
expect(paused.message).toContain('genie agent recover paused-agent');
544+
// Must NOT mislead the operator into thinking the session is lost.
545+
expect(paused.message).not.toContain('no claude_session_id recorded');
546+
expect(paused.message).not.toContain('genie reset');
547+
});
548+
549+
test('MissingResumeSessionError(reason=null_session) keeps the session-lost runbook hint', async () => {
550+
const { MissingResumeSessionError } = await import('./protocol-router.js');
551+
552+
const lost = new MissingResumeSessionError('lost-agent', undefined, 'null_session');
553+
expect(lost.reason).toBe('null_session');
554+
// Genuine session-loss path keeps the legacy `genie reset` runbook hint.
555+
expect(lost.message).toContain('no claude_session_id recorded');
556+
expect(lost.message).toContain('genie reset lost-agent');
557+
// And must NOT confuse itself with the auto_resume_disabled message.
558+
expect(lost.message).not.toContain('auto_resume is disabled');
559+
});
529560
});
530561

531562
// ---------------------------------------------------------------------------

src/lib/protocol-router.ts

Lines changed: 37 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -47,12 +47,28 @@ interface DeliveryResult {
4747
* of quietly proceeding with a fresh spawn.
4848
*
4949
* `reason` names which precondition failed:
50-
* - `no_executor` — agent has no `current_executor_id` (never spawned, or row pruned)
51-
* - `null_session` — executor row exists but `claude_session_id` is null
52-
* - `no_session_id` — legacy alias kept for callers that read `agent.claudeSessionId`
53-
* directly before Group 1's single-reader helper lands.
50+
* - `no_executor` — agent has no `current_executor_id` (never spawned, or row pruned)
51+
* - `null_session` — executor row exists but `claude_session_id` is null (genuine session loss)
52+
* - `no_session_id` — legacy alias kept for callers that read `agent.claudeSessionId`
53+
* directly before Group 1's single-reader helper lands.
54+
* - `auto_resume_disabled` — executor HAS a session_id, but `agents.auto_resume = false`
55+
* (operator paused or scheduler exhausted retries). The session
56+
* is intact — flip `auto_resume = true` via `genie agent recover`.
5457
*/
55-
export type MissingResumeSessionReason = 'no_executor' | 'null_session' | 'no_session_id';
58+
export type MissingResumeSessionReason = 'no_executor' | 'null_session' | 'no_session_id' | 'auto_resume_disabled';
59+
60+
function buildResumeErrorMessage(workerId: string, suffix: string, reason: MissingResumeSessionReason): string {
61+
if (reason === 'auto_resume_disabled') {
62+
return (
63+
`Cannot resume worker "${workerId}"${suffix}: auto_resume is disabled, but the session UUID is intact ` +
64+
`(reason: auto_resume_disabled). Run \`genie agent recover ${workerId} --yes\` to flip the flag and resume.`
65+
);
66+
}
67+
return (
68+
`Cannot resume worker "${workerId}"${suffix}: executor has no claude_session_id recorded (reason: ${reason}). ` +
69+
`This usually means the worker predates the session-sync hook. Run \`genie reset ${workerId}\` or re-spawn the worker to recover.`
70+
);
71+
}
5672

5773
export class MissingResumeSessionError extends Error {
5874
readonly workerId: string;
@@ -62,9 +78,7 @@ export class MissingResumeSessionError extends Error {
6278

6379
constructor(workerId: string, recipientId?: string, reason: MissingResumeSessionReason = 'null_session') {
6480
const suffix = recipientId ? ` (recipient "${recipientId}")` : '';
65-
super(
66-
`Cannot resume worker "${workerId}"${suffix}: executor has no claude_session_id recorded (reason: ${reason}). This usually means the worker predates the session-sync hook. Run \`genie reset ${workerId}\` or re-spawn the worker to recover.`,
67-
);
81+
super(buildResumeErrorMessage(workerId, suffix, reason));
6882
this.name = 'MissingResumeSessionError';
6983
this.workerId = workerId;
7084
this.entityId = workerId;
@@ -331,7 +345,21 @@ export async function resolveResumeSessionId(
331345
const agentIdToProbe = worker?.id ?? `dir:${recipientId}`;
332346
const decision = await shouldResume(agentIdToProbe);
333347
if (worker && (await isExecutorResumable(worker))) {
334-
if (!decision.sessionId) throw new MissingResumeSessionError(worker.id, recipientId);
348+
// CodeRabbit-flagged gap (PR #1693): the chokepoint may return
349+
// `resume: false` with a sessionId still attached (e.g.
350+
// `auto_resume_disabled` — operator explicitly paused). Without checking
351+
// `decision.resume`, the spawn-side path would silently resume a paused
352+
// worker. Propagate the chokepoint reason onto the error so operators
353+
// see "auto_resume disabled — run recover" instead of "session lost".
354+
if (!decision.resume || !decision.sessionId) {
355+
const errReason: MissingResumeSessionReason =
356+
decision.reason === 'unknown_agent'
357+
? 'no_executor'
358+
: decision.reason === 'auto_resume_disabled'
359+
? 'auto_resume_disabled'
360+
: 'null_session';
361+
throw new MissingResumeSessionError(worker.id, recipientId, errReason);
362+
}
335363
}
336364
return decision.sessionId;
337365
}

src/lib/provider-adapters.test.ts

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,58 @@ describe('buildClaudeCommand', () => {
141141
expect(result.command).toContain("--model 'opus'");
142142
});
143143

144+
// Felipe directive 2026-05-07: --name was retired because CC stored it as
145+
// `customTitle` in the JSONL header and then composed the resume hint as
146+
// `claude --resume "<customTitle>"` instead of the canonical UUID, breaking
147+
// session resume. Even when callers pass `name`, the launch command MUST
148+
// NOT emit --name. Sessions are identified exclusively by --session-id /
149+
// --resume <uuid>.
150+
it('never emits --name even when params.name is set', () => {
151+
const result = buildClaudeCommand({
152+
provider: 'claude',
153+
team: 'work',
154+
role: 'implementor',
155+
name: 'felipe',
156+
});
157+
expect(result.command).not.toContain('--name ');
158+
});
159+
160+
it('never emits --name on a fresh-spawn command', () => {
161+
const result = buildClaudeCommand({
162+
provider: 'claude',
163+
team: 'work',
164+
role: 'implementor',
165+
});
166+
expect(result.command).not.toContain('--name ');
167+
});
168+
169+
it('uses --session-id (not --name) for session identity on fresh spawn', () => {
170+
const result = buildClaudeCommand({
171+
provider: 'claude',
172+
team: 'work',
173+
role: 'implementor',
174+
sessionId: '11111111-2222-3333-4444-555555555555',
175+
name: 'should-be-ignored',
176+
});
177+
expect(result.command).toContain('--session-id');
178+
expect(result.command).toContain("'11111111-2222-3333-4444-555555555555'");
179+
expect(result.command).not.toContain('--name ');
180+
expect(result.command).not.toContain("'should-be-ignored'");
181+
});
182+
183+
it('uses --resume (not --name) for session identity on resume', () => {
184+
const result = buildClaudeCommand({
185+
provider: 'claude',
186+
team: 'work',
187+
role: 'implementor',
188+
resume: 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee',
189+
name: 'felipe',
190+
});
191+
expect(result.command).toContain('--resume');
192+
expect(result.command).toContain("'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee'");
193+
expect(result.command).not.toContain('--name ');
194+
});
195+
144196
it('includes --append-system-prompt-file by default when systemPromptFile is set', () => {
145197
const result = buildClaudeCommand({
146198
provider: 'claude',

src/lib/provider-adapters.ts

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,13 @@ export interface SpawnParams {
105105
model?: string;
106106
/** Initial prompt to send as the first user message (Claude Code positional [prompt] arg). */
107107
initialPrompt?: string;
108-
/** Display name for the CC session (emits --name). Used in /resume and terminal title. */
108+
/**
109+
* @deprecated 2026-05-07 — `--name` was retired because CC stored it as
110+
* `customTitle` in the JSONL header and then composed `claude --resume
111+
* "<customTitle>"` instead of the canonical UUID, breaking session resume.
112+
* The field remains in the schema for backward compatibility but is no
113+
* longer emitted on the launch command. Always use `sessionId` / `resume`.
114+
*/
109115
name?: string;
110116
/** Claude Code permissions (allow/deny lists with Bash() patterns). Merged into --settings. */
111117
permissions?: { allow?: string[]; deny?: string[] };
@@ -389,7 +395,10 @@ function appendSessionFlags(parts: string[], params: SpawnParams): void {
389395
const claudeAgentFlag = params.agentTemplate ?? params.role;
390396
if (claudeAgentFlag) parts.push('--agent', escapeShellArg(claudeAgentFlag));
391397
if (params.model) parts.push('--model', escapeShellArg(params.model));
392-
if (params.name) parts.push('--name', escapeShellArg(params.name));
398+
// NOTE: --name intentionally NOT emitted. CC stores --name as `customTitle`
399+
// in the JSONL header and then suggests `claude --resume "<customTitle>"`
400+
// instead of the canonical UUID, clobbering session resume (Felipe directive
401+
// 2026-05-07). The session UUID flows through --session-id / --resume above.
393402
}
394403

395404
// Inject hook dispatch + permissions via --settings (deep-merges with existing settings).

src/lib/team-lead-command.test.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,9 @@ describe('buildTeamLeadCommand resume behavior', () => {
142142
const cmd = buildTeamLeadCommand('test-team', { promptMode: 'append' });
143143
expect(cmd).not.toContain('--resume');
144144
expect(cmd).not.toContain('--session-id');
145-
expect(cmd).toContain("--name 'test-team'");
145+
// --name retired (Felipe directive 2026-05-07): CC's customTitle
146+
// clobbered the UUID resume hint. Always use --session-id / --resume.
147+
expect(cmd).not.toContain('--name ');
146148
});
147149

148150
test('sessionId without resume emits --session-id <uuid>', () => {

src/lib/team-lead-command.ts

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,11 @@ export function buildTeamLeadCommand(teamName: string, options?: BuildTeamLeadCo
6767
'--permission-mode auto',
6868
];
6969

70-
// Session name for CC's /resume and terminal title
71-
parts.push(`--name ${shellQuote(sanitized)}`);
70+
// NOTE: --name intentionally omitted. CC stores --name as `customTitle` in
71+
// the JSONL header and then composes its resume hint as
72+
// `claude --resume "<customTitle>"` instead of the canonical UUID, breaking
73+
// session resumption (Felipe directive 2026-05-07). Always use --session-id
74+
// / --resume <uuid>.
7275

7376
if (options?.sessionId) {
7477
const flag = options.resume ? '--resume' : '--session-id';

0 commit comments

Comments
 (0)