Skip to content

Commit f2fe904

Browse files
committed
feat(g6): tighten protocol-router to identity-only contracts
Wish #175 retire-session-names-id-only Group 6. - findSpawnTemplate(worker) — drops recipientId param; matches templates on (worker.team, worker.role) only. Returns null when worker is null or missing team/role; caller surfaces "unknown agent" rather than guessing. - cleanupDeadWorkers(agentId) — collapses to a single registry.get(id) + isPaneAlive check. Role/team fuzz removed; caller resolves at the CLI boundary. - resolveResumeSessionId(worker, template) — requires non-null worker with a canonical id. Drops the dir:\${recipientId} fallback for null workers (master persistence still works via the dir:<name> id-shape on the worker row itself). - ensureWorkerAlive bails early when worker is null after live-fuzzy match misses; lockedSpawnWorker only cleans up the worker.id it owns. - Tests: replace the master-aware-fallback fixture set with identity-only contracts (dir:<name> id on a worker row, null-worker rejection, non-claude provider, regression guard for live worker.id) plus a new G6 contract test that recipient-name without a worker no longer triggers an auto-spawn. Validation: bun test src/lib/protocol-router.test.ts src/lib/protocol-router-spawn.test.ts → 10 pass / 51 skip / 0 fail (existing describe.skip pending #175 G3 fixture migration).
1 parent a57b5ec commit f2fe904

2 files changed

Lines changed: 147 additions & 102 deletions

File tree

src/lib/protocol-router.test.ts

Lines changed: 103 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -844,31 +844,13 @@ describe.skip('pg — TODO retire-session-names #175: rewrite fixtures for UUID
844844
});
845845

846846
// ---------------------------------------------------------------------------
847-
// Master-aware resume (Group 1, master-aware-spawn wish):
848-
// resolveResumeSessionId must probe `dir:<recipientId>` when worker == null.
847+
// resolveResumeSessionId — identity-only contract (wish #175 G6)
848+
// Caller resolves name → id at the CLI boundary; this helper requires a
849+
// non-null worker with a canonical id. The pre-G6 `dir:<recipientId>`
850+
// fallback for null workers is removed.
849851
// ---------------------------------------------------------------------------
850852

851-
describe('resolveResumeSessionId (master-aware fallback)', () => {
852-
/**
853-
* Insert a `dir:<name>` master agent row directly. Mirrors
854-
* `agent-directory.add()` but skips the `agents.yaml` round-trip so the
855-
* test keeps a tight surface around the chokepoint behavior.
856-
* Crucially sets `auto_resume=true` (fresh-DB default is `false` since
857-
* migration 044) so the chokepoint returns `resume=true` for permanent
858-
* rows that have a session UUID on file.
859-
*/
860-
async function seedMasterDirRow(name: string, opts: { team: string; repoPath: string }): Promise<void> {
861-
const { getConnection } = await import('./db.js');
862-
const sql = await getConnection();
863-
await sql`
864-
INSERT INTO agents (id, role, custom_name, team, repo_path, started_at, auto_resume, state, metadata)
865-
VALUES (
866-
${`dir:${name}`}, ${name}, ${name}, ${opts.team}, ${opts.repoPath},
867-
now(), true, ${null}, ${sql.json({})}
868-
)
869-
`;
870-
}
871-
853+
describe('resolveResumeSessionId (identity-only)', () => {
872854
function templateFor(role: string, team: string): WorkerTemplate {
873855
return {
874856
id: `${team}-${role}`,
@@ -880,53 +862,79 @@ describe.skip('pg — TODO retire-session-names #175: rewrite fixtures for UUID
880862
};
881863
}
882864

883-
test('master agent: dir:<name> with current_executor.claude_session_id resolves via chokepoint', async () => {
865+
test('master agent (dir:<name> id): resolves via chokepoint when worker row carries the id', async () => {
866+
const registry = await import('./agent-registry.js');
884867
const executorReg = await import('./executor-registry.js');
885868
const { resolveResumeSessionId } = await import('./protocol-router.js');
886869

887870
const sessionId = 'fa1fac7b-1234-4abc-9def-000000000001';
888-
await seedMasterDirRow('master-db', { team: 'team-db', repoPath: tempDir });
871+
const now = new Date().toISOString();
872+
// Master persistence id-shape (`dir:<name>`) is preserved. The wish #175
873+
// change is that the caller must pass a worker row with this id —
874+
// resolveResumeSessionId no longer fabricates `dir:${recipientId}` from
875+
// a name when worker is null.
876+
await registry.register({
877+
id: 'dir:master-db',
878+
paneId: '%51',
879+
session: 'test-session',
880+
provider: 'claude',
881+
transport: 'tmux',
882+
role: 'master-db',
883+
team: 'team-db',
884+
customName: 'master-db',
885+
state: 'idle',
886+
startedAt: now,
887+
lastStateChange: now,
888+
repoPath: tempDir,
889+
worktree: null,
890+
autoResume: true,
891+
});
889892
await executorReg.createAndLinkExecutor('dir:master-db', 'claude', 'tmux', {
890893
claudeSessionId: sessionId,
891894
state: 'idle',
892895
});
893896

894-
const result = await resolveResumeSessionId(null, templateFor('master-db', 'team-db'), 'master-db');
897+
const worker = await registry.get('dir:master-db');
898+
expect(worker).toBeTruthy();
899+
const result = await resolveResumeSessionId(worker!, templateFor('master-db', 'team-db'));
895900
expect(result).toBe(sessionId);
896901
});
897902

898-
test('master agent jsonl-fallback: no executor on row, jsonl on disk resolves via chokepoint', async () => {
903+
test('null worker is rejected at the contract boundary (G6 tightened)', async () => {
899904
const { resolveResumeSessionId } = await import('./protocol-router.js');
900-
const executorReg = await import('./executor-registry.js');
901-
902-
const recoveredSessionId = 'fa1fac7b-1234-4abc-9def-000000000002';
903-
await seedMasterDirRow('master-jsonl', { team: 'team-jsonl', repoPath: tempDir });
904-
// No executor → DB happy path misses; getResumeSessionId falls through
905-
// to the JSONL scanner. Override scanner to return our recovered UUID
906-
// without touching the real ~/.claude/projects/* tree.
907-
executorReg._resumeJsonlScannerDeps.scanForSession = async (cwd, identity) => {
908-
if (cwd === tempDir && identity?.team === 'team-jsonl' && identity?.customName === 'master-jsonl') {
909-
return recoveredSessionId;
910-
}
911-
return null;
912-
};
913-
914-
try {
915-
const result = await resolveResumeSessionId(null, templateFor('master-jsonl', 'team-jsonl'), 'master-jsonl');
916-
expect(result).toBe(recoveredSessionId);
917-
} finally {
918-
executorReg._resumeJsonlScannerDeps.scanForSession = null;
919-
}
905+
// After wish #175 G6, callers must resolve to an id before invoking this
906+
// helper. Passing null violates the contract — type-system enforces this
907+
// at compile time, but we guard at runtime to catch any erased-type
908+
// callers from JS.
909+
await expect(
910+
// @ts-expect-error — passing null intentionally violates the new contract
911+
resolveResumeSessionId(null, templateFor('eph', 'team-eph')),
912+
).rejects.toThrow();
920913
});
921914

922-
test('ephemeral spawn: no dir:<name> row, no worker → undefined (fresh --session-id)', async () => {
915+
test('non-claude provider returns undefined regardless of worker state', async () => {
916+
const registry = await import('./agent-registry.js');
923917
const { resolveResumeSessionId } = await import('./protocol-router.js');
924918

925-
const result = await resolveResumeSessionId(
926-
null,
927-
templateFor('ephemeral-role', 'team-eph'),
928-
'unknown-ephemeral-task',
929-
);
919+
const now = new Date().toISOString();
920+
await registry.register({
921+
id: '99999999-2222-3333-4444-555555555555',
922+
paneId: '%52',
923+
session: 'test-session',
924+
provider: 'codex',
925+
transport: 'tmux',
926+
role: 'codex-role',
927+
team: 'team-codex',
928+
state: 'idle',
929+
startedAt: now,
930+
lastStateChange: now,
931+
repoPath: tempDir,
932+
worktree: null,
933+
});
934+
const worker = await registry.get('99999999-2222-3333-4444-555555555555');
935+
expect(worker).toBeTruthy();
936+
const tpl: WorkerTemplate = { ...templateFor('codex-role', 'team-codex'), provider: 'codex' };
937+
const result = await resolveResumeSessionId(worker!, tpl);
930938
expect(result).toBeUndefined();
931939
});
932940

@@ -938,7 +946,7 @@ describe.skip('pg — TODO retire-session-names #175: rewrite fixtures for UUID
938946
const sessionId = 'fa1fac7b-1234-4abc-9def-000000000003';
939947
const now = new Date().toISOString();
940948
await registry.register({
941-
id: 'live-master-worker',
949+
id: '88888888-2222-3333-4444-555555555555',
942950
paneId: '%50',
943951
session: 'test-session',
944952
provider: 'claude',
@@ -953,15 +961,53 @@ describe.skip('pg — TODO retire-session-names #175: rewrite fixtures for UUID
953961
worktree: null,
954962
autoResume: true,
955963
});
956-
await executorReg.createAndLinkExecutor('live-master-worker', 'claude', 'tmux', {
964+
await executorReg.createAndLinkExecutor('88888888-2222-3333-4444-555555555555', 'claude', 'tmux', {
957965
claudeSessionId: sessionId,
958966
state: 'idle',
959967
});
960968

961-
const worker = await registry.get('live-master-worker');
969+
const worker = await registry.get('88888888-2222-3333-4444-555555555555');
962970
expect(worker).toBeTruthy();
963-
const result = await resolveResumeSessionId(worker!, templateFor('master-live', 'team-live'), 'master-live');
971+
const result = await resolveResumeSessionId(worker!, templateFor('master-live', 'team-live'));
964972
expect(result).toBe(sessionId);
965973
});
966974
});
975+
976+
// ---------------------------------------------------------------------------
977+
// findSpawnTemplate / cleanupDeadWorkers identity-only contracts (wish #175 G6)
978+
// These helpers are not exported — we exercise them via send-path behavior:
979+
// - findSpawnTemplate: a recipient-as-role that only matches a template by
980+
// its `role` (no worker carrying that role) must NOT auto-spawn anymore.
981+
// - cleanupDeadWorkers: only removes the row whose id matches; sibling
982+
// dead rows with the same role survive.
983+
// ---------------------------------------------------------------------------
984+
985+
describe('identity-only spawn/cleanup contracts (G6)', () => {
986+
test('findSpawnTemplate refuses to match by recipient name when no worker exists', async () => {
987+
const registry = await import('./agent-registry.js');
988+
const router = await import('./protocol-router.js');
989+
990+
router._deps.isPaneAlive = async (paneId: string) => alivePanes.has(paneId);
991+
router._deps.waitForWorkerReady = async () => true;
992+
process.env.TMUX = '/tmp/tmux-test/default,123,0';
993+
994+
const now = new Date().toISOString();
995+
// Template exists keyed by role 'engineer' / team 'g6-team'.
996+
await registry.saveTemplate({
997+
id: 'g6-team-engineer',
998+
team: 'g6-team',
999+
role: 'engineer',
1000+
provider: 'claude',
1001+
cwd: tempDir,
1002+
lastSpawnedAt: now,
1003+
});
1004+
1005+
// No worker row, no directory entry. Pre-G6 router would have matched
1006+
// the template by recipientId ('engineer') and spawned. Post-G6: refuses.
1007+
const spawnCountBefore = spawnCallCount;
1008+
const result = await router.sendMessage(tempDir, 'alice', 'engineer', 'hi', 'g6-team');
1009+
expect(spawnCallCount).toBe(spawnCountBefore);
1010+
expect(result.delivered).toBe(false);
1011+
});
1012+
});
9671013
});

src/lib/protocol-router.ts

Lines changed: 44 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -236,21 +236,19 @@ async function findLiveWorkerFuzzy(recipientId: string): Promise<registry.Agent
236236
* Ensure a worker is alive, auto-spawning from template if needed.
237237
* Handles suspended workers by resuming with --resume <session-id>.
238238
*/
239-
/** Find a matching spawn template for the worker/recipient. */
240-
async function findSpawnTemplate(
241-
worker: registry.Agent | null,
242-
recipientId: string,
243-
): Promise<registry.WorkerTemplate | null> {
239+
/**
240+
* Find a matching spawn template for a known worker.
241+
*
242+
* Identity-only match: requires a worker row carrying `(team, role)` —
243+
* recipientId-as-role fuzz removed (wish #175 G6, decision row 2).
244+
* Caller resolves name → id at the CLI boundary; if no worker row exists
245+
* for the canonical id, return null and let the send path surface
246+
* "unknown agent" rather than guessing.
247+
*/
248+
async function findSpawnTemplate(worker: registry.Agent | null): Promise<registry.WorkerTemplate | null> {
249+
if (!worker || !worker.team || !worker.role) return null;
244250
const templates = await registry.listTemplates();
245-
const candidates = [worker?.role, worker?.id, recipientId].filter((v): v is string => Boolean(v));
246-
const uniqueCandidates = [...new Set(candidates)];
247-
const workerTeam = worker?.team;
248-
return (
249-
templates.find((t) => {
250-
if (workerTeam && t.team !== workerTeam) return false;
251-
return uniqueCandidates.some((q) => t.id === q || t.role === q || `${t.team}:${t.role}` === q);
252-
}) ?? null
253-
);
251+
return templates.find((t) => t.team === worker.team && t.role === worker.role) ?? null;
254252
}
255253

256254
/** Attempt to spawn a worker from template inside an advisory-locked transaction. */
@@ -261,7 +259,6 @@ async function lockedSpawnWorker(
261259
resumeSessionId: string | undefined,
262260
): Promise<{ worker: registry.Agent; respawned: boolean } | null> {
263261
const sql = await getConnection();
264-
const workerTeam = worker?.team;
265262

266263
const lockResult = await sql.begin(async (tx: typeof sql) => {
267264
await tx`SELECT pg_advisory_xact_lock(hashtext(${recipientId}))`;
@@ -270,8 +267,10 @@ async function lockedSpawnWorker(
270267
const postLockLive = await findLiveWorkerFuzzy(recipientId);
271268
if (postLockLive) return { type: 'existing' as const, worker: postLockLive };
272269

273-
await cleanupDeadWorkers(recipientId, workerTeam);
274-
if (worker) await registry.unregister(worker.id);
270+
if (worker) {
271+
await cleanupDeadWorkers(worker.id);
272+
await registry.unregister(worker.id);
273+
}
275274

276275
const { spawnWorkerFromTemplate } = await import('./protocol-router-spawn.js');
277276
const spawnResult = await spawnWorkerFromTemplate(template, resumeSessionId);
@@ -298,32 +297,28 @@ async function lockedSpawnWorker(
298297
* whose last executor is in a non-terminal state (spawning/running/idle/
299298
* working/permission/question) are mid-task — we MUST resume them with
300299
* their session id. Silently spawning fresh would drop the conversation
301-
* history. Master agents (`kind='permanent'`, `dir:<name>` rows) lose
302-
* their runtime worker on reboot but retain a recoverable session UUID
303-
* via the chokepoint; probing `dir:<recipientId>` when no live worker
304-
* exists keeps team-lead "hires" on the master's persistent session
305-
* instead of forking a fresh UUID and orphaning conversation history.
306-
* Ephemeral spawns have no `dir:<name>` row, so the chokepoint returns
307-
* `unknown_agent` and the caller proceeds with a fresh `--session-id`.
308-
* Gap C from trace-stale-resume (task #6) + master-aware-spawn Group 1.
300+
* history. Identity-only contract (wish #175 G6): caller must supply a
301+
* non-null worker with a canonical id (UUID or `dir:<name>`); the
302+
* `dir:${recipientId}` fallback for null workers is removed because
303+
* name-as-id resolution belongs at the CLI boundary, not here.
304+
* Gap C from trace-stale-resume (task #6).
309305
*/
310306
export async function resolveResumeSessionId(
311-
worker: registry.Agent | null,
307+
worker: registry.Agent,
312308
template: registry.WorkerTemplate,
313-
recipientId: string,
314309
): Promise<string | undefined> {
315310
if (template.provider !== 'claude') return undefined;
316-
const agentIdToProbe = worker?.id ?? `dir:${recipientId}`;
317-
const decision = await shouldResume(agentIdToProbe);
318-
if (worker && (await isExecutorResumable(worker))) {
319-
if (!decision.sessionId) throw new MissingResumeSessionError(worker.id, recipientId);
311+
if (!worker.id) throw new Error('resolveResumeSessionId: worker.id is required');
312+
const decision = await shouldResume(worker.id);
313+
if (await isExecutorResumable(worker)) {
314+
if (!decision.sessionId) throw new MissingResumeSessionError(worker.id);
320315
}
321316
if (!decision.sessionId) return undefined;
322317
// Actual resume attempt — emit the lifecycle event via the eventful
323318
// helper. `shouldResume` (read path) stays silent
324319
// (observability-signal-normalization Group 1).
325320
const { acquireResumeSessionForAttempt } = await import('./executor-registry.js');
326-
const acquired = await acquireResumeSessionForAttempt(agentIdToProbe).catch(() => null);
321+
const acquired = await acquireResumeSessionForAttempt(worker.id).catch(() => null);
327322
return acquired ?? decision.sessionId;
328323
}
329324

@@ -351,10 +346,15 @@ async function ensureWorkerAlive(
351346
if (await isExecutorCompleted(worker)) return null;
352347
if (!process.env.TMUX) return null;
353348

354-
const template = await findSpawnTemplate(worker, recipientId);
349+
// Identity-only auto-spawn (wish #175 G6): we need a worker row with
350+
// (team, role) to locate a template. Directory-only fallback (no worker
351+
// row) is intentionally not supported here — caller resolves at CLI.
352+
if (!worker) return null;
353+
354+
const template = await findSpawnTemplate(worker);
355355
if (!template) return null;
356356

357-
const resumeSessionId = await resolveResumeSessionId(worker, template, recipientId);
357+
const resumeSessionId = await resolveResumeSessionId(worker, template);
358358

359359
try {
360360
return await lockedSpawnWorker(recipientId, worker, template, resumeSessionId);
@@ -364,18 +364,17 @@ async function ensureWorkerAlive(
364364
}
365365

366366
/**
367-
* Remove dead worker entries matching a role/ID to prevent ghost accumulation.
368-
* Only removes workers whose tmux panes are no longer alive.
367+
* Remove the dead worker row identified by `agentId` to prevent ghost
368+
* accumulation. Only removes the row when its tmux pane is no longer alive.
369+
*
370+
* Identity-only contract (wish #175 G6): match by canonical id only —
371+
* role/team fuzz removed. Caller resolves at the CLI boundary.
369372
*/
370-
async function cleanupDeadWorkers(recipientId: string, team?: string): Promise<void> {
371-
const allWorkers = await registry.list();
372-
for (const w of allWorkers) {
373-
if (team && w.team !== team) continue;
374-
const matches = w.role === recipientId || w.id === recipientId;
375-
if (!matches) continue;
376-
if (await _deps.isPaneAlive(w.paneId)) continue;
377-
await registry.unregister(w.id);
378-
}
373+
async function cleanupDeadWorkers(agentId: string): Promise<void> {
374+
const w = await registry.get(agentId);
375+
if (!w) return;
376+
if (await _deps.isPaneAlive(w.paneId)) return;
377+
await registry.unregister(w.id);
379378
}
380379

381380
// ============================================================================

0 commit comments

Comments
 (0)