Skip to content

Commit 838c451

Browse files
author
Carsten
committed
fix(05.1-01): defect qwibitai#6 L2 — synthetic user-directive injection in onHuman (breaks AMD context contamination)
Layer-2 defense-in-depth for defect qwibitai#6: between updateInstructions(persona) and setTimeout→requestResponse in webhook.ts onHuman closure, inject a conversation.item.create with role=user containing a bracketed system-hint directive. This breaks the conversational context the model inherited from CASE2_AMD_CLASSIFIER_PROMPT so it cannot mis-read the callee's opening greeting ('Restaurant Bellavista') as evidence it should continue in AMD-helper mode. Text verbatim per RESEARCH §2.5 with ASCII umlauts (Phase 2 CASE6B_PERSONA convention): 'Beginne bitte mit der Begruessung gemaess deiner neuen Anweisungen'. Hardcoded literal, not derived from counterpart input — safe under T-05.1-01-04 (counterpart cannot prompt-inject). Pitfall 5: conversation.item.create does NOT itself trigger a response.create (VAD only scopes audio-derived items). The explicit setTimeout→requestResponse is preserved unchanged. Tests added (RED before, GREEN after this commit): - Test F: asserts send order session.update → conversation.item.create → (after GREET_TRIGGER_DELAY_OUTBOUND_MS) response.create - Test G: asserts verbatim directive text with ASCII umlauts, no unicode - Test H (regression, inside F): persona-swap trigger from Wave 3 still fires Also adds dispatch.getAmdClassifier() test-only accessor so the tests can drive classifier.onAmdResult('human') end-to-end through the /accept flow. Full voice-bridge suite: 367 passed / 4 skipped. Build clean. Both defect qwibitai#6 layers now shipped (L1 session.type discriminator, L2 synthetic directive).
1 parent 8012c06 commit 838c451

3 files changed

Lines changed: 279 additions & 0 deletions

File tree

voice-bridge/src/tools/dispatch.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,12 @@ let _activeClassifier: AmdClassifier | null = null
3636
export function setAmdClassifier(classifier: AmdClassifier | null): void {
3737
_activeClassifier = classifier
3838
}
39+
// Plan 05.1-01 Task 3: test-only accessor — lets accept.test.ts drive the
40+
// classifier's onAmdResult('human') synthetic trigger so the onHuman
41+
// closure's full send-ordering can be asserted end-to-end.
42+
export function getAmdClassifier(): AmdClassifier | null {
43+
return _activeClassifier
44+
}
3945

4046
// Tool-name mapping: bridge tool name → Core MCP tool name.
4147
// null = not implemented (03-08 skipped or bridge-internal, stub path).

voice-bridge/src/webhook.ts

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,46 @@ export function registerAcceptRoute(
294294
if (ctxRef) {
295295
// Push Case-2 persona to model via session.update, then trigger greeting.
296296
updateInstructions(ctxRef.sideband.state, persona, log)
297+
298+
// Plan 05.1-01 Task 3 (defect #6 Layer 2, RESEARCH §2.5):
299+
// synthetic user-directive injection between updateInstructions
300+
// and the setTimeout→requestResponse. Breaks the conversational
301+
// context inherited from CASE2_AMD_CLASSIFIER_PROMPT — without
302+
// this, the model may still mis-read the callee's opening
303+
// greeting ("Restaurant Bellavista") as evidence it should
304+
// continue in AMD-helper mode instead of CASE2_OUTBOUND_PERSONA.
305+
// Text uses ASCII umlauts per Phase 2 CASE6B_PERSONA convention.
306+
// Pitfall 5: this item.create does NOT itself trigger a
307+
// response.create (VAD only scopes audio-derived items), so
308+
// the explicit requestResponse below is still required.
309+
try {
310+
ctxRef.sideband.state.ws?.send(
311+
JSON.stringify({
312+
type: 'conversation.item.create',
313+
item: {
314+
type: 'message',
315+
role: 'user',
316+
content: [
317+
{
318+
type: 'input_text',
319+
text: '[System-Hinweis: AMD-Verdict war human. Der Anruf laeuft jetzt im Reservierungs-Modus. Beginne bitte mit der Begruessung gemaess deiner neuen Anweisungen.]',
320+
},
321+
],
322+
},
323+
}),
324+
)
325+
log.info({
326+
event: 'case_2_amd_synthetic_user_directive_sent',
327+
call_id: callId,
328+
})
329+
} catch (e: unknown) {
330+
log.warn({
331+
event: 'case_2_amd_synthetic_user_directive_send_failed',
332+
call_id: callId,
333+
err: (e as Error)?.message,
334+
})
335+
}
336+
297337
setTimeout(() => {
298338
if (ctxRef) requestResponse(ctxRef.sideband.state, log)
299339
}, GREET_TRIGGER_DELAY_OUTBOUND_MS)

voice-bridge/tests/accept.test.ts

Lines changed: 233 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -580,6 +580,239 @@ describe('POST /accept — Case-2 outbound branch (05-03 Task 3)', () => {
580580
await app.close()
581581
}
582582
})
583+
584+
// Plan 05.1-01 Task 3: onHuman L2 defense-in-depth — synthetic user-directive
585+
// injection between updateInstructions and setTimeout→requestResponse.
586+
// Breaks AMD classifier conversational context contamination (RESEARCH §2.5).
587+
// Asserts exact WS send order: session.update → conversation.item.create →
588+
// response.create (after GREET_TRIGGER_DELAY_OUTBOUND_MS).
589+
it('Test F+H: onHuman sends session.update THEN conversation.item.create THEN (after timer) response.create', async () => {
590+
// Mock WS whose .send() we can inspect in order
591+
const sentMessages: string[] = []
592+
const mockWs = {
593+
send: vi.fn((s: string) => {
594+
sentMessages.push(s)
595+
}),
596+
readyState: 1,
597+
}
598+
// Mock sideband state: ready=true so updateInstructions and requestResponse proceed
599+
const mockState = {
600+
callId: 'rtc_c2_l2',
601+
ready: true,
602+
ws: mockWs as unknown as import('ws').WebSocket,
603+
openedAt: 0,
604+
lastUpdateAt: 0,
605+
}
606+
607+
const outboundRouter = makeCase2OutboundRouter('case_2')
608+
await new Promise((r) => setTimeout(r, 10))
609+
610+
const acceptSpy = vi.fn().mockResolvedValue({})
611+
const openai = {
612+
webhooks: {
613+
unwrap: vi.fn().mockResolvedValue({
614+
type: 'realtime.call.incoming',
615+
data: {
616+
call_id: 'rtc_c2_l2',
617+
sip_headers: [{ name: 'From', value: '"Caller" <sip:+4900000@sipgate.de>' }],
618+
},
619+
}),
620+
},
621+
realtime: { calls: { accept: acceptSpy, reject: vi.fn() } },
622+
}
623+
624+
const router = {
625+
startCall: vi.fn().mockReturnValue({
626+
sideband: { state: mockState },
627+
close: vi.fn(),
628+
}),
629+
endCall: vi.fn(),
630+
getCall: vi.fn(),
631+
_size: vi.fn().mockReturnValue(0),
632+
}
633+
634+
const { buildApp } = await import('../src/index.js')
635+
const { getAmdClassifier, setAmdClassifier } = await import('../src/tools/dispatch.js')
636+
637+
const app = await buildApp({
638+
openaiOverride: openai as never,
639+
whitelistOverride: new Set(),
640+
routerOverride: router as never,
641+
outboundRouterOverride: outboundRouter,
642+
})
643+
644+
try {
645+
const res = await app.inject({
646+
method: 'POST',
647+
url: '/accept',
648+
headers: {
649+
'content-type': 'application/json',
650+
'webhook-id': 'c2-l2',
651+
'webhook-timestamp': String(Math.floor(Date.now() / 1000)),
652+
'webhook-signature': 'v1,xxx',
653+
},
654+
payload: JSON.stringify({
655+
type: 'realtime.call.incoming',
656+
data: { call_id: 'rtc_c2_l2' },
657+
}),
658+
})
659+
660+
expect(res.statusCode).toBe(200)
661+
662+
// Switch to fake timers BEFORE firing onAmdResult so the setTimeout in
663+
// onHuman (GREET_TRIGGER_DELAY_OUTBOUND_MS) is trapped under our control.
664+
vi.useFakeTimers()
665+
try {
666+
const classifier = getAmdClassifier()
667+
expect(classifier).not.toBeNull()
668+
// Trigger the human verdict → fires the onHuman closure in webhook.ts
669+
classifier?.onAmdResult('human')
670+
671+
// IMMEDIATELY after onAmdResult: two sync sends must be present
672+
// (updateInstructions then conversation.item.create).
673+
// requestResponse is still pending in the setTimeout queue.
674+
expect(sentMessages.length).toBeGreaterThanOrEqual(2)
675+
676+
// Test F ordering: first send = session.update with type:'realtime' + Case-2 persona
677+
const firstParsed = JSON.parse(sentMessages[0])
678+
expect(firstParsed.type).toBe('session.update')
679+
expect(firstParsed.session?.type).toBe('realtime')
680+
expect(firstParsed.session?.instructions).toContain('NanoClaw im Auftrag')
681+
682+
// Test F ordering: second send = conversation.item.create role=user synthetic directive
683+
const secondParsed = JSON.parse(sentMessages[1])
684+
expect(secondParsed.type).toBe('conversation.item.create')
685+
expect(secondParsed.item?.type).toBe('message')
686+
expect(secondParsed.item?.role).toBe('user')
687+
expect(secondParsed.item?.content?.[0]?.type).toBe('input_text')
688+
expect(secondParsed.item?.content?.[0]?.text).toContain(
689+
'[System-Hinweis: AMD-Verdict war human.',
690+
)
691+
692+
// Test H (regression): the persona-swap trigger from Wave 3 still fires —
693+
// advance timers past GREET_TRIGGER_DELAY_OUTBOUND_MS, expect response.create
694+
await vi.advanceTimersByTimeAsync(5000)
695+
const responseCreateMsg = sentMessages.find((s) => {
696+
try {
697+
return JSON.parse(s).type === 'response.create'
698+
} catch {
699+
return false
700+
}
701+
})
702+
expect(responseCreateMsg).toBeDefined()
703+
704+
// Overall ordering: session.update (idx 0) < item.create (idx 1) < response.create (later)
705+
const idxSessionUpdate = sentMessages.findIndex(
706+
(s) => JSON.parse(s).type === 'session.update',
707+
)
708+
const idxItemCreate = sentMessages.findIndex(
709+
(s) => JSON.parse(s).type === 'conversation.item.create',
710+
)
711+
const idxResponseCreate = sentMessages.findIndex(
712+
(s) => JSON.parse(s).type === 'response.create',
713+
)
714+
expect(idxSessionUpdate).toBe(0)
715+
expect(idxItemCreate).toBe(1)
716+
expect(idxResponseCreate).toBeGreaterThan(idxItemCreate)
717+
} finally {
718+
vi.useRealTimers()
719+
// Clean up classifier registration to avoid cross-test contamination
720+
setAmdClassifier(null)
721+
}
722+
} finally {
723+
await app.close()
724+
}
725+
})
726+
727+
it('Test G: synthetic-item text contains verbatim directive (RESEARCH §2.5, ASCII umlauts)', async () => {
728+
const sentMessages: string[] = []
729+
const mockWs = {
730+
send: vi.fn((s: string) => {
731+
sentMessages.push(s)
732+
}),
733+
readyState: 1,
734+
}
735+
const mockState = {
736+
callId: 'rtc_c2_l2g',
737+
ready: true,
738+
ws: mockWs as unknown as import('ws').WebSocket,
739+
openedAt: 0,
740+
lastUpdateAt: 0,
741+
}
742+
743+
const outboundRouter = makeCase2OutboundRouter('case_2')
744+
await new Promise((r) => setTimeout(r, 10))
745+
746+
const acceptSpy = vi.fn().mockResolvedValue({})
747+
const openai = {
748+
webhooks: {
749+
unwrap: vi.fn().mockResolvedValue({
750+
type: 'realtime.call.incoming',
751+
data: {
752+
call_id: 'rtc_c2_l2g',
753+
sip_headers: [{ name: 'From', value: '"Caller" <sip:+4900000@sipgate.de>' }],
754+
},
755+
}),
756+
},
757+
realtime: { calls: { accept: acceptSpy, reject: vi.fn() } },
758+
}
759+
760+
const router = {
761+
startCall: vi.fn().mockReturnValue({
762+
sideband: { state: mockState },
763+
close: vi.fn(),
764+
}),
765+
endCall: vi.fn(),
766+
getCall: vi.fn(),
767+
_size: vi.fn().mockReturnValue(0),
768+
}
769+
770+
const { buildApp } = await import('../src/index.js')
771+
const { getAmdClassifier, setAmdClassifier } = await import('../src/tools/dispatch.js')
772+
773+
const app = await buildApp({
774+
openaiOverride: openai as never,
775+
whitelistOverride: new Set(),
776+
routerOverride: router as never,
777+
outboundRouterOverride: outboundRouter,
778+
})
779+
780+
try {
781+
await app.inject({
782+
method: 'POST',
783+
url: '/accept',
784+
headers: {
785+
'content-type': 'application/json',
786+
'webhook-id': 'c2-l2g',
787+
'webhook-timestamp': String(Math.floor(Date.now() / 1000)),
788+
'webhook-signature': 'v1,xxx',
789+
},
790+
payload: JSON.stringify({
791+
type: 'realtime.call.incoming',
792+
data: { call_id: 'rtc_c2_l2g' },
793+
}),
794+
})
795+
796+
const classifier = getAmdClassifier()
797+
classifier?.onAmdResult('human')
798+
799+
const itemCreate = sentMessages
800+
.map((s) => JSON.parse(s))
801+
.find((p) => p.type === 'conversation.item.create')
802+
expect(itemCreate).toBeDefined()
803+
const text = itemCreate.item.content[0].text as string
804+
// Verbatim phrases per RESEARCH §2.5 + ASCII umlaut convention
805+
expect(text).toContain('[System-Hinweis: AMD-Verdict war human.')
806+
expect(text).toContain('Reservierungs-Modus')
807+
expect(text).toContain('Beginne bitte mit der Begruessung gemaess deiner neuen Anweisungen')
808+
// ASCII umlauts, not unicode — project convention (Phase 2 CASE6B_PERSONA)
809+
expect(text).not.toMatch(/[äöüß]/)
810+
811+
setAmdClassifier(null)
812+
} finally {
813+
await app.close()
814+
}
815+
})
583816
})
584817

585818
// Plan 04-02 Task 3: /accept-time cost gate integration.

0 commit comments

Comments
 (0)