Skip to content

Commit 6b4ef85

Browse files
committed
fix(api): avoid duplicated responses history
1 parent 5d3be89 commit 6b4ef85

2 files changed

Lines changed: 282 additions & 19 deletions

File tree

gateway/platforms/api_server.py

Lines changed: 77 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1693,12 +1693,12 @@ async def _dispatch(it) -> None:
16931693
"output_tokens": usage.get("output_tokens", 0),
16941694
"total_tokens": usage.get("total_tokens", 0),
16951695
}
1696-
full_history = list(conversation_history)
1697-
full_history.append({"role": "user", "content": user_message})
1698-
if isinstance(result, dict) and result.get("messages"):
1699-
full_history.extend(result["messages"])
1700-
else:
1701-
full_history.append({"role": "assistant", "content": final_response_text})
1696+
full_history = self._build_response_conversation_history(
1697+
conversation_history,
1698+
user_message,
1699+
result,
1700+
final_response_text,
1701+
)
17021702
_persist_response_snapshot(
17031703
completed_env,
17041704
conversation_history_snapshot=full_history,
@@ -1965,17 +1965,22 @@ async def _compute_response():
19651965

19661966
# Build the full conversation history for storage
19671967
# (includes tool calls from the agent run)
1968-
full_history = list(conversation_history)
1969-
full_history.append({"role": "user", "content": user_message})
1970-
# Add agent's internal messages if available
1971-
agent_messages = result.get("messages", [])
1972-
if agent_messages:
1973-
full_history.extend(agent_messages)
1974-
else:
1975-
full_history.append({"role": "assistant", "content": final_response})
1968+
full_history = self._build_response_conversation_history(
1969+
conversation_history,
1970+
user_message,
1971+
result,
1972+
final_response,
1973+
)
19761974

1977-
# Build output items (includes tool calls + final message)
1978-
output_items = self._extract_output_items(result)
1975+
# Build output items from the current turn only. AIAgent returns a
1976+
# full transcript in result["messages"], while older/mocked paths may
1977+
# return only the current turn suffix.
1978+
output_start_index = self._response_messages_turn_start_index(
1979+
conversation_history,
1980+
user_message,
1981+
result,
1982+
)
1983+
output_items = self._extract_output_items(result, start_index=output_start_index)
19791984

19801985
response_data = {
19811986
"id": response_id,
@@ -2264,17 +2269,70 @@ async def _handle_run_job(self, request: "web.Request") -> "web.Response":
22642269
# ------------------------------------------------------------------
22652270

22662271
@staticmethod
2267-
def _extract_output_items(result: Dict[str, Any]) -> List[Dict[str, Any]]:
2272+
def _build_response_conversation_history(
2273+
conversation_history: List[Dict[str, Any]],
2274+
user_message: Any,
2275+
result: Dict[str, Any],
2276+
final_response: Any,
2277+
) -> List[Dict[str, Any]]:
2278+
"""Build the stored Responses transcript without duplicating history."""
2279+
prior = list(conversation_history)
2280+
current_user = {"role": "user", "content": user_message}
2281+
agent_messages = result.get("messages") if isinstance(result, dict) else None
2282+
2283+
if isinstance(agent_messages, list) and agent_messages:
2284+
turn_start = APIServerAdapter._response_messages_turn_start_index(
2285+
conversation_history,
2286+
user_message,
2287+
result,
2288+
)
2289+
if turn_start:
2290+
return list(agent_messages)
2291+
2292+
full_history = prior
2293+
full_history.append(current_user)
2294+
full_history.extend(agent_messages)
2295+
return full_history
2296+
2297+
full_history = prior
2298+
full_history.append(current_user)
2299+
full_history.append({"role": "assistant", "content": final_response})
2300+
return full_history
2301+
2302+
@staticmethod
2303+
def _response_messages_turn_start_index(
2304+
conversation_history: List[Dict[str, Any]],
2305+
user_message: Any,
2306+
result: Dict[str, Any],
2307+
) -> int:
2308+
"""Detect transcript-shaped result["messages"] and return turn start."""
2309+
agent_messages = result.get("messages") if isinstance(result, dict) else None
2310+
if not isinstance(agent_messages, list) or not agent_messages:
2311+
return 0
2312+
2313+
prior = list(conversation_history)
2314+
current_user = {"role": "user", "content": user_message}
2315+
expected_prefix = prior + [current_user]
2316+
if agent_messages[:len(expected_prefix)] == expected_prefix:
2317+
return len(expected_prefix)
2318+
if prior and agent_messages[:len(prior)] == prior:
2319+
return len(prior)
2320+
return 0
2321+
2322+
@staticmethod
2323+
def _extract_output_items(result: Dict[str, Any], start_index: int = 0) -> List[Dict[str, Any]]:
22682324
"""
2269-
Build the full output item array from the agent's messages.
2325+
Build the output item array from the agent's messages.
22702326
2271-
Walks *result["messages"]* and emits:
2327+
Walks *result["messages"]* starting at *start_index* and emits:
22722328
- ``function_call`` items for each tool_call on assistant messages
22732329
- ``function_call_output`` items for each tool-role message
22742330
- a final ``message`` item with the assistant's text reply
22752331
"""
22762332
items: List[Dict[str, Any]] = []
22772333
messages = result.get("messages", [])
2334+
if start_index > 0:
2335+
messages = messages[start_index:]
22782336

22792337
for msg in messages:
22802338
role = msg.get("role")

tests/gateway/test_api_server.py

Lines changed: 205 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1313,6 +1313,146 @@ async def test_previous_response_id_chaining(self, adapter):
13131313
assert len(call_kwargs["conversation_history"]) > 0
13141314
assert call_kwargs["user_message"] == "Now add 1 more"
13151315

1316+
@pytest.mark.asyncio
1317+
async def test_previous_response_id_stores_full_agent_transcript_once(self, adapter):
1318+
"""Chained Responses storage must not append result["messages"] twice."""
1319+
first_history = [
1320+
{"role": "user", "content": "What is 1+1?"},
1321+
{"role": "assistant", "content": "2"},
1322+
]
1323+
1324+
app = _create_app(adapter)
1325+
async with TestClient(TestServer(app)) as cli:
1326+
with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
1327+
mock_run.return_value = (
1328+
{
1329+
"final_response": "2",
1330+
"messages": list(first_history),
1331+
"api_calls": 1,
1332+
},
1333+
{"input_tokens": 0, "output_tokens": 0, "total_tokens": 0},
1334+
)
1335+
resp1 = await cli.post(
1336+
"/v1/responses",
1337+
json={"model": "hermes-agent", "input": "What is 1+1?"},
1338+
)
1339+
1340+
assert resp1.status == 200
1341+
resp1_data = await resp1.json()
1342+
stored_first = adapter._response_store.get(resp1_data["id"])
1343+
assert stored_first["conversation_history"] == first_history
1344+
1345+
second_history = first_history + [
1346+
{"role": "user", "content": "Now add 1 more"},
1347+
{"role": "assistant", "content": "3"},
1348+
]
1349+
with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
1350+
mock_run.return_value = (
1351+
{
1352+
"final_response": "3",
1353+
"messages": list(second_history),
1354+
"api_calls": 1,
1355+
},
1356+
{"input_tokens": 0, "output_tokens": 0, "total_tokens": 0},
1357+
)
1358+
resp2 = await cli.post(
1359+
"/v1/responses",
1360+
json={
1361+
"model": "hermes-agent",
1362+
"input": "Now add 1 more",
1363+
"previous_response_id": resp1_data["id"],
1364+
},
1365+
)
1366+
1367+
assert resp2.status == 200
1368+
resp2_data = await resp2.json()
1369+
stored_second = adapter._response_store.get(resp2_data["id"])
1370+
stored_history = stored_second["conversation_history"]
1371+
assert stored_history == second_history
1372+
assert stored_history.count(first_history[0]) == 1
1373+
assert stored_history.count({"role": "user", "content": "Now add 1 more"}) == 1
1374+
1375+
@pytest.mark.asyncio
1376+
async def test_previous_response_id_outputs_only_current_turn_items(self, adapter):
1377+
"""Response output must not replay previous tool artifacts."""
1378+
prior_history = [
1379+
{"role": "user", "content": "Read old file"},
1380+
{
1381+
"role": "assistant",
1382+
"tool_calls": [
1383+
{
1384+
"id": "call_old",
1385+
"function": {
1386+
"name": "read_file",
1387+
"arguments": '{"path":"/tmp/old.txt"}',
1388+
},
1389+
}
1390+
],
1391+
},
1392+
{
1393+
"role": "tool",
1394+
"tool_call_id": "call_old",
1395+
"content": '{"content":"old"}',
1396+
},
1397+
{"role": "assistant", "content": "old"},
1398+
]
1399+
adapter._response_store.put(
1400+
"resp_prev",
1401+
{
1402+
"response": {"id": "resp_prev", "status": "completed"},
1403+
"conversation_history": list(prior_history),
1404+
"session_id": "api-test-session",
1405+
},
1406+
)
1407+
full_agent_transcript = prior_history + [
1408+
{"role": "user", "content": "Read new file"},
1409+
{
1410+
"role": "assistant",
1411+
"tool_calls": [
1412+
{
1413+
"id": "call_new",
1414+
"function": {
1415+
"name": "read_file",
1416+
"arguments": '{"path":"/tmp/new.txt"}',
1417+
},
1418+
}
1419+
],
1420+
},
1421+
{
1422+
"role": "tool",
1423+
"tool_call_id": "call_new",
1424+
"content": '{"content":"new"}',
1425+
},
1426+
{"role": "assistant", "content": "new"},
1427+
]
1428+
1429+
app = _create_app(adapter)
1430+
async with TestClient(TestServer(app)) as cli:
1431+
with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
1432+
mock_run.return_value = (
1433+
{
1434+
"final_response": "new",
1435+
"messages": list(full_agent_transcript),
1436+
"api_calls": 1,
1437+
},
1438+
{"input_tokens": 0, "output_tokens": 0, "total_tokens": 0},
1439+
)
1440+
resp = await cli.post(
1441+
"/v1/responses",
1442+
json={
1443+
"model": "hermes-agent",
1444+
"input": "Read new file",
1445+
"previous_response_id": "resp_prev",
1446+
},
1447+
)
1448+
assert resp.status == 200
1449+
data = await resp.json()
1450+
1451+
output_json = json.dumps(data["output"])
1452+
assert "call_new" in output_json
1453+
assert "call_old" not in output_json
1454+
assert "/tmp/old.txt" not in output_json
1455+
13161456
@pytest.mark.asyncio
13171457
async def test_previous_response_id_preserves_session(self, adapter):
13181458
"""Chained responses via previous_response_id reuse the same session_id."""
@@ -1580,6 +1720,71 @@ async def _mock_run_agent(**kwargs):
15801720
assert data["status"] == "completed"
15811721
assert data["output"][-1]["content"][0]["text"] == "Stored response"
15821722

1723+
@pytest.mark.asyncio
1724+
async def test_streamed_previous_response_id_stores_full_agent_transcript_once(self, adapter):
1725+
prior_history = [
1726+
{"role": "user", "content": "What is 1+1?"},
1727+
{"role": "assistant", "content": "2"},
1728+
]
1729+
adapter._response_store.put(
1730+
"resp_prev",
1731+
{
1732+
"response": {"id": "resp_prev", "status": "completed"},
1733+
"conversation_history": list(prior_history),
1734+
"session_id": "api-test-session",
1735+
},
1736+
)
1737+
1738+
expected_history = prior_history + [
1739+
{"role": "user", "content": "Now add 1 more"},
1740+
{"role": "assistant", "content": "3"},
1741+
]
1742+
1743+
app = _create_app(adapter)
1744+
async with TestClient(TestServer(app)) as cli:
1745+
async def _mock_run_agent(**kwargs):
1746+
cb = kwargs.get("stream_delta_callback")
1747+
if cb:
1748+
cb("3")
1749+
return (
1750+
{
1751+
"final_response": "3",
1752+
"messages": list(expected_history),
1753+
"api_calls": 1,
1754+
},
1755+
{"input_tokens": 1, "output_tokens": 1, "total_tokens": 2},
1756+
)
1757+
1758+
with patch.object(adapter, "_run_agent", side_effect=_mock_run_agent):
1759+
resp = await cli.post(
1760+
"/v1/responses",
1761+
json={
1762+
"model": "hermes-agent",
1763+
"input": "Now add 1 more",
1764+
"previous_response_id": "resp_prev",
1765+
"stream": True,
1766+
},
1767+
)
1768+
body = await resp.text()
1769+
1770+
assert resp.status == 200
1771+
response_id = None
1772+
for line in body.splitlines():
1773+
if line.startswith("data: "):
1774+
try:
1775+
payload = json.loads(line[len("data: "):])
1776+
except json.JSONDecodeError:
1777+
continue
1778+
if payload.get("type") == "response.completed":
1779+
response_id = payload["response"]["id"]
1780+
break
1781+
1782+
assert response_id
1783+
stored_history = adapter._response_store.get(response_id)["conversation_history"]
1784+
assert stored_history == expected_history
1785+
assert stored_history.count(prior_history[0]) == 1
1786+
assert stored_history.count({"role": "user", "content": "Now add 1 more"}) == 1
1787+
15831788
@pytest.mark.asyncio
15841789
async def test_stream_cancelled_persists_incomplete_snapshot(self, adapter):
15851790
"""Server-side asyncio.CancelledError (shutdown, request timeout) must

0 commit comments

Comments
 (0)