Skip to content

Commit b46e5b3

Browse files
committed
fix: resolve lazy session creation regressions (#18370 fallout)
Fix three regressions introduced by PR #18370 (lazy session creation): 1. _finalize_session() uses stale session_key after compression (#20001) 2. session_key not synced after auto-compression in run_conversation (#20001) 3. pending_title ValueError leaves title wedged forever (#19029) 4. Gateway silently swallows null responses when agent did work (#18765) 5. One-time cleanup for accumulated ghost compression continuations (#20001) Changes: - tui_gateway/server.py: _finalize_session() now uses agent.session_id (falls back to session_key when agent is None). Refactor _sync_session_key_after_compress() with clear_pending_title and restart_slash_worker policy flags. Call it post-run_conversation() to sync session_key after auto-compression. Add ValueError handler to pending_title flush. - gateway/run.py: Extract _normalize_empty_agent_response() helper that consolidates failed/partial/null response handling. Surfaces user-facing error when agent did work (api_calls > 0) but returned no text. - hermes_state.py: Add finalize_orphaned_compression_sessions() — marks ghost continuation sessions as ended (non-destructive, preserves data). - cli.py: One-time startup migration for orphaned compression sessions. Test changes: - tests/test_tui_gateway_server.py: Update pending_title ValueError test for post-#18370 architecture (title applied post-message, not at create). - tests/test_lazy_session_regressions.py: 14 new regression tests covering all fixed paths.
1 parent 87b113c commit b46e5b3

6 files changed

Lines changed: 778 additions & 74 deletions

File tree

cli.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -940,6 +940,18 @@ def _run_state_db_auto_maintenance(session_db) -> None:
940940
except Exception as _prune_exc:
941941
logger.debug("Ghost session prune skipped: %s", _prune_exc)
942942

943+
# One-time finalize of orphaned compression continuations (#20001).
944+
try:
945+
if not session_db.get_meta("orphaned_compression_finalize_v1"):
946+
finalized = session_db.finalize_orphaned_compression_sessions()
947+
session_db.set_meta("orphaned_compression_finalize_v1", "1")
948+
if finalized:
949+
logger.info(
950+
"Finalized %d orphaned compression sessions", finalized
951+
)
952+
except Exception as _finalize_exc:
953+
logger.debug("Orphan compression finalize skipped: %s", _finalize_exc)
954+
943955
cfg = (_load_full_config().get("sessions") or {})
944956
if not cfg.get("auto_prune", False):
945957
return

gateway/run.py

Lines changed: 51 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -939,6 +939,52 @@ def _format_gateway_process_notification(evt: dict) -> "str | None":
939939
_gateway_runner_ref: _weakref.ref = lambda: None
940940

941941

942+
def _normalize_empty_agent_response(
943+
agent_result: dict,
944+
response: str,
945+
*,
946+
history_len: int = 0,
947+
) -> str:
948+
"""Normalize empty/None agent responses into user-facing messages.
949+
950+
Consolidates the existing ``failed`` handler and adds a catch-all for
951+
the case where the agent did work (api_calls > 0) but returned no text.
952+
Fix for #18765.
953+
"""
954+
if response:
955+
return response
956+
957+
if agent_result.get("failed"):
958+
error_detail = agent_result.get("error", "unknown error")
959+
error_str = str(error_detail).lower()
960+
is_context_failure = any(
961+
p in error_str
962+
for p in ("context", "token", "too large", "too long", "exceed", "payload")
963+
) or ("400" in error_str and history_len > 50)
964+
if is_context_failure:
965+
return (
966+
"⚠️ Session too large for the model's context window.\n"
967+
"Use /compact to compress the conversation, or "
968+
"/reset to start fresh."
969+
)
970+
return (
971+
f"The request failed: {str(error_detail)[:300]}\n"
972+
"Try again or use /reset to start a fresh session."
973+
)
974+
975+
api_calls = int(agent_result.get("api_calls", 0) or 0)
976+
if api_calls > 0 and not agent_result.get("interrupted"):
977+
if agent_result.get("partial"):
978+
err = agent_result.get("error", "processing incomplete")
979+
return f"⚠️ Processing stopped: {str(err)[:200]}. Try again."
980+
return (
981+
"⚠️ Processing completed but no response was generated. "
982+
"This may be a transient error — try sending your message again."
983+
)
984+
985+
return response
986+
987+
942988
class GatewayRunner:
943989
"""
944990
Main gateway controller.
@@ -6439,33 +6485,11 @@ async def _handle_message_with_agent(self, event, source, _quick_key: str, run_g
64396485
session_key, _e,
64406486
)
64416487

6442-
# Surface error details when the agent failed silently (final_response=None)
6443-
if not response and agent_result.get("failed"):
6444-
error_detail = agent_result.get("error", "unknown error")
6445-
error_str = str(error_detail).lower()
6446-
6447-
# Detect context-overflow failures and give specific guidance.
6448-
# Generic 400 "Error" from Anthropic with large sessions is the
6449-
# most common cause of this (#1630).
6450-
_is_ctx_fail = any(p in error_str for p in (
6451-
"context", "token", "too large", "too long",
6452-
"exceed", "payload",
6453-
)) or (
6454-
"400" in error_str
6455-
and len(history) > 50
6456-
)
6457-
6458-
if _is_ctx_fail:
6459-
response = (
6460-
"⚠️ Session too large for the model's context window.\n"
6461-
"Use /compact to compress the conversation, or "
6462-
"/reset to start fresh."
6463-
)
6464-
else:
6465-
response = (
6466-
f"The request failed: {str(error_detail)[:300]}\n"
6467-
"Try again or use /reset to start a fresh session."
6468-
)
6488+
# Normalize empty responses: surface errors, partial failures, and
6489+
# the case where agent did work but returned no text. Fix for #18765.
6490+
response = _normalize_empty_agent_response(
6491+
agent_result, response, history_len=len(history),
6492+
)
64696493

64706494
# If the agent's session_id changed during compression, update
64716495
# session_entry so transcript writes below go to the right session.

hermes_state.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -718,6 +718,45 @@ def _do(conn):
718718
self._remove_session_files(sessions_dir, sid)
719719
return len(removed_ids)
720720

721+
def finalize_orphaned_compression_sessions(self) -> int:
722+
"""Mark orphaned compression continuation sessions as ended.
723+
724+
Targets child sessions that were never finalized: parent is ended
725+
with reason='compression', child has messages but no end_reason/ended_at
726+
and api_call_count=0. Non-destructive: preserves all messages and sets
727+
end_reason='orphaned_compression'. Fix for #20001.
728+
"""
729+
cutoff = time.time() - 604800 # 7 days
730+
731+
def _do(conn):
732+
now = time.time()
733+
result = conn.execute(
734+
"""
735+
UPDATE sessions
736+
SET ended_at = ?,
737+
end_reason = 'orphaned_compression'
738+
WHERE api_call_count = 0
739+
AND end_reason IS NULL
740+
AND ended_at IS NULL
741+
AND started_at < ?
742+
AND parent_session_id IS NOT NULL
743+
AND EXISTS (
744+
SELECT 1 FROM sessions p
745+
WHERE p.id = sessions.parent_session_id
746+
AND p.end_reason = 'compression'
747+
AND p.ended_at IS NOT NULL
748+
)
749+
AND EXISTS (
750+
SELECT 1 FROM messages m
751+
WHERE m.session_id = sessions.id
752+
)
753+
""",
754+
(now, cutoff),
755+
)
756+
return result.rowcount
757+
758+
return self._execute_write(_do) or 0
759+
721760
def get_session(self, session_id: str) -> Optional[Dict[str, Any]]:
722761
"""Get a session by ID."""
723762
with self._lock:

0 commit comments

Comments
 (0)