Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -940,6 +940,18 @@ def _run_state_db_auto_maintenance(session_db) -> None:
except Exception as _prune_exc:
logger.debug("Ghost session prune skipped: %s", _prune_exc)

# One-time finalize of orphaned compression continuations (#20001).
try:
if not session_db.get_meta("orphaned_compression_finalize_v1"):
finalized = session_db.finalize_orphaned_compression_sessions()
session_db.set_meta("orphaned_compression_finalize_v1", "1")
if finalized:
logger.info(
"Finalized %d orphaned compression sessions", finalized
)
except Exception as _finalize_exc:
logger.debug("Orphan compression finalize skipped: %s", _finalize_exc)

cfg = (_load_full_config().get("sessions") or {})
if not cfg.get("auto_prune", False):
return
Expand Down
78 changes: 51 additions & 27 deletions gateway/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -939,6 +939,52 @@ def _format_gateway_process_notification(evt: dict) -> "str | None":
_gateway_runner_ref: _weakref.ref = lambda: None


def _normalize_empty_agent_response(
agent_result: dict,
response: str,
*,
history_len: int = 0,
) -> str:
"""Normalize empty/None agent responses into user-facing messages.

Consolidates the existing ``failed`` handler and adds a catch-all for
the case where the agent did work (api_calls > 0) but returned no text.
Fix for #18765.
"""
if response:
return response

if agent_result.get("failed"):
error_detail = agent_result.get("error", "unknown error")
error_str = str(error_detail).lower()
is_context_failure = any(
p in error_str
for p in ("context", "token", "too large", "too long", "exceed", "payload")
) or ("400" in error_str and history_len > 50)
if is_context_failure:
return (
"⚠️ Session too large for the model's context window.\n"
"Use /compact to compress the conversation, or "
"/reset to start fresh."
)
return (
f"The request failed: {str(error_detail)[:300]}\n"
"Try again or use /reset to start a fresh session."
)

api_calls = int(agent_result.get("api_calls", 0) or 0)
if api_calls > 0 and not agent_result.get("interrupted"):
if agent_result.get("partial"):
err = agent_result.get("error", "processing incomplete")
return f"⚠️ Processing stopped: {str(err)[:200]}. Try again."
return (
"⚠️ Processing completed but no response was generated. "
"This may be a transient error — try sending your message again."
)

return response


class GatewayRunner:
"""
Main gateway controller.
Expand Down Expand Up @@ -6439,33 +6485,11 @@ async def _handle_message_with_agent(self, event, source, _quick_key: str, run_g
session_key, _e,
)

# Surface error details when the agent failed silently (final_response=None)
if not response and agent_result.get("failed"):
error_detail = agent_result.get("error", "unknown error")
error_str = str(error_detail).lower()

# Detect context-overflow failures and give specific guidance.
# Generic 400 "Error" from Anthropic with large sessions is the
# most common cause of this (#1630).
_is_ctx_fail = any(p in error_str for p in (
"context", "token", "too large", "too long",
"exceed", "payload",
)) or (
"400" in error_str
and len(history) > 50
)

if _is_ctx_fail:
response = (
"⚠️ Session too large for the model's context window.\n"
"Use /compact to compress the conversation, or "
"/reset to start fresh."
)
else:
response = (
f"The request failed: {str(error_detail)[:300]}\n"
"Try again or use /reset to start a fresh session."
)
# Normalize empty responses: surface errors, partial failures, and
# the case where agent did work but returned no text. Fix for #18765.
response = _normalize_empty_agent_response(
agent_result, response, history_len=len(history),
)

# If the agent's session_id changed during compression, update
# session_entry so transcript writes below go to the right session.
Expand Down
39 changes: 39 additions & 0 deletions hermes_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -718,6 +718,45 @@ def _do(conn):
self._remove_session_files(sessions_dir, sid)
return len(removed_ids)

def finalize_orphaned_compression_sessions(self) -> int:
"""Mark orphaned compression continuation sessions as ended.

Targets child sessions that were never finalized: parent is ended
with reason='compression', child has messages but no end_reason/ended_at
and api_call_count=0. Non-destructive: preserves all messages and sets
end_reason='orphaned_compression'. Fix for #20001.
"""
cutoff = time.time() - 604800 # 7 days

def _do(conn):
now = time.time()
result = conn.execute(
"""
UPDATE sessions
SET ended_at = ?,
end_reason = 'orphaned_compression'
WHERE api_call_count = 0
AND end_reason IS NULL
AND ended_at IS NULL
AND started_at < ?
AND parent_session_id IS NOT NULL
AND EXISTS (
SELECT 1 FROM sessions p
WHERE p.id = sessions.parent_session_id
AND p.end_reason = 'compression'
AND p.ended_at IS NOT NULL
)
AND EXISTS (
SELECT 1 FROM messages m
WHERE m.session_id = sessions.id
)
""",
(now, cutoff),
)
return result.rowcount

return self._execute_write(_do) or 0

def get_session(self, session_id: str) -> Optional[Dict[str, Any]]:
"""Get a session by ID."""
with self._lock:
Expand Down
Loading
Loading