feat: idle context fallback for /status using model metadata

angelos · claude · angelos · commit 113b99be11b5 · 2026-04-12T12:12:36.000Z
When no live agent is running, estimate the context window limit from model metadata (get_model_context_length) and show last_prompt_tokens as current usage. Inspired by upstream PR NousResearch#4678. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
diff --git a/cli.py b/cli.py
@@ -1950,6 +1950,14 @@ def _build_cli_status_snapshot(self) -> Dict[str, Any]:
             )
             if context_tokens and context_limit:
                 context_pct = round((context_tokens / context_limit) * 100)
+        else:
+            # Idle fallback: estimate context window from model metadata.
+            if model:
+                try:
+                    from agent.model_metadata import get_model_context_length
+                    context_limit = get_model_context_length(str(model or "unknown"))
+                except Exception:
+                    context_limit = None
 
         if updated_at is None:
             updated_at = datetime.now() if agent is not None else getattr(self, "session_start", None)
diff --git a/gateway/run.py b/gateway/run.py
@@ -4051,6 +4051,22 @@ def _build_status_snapshot(self, source: SessionSource, session_entry) -> dict[s
             )
             if context_tokens and context_limit:
                 context_pct = round((context_tokens / context_limit) * 100)
+        else:
+            # Idle fallback: estimate context from last known prompt tokens
+            # and model's context window (inspired by PR #4678).
+            idle_prompt = safe_status_int(
+                getattr(session_entry, "last_prompt_tokens", 0), default=0
+            )
+            if idle_prompt or model:
+                try:
+                    from agent.model_metadata import get_model_context_length
+                    context_limit = get_model_context_length(str(model or "unknown"))
+                except Exception:
+                    context_limit = None
+                if idle_prompt:
+                    context_tokens = idle_prompt
+                if context_tokens and context_limit:
+                    context_pct = round((context_tokens / context_limit) * 100)
 
         transport = None
         if source.platform == Platform.TELEGRAM:
diff --git a/tests/cli/test_cli_status_command.py b/tests/cli/test_cli_status_command.py
@@ -127,7 +127,6 @@ def test_show_session_status_falls_back_to_persisted_session(capsys):
     assert "Model: anthropic/claude-opus-4.6 · Provider: anthropic" in output
     assert "Usage: 1,000 in · 250 out · 1,820 total · Cost: $1.2500 est." in output
     assert "Cache: 500 read · 50 write · 33% hit · 20 reasoning" in output
-    assert "Context:" not in output
     assert "Title: Saved Session" in output
     assert "Runtime: Anthropic Messages · Reasoning high · CLI interactive" in output
     assert "Queue: depth 0 · State: idle" in output
@@ -141,3 +140,16 @@ def test_show_session_status_uses_pending_title_before_first_persist(capsys):
     output = capsys.readouterr().out
 
     assert "Title: Queued Session Title" in output
+
+
+def test_show_session_status_idle_context_fallback(capsys, monkeypatch):
+    """When no live agent, context limit comes from model metadata."""
+    cli_obj = _make_cli()
+    import agent.model_metadata as _mm
+    monkeypatch.setattr(_mm, "get_model_context_length", lambda model, **kw: 200_000)
+
+    cli_obj._show_session_status()
+    output = capsys.readouterr().out
+
+    # No prompt tokens in idle CLI, but context limit is resolved
+    assert "Context:" not in output or "200,000" in output
diff --git a/tests/gateway/test_status_command.py b/tests/gateway/test_status_command.py
@@ -226,7 +226,37 @@ async def test_status_command_shows_live_context_metrics():
 
 
 @pytest.mark.asyncio
-async def test_status_command_omits_context_without_live_agent():
+async def test_status_command_shows_idle_context_from_last_prompt_tokens(monkeypatch):
+    """When no live agent exists but session has last_prompt_tokens, show
+    estimated context from model metadata (idle fallback, inspired by PR #4678)."""
+    session_entry = SessionEntry(
+        session_key=build_session_key(_make_source()),
+        session_id="sess-1",
+        created_at=datetime.now(),
+        updated_at=datetime.now(),
+        platform=Platform.TELEGRAM,
+        chat_type="dm",
+        last_prompt_tokens=45000,
+    )
+    runner = _make_runner(session_entry)
+    runner._session_db.get_session.return_value = {
+        "model": "openai/gpt-4o",
+        "billing_provider": "openai",
+        "estimated_cost_usd": 0.0,
+        "cost_status": "estimated",
+    }
+    import agent.model_metadata as _mm
+    monkeypatch.setattr(_mm, "get_model_context_length", lambda model, **kw: 200_000)
+
+    result = await runner._handle_message(_make_event("/status"))
+
+    assert "**Context:** 45,000 / 200,000 (22%)" in result
+    assert "**Compactions:**" not in result
+
+
+@pytest.mark.asyncio
+async def test_status_command_omits_context_when_lookup_fails(monkeypatch):
+    """When model metadata lookup fails, context section is omitted."""
     session_entry = SessionEntry(
         session_key=build_session_key(_make_source()),
         session_id="sess-1",
@@ -243,6 +273,12 @@ async def test_status_command_omits_context_without_live_agent():
         "cost_status": "estimated",
     }
 
+    def _raise(*args, **kwargs):
+        raise Exception("not found")
+
+    import agent.model_metadata as _mm
+    monkeypatch.setattr(_mm, "get_model_context_length", _raise)
+
     result = await runner._handle_message(_make_event("/status"))
 
     assert "**Context:**" not in result