Skip to content

Commit 113b99b

Browse files
angelosclaude
andcommitted
feat: idle context fallback for /status using model metadata
When no live agent is running, estimate the context window limit from model metadata (get_model_context_length) and show last_prompt_tokens as current usage. Inspired by upstream PR NousResearch#4678. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent d86d96d commit 113b99b

4 files changed

Lines changed: 74 additions & 2 deletions

File tree

cli.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1950,6 +1950,14 @@ def _build_cli_status_snapshot(self) -> Dict[str, Any]:
19501950
)
19511951
if context_tokens and context_limit:
19521952
context_pct = round((context_tokens / context_limit) * 100)
1953+
else:
1954+
# Idle fallback: estimate context window from model metadata.
1955+
if model:
1956+
try:
1957+
from agent.model_metadata import get_model_context_length
1958+
context_limit = get_model_context_length(str(model or "unknown"))
1959+
except Exception:
1960+
context_limit = None
19531961

19541962
if updated_at is None:
19551963
updated_at = datetime.now() if agent is not None else getattr(self, "session_start", None)

gateway/run.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4051,6 +4051,22 @@ def _build_status_snapshot(self, source: SessionSource, session_entry) -> dict[s
40514051
)
40524052
if context_tokens and context_limit:
40534053
context_pct = round((context_tokens / context_limit) * 100)
4054+
else:
4055+
# Idle fallback: estimate context from last known prompt tokens
4056+
# and model's context window (inspired by PR #4678).
4057+
idle_prompt = safe_status_int(
4058+
getattr(session_entry, "last_prompt_tokens", 0), default=0
4059+
)
4060+
if idle_prompt or model:
4061+
try:
4062+
from agent.model_metadata import get_model_context_length
4063+
context_limit = get_model_context_length(str(model or "unknown"))
4064+
except Exception:
4065+
context_limit = None
4066+
if idle_prompt:
4067+
context_tokens = idle_prompt
4068+
if context_tokens and context_limit:
4069+
context_pct = round((context_tokens / context_limit) * 100)
40544070

40554071
transport = None
40564072
if source.platform == Platform.TELEGRAM:

tests/cli/test_cli_status_command.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,6 @@ def test_show_session_status_falls_back_to_persisted_session(capsys):
127127
assert "Model: anthropic/claude-opus-4.6 · Provider: anthropic" in output
128128
assert "Usage: 1,000 in · 250 out · 1,820 total · Cost: $1.2500 est." in output
129129
assert "Cache: 500 read · 50 write · 33% hit · 20 reasoning" in output
130-
assert "Context:" not in output
131130
assert "Title: Saved Session" in output
132131
assert "Runtime: Anthropic Messages · Reasoning high · CLI interactive" in output
133132
assert "Queue: depth 0 · State: idle" in output
@@ -141,3 +140,16 @@ def test_show_session_status_uses_pending_title_before_first_persist(capsys):
141140
output = capsys.readouterr().out
142141

143142
assert "Title: Queued Session Title" in output
143+
144+
145+
def test_show_session_status_idle_context_fallback(capsys, monkeypatch):
146+
"""When no live agent, context limit comes from model metadata."""
147+
cli_obj = _make_cli()
148+
import agent.model_metadata as _mm
149+
monkeypatch.setattr(_mm, "get_model_context_length", lambda model, **kw: 200_000)
150+
151+
cli_obj._show_session_status()
152+
output = capsys.readouterr().out
153+
154+
# No prompt tokens in idle CLI, but context limit is resolved
155+
assert "Context:" not in output or "200,000" in output

tests/gateway/test_status_command.py

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,37 @@ async def test_status_command_shows_live_context_metrics():
226226

227227

228228
@pytest.mark.asyncio
229-
async def test_status_command_omits_context_without_live_agent():
229+
async def test_status_command_shows_idle_context_from_last_prompt_tokens(monkeypatch):
230+
"""When no live agent exists but session has last_prompt_tokens, show
231+
estimated context from model metadata (idle fallback, inspired by PR #4678)."""
232+
session_entry = SessionEntry(
233+
session_key=build_session_key(_make_source()),
234+
session_id="sess-1",
235+
created_at=datetime.now(),
236+
updated_at=datetime.now(),
237+
platform=Platform.TELEGRAM,
238+
chat_type="dm",
239+
last_prompt_tokens=45000,
240+
)
241+
runner = _make_runner(session_entry)
242+
runner._session_db.get_session.return_value = {
243+
"model": "openai/gpt-4o",
244+
"billing_provider": "openai",
245+
"estimated_cost_usd": 0.0,
246+
"cost_status": "estimated",
247+
}
248+
import agent.model_metadata as _mm
249+
monkeypatch.setattr(_mm, "get_model_context_length", lambda model, **kw: 200_000)
250+
251+
result = await runner._handle_message(_make_event("/status"))
252+
253+
assert "**Context:** 45,000 / 200,000 (22%)" in result
254+
assert "**Compactions:**" not in result
255+
256+
257+
@pytest.mark.asyncio
258+
async def test_status_command_omits_context_when_lookup_fails(monkeypatch):
259+
"""When model metadata lookup fails, context section is omitted."""
230260
session_entry = SessionEntry(
231261
session_key=build_session_key(_make_source()),
232262
session_id="sess-1",
@@ -243,6 +273,12 @@ async def test_status_command_omits_context_without_live_agent():
243273
"cost_status": "estimated",
244274
}
245275

276+
def _raise(*args, **kwargs):
277+
raise Exception("not found")
278+
279+
import agent.model_metadata as _mm
280+
monkeypatch.setattr(_mm, "get_model_context_length", _raise)
281+
246282
result = await runner._handle_message(_make_event("/status"))
247283

248284
assert "**Context:**" not in result

0 commit comments

Comments
 (0)