|
25 | 25 |
|
26 | 26 | from agent.memory_manager import sanitize_context |
27 | 27 | from hermes_constants import get_hermes_home |
28 | | -from typing import Any, Callable, Dict, List, Optional, TypeVar |
| 28 | +from typing import Any, Callable, Dict, List, Optional, Tuple, TypeVar |
29 | 29 |
|
30 | 30 | logger = logging.getLogger(__name__) |
31 | 31 |
|
32 | 32 | T = TypeVar("T") |
33 | 33 |
|
34 | 34 | DEFAULT_DB_PATH = get_hermes_home() / "state.db" |
35 | 35 |
|
36 | | -SCHEMA_VERSION = 11 |
| 36 | +SCHEMA_VERSION = 12 |
37 | 37 |
|
38 | 38 | SCHEMA_SQL = """ |
39 | 39 | CREATE TABLE IF NOT EXISTS schema_version ( |
|
98 | 98 | CREATE INDEX IF NOT EXISTS idx_sessions_parent ON sessions(parent_session_id); |
99 | 99 | CREATE INDEX IF NOT EXISTS idx_sessions_started ON sessions(started_at DESC); |
100 | 100 | CREATE INDEX IF NOT EXISTS idx_messages_session ON messages(session_id, timestamp); |
| 101 | +
|
| 102 | +CREATE TABLE IF NOT EXISTS skill_invocations ( |
| 103 | + id INTEGER PRIMARY KEY AUTOINCREMENT, |
| 104 | + session_id TEXT REFERENCES sessions(id), |
| 105 | + cron_id TEXT, |
| 106 | + skill_name TEXT NOT NULL, |
| 107 | + skill_version TEXT, |
| 108 | + invoked_at REAL NOT NULL, |
| 109 | + completed_at REAL, |
| 110 | + model TEXT, |
| 111 | + provider TEXT, |
| 112 | + duration_seconds REAL, |
| 113 | + input_tokens INTEGER DEFAULT 0, |
| 114 | + output_tokens INTEGER DEFAULT 0, |
| 115 | + cache_read_tokens INTEGER DEFAULT 0, |
| 116 | + cache_write_tokens INTEGER DEFAULT 0, |
| 117 | + estimated_cost_usd REAL, |
| 118 | + success INTEGER, |
| 119 | + end_reason TEXT, |
| 120 | + quality_score REAL |
| 121 | +); |
| 122 | +
|
| 123 | +CREATE INDEX IF NOT EXISTS idx_skill_invocations_skill ON skill_invocations(skill_name, invoked_at DESC); |
| 124 | +CREATE INDEX IF NOT EXISTS idx_skill_invocations_cron ON skill_invocations(cron_id, invoked_at DESC); |
| 125 | +CREATE INDEX IF NOT EXISTS idx_skill_invocations_session ON skill_invocations(session_id); |
| 126 | +
|
| 127 | +CREATE VIEW IF NOT EXISTS skill_stats_daily AS |
| 128 | +SELECT |
| 129 | + skill_name, |
| 130 | + model, |
| 131 | + provider, |
| 132 | + DATE(invoked_at, 'unixepoch') AS day, |
| 133 | + COUNT(*) AS invocation_count, |
| 134 | + SUM(CASE WHEN success = 1 THEN 1 ELSE 0 END) AS success_count, |
| 135 | + SUM(CASE WHEN success = 0 THEN 1 ELSE 0 END) AS failure_count, |
| 136 | + AVG(duration_seconds) AS avg_duration_s, |
| 137 | + SUM(estimated_cost_usd) AS total_cost_usd, |
| 138 | + AVG(estimated_cost_usd) AS avg_cost_usd, |
| 139 | + SUM(input_tokens) AS total_input_tokens, |
| 140 | + SUM(output_tokens) AS total_output_tokens, |
| 141 | + AVG(quality_score) AS avg_quality_score, |
| 142 | + MAX(invoked_at) AS last_invoked_at |
| 143 | +FROM skill_invocations |
| 144 | +GROUP BY skill_name, model, provider, day; |
101 | 145 | """ |
102 | 146 |
|
103 | 147 | FTS_SQL = """ |
@@ -677,6 +721,160 @@ def _do(conn): |
677 | 721 | conn.execute(sql, params) |
678 | 722 | self._execute_write(_do) |
679 | 723 |
|
| 724 | + def record_skill_invocation( |
| 725 | + self, |
| 726 | + *, |
| 727 | + skill_name: str, |
| 728 | + invoked_at: float, |
| 729 | + session_id: Optional[str] = None, |
| 730 | + cron_id: Optional[str] = None, |
| 731 | + skill_version: Optional[str] = None, |
| 732 | + completed_at: Optional[float] = None, |
| 733 | + model: Optional[str] = None, |
| 734 | + provider: Optional[str] = None, |
| 735 | + duration_seconds: Optional[float] = None, |
| 736 | + input_tokens: int = 0, |
| 737 | + output_tokens: int = 0, |
| 738 | + cache_read_tokens: int = 0, |
| 739 | + cache_write_tokens: int = 0, |
| 740 | + estimated_cost_usd: Optional[float] = None, |
| 741 | + success: Optional[bool] = None, |
| 742 | + end_reason: Optional[str] = None, |
| 743 | + quality_score: Optional[float] = None, |
| 744 | + ) -> int: |
| 745 | + """Insert one ``skill_invocations`` row and return the new id. |
| 746 | +
|
| 747 | + One row per (skill_name, cron run) — see ``cron/scheduler.py`` for the |
| 748 | + cron-side caller. Slash-command and ad-hoc skill_view invocations are |
| 749 | + not tracked here in v1. |
| 750 | +
|
| 751 | + ``success`` is stored as 0/1 to match the SQLite REAL-vs-INTEGER |
| 752 | + convention used elsewhere in the schema; ``None`` is left as NULL. |
| 753 | + """ |
| 754 | + sql = """INSERT INTO skill_invocations ( |
| 755 | + session_id, cron_id, skill_name, skill_version, |
| 756 | + invoked_at, completed_at, model, provider, |
| 757 | + duration_seconds, |
| 758 | + input_tokens, output_tokens, cache_read_tokens, cache_write_tokens, |
| 759 | + estimated_cost_usd, success, end_reason, quality_score |
| 760 | + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""" |
| 761 | + success_int: Optional[int] |
| 762 | + if success is None: |
| 763 | + success_int = None |
| 764 | + else: |
| 765 | + success_int = 1 if success else 0 |
| 766 | + params = ( |
| 767 | + session_id, cron_id, skill_name, skill_version, |
| 768 | + invoked_at, completed_at, model, provider, |
| 769 | + duration_seconds, |
| 770 | + input_tokens, output_tokens, cache_read_tokens, cache_write_tokens, |
| 771 | + estimated_cost_usd, success_int, end_reason, quality_score, |
| 772 | + ) |
| 773 | + |
| 774 | + new_id_holder: Dict[str, int] = {} |
| 775 | + |
| 776 | + def _do(conn): |
| 777 | + cur = conn.execute(sql, params) |
| 778 | + new_id_holder["id"] = int(cur.lastrowid or 0) |
| 779 | + |
| 780 | + self._execute_write(_do) |
| 781 | + return new_id_holder.get("id", 0) |
| 782 | + |
| 783 | + def query_skill_ema( |
| 784 | + self, |
| 785 | + window_days: int = 14, |
| 786 | + alpha: float = 0.3, |
| 787 | + ) -> List[Dict[str, Any]]: |
| 788 | + """Per-(skill_name, model) exponentially-weighted moving averages. |
| 789 | +
|
| 790 | + Reads ``skill_stats_daily`` for the last *window_days* and applies |
| 791 | + exponential weighting where the most recent day has weight ``alpha``, |
| 792 | + the next has ``alpha * (1-alpha)``, and so on. The default |
| 793 | + ``alpha=0.3`` gives roughly a 5-day half-life — recent behaviour |
| 794 | + dominates without dropping older data on a hard window. |
| 795 | +
|
| 796 | + Returns a list of dicts (one per (skill_name, model) bucket) with |
| 797 | + keys:: |
| 798 | +
|
| 799 | + skill_name, model, provider, |
| 800 | + sample_count, success_count, failure_count, |
| 801 | + ema_success_rate, ema_duration_s, ema_cost_per_call, |
| 802 | + days_with_data, last_invoked_at |
| 803 | +
|
| 804 | + Sorted by ``last_invoked_at`` descending so the dashboard surfaces |
| 805 | + currently-active skills first. Buckets with zero rows in the window |
| 806 | + are silently omitted. |
| 807 | + """ |
| 808 | + if window_days <= 0 or alpha <= 0 or alpha >= 1: |
| 809 | + return [] |
| 810 | + cutoff_ts = time.time() - window_days * 86400.0 |
| 811 | + sql = ( |
| 812 | + "SELECT skill_name, model, provider, day, " |
| 813 | + "invocation_count, success_count, failure_count, " |
| 814 | + "avg_duration_s, avg_cost_usd, avg_quality_score, last_invoked_at " |
| 815 | + "FROM skill_stats_daily " |
| 816 | + "WHERE last_invoked_at >= ? " |
| 817 | + "ORDER BY skill_name, model, day" |
| 818 | + ) |
| 819 | + with self._lock: |
| 820 | + cur = self._conn.execute(sql, (cutoff_ts,)) |
| 821 | + rows = [dict(r) for r in cur.fetchall()] |
| 822 | + |
| 823 | + groups: Dict[Tuple[str, Optional[str]], List[Dict[str, Any]]] = {} |
| 824 | + for r in rows: |
| 825 | + key = (r["skill_name"], r["model"]) |
| 826 | + groups.setdefault(key, []).append(r) |
| 827 | + |
| 828 | + out: List[Dict[str, Any]] = [] |
| 829 | + for (skill_name, model), rs in groups.items(): |
| 830 | + rs.sort(key=lambda r: r["day"]) |
| 831 | + n = len(rs) |
| 832 | + raw_weights = [alpha * (1 - alpha) ** (n - 1 - i) for i in range(n)] |
| 833 | + wsum = sum(raw_weights) |
| 834 | + weights = ( |
| 835 | + [w / wsum for w in raw_weights] |
| 836 | + if wsum > 0 |
| 837 | + else [1.0 / n] * n |
| 838 | + ) |
| 839 | + |
| 840 | + sample_count = sum(int(r["invocation_count"] or 0) for r in rs) |
| 841 | + success_count = sum(int(r["success_count"] or 0) for r in rs) |
| 842 | + failure_count = sum(int(r["failure_count"] or 0) for r in rs) |
| 843 | + |
| 844 | + ema_success_rate = sum( |
| 845 | + w * ( |
| 846 | + (int(r["success_count"] or 0) / max(1, int(r["invocation_count"] or 0))) |
| 847 | + ) |
| 848 | + for w, r in zip(weights, rs) |
| 849 | + ) |
| 850 | + ema_duration = sum( |
| 851 | + w * float(r["avg_duration_s"] or 0.0) |
| 852 | + for w, r in zip(weights, rs) |
| 853 | + ) |
| 854 | + ema_cost = sum( |
| 855 | + w * float(r["avg_cost_usd"] or 0.0) |
| 856 | + for w, r in zip(weights, rs) |
| 857 | + ) |
| 858 | + last_invoked = max(float(r["last_invoked_at"] or 0.0) for r in rs) |
| 859 | + provider_latest = rs[-1].get("provider") |
| 860 | + |
| 861 | + out.append({ |
| 862 | + "skill_name": skill_name, |
| 863 | + "model": model, |
| 864 | + "provider": provider_latest, |
| 865 | + "sample_count": sample_count, |
| 866 | + "success_count": success_count, |
| 867 | + "failure_count": failure_count, |
| 868 | + "ema_success_rate": ema_success_rate, |
| 869 | + "ema_duration_s": ema_duration, |
| 870 | + "ema_cost_per_call": ema_cost, |
| 871 | + "days_with_data": n, |
| 872 | + "last_invoked_at": last_invoked, |
| 873 | + }) |
| 874 | + |
| 875 | + out.sort(key=lambda x: x["last_invoked_at"], reverse=True) |
| 876 | + return out |
| 877 | + |
680 | 878 | def ensure_session( |
681 | 879 | self, |
682 | 880 | session_id: str, |
|
0 commit comments