Skip to content

Commit 8ae9cb3

Browse files
committed
Merge remote-tracking branch 'origin/main' into feat/math-validation-agent
# Conflicts: # engine/config/.env.example # engine/src/stirling/api/app.py
2 parents 8aad9a1 + 2bf5f0b commit 8ae9cb3

15 files changed

Lines changed: 397 additions & 112 deletions

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,3 +255,6 @@ docs/type3/signatures/
255255

256256
# Type3 sample PDFs (development only)
257257
**/type3/samples/
258+
259+
# Claude
260+
.claude/

engine/config/.env.example

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,11 @@ STIRLING_FAST_MODEL=anthropic:claude-haiku-4-5
88
STIRLING_SMART_MODEL_MAX_TOKENS=8192
99
STIRLING_FAST_MODEL_MAX_TOKENS=2048
1010

11+
# PostHog analytics. Set STIRLING_POSTHOG_ENABLED=true and provide an API key to enable.
12+
STIRLING_POSTHOG_ENABLED=false
13+
STIRLING_POSTHOG_API_KEY=phc_VOdeYnlevc2T63m3myFGjeBlRcIusRgmhfx6XL5a1iz
14+
STIRLING_POSTHOG_HOST=https://eu.i.posthog.com
15+
1116
# Log level for the stirling logger hierarchy (DEBUG, INFO, WARNING, ERROR)
1217
STIRLING_LOG_LEVEL=INFO
1318

engine/pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ dependencies = [
1010
"pydantic-settings>=2.0.0",
1111
"python-dotenv>=1.2.1",
1212
"uvicorn>=0.35.0",
13+
"opentelemetry-sdk>=1.39.0",
14+
"posthog>=3.0.0",
1315
]
1416

1517
[dependency-groups]

engine/src/stirling/api/app.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,12 @@
44
from typing import Annotated
55

66
from fastapi import Depends, FastAPI
7+
from pydantic_ai import Agent
8+
from pydantic_ai.models.instrumented import InstrumentationSettings
79

810
from stirling.agents import ExecutionPlanningAgent, OrchestratorAgent, PdfEditAgent, PdfQuestionAgent, UserSpecAgent
911
from stirling.agents.ledger import MathAuditorAgent
12+
from stirling.api.middleware import UserIdMiddleware
1013
from stirling.api.routes import (
1114
agent_draft_router,
1215
execution_router,
@@ -17,7 +20,7 @@
1720
)
1821
from stirling.config import AppSettings, load_settings
1922
from stirling.contracts import HealthResponse
20-
from stirling.services import build_runtime
23+
from stirling.services import build_runtime, setup_posthog_tracking
2124

2225

2326
def _load_startup_settings(fast_api: FastAPI) -> AppSettings:
@@ -40,10 +43,16 @@ async def lifespan(fast_api: FastAPI):
4043
fast_api.state.user_spec_agent = UserSpecAgent(runtime)
4144
fast_api.state.execution_planning_agent = ExecutionPlanningAgent(runtime)
4245
fast_api.state.math_auditor_agent = MathAuditorAgent(runtime)
46+
tracer_provider = setup_posthog_tracking(settings)
47+
if tracer_provider:
48+
Agent.instrument_all(InstrumentationSettings(tracer_provider=tracer_provider))
4349
yield
50+
if tracer_provider:
51+
tracer_provider.shutdown()
4452

4553

4654
app = FastAPI(title="Stirling AI Engine", lifespan=lifespan, version="0.1.0")
55+
app.add_middleware(UserIdMiddleware)
4756
app.include_router(orchestrator_router)
4857
app.include_router(pdf_edit_router)
4958
app.include_router(pdf_question_router)
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
from __future__ import annotations
2+
3+
from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint
4+
from starlette.requests import Request
5+
from starlette.responses import Response
6+
7+
from stirling.services.tracking import current_user_id
8+
9+
_USER_ID_HEADER = "X-User-Id"
10+
11+
12+
class UserIdMiddleware(BaseHTTPMiddleware):
13+
"""Extract X-User-Id header and set it as the current user for PostHog tracking."""
14+
15+
async def dispatch(self, request: Request, call_next: RequestResponseEndpoint) -> Response:
16+
user_id = request.headers.get(_USER_ID_HEADER)
17+
if user_id:
18+
token = current_user_id.set(user_id)
19+
try:
20+
return await call_next(request)
21+
finally:
22+
current_user_id.reset(token)
23+
return await call_next(request)

engine/src/stirling/config/settings.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,10 @@ def _configure_logging(level_name: str, log_file: str) -> None:
5050
fh.setLevel(level)
5151
root.addHandler(fh)
5252

53+
posthog_enabled: bool = Field(validation_alias="STIRLING_POSTHOG_ENABLED")
54+
posthog_api_key: str = Field(validation_alias="STIRLING_POSTHOG_API_KEY")
55+
posthog_host: str = Field(validation_alias="STIRLING_POSTHOG_HOST")
56+
5357

5458
@lru_cache(maxsize=1)
5559
def load_settings() -> AppSettings:
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
"""Shared services used by the Stirling AI runtime."""
22

33
from .runtime import AppRuntime, build_model_settings, build_runtime
4+
from .tracking import setup_posthog_tracking
45

56
__all__ = [
67
"AppRuntime",
78
"build_model_settings",
89
"build_runtime",
10+
"setup_posthog_tracking",
911
]
Lines changed: 233 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,233 @@
1+
from __future__ import annotations
2+
3+
import json
4+
from collections import OrderedDict
5+
from collections.abc import Mapping
6+
from contextvars import ContextVar
7+
from typing import Any
8+
9+
from opentelemetry.context import Context
10+
from opentelemetry.sdk.trace import ReadableSpan, SpanProcessor, TracerProvider
11+
from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import ( # No public import for these constants yet
12+
GEN_AI_INPUT_MESSAGES,
13+
GEN_AI_OPERATION_NAME,
14+
GEN_AI_OUTPUT_MESSAGES,
15+
GEN_AI_REQUEST_MAX_TOKENS,
16+
GEN_AI_REQUEST_MODEL,
17+
GEN_AI_REQUEST_TEMPERATURE,
18+
GEN_AI_RESPONSE_MODEL,
19+
GEN_AI_SYSTEM,
20+
GEN_AI_TOOL_DEFINITIONS,
21+
GEN_AI_USAGE_INPUT_TOKENS,
22+
GEN_AI_USAGE_OUTPUT_TOKENS,
23+
GenAiOperationNameValues,
24+
)
25+
from opentelemetry.semconv.attributes.server_attributes import SERVER_ADDRESS, SERVER_PORT
26+
from opentelemetry.trace import Span
27+
from posthog.client import Client as PostHogClient
28+
29+
from stirling.config import AppSettings
30+
31+
# Per-request user ID, set by middleware from the X-User-Id header.
32+
# When not set, PostHog generates a random ID and marks the event as personless.
33+
current_user_id: ContextVar[str | None] = ContextVar("current_user_id", default=None)
34+
35+
36+
class LRUSet:
37+
"""Least Recently Used Set: a set with a maximum size that evicts the oldest entries first."""
38+
39+
def __init__(self, max_size: int) -> None:
40+
self._max_size = max_size
41+
self._data: OrderedDict[str, None] = OrderedDict()
42+
43+
def __contains__(self, key: str) -> bool:
44+
return key in self._data
45+
46+
def add(self, key: str) -> None:
47+
self._data[key] = None
48+
if len(self._data) > self._max_size:
49+
self._data.popitem(last=False)
50+
51+
52+
def _parse_json_attr(attrs: Mapping[str, Any], key: str) -> Any | None:
53+
"""Parse a JSON string span attribute, returning None on failure."""
54+
raw = attrs.get(key)
55+
if raw is None:
56+
return None
57+
try:
58+
return json.loads(str(raw))
59+
except (json.JSONDecodeError, TypeError):
60+
return None
61+
62+
63+
def _transform_output_choices(choices: list[Any]) -> list[Any]:
64+
"""Transform Pydantic AI's parts-based output format to PostHog-compatible format.
65+
66+
Pydantic AI emits: ``[{"role": "assistant", "parts": [{"type": "tool_call", "name": "..."}]}]``
67+
PostHog expects: ``[{"role": "assistant", "tool_calls": [{"type": "function", "function": {"name": "..."}}]}]``
68+
"""
69+
for choice in choices:
70+
if not isinstance(choice, dict) or "parts" not in choice:
71+
continue
72+
tool_calls = []
73+
for part in choice.get("parts", []):
74+
if isinstance(part, dict) and part.get("type") == "tool_call":
75+
tool_calls.append(
76+
{
77+
"type": "function",
78+
"id": part.get("id", ""),
79+
"function": {"name": part.get("name", "")},
80+
}
81+
)
82+
if tool_calls:
83+
choice["tool_calls"] = tool_calls
84+
choice["content"] = choice.pop("parts")
85+
return choices
86+
87+
88+
def _extract_user_message(attrs: Mapping[str, Any]) -> str:
89+
"""Extract the last user message text from the input messages span attribute."""
90+
messages = _parse_json_attr(attrs, GEN_AI_INPUT_MESSAGES)
91+
if not isinstance(messages, list):
92+
return ""
93+
for msg in reversed(messages):
94+
if not isinstance(msg, dict):
95+
continue
96+
if msg.get("role") == "user":
97+
for part in msg.get("parts", []):
98+
if isinstance(part, dict) and part.get("type") == "text":
99+
return str(part.get("content", ""))
100+
return ""
101+
102+
103+
# TODO: Replace with an official PostHog integration if one ever exists
104+
class PostHogSpanProcessor(SpanProcessor):
105+
"""Translates Pydantic AI OpenTelemetry spans into PostHog $ai_generation events."""
106+
107+
def __init__(self, client: PostHogClient) -> None:
108+
self._client = client
109+
self._seen_traces = LRUSet(max_size=10_000)
110+
111+
def on_start(self, span: Span, parent_context: Context | None = None) -> None:
112+
pass
113+
114+
def on_end(self, span: ReadableSpan) -> None:
115+
attrs = dict(span.attributes or {})
116+
if attrs.get(GEN_AI_OPERATION_NAME) != GenAiOperationNameValues.CHAT.value:
117+
return
118+
119+
properties = self._build_generation_properties(span, attrs)
120+
self._maybe_emit_trace_event(span, attrs, properties)
121+
self._client.capture(
122+
distinct_id=current_user_id.get(),
123+
event="$ai_generation",
124+
properties=properties,
125+
)
126+
127+
def _build_generation_properties(self, span: ReadableSpan, attrs: Mapping[str, Any]) -> dict[str, object]:
128+
"""Build the $ai_generation event properties from span data."""
129+
properties: dict[str, object] = {
130+
"$ai_provider": attrs.get(GEN_AI_SYSTEM, ""),
131+
"$ai_model": attrs.get(GEN_AI_RESPONSE_MODEL) or attrs.get(GEN_AI_REQUEST_MODEL, ""),
132+
"$ai_input_tokens": attrs.get(GEN_AI_USAGE_INPUT_TOKENS, 0),
133+
"$ai_output_tokens": attrs.get(GEN_AI_USAGE_OUTPUT_TOKENS, 0),
134+
}
135+
136+
if span.context:
137+
properties["$ai_trace_id"] = format(span.context.trace_id, "032x")
138+
properties["$ai_span_id"] = format(span.context.span_id, "016x")
139+
if span.parent and span.parent.span_id:
140+
properties["$ai_parent_id"] = format(span.parent.span_id, "016x")
141+
if span.start_time and span.end_time:
142+
properties["$ai_latency"] = (span.end_time - span.start_time) / 1e9
143+
144+
self._add_message_properties(properties, attrs)
145+
self._add_model_parameters(properties, attrs)
146+
self._add_tool_definitions(properties, attrs)
147+
self._add_base_url(properties, attrs)
148+
149+
return properties
150+
151+
def _maybe_emit_trace_event(
152+
self, span: ReadableSpan, attrs: Mapping[str, Any], properties: dict[str, object]
153+
) -> None:
154+
"""Emit an $ai_trace event for the first span seen per trace ID."""
155+
trace_id = str(properties.get("$ai_trace_id", ""))
156+
if not trace_id or trace_id in self._seen_traces:
157+
return
158+
159+
self._seen_traces.add(trace_id)
160+
trace_properties: dict[str, object] = {
161+
"$ai_trace_id": trace_id,
162+
"$ai_trace_name": _extract_user_message(attrs),
163+
"$ai_provider": attrs.get(GEN_AI_SYSTEM, ""),
164+
}
165+
if span.start_time and span.end_time:
166+
trace_properties["$ai_latency"] = (span.end_time - span.start_time) / 1e9
167+
self._client.capture(
168+
distinct_id=current_user_id.get(),
169+
event="$ai_trace",
170+
properties=trace_properties,
171+
)
172+
173+
@staticmethod
174+
def _add_message_properties(properties: dict[str, object], attrs: Mapping[str, Any]) -> None:
175+
input_messages = _parse_json_attr(attrs, GEN_AI_INPUT_MESSAGES)
176+
if input_messages is not None:
177+
properties["$ai_input"] = input_messages
178+
179+
output_messages = _parse_json_attr(attrs, GEN_AI_OUTPUT_MESSAGES)
180+
if isinstance(output_messages, list):
181+
properties["$ai_output_choices"] = _transform_output_choices(output_messages)
182+
elif output_messages is not None:
183+
properties["$ai_output_choices"] = output_messages
184+
185+
@staticmethod
186+
def _add_model_parameters(properties: dict[str, object], attrs: Mapping[str, Any]) -> None:
187+
model_parameters: dict[str, object] = {}
188+
if GEN_AI_REQUEST_TEMPERATURE in attrs:
189+
model_parameters["temperature"] = attrs[GEN_AI_REQUEST_TEMPERATURE]
190+
if GEN_AI_REQUEST_MAX_TOKENS in attrs:
191+
model_parameters["max_tokens"] = attrs[GEN_AI_REQUEST_MAX_TOKENS]
192+
if model_parameters:
193+
properties["$ai_model_parameters"] = model_parameters
194+
195+
@staticmethod
196+
def _add_tool_definitions(properties: dict[str, object], attrs: Mapping[str, Any]) -> None:
197+
tools = _parse_json_attr(attrs, GEN_AI_TOOL_DEFINITIONS)
198+
if tools is not None:
199+
properties["$ai_tools"] = tools
200+
201+
@staticmethod
202+
def _add_base_url(properties: dict[str, object], attrs: Mapping[str, Any]) -> None:
203+
parts: list[str] = []
204+
if host := attrs.get(SERVER_ADDRESS):
205+
parts.append(str(host))
206+
if port := attrs.get(SERVER_PORT):
207+
parts.append(str(port))
208+
if parts:
209+
properties["$ai_base_url"] = ":".join(parts)
210+
211+
def shutdown(self) -> None:
212+
self._client.shutdown()
213+
214+
def force_flush(self, timeout_millis: int = 30000) -> bool:
215+
self._client.flush()
216+
return True
217+
218+
219+
def setup_posthog_tracking(settings: AppSettings) -> TracerProvider | None:
220+
"""Configure OpenTelemetry with a PostHog span processor for LLM analytics.
221+
222+
Returns the TracerProvider so it can be shut down on app exit,
223+
or None when tracking is disabled.
224+
"""
225+
if not settings.posthog_enabled or not settings.posthog_api_key:
226+
return None
227+
228+
client = PostHogClient(project_api_key=settings.posthog_api_key, host=settings.posthog_host)
229+
processor = PostHogSpanProcessor(client)
230+
231+
provider = TracerProvider()
232+
provider.add_span_processor(processor)
233+
return provider

engine/tests/conftest.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
from __future__ import annotations
2+
3+
from collections.abc import Iterator
4+
5+
import pytest
6+
7+
from stirling.config import AppSettings, load_settings
8+
from stirling.services import build_runtime
9+
from stirling.services.runtime import AppRuntime
10+
11+
12+
@pytest.fixture(autouse=True)
13+
def clear_settings_cache() -> Iterator[None]:
14+
load_settings.cache_clear()
15+
yield
16+
load_settings.cache_clear()
17+
18+
19+
def build_app_settings() -> AppSettings:
20+
return AppSettings(
21+
smart_model_name="test",
22+
fast_model_name="test",
23+
smart_model_max_tokens=8192,
24+
fast_model_max_tokens=2048,
25+
posthog_enabled=False,
26+
posthog_api_key="",
27+
posthog_host="https://eu.i.posthog.com",
28+
)
29+
30+
31+
@pytest.fixture
32+
def app_settings() -> AppSettings:
33+
return build_app_settings()
34+
35+
36+
@pytest.fixture
37+
def runtime(app_settings: AppSettings) -> AppRuntime:
38+
return build_runtime(app_settings)

0 commit comments

Comments
 (0)