Skip to content

Commit 4bfd4ae

Browse files
committed
Make Orchestrator metrics singleton
So that even if applications instantiate multiple Orchestrators, "megaservice_*" metrics collect data from all of them. Another option would be to add arguments for passing Orchestrator instance names as metric prefixes, to name and differentiate metrics for each Orchestrator instance. However, that would have needed changes in 3 OPEA projects instead of just this one, and dashboards would then need to hard-code those per-application prefixes. Signed-off-by: Eero Tamminen <[email protected]>
1 parent a43d1de commit 4bfd4ae

File tree

1 file changed

+21
-17
lines changed

1 file changed

+21
-17
lines changed

comps/cores/mega/orchestrator.py

Lines changed: 21 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -29,19 +29,7 @@
2929

3030

3131
class OrchestratorMetrics:
32-
# Need an static class-level ID for metric prefix because:
33-
# - Prometheus requires metrics (their names) to be unique
34-
_instance_id = 0
35-
3632
def __init__(self) -> None:
37-
OrchestratorMetrics._instance_id += 1
38-
if OrchestratorMetrics._instance_id > 1:
39-
self._prefix = f"megaservice{self._instance_id}"
40-
else:
41-
self._prefix = "megaservice"
42-
43-
self.request_pending = Gauge(f"{self._prefix}_request_pending", "Count of currently pending requests (gauge)")
44-
4533
# locking for latency metric creation / method change
4634
self._lock = threading.Lock()
4735

@@ -50,20 +38,22 @@ def __init__(self) -> None:
5038
self.first_token_latency = None
5139
self.inter_token_latency = None
5240
self.request_latency = None
41+
self.request_pending = None
5342

5443
# initial methods to create the metrics
5544
self.token_update = self._token_update_create
5645
self.request_update = self._request_update_create
46+
self.pending_update = self._pending_update_create
5747

5848
def _token_update_create(self, token_start: float, is_first: bool) -> float:
5949
with self._lock:
6050
# in case another thread already got here
6151
if self.token_update == self._token_update_create:
6252
self.first_token_latency = Histogram(
63-
f"{self._prefix}_first_token_latency", "First token latency (histogram)"
53+
"megaservice_first_token_latency", "First token latency (histogram)"
6454
)
6555
self.inter_token_latency = Histogram(
66-
f"{self._prefix}_inter_token_latency", "Inter-token latency (histogram)"
56+
"megaservice_inter_token_latency", "Inter-token latency (histogram)"
6757
)
6858
self.token_update = self._token_update_real
6959
return self.token_update(token_start, is_first)
@@ -73,11 +63,21 @@ def _request_update_create(self, req_start: float) -> None:
7363
# in case another thread already got here
7464
if self.request_update == self._request_update_create:
7565
self.request_latency = Histogram(
76-
f"{self._prefix}_request_latency", "Whole LLM request/reply latency (histogram)"
66+
"megaservice_request_latency", "Whole LLM request/reply latency (histogram)"
7767
)
7868
self.request_update = self._request_update_real
7969
self.request_update(req_start)
8070

71+
def _pending_update_create(self, increase: bool) -> None:
72+
with self._lock:
73+
# in case another thread already got here
74+
if self.pending_update == self._pending_update_create:
75+
self.request_pending = Gauge(
76+
"megaservice_request_pending", "Count of currently pending requests (gauge)"
77+
)
78+
self.pending_update = self._pending_update_real
79+
self.pending_update(increase)
80+
8181
def _token_update_real(self, token_start: float, is_first: bool) -> float:
8282
now = time.time()
8383
if is_first:
@@ -89,18 +89,22 @@ def _token_update_real(self, token_start: float, is_first: bool) -> float:
8989
def _request_update_real(self, req_start: float) -> None:
9090
self.request_latency.observe(time.time() - req_start)
9191

92-
def pending_update(self, increase: bool) -> None:
92+
def _pending_update_real(self, increase: bool) -> None:
9393
if increase:
9494
self.request_pending.inc()
9595
else:
9696
self.request_pending.dec()
9797

9898

99+
# Prometheus metrics need to be singletons, not per Orchestrator
100+
_metrics = OrchestratorMetrics()
101+
102+
99103
class ServiceOrchestrator(DAG):
100104
"""Manage 1 or N micro services in a DAG through Python API."""
101105

102106
def __init__(self) -> None:
103-
self.metrics = OrchestratorMetrics()
107+
self.metrics = _metrics
104108
self.services = {} # all services, id -> service
105109
super().__init__()
106110

0 commit comments

Comments
 (0)