Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit 9eea464

Browse files
authored
Add OpenTracing for database activity. (#10113)
This adds quite a lot of OpenTracing decoration for database activity. Specifically it adds tracing at four different levels: * emit a span for each "interaction" - ie, the top level database function that we tend to call "transaction", but isn't really, because it can end up as multiple transactions. * emit a span while we hold a database connection open * emit a span for each database transaction - actual actual transaction. * emit a span for each database query. I'm aware this might be quite a lot of overhead, but even just running it on a local Synapse it looks really interesting, and I hope the overhead can be offset just by turning down the sampling frequency and finding other ways of tracing requests of interest (eg, the `force_tracing_for_users` setting).
1 parent 1d14307 commit 9eea464

File tree

3 files changed

+60
-33
lines changed

3 files changed

+60
-33
lines changed

changelog.d/10113.feature

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Report OpenTracing spans for database activity.

synapse/logging/opentracing.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,12 @@ class SynapseTags:
271271
# HTTP request tag (used to distinguish full vs incremental syncs, etc)
272272
REQUEST_TAG = "request_tag"
273273

274+
# Text description of a database transaction
275+
DB_TXN_DESC = "db.txn_desc"
276+
277+
# Uniqueish ID of a database transaction
278+
DB_TXN_ID = "db.txn_id"
279+
274280

275281
# Block everything by default
276282
# A regex which matches the server_names to expose traces for.

synapse/storage/database.py

Lines changed: 53 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040

4141
from synapse.api.errors import StoreError
4242
from synapse.config.database import DatabaseConnectionConfig
43+
from synapse.logging import opentracing
4344
from synapse.logging.context import (
4445
LoggingContext,
4546
current_context,
@@ -313,7 +314,14 @@ def _do_execute(self, func: Callable[..., R], sql: str, *args: Any) -> R:
313314
start = time.time()
314315

315316
try:
316-
return func(sql, *args)
317+
with opentracing.start_active_span(
318+
"db.query",
319+
tags={
320+
opentracing.tags.DATABASE_TYPE: "sql",
321+
opentracing.tags.DATABASE_STATEMENT: sql,
322+
},
323+
):
324+
return func(sql, *args)
317325
except Exception as e:
318326
sql_logger.debug("[SQL FAIL] {%s} %s", self.name, e)
319327
raise
@@ -525,9 +533,16 @@ def new_transaction(
525533
exception_callbacks=exception_callbacks,
526534
)
527535
try:
528-
r = func(cursor, *args, **kwargs)
529-
conn.commit()
530-
return r
536+
with opentracing.start_active_span(
537+
"db.txn",
538+
tags={
539+
opentracing.SynapseTags.DB_TXN_DESC: desc,
540+
opentracing.SynapseTags.DB_TXN_ID: name,
541+
},
542+
):
543+
r = func(cursor, *args, **kwargs)
544+
conn.commit()
545+
return r
531546
except self.engine.module.OperationalError as e:
532547
# This can happen if the database disappears mid
533548
# transaction.
@@ -653,16 +668,17 @@ async def runInteraction(
653668
logger.warning("Starting db txn '%s' from sentinel context", desc)
654669

655670
try:
656-
result = await self.runWithConnection(
657-
self.new_transaction,
658-
desc,
659-
after_callbacks,
660-
exception_callbacks,
661-
func,
662-
*args,
663-
db_autocommit=db_autocommit,
664-
**kwargs,
665-
)
671+
with opentracing.start_active_span(f"db.{desc}"):
672+
result = await self.runWithConnection(
673+
self.new_transaction,
674+
desc,
675+
after_callbacks,
676+
exception_callbacks,
677+
func,
678+
*args,
679+
db_autocommit=db_autocommit,
680+
**kwargs,
681+
)
666682

667683
for after_callback, after_args, after_kwargs in after_callbacks:
668684
after_callback(*after_args, **after_kwargs)
@@ -718,25 +734,29 @@ def inner_func(conn, *args, **kwargs):
718734
with LoggingContext(
719735
str(curr_context), parent_context=parent_context
720736
) as context:
721-
sched_duration_sec = monotonic_time() - start_time
722-
sql_scheduling_timer.observe(sched_duration_sec)
723-
context.add_database_scheduled(sched_duration_sec)
724-
725-
if self.engine.is_connection_closed(conn):
726-
logger.debug("Reconnecting closed database connection")
727-
conn.reconnect()
728-
729-
try:
730-
if db_autocommit:
731-
self.engine.attempt_to_set_autocommit(conn, True)
732-
733-
db_conn = LoggingDatabaseConnection(
734-
conn, self.engine, "runWithConnection"
735-
)
736-
return func(db_conn, *args, **kwargs)
737-
finally:
738-
if db_autocommit:
739-
self.engine.attempt_to_set_autocommit(conn, False)
737+
with opentracing.start_active_span(
738+
operation_name="db.connection",
739+
):
740+
sched_duration_sec = monotonic_time() - start_time
741+
sql_scheduling_timer.observe(sched_duration_sec)
742+
context.add_database_scheduled(sched_duration_sec)
743+
744+
if self.engine.is_connection_closed(conn):
745+
logger.debug("Reconnecting closed database connection")
746+
conn.reconnect()
747+
opentracing.log_kv({"message": "reconnected"})
748+
749+
try:
750+
if db_autocommit:
751+
self.engine.attempt_to_set_autocommit(conn, True)
752+
753+
db_conn = LoggingDatabaseConnection(
754+
conn, self.engine, "runWithConnection"
755+
)
756+
return func(db_conn, *args, **kwargs)
757+
finally:
758+
if db_autocommit:
759+
self.engine.attempt_to_set_autocommit(conn, False)
740760

741761
return await make_deferred_yieldable(
742762
self._db_pool.runWithConnection(inner_func, *args, **kwargs)

0 commit comments

Comments
 (0)