From cf52acda722607f5b2979b26abd3d26f2fb33279 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 10 Aug 2022 18:13:16 -0500 Subject: [PATCH 1/4] Instrument FederationStateIdsServlet Conflicts: docker/conf/homeserver.yaml synapse/federation/federation_server.py synapse/handlers/federation.py --- synapse/federation/federation_server.py | 11 ++++++++++- synapse/handlers/federation.py | 4 +++- synapse/rest/client/room.py | 2 ++ synapse/storage/databases/main/event_federation.py | 3 +++ synapse/util/ratelimitutils.py | 5 +++++ 5 files changed, 23 insertions(+), 2 deletions(-) diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index db4b83a505cd..c078062b0377 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -61,7 +61,12 @@ nested_logging_context, run_in_background, ) -from synapse.logging.opentracing import log_kv, start_active_span_from_edu, trace +from synapse.logging.opentracing import ( + log_kv, + start_active_span_from_edu, + trace, + tag_args, +) from synapse.metrics.background_process_metrics import wrap_as_background_process from synapse.replication.http.federation import ( ReplicationFederationSendEduRestServlet, @@ -547,6 +552,8 @@ async def on_room_state_request( return 200, resp + @trace + @tag_args async def on_state_ids_request( self, origin: str, room_id: str, event_id: str ) -> Tuple[int, JsonDict]: @@ -569,6 +576,8 @@ async def on_state_ids_request( return 200, resp + @trace + @tag_args async def _on_state_ids_request_compute( self, room_id: str, event_id: str ) -> JsonDict: diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 5042236742da..13b9018abff7 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -59,7 +59,7 @@ from synapse.federation.federation_client import InvalidResponseError from synapse.http.servlet import assert_params_in_dict from synapse.logging.context import nested_logging_context -from synapse.logging.opentracing import trace +from synapse.logging.opentracing import trace, tag_args from synapse.metrics.background_process_metrics import run_as_background_process from synapse.module_api import NOT_SPAM from synapse.replication.http.federation import ( @@ -1081,6 +1081,8 @@ async def on_make_knock_request( return event + @trace + @tag_args async def get_state_ids_for_pdu(self, room_id: str, event_id: str) -> List[str]: """Returns the state at the event. i.e. not including said event.""" event = await self.store.get_event(event_id, check_room_id=room_id) diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py index 2f513164cb84..367da640d60c 100644 --- a/synapse/rest/client/room.py +++ b/synapse/rest/client/room.py @@ -563,6 +563,7 @@ def __init__(self, hs: "HomeServer"): async def on_GET( self, request: SynapseRequest, room_id: str ) -> Tuple[int, JsonDict]: + logger.info("RoomMessageListRestServlet afwefaewfewfew") requester = await self.auth.get_user_by_req(request, allow_guest=True) pagination_config = await PaginationConfig.from_request( self.store, request, default_limit=10 @@ -592,6 +593,7 @@ async def on_GET( as_client_event=as_client_event, event_filter=event_filter, ) + logger.info("RoomMessageListRestServlet afwefaewfewfew got msgs") return 200, msgs diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index eec55b647857..531a1be0856a 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -33,6 +33,7 @@ from synapse.api.errors import StoreError from synapse.api.room_versions import EventFormatVersions, RoomVersion from synapse.events import EventBase, make_event_from_dict +from synapse.logging.opentracing import trace, tag_args from synapse.metrics.background_process_metrics import wrap_as_background_process from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause from synapse.storage.database import ( @@ -126,6 +127,8 @@ async def get_auth_chain( ) return await self.get_events_as_list(event_ids) + @trace + @tag_args async def get_auth_chain_ids( self, room_id: str, diff --git a/synapse/util/ratelimitutils.py b/synapse/util/ratelimitutils.py index 6394cc39ac02..95647f106bab 100644 --- a/synapse/util/ratelimitutils.py +++ b/synapse/util/ratelimitutils.py @@ -21,6 +21,7 @@ from twisted.internet import defer from synapse.api.errors import LimitExceededError +from synapse.logging.opentracing import start_active_span from synapse.config.ratelimiting import FederationRatelimitSettings from synapse.logging.context import ( PreserveLoggingContext, @@ -110,6 +111,9 @@ def ratelimit(self) -> "Iterator[defer.Deferred[None]]": def _on_enter(self, request_id: object) -> "defer.Deferred[None]": time_now = self.clock.time_msec() + wait_span_scope = start_active_span("ratelimit wait") + wait_span_scope.__enter__() + # remove any entries from request_times which aren't within the window self.request_times[:] = [ r for r in self.request_times if time_now - r < self.window_size @@ -162,6 +166,7 @@ def on_wait_finished(_: Any) -> "defer.Deferred[None]": def on_start(r: object) -> object: logger.debug("Ratelimit [%s]: Processing req", id(request_id)) + wait_span_scope.__exit__(None, None, None) self.current_processing.add(request_id) return r From bf5d21bdd141626dd4c5c91e649bbcbd7597063a Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 10 Aug 2022 18:29:14 -0500 Subject: [PATCH 2/4] Fix lints and changelog --- changelog.d/13499.misc | 1 + synapse/federation/federation_server.py | 2 +- synapse/handlers/federation.py | 2 +- synapse/storage/databases/main/event_federation.py | 2 +- synapse/util/ratelimitutils.py | 2 +- 5 files changed, 5 insertions(+), 4 deletions(-) create mode 100644 changelog.d/13499.misc diff --git a/changelog.d/13499.misc b/changelog.d/13499.misc new file mode 100644 index 000000000000..99dbcebec8a1 --- /dev/null +++ b/changelog.d/13499.misc @@ -0,0 +1 @@ +Instrument `FederationStateIdsServlet` (`/state_ids`) for understandable traces in Jaeger. diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index c078062b0377..75fbc6073df9 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -64,8 +64,8 @@ from synapse.logging.opentracing import ( log_kv, start_active_span_from_edu, - trace, tag_args, + trace, ) from synapse.metrics.background_process_metrics import wrap_as_background_process from synapse.replication.http.federation import ( diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 13b9018abff7..6f5ab86ac4b3 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -59,7 +59,7 @@ from synapse.federation.federation_client import InvalidResponseError from synapse.http.servlet import assert_params_in_dict from synapse.logging.context import nested_logging_context -from synapse.logging.opentracing import trace, tag_args +from synapse.logging.opentracing import tag_args, trace from synapse.metrics.background_process_metrics import run_as_background_process from synapse.module_api import NOT_SPAM from synapse.replication.http.federation import ( diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 531a1be0856a..0bc8401f2be9 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -33,7 +33,7 @@ from synapse.api.errors import StoreError from synapse.api.room_versions import EventFormatVersions, RoomVersion from synapse.events import EventBase, make_event_from_dict -from synapse.logging.opentracing import trace, tag_args +from synapse.logging.opentracing import tag_args, trace from synapse.metrics.background_process_metrics import wrap_as_background_process from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause from synapse.storage.database import ( diff --git a/synapse/util/ratelimitutils.py b/synapse/util/ratelimitutils.py index 95647f106bab..28c401fecc84 100644 --- a/synapse/util/ratelimitutils.py +++ b/synapse/util/ratelimitutils.py @@ -21,13 +21,13 @@ from twisted.internet import defer from synapse.api.errors import LimitExceededError -from synapse.logging.opentracing import start_active_span from synapse.config.ratelimiting import FederationRatelimitSettings from synapse.logging.context import ( PreserveLoggingContext, make_deferred_yieldable, run_in_background, ) +from synapse.logging.opentracing import start_active_span from synapse.util import Clock if typing.TYPE_CHECKING: From f4e49e307a71fbc706b3d38c6a4a4308cef70d24 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 10 Aug 2022 18:31:08 -0500 Subject: [PATCH 3/4] Remove debug logs --- synapse/rest/client/room.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py index 367da640d60c..2f513164cb84 100644 --- a/synapse/rest/client/room.py +++ b/synapse/rest/client/room.py @@ -563,7 +563,6 @@ def __init__(self, hs: "HomeServer"): async def on_GET( self, request: SynapseRequest, room_id: str ) -> Tuple[int, JsonDict]: - logger.info("RoomMessageListRestServlet afwefaewfewfew") requester = await self.auth.get_user_by_req(request, allow_guest=True) pagination_config = await PaginationConfig.from_request( self.store, request, default_limit=10 @@ -593,7 +592,6 @@ async def on_GET( as_client_event=as_client_event, event_filter=event_filter, ) - logger.info("RoomMessageListRestServlet afwefaewfewfew got msgs") return 200, msgs From effc2b8eb13ab4ae5e359cfbdb188379da083f82 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 11 Aug 2022 16:05:59 -0500 Subject: [PATCH 4/4] Try to better always end the span See https://github.com/matrix-org/synapse/pull/13499#discussion_r943141667 --- synapse/util/ratelimitutils.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/synapse/util/ratelimitutils.py b/synapse/util/ratelimitutils.py index 28c401fecc84..e1beaec5a3fe 100644 --- a/synapse/util/ratelimitutils.py +++ b/synapse/util/ratelimitutils.py @@ -111,9 +111,6 @@ def ratelimit(self) -> "Iterator[defer.Deferred[None]]": def _on_enter(self, request_id: object) -> "defer.Deferred[None]": time_now = self.clock.time_msec() - wait_span_scope = start_active_span("ratelimit wait") - wait_span_scope.__enter__() - # remove any entries from request_times which aren't within the window self.request_times[:] = [ r for r in self.request_times if time_now - r < self.window_size @@ -166,7 +163,6 @@ def on_wait_finished(_: Any) -> "defer.Deferred[None]": def on_start(r: object) -> object: logger.debug("Ratelimit [%s]: Processing req", id(request_id)) - wait_span_scope.__exit__(None, None, None) self.current_processing.add(request_id) return r @@ -181,8 +177,11 @@ def on_both(r: object) -> object: # Ensure that we've properly cleaned up. self.sleeping_requests.discard(request_id) self.ready_request_queue.pop(request_id, None) + wait_span_scope.__exit__(None, None, None) return r + wait_span_scope = start_active_span("ratelimit wait") + wait_span_scope.__enter__() ret_defer.addCallbacks(on_start, on_err) ret_defer.addBoth(on_both) return make_deferred_yieldable(ret_defer)