Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit 04d4c31

Browse files
committed
Merge pull request #5706 from matrix-org/erikj/add_memberships_to_current_state
2 parents 11d91ee + 5c07c97 commit 04d4c31

File tree

5 files changed

+100
-21
lines changed

5 files changed

+100
-21
lines changed

changelog.d/5706.misc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Reduce database IO usage by optimising queries for current membership.

synapse/storage/_base.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,21 @@
8686
class LoggingTransaction(object):
8787
"""An object that almost-transparently proxies for the 'txn' object
8888
passed to the constructor. Adds logging and metrics to the .execute()
89-
method."""
89+
method.
90+
91+
Args:
92+
txn: The database transcation object to wrap.
93+
name (str): The name of this transactions for logging.
94+
database_engine (Sqlite3Engine|PostgresEngine)
95+
after_callbacks(list|None): A list that callbacks will be appended to
96+
that have been added by `call_after` which should be run on
97+
successful completion of the transaction. None indicates that no
98+
callbacks should be allowed to be scheduled to run.
99+
exception_callbacks(list|None): A list that callbacks will be appended
100+
to that have been added by `call_on_exception` which should be run
101+
if transaction ends with an error. None indicates that no callbacks
102+
should be allowed to be scheduled to run.
103+
"""
90104

91105
__slots__ = [
92106
"txn",
@@ -97,7 +111,7 @@ class LoggingTransaction(object):
97111
]
98112

99113
def __init__(
100-
self, txn, name, database_engine, after_callbacks, exception_callbacks
114+
self, txn, name, database_engine, after_callbacks=None, exception_callbacks=None
101115
):
102116
object.__setattr__(self, "txn", txn)
103117
object.__setattr__(self, "name", name)

synapse/storage/event_push_actions.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,6 @@ def __init__(self, db_conn, hs):
7979
db_conn.cursor(),
8080
name="_find_stream_orderings_for_times_txn",
8181
database_engine=self.database_engine,
82-
after_callbacks=[],
83-
exception_callbacks=[],
8482
)
8583
self._find_stream_orderings_for_times_txn(cur)
8684
cur.close()

synapse/storage/roommember.py

Lines changed: 80 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424
from twisted.internet import defer
2525

2626
from synapse.api.constants import EventTypes, Membership
27+
from synapse.metrics.background_process_metrics import run_as_background_process
28+
from synapse.storage._base import LoggingTransaction
2729
from synapse.storage.events_worker import EventsWorkerStore
2830
from synapse.types import get_domain_from_id
2931
from synapse.util.async_helpers import Linearizer
@@ -57,6 +59,47 @@
5759

5860

5961
class RoomMemberWorkerStore(EventsWorkerStore):
62+
def __init__(self, db_conn, hs):
63+
super(RoomMemberWorkerStore, self).__init__(db_conn, hs)
64+
65+
# Is the current_state_events.membership up to date? Or is the
66+
# background update still running?
67+
self._current_state_events_membership_up_to_date = False
68+
69+
txn = LoggingTransaction(
70+
db_conn.cursor(),
71+
name="_check_safe_current_state_events_membership_updated",
72+
database_engine=self.database_engine,
73+
)
74+
self._check_safe_current_state_events_membership_updated_txn(txn)
75+
txn.close()
76+
77+
def _check_safe_current_state_events_membership_updated_txn(self, txn):
78+
"""Checks if it is safe to assume the new current_state_events
79+
membership column is up to date
80+
"""
81+
82+
pending_update = self._simple_select_one_txn(
83+
txn,
84+
table="background_updates",
85+
keyvalues={"update_name": _CURRENT_STATE_MEMBERSHIP_UPDATE_NAME},
86+
retcols=["update_name"],
87+
allow_none=True,
88+
)
89+
90+
self._current_state_events_membership_up_to_date = not pending_update
91+
92+
# If the update is still running, reschedule to run.
93+
if pending_update:
94+
self._clock.call_later(
95+
15.0,
96+
run_as_background_process,
97+
"_check_safe_current_state_events_membership_updated",
98+
self.runInteraction,
99+
"_check_safe_current_state_events_membership_updated",
100+
self._check_safe_current_state_events_membership_updated_txn,
101+
)
102+
60103
@cachedInlineCallbacks(max_entries=100000, iterable=True, cache_context=True)
61104
def get_hosts_in_room(self, room_id, cache_context):
62105
"""Returns the set of all hosts currently in the room
@@ -70,14 +113,23 @@ def get_hosts_in_room(self, room_id, cache_context):
70113
@cached(max_entries=100000, iterable=True)
71114
def get_users_in_room(self, room_id):
72115
def f(txn):
73-
sql = (
74-
"SELECT m.user_id FROM room_memberships as m"
75-
" INNER JOIN current_state_events as c"
76-
" ON m.event_id = c.event_id "
77-
" AND m.room_id = c.room_id "
78-
" AND m.user_id = c.state_key"
79-
" WHERE c.type = 'm.room.member' AND c.room_id = ? AND m.membership = ?"
80-
)
116+
# If we can assume current_state_events.membership is up to date
117+
# then we can avoid a join, which is a Very Good Thing given how
118+
# frequently this function gets called.
119+
if self._current_state_events_membership_up_to_date:
120+
sql = """
121+
SELECT state_key FROM current_state_events
122+
WHERE type = 'm.room.member' AND room_id = ? AND membership = ?
123+
"""
124+
else:
125+
sql = """
126+
SELECT state_key FROM room_memberships as m
127+
INNER JOIN current_state_events as c
128+
ON m.event_id = c.event_id
129+
AND m.room_id = c.room_id
130+
AND m.user_id = c.state_key
131+
WHERE c.type = 'm.room.member' AND c.room_id = ? AND m.membership = ?
132+
"""
81133

82134
txn.execute(sql, (room_id, Membership.JOIN))
83135
return [to_ascii(r[0]) for r in txn]
@@ -99,15 +151,26 @@ def _get_room_summary_txn(txn):
99151
# first get counts.
100152
# We do this all in one transaction to keep the cache small.
101153
# FIXME: get rid of this when we have room_stats
102-
sql = """
103-
SELECT count(*), m.membership FROM room_memberships as m
104-
INNER JOIN current_state_events as c
105-
ON m.event_id = c.event_id
106-
AND m.room_id = c.room_id
107-
AND m.user_id = c.state_key
108-
WHERE c.type = 'm.room.member' AND c.room_id = ?
109-
GROUP BY m.membership
110-
"""
154+
155+
# If we can assume current_state_events.membership is up to date
156+
# then we can avoid a join, which is a Very Good Thing given how
157+
# frequently this function gets called.
158+
if self._current_state_events_membership_up_to_date:
159+
sql = """
160+
SELECT count(*), membership FROM current_state_events
161+
WHERE type = 'm.room.member' AND room_id = ?
162+
GROUP BY membership
163+
"""
164+
else:
165+
sql = """
166+
SELECT count(*), m.membership FROM room_memberships as m
167+
INNER JOIN current_state_events as c
168+
ON m.event_id = c.event_id
169+
AND m.room_id = c.room_id
170+
AND m.user_id = c.state_key
171+
WHERE c.type = 'm.room.member' AND c.room_id = ?
172+
GROUP BY m.membership
173+
"""
111174

112175
txn.execute(sql, (room_id,))
113176
res = {}

synapse/storage/schema/delta/56/current_state_events_membership.sql

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616
-- We add membership to current state so that we don't need to join against
1717
-- room_memberships, which can be surprisingly costly (we do such queries
1818
-- very frequently).
19+
-- This will be null for non-membership events and the content.membership key
20+
-- for membership events. (Will also be null for membership events until the
21+
-- background update job has finished).
1922
ALTER TABLE current_state_events ADD membership TEXT;
2023

2124
INSERT INTO background_updates (update_name, progress_json) VALUES

0 commit comments

Comments
 (0)