Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit 5c07c97

Browse files
authored
Merge pull request #5706 from matrix-org/erikj/add_memberships_to_current_state
Add membership column to current_state_events table
2 parents 7b8bc61 + bd2e1a2 commit 5c07c97

File tree

8 files changed

+195
-41
lines changed

8 files changed

+195
-41
lines changed

changelog.d/5706.misc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Reduce database IO usage by optimising queries for current membership.

synapse/storage/_base.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,21 @@
8686
class LoggingTransaction(object):
8787
"""An object that almost-transparently proxies for the 'txn' object
8888
passed to the constructor. Adds logging and metrics to the .execute()
89-
method."""
89+
method.
90+
91+
Args:
92+
txn: The database transcation object to wrap.
93+
name (str): The name of this transactions for logging.
94+
database_engine (Sqlite3Engine|PostgresEngine)
95+
after_callbacks(list|None): A list that callbacks will be appended to
96+
that have been added by `call_after` which should be run on
97+
successful completion of the transaction. None indicates that no
98+
callbacks should be allowed to be scheduled to run.
99+
exception_callbacks(list|None): A list that callbacks will be appended
100+
to that have been added by `call_on_exception` which should be run
101+
if transaction ends with an error. None indicates that no callbacks
102+
should be allowed to be scheduled to run.
103+
"""
90104

91105
__slots__ = [
92106
"txn",
@@ -97,7 +111,7 @@ class LoggingTransaction(object):
97111
]
98112

99113
def __init__(
100-
self, txn, name, database_engine, after_callbacks, exception_callbacks
114+
self, txn, name, database_engine, after_callbacks=None, exception_callbacks=None
101115
):
102116
object.__setattr__(self, "txn", txn)
103117
object.__setattr__(self, "name", name)

synapse/storage/event_push_actions.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,6 @@ def __init__(self, db_conn, hs):
7979
db_conn.cursor(),
8080
name="_find_stream_orderings_for_times_txn",
8181
database_engine=self.database_engine,
82-
after_callbacks=[],
83-
exception_callbacks=[],
8482
)
8583
self._find_stream_orderings_for_times_txn(cur)
8684
cur.close()

synapse/storage/events.py

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -918,8 +918,6 @@ def _persist_events_txn(
918918
min_stream_order = events_and_contexts[0][0].internal_metadata.stream_ordering
919919
max_stream_order = events_and_contexts[-1][0].internal_metadata.stream_ordering
920920

921-
self._update_current_state_txn(txn, state_delta_for_room, min_stream_order)
922-
923921
self._update_forward_extremities_txn(
924922
txn,
925923
new_forward_extremities=new_forward_extremeties,
@@ -993,6 +991,10 @@ def _persist_events_txn(
993991
backfilled=backfilled,
994992
)
995993

994+
# We call this last as it assumes we've inserted the events into
995+
# room_memberships, where applicable.
996+
self._update_current_state_txn(txn, state_delta_for_room, min_stream_order)
997+
996998
def _update_current_state_txn(self, txn, state_delta_by_room, stream_id):
997999
for room_id, current_state_tuple in iteritems(state_delta_by_room):
9981000
to_delete, to_insert = current_state_tuple
@@ -1062,16 +1064,16 @@ def _update_current_state_txn(self, txn, state_delta_by_room, stream_id):
10621064
),
10631065
)
10641066

1065-
self._simple_insert_many_txn(
1066-
txn,
1067-
table="current_state_events",
1068-
values=[
1069-
{
1070-
"event_id": ev_id,
1071-
"room_id": room_id,
1072-
"type": key[0],
1073-
"state_key": key[1],
1074-
}
1067+
# We include the membership in the current state table, hence we do
1068+
# a lookup when we insert. This assumes that all events have already
1069+
# been inserted into room_memberships.
1070+
txn.executemany(
1071+
"""INSERT INTO current_state_events
1072+
(room_id, type, state_key, event_id, membership)
1073+
VALUES (?, ?, ?, ?, (SELECT membership FROM room_memberships WHERE event_id = ?))
1074+
""",
1075+
[
1076+
(room_id, key[0], key[1], ev_id, ev_id)
10751077
for key, ev_id in iteritems(to_insert)
10761078
],
10771079
)

synapse/storage/prepare_database.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727

2828
# Remember to update this number every time a change is made to database
2929
# schema files, so the users will be informed on server restarts.
30-
SCHEMA_VERSION = 55
30+
SCHEMA_VERSION = 56
3131

3232
dir_path = os.path.abspath(os.path.dirname(__file__))
3333

synapse/storage/roommember.py

Lines changed: 134 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424
from twisted.internet import defer
2525

2626
from synapse.api.constants import EventTypes, Membership
27+
from synapse.metrics.background_process_metrics import run_as_background_process
28+
from synapse.storage._base import LoggingTransaction
2729
from synapse.storage.events_worker import EventsWorkerStore
2830
from synapse.types import get_domain_from_id
2931
from synapse.util.async_helpers import Linearizer
@@ -53,9 +55,51 @@
5355
MemberSummary = namedtuple("MemberSummary", ("members", "count"))
5456

5557
_MEMBERSHIP_PROFILE_UPDATE_NAME = "room_membership_profile_update"
58+
_CURRENT_STATE_MEMBERSHIP_UPDATE_NAME = "current_state_events_membership"
5659

5760

5861
class RoomMemberWorkerStore(EventsWorkerStore):
62+
def __init__(self, db_conn, hs):
63+
super(RoomMemberWorkerStore, self).__init__(db_conn, hs)
64+
65+
# Is the current_state_events.membership up to date? Or is the
66+
# background update still running?
67+
self._current_state_events_membership_up_to_date = False
68+
69+
txn = LoggingTransaction(
70+
db_conn.cursor(),
71+
name="_check_safe_current_state_events_membership_updated",
72+
database_engine=self.database_engine,
73+
)
74+
self._check_safe_current_state_events_membership_updated_txn(txn)
75+
txn.close()
76+
77+
def _check_safe_current_state_events_membership_updated_txn(self, txn):
78+
"""Checks if it is safe to assume the new current_state_events
79+
membership column is up to date
80+
"""
81+
82+
pending_update = self._simple_select_one_txn(
83+
txn,
84+
table="background_updates",
85+
keyvalues={"update_name": _CURRENT_STATE_MEMBERSHIP_UPDATE_NAME},
86+
retcols=["update_name"],
87+
allow_none=True,
88+
)
89+
90+
self._current_state_events_membership_up_to_date = not pending_update
91+
92+
# If the update is still running, reschedule to run.
93+
if pending_update:
94+
self._clock.call_later(
95+
15.0,
96+
run_as_background_process,
97+
"_check_safe_current_state_events_membership_updated",
98+
self.runInteraction,
99+
"_check_safe_current_state_events_membership_updated",
100+
self._check_safe_current_state_events_membership_updated_txn,
101+
)
102+
59103
@cachedInlineCallbacks(max_entries=100000, iterable=True, cache_context=True)
60104
def get_hosts_in_room(self, room_id, cache_context):
61105
"""Returns the set of all hosts currently in the room
@@ -69,14 +113,23 @@ def get_hosts_in_room(self, room_id, cache_context):
69113
@cached(max_entries=100000, iterable=True)
70114
def get_users_in_room(self, room_id):
71115
def f(txn):
72-
sql = (
73-
"SELECT m.user_id FROM room_memberships as m"
74-
" INNER JOIN current_state_events as c"
75-
" ON m.event_id = c.event_id "
76-
" AND m.room_id = c.room_id "
77-
" AND m.user_id = c.state_key"
78-
" WHERE c.type = 'm.room.member' AND c.room_id = ? AND m.membership = ?"
79-
)
116+
# If we can assume current_state_events.membership is up to date
117+
# then we can avoid a join, which is a Very Good Thing given how
118+
# frequently this function gets called.
119+
if self._current_state_events_membership_up_to_date:
120+
sql = """
121+
SELECT state_key FROM current_state_events
122+
WHERE type = 'm.room.member' AND room_id = ? AND membership = ?
123+
"""
124+
else:
125+
sql = """
126+
SELECT state_key FROM room_memberships as m
127+
INNER JOIN current_state_events as c
128+
ON m.event_id = c.event_id
129+
AND m.room_id = c.room_id
130+
AND m.user_id = c.state_key
131+
WHERE c.type = 'm.room.member' AND c.room_id = ? AND m.membership = ?
132+
"""
80133

81134
txn.execute(sql, (room_id, Membership.JOIN))
82135
return [to_ascii(r[0]) for r in txn]
@@ -98,15 +151,26 @@ def _get_room_summary_txn(txn):
98151
# first get counts.
99152
# We do this all in one transaction to keep the cache small.
100153
# FIXME: get rid of this when we have room_stats
101-
sql = """
102-
SELECT count(*), m.membership FROM room_memberships as m
103-
INNER JOIN current_state_events as c
104-
ON m.event_id = c.event_id
105-
AND m.room_id = c.room_id
106-
AND m.user_id = c.state_key
107-
WHERE c.type = 'm.room.member' AND c.room_id = ?
108-
GROUP BY m.membership
109-
"""
154+
155+
# If we can assume current_state_events.membership is up to date
156+
# then we can avoid a join, which is a Very Good Thing given how
157+
# frequently this function gets called.
158+
if self._current_state_events_membership_up_to_date:
159+
sql = """
160+
SELECT count(*), membership FROM current_state_events
161+
WHERE type = 'm.room.member' AND room_id = ?
162+
GROUP BY membership
163+
"""
164+
else:
165+
sql = """
166+
SELECT count(*), m.membership FROM room_memberships as m
167+
INNER JOIN current_state_events as c
168+
ON m.event_id = c.event_id
169+
AND m.room_id = c.room_id
170+
AND m.user_id = c.state_key
171+
WHERE c.type = 'm.room.member' AND c.room_id = ?
172+
GROUP BY m.membership
173+
"""
110174

111175
txn.execute(sql, (room_id,))
112176
res = {}
@@ -224,7 +288,7 @@ def _get_rooms_for_user_where_membership_is_txn(
224288
results = []
225289
if membership_list:
226290
where_clause = "user_id = ? AND (%s) AND forgotten = 0" % (
227-
" OR ".join(["membership = ?" for _ in membership_list]),
291+
" OR ".join(["m.membership = ?" for _ in membership_list]),
228292
)
229293

230294
args = [user_id]
@@ -453,8 +517,8 @@ def is_host_joined(self, room_id, host):
453517

454518
sql = """
455519
SELECT state_key FROM current_state_events AS c
456-
INNER JOIN room_memberships USING (event_id)
457-
WHERE membership = 'join'
520+
INNER JOIN room_memberships AS m USING (event_id)
521+
WHERE m.membership = 'join'
458522
AND type = 'm.room.member'
459523
AND c.room_id = ?
460524
AND state_key LIKE ?
@@ -602,6 +666,10 @@ def __init__(self, db_conn, hs):
602666
self.register_background_update_handler(
603667
_MEMBERSHIP_PROFILE_UPDATE_NAME, self._background_add_membership_profile
604668
)
669+
self.register_background_update_handler(
670+
_CURRENT_STATE_MEMBERSHIP_UPDATE_NAME,
671+
self._background_current_state_membership,
672+
)
605673

606674
def _store_room_members_txn(self, txn, events, backfilled):
607675
"""Store a room member in the database.
@@ -781,6 +849,52 @@ def add_membership_profile_txn(txn):
781849

782850
defer.returnValue(result)
783851

852+
@defer.inlineCallbacks
853+
def _background_current_state_membership(self, progress, batch_size):
854+
"""Update the new membership column on current_state_events.
855+
"""
856+
857+
if "rooms" not in progress:
858+
rooms = yield self._simple_select_onecol(
859+
table="current_state_events",
860+
keyvalues={},
861+
retcol="DISTINCT room_id",
862+
desc="_background_current_state_membership_get_rooms",
863+
)
864+
progress["rooms"] = rooms
865+
866+
rooms = progress["rooms"]
867+
868+
def _background_current_state_membership_txn(txn):
869+
processed = 0
870+
while rooms and processed < batch_size:
871+
sql = """
872+
UPDATE current_state_events AS c
873+
SET membership = (
874+
SELECT membership FROM room_memberships
875+
WHERE event_id = c.event_id
876+
)
877+
WHERE room_id = ?
878+
"""
879+
txn.execute(sql, (rooms.pop(),))
880+
processed += txn.rowcount
881+
882+
self._background_update_progress_txn(
883+
txn, _CURRENT_STATE_MEMBERSHIP_UPDATE_NAME, progress
884+
)
885+
886+
return processed
887+
888+
result = yield self.runInteraction(
889+
"_background_current_state_membership_update",
890+
_background_current_state_membership_txn,
891+
)
892+
893+
if not rooms:
894+
yield self._end_background_update(_CURRENT_STATE_MEMBERSHIP_UPDATE_NAME)
895+
896+
defer.returnValue(result)
897+
784898

785899
class _JoinedHostsCache(object):
786900
"""Cache for joined hosts in a room that is optimised to handle updates
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
/* Copyright 2019 The Matrix.org Foundation C.I.C.
2+
*
3+
* Licensed under the Apache License, Version 2.0 (the "License");
4+
* you may not use this file except in compliance with the License.
5+
* You may obtain a copy of the License at
6+
*
7+
* http://www.apache.org/licenses/LICENSE-2.0
8+
*
9+
* Unless required by applicable law or agreed to in writing, software
10+
* distributed under the License is distributed on an "AS IS" BASIS,
11+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
* See the License for the specific language governing permissions and
13+
* limitations under the License.
14+
*/
15+
16+
-- We add membership to current state so that we don't need to join against
17+
-- room_memberships, which can be surprisingly costly (we do such queries
18+
-- very frequently).
19+
-- This will be null for non-membership events and the content.membership key
20+
-- for membership events. (Will also be null for membership events until the
21+
-- background update job has finished).
22+
ALTER TABLE current_state_events ADD membership TEXT;
23+
24+
INSERT INTO background_updates (update_name, progress_json) VALUES
25+
('current_state_events_membership', '{}');

synapse/storage/user_directory.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -618,15 +618,15 @@ def get_rooms_in_common_for_users(self, user_id, other_user_id):
618618
sql = """
619619
SELECT room_id FROM (
620620
SELECT c.room_id FROM current_state_events AS c
621-
INNER JOIN room_memberships USING (event_id)
621+
INNER JOIN room_memberships AS m USING (event_id)
622622
WHERE type = 'm.room.member'
623-
AND membership = 'join'
623+
AND m.membership = 'join'
624624
AND state_key = ?
625625
) AS f1 INNER JOIN (
626626
SELECT c.room_id FROM current_state_events AS c
627-
INNER JOIN room_memberships USING (event_id)
627+
INNER JOIN room_memberships AS m USING (event_id)
628628
WHERE type = 'm.room.member'
629-
AND membership = 'join'
629+
AND m.membership = 'join'
630630
AND state_key = ?
631631
) f2 USING (room_id)
632632
"""

0 commit comments

Comments
 (0)