Skip to content

Commit f1e8d2d

Browse files
authored
Sliding Sync: Speed up getting receipts for initial rooms (#17592)
Let's only pull out the events we care about. Note that the index isn't necessary here, as postgres is happy to scan the set of rooms for the events.
1 parent 1042804 commit f1e8d2d

File tree

4 files changed

+108
-29
lines changed

4 files changed

+108
-29
lines changed

changelog.d/17592.misc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Correctly track read receipts that should be sent down in experimental sliding sync.

synapse/handlers/sliding_sync.py

Lines changed: 8 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -3088,38 +3088,17 @@ async def get_receipts_extension_response(
30883088
# from that room but we only want to include receipts for events
30893089
# in the timeline to avoid bloating and blowing up the sync response
30903090
# as the number of users in the room increases. (this behavior is part of the spec)
3091-
initial_rooms = {
3092-
room_id
3091+
initial_rooms_and_event_ids = [
3092+
(room_id, event.event_id)
30933093
for room_id in initial_rooms
30943094
if room_id in actual_room_response_map
3095-
}
3096-
if initial_rooms:
3097-
initial_receipts = await self.store.get_linearized_receipts_for_rooms(
3098-
room_ids=initial_rooms,
3099-
to_key=to_token.receipt_key,
3095+
for event in actual_room_response_map[room_id].timeline_events
3096+
]
3097+
if initial_rooms_and_event_ids:
3098+
initial_receipts = await self.store.get_linearized_receipts_for_events(
3099+
room_and_event_ids=initial_rooms_and_event_ids,
31003100
)
3101-
3102-
for receipt in initial_receipts:
3103-
relevant_event_ids = {
3104-
event.event_id
3105-
for event in actual_room_response_map[
3106-
receipt["room_id"]
3107-
].timeline_events
3108-
}
3109-
3110-
content = {
3111-
event_id: content_value
3112-
for event_id, content_value in receipt["content"].items()
3113-
if event_id in relevant_event_ids
3114-
}
3115-
if content:
3116-
fetched_receipts.append(
3117-
{
3118-
"type": receipt["type"],
3119-
"room_id": receipt["room_id"],
3120-
"content": content,
3121-
}
3122-
)
3101+
fetched_receipts.extend(initial_receipts)
31233102

31243103
fetched_receipts = ReceiptEventSource.filter_out_private_receipts(
31253104
fetched_receipts, sync_config.user.to_string()

synapse/storage/databases/main/receipts.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
DatabasePool,
4444
LoggingDatabaseConnection,
4545
LoggingTransaction,
46+
make_tuple_in_list_sql_clause,
4647
)
4748
from synapse.storage.engines._base import IsolationLevel
4849
from synapse.storage.util.id_generators import MultiWriterIdGenerator
@@ -481,6 +482,83 @@ def f(
481482
}
482483
return results
483484

485+
async def get_linearized_receipts_for_events(
486+
self,
487+
room_and_event_ids: Collection[Tuple[str, str]],
488+
) -> Sequence[JsonMapping]:
489+
"""Get all receipts for the given set of events.
490+
491+
Arguments:
492+
room_and_event_ids: A collection of 2-tuples of room ID and
493+
event IDs to fetch receipts for
494+
495+
Returns:
496+
A list of receipts, one per room.
497+
"""
498+
499+
def get_linearized_receipts_for_events_txn(
500+
txn: LoggingTransaction,
501+
room_id_event_id_tuples: Collection[Tuple[str, str]],
502+
) -> List[Tuple[str, str, str, str, Optional[str], str]]:
503+
clause, args = make_tuple_in_list_sql_clause(
504+
self.database_engine, ("room_id", "event_id"), room_id_event_id_tuples
505+
)
506+
507+
sql = f"""
508+
SELECT room_id, receipt_type, user_id, event_id, thread_id, data
509+
FROM receipts_linearized
510+
WHERE {clause}
511+
"""
512+
513+
txn.execute(sql, args)
514+
515+
return txn.fetchall()
516+
517+
# room_id -> event_id -> receipt_type -> user_id -> receipt data
518+
room_to_content: Dict[str, Dict[str, Dict[str, Dict[str, JsonMapping]]]] = {}
519+
for batch in batch_iter(room_and_event_ids, 1000):
520+
batch_results = await self.db_pool.runInteraction(
521+
"get_linearized_receipts_for_events",
522+
get_linearized_receipts_for_events_txn,
523+
batch,
524+
)
525+
526+
for (
527+
room_id,
528+
receipt_type,
529+
user_id,
530+
event_id,
531+
thread_id,
532+
data,
533+
) in batch_results:
534+
content = room_to_content.setdefault(room_id, {})
535+
user_receipts = content.setdefault(event_id, {}).setdefault(
536+
receipt_type, {}
537+
)
538+
539+
receipt_data = db_to_json(data)
540+
if thread_id is not None:
541+
receipt_data["thread_id"] = thread_id
542+
543+
# MSC4102: always replace threaded receipts with unthreaded ones
544+
# if there is a clash. Specifically:
545+
# - if there is no existing receipt, great, set the data.
546+
# - if there is an existing receipt, is it threaded (thread_id
547+
# present)? YES: replace if this receipt has no thread id.
548+
# NO: do not replace. This means we will drop some receipts, but
549+
# MSC4102 is designed to drop semantically meaningless receipts,
550+
# so this is okay. Previously, we would drop meaningful data!
551+
if user_id in user_receipts:
552+
if "thread_id" in user_receipts[user_id] and not thread_id:
553+
user_receipts[user_id] = receipt_data
554+
else:
555+
user_receipts[user_id] = receipt_data
556+
557+
return [
558+
{"type": EduTypes.RECEIPT, "room_id": room_id, "content": content}
559+
for room_id, content in room_to_content.items()
560+
]
561+
484562
@cached(
485563
num_args=2,
486564
)
@@ -996,6 +1074,12 @@ def __init__(
9961074
self.RECEIPTS_GRAPH_UNIQUE_INDEX_UPDATE_NAME,
9971075
self._background_receipts_graph_unique_index,
9981076
)
1077+
self.db_pool.updates.register_background_index_update(
1078+
update_name="receipts_room_id_event_id_index",
1079+
index_name="receipts_linearized_event_id",
1080+
table="receipts_linearized",
1081+
columns=("room_id", "event_id"),
1082+
)
9991083

10001084
async def _populate_receipt_event_stream_ordering(
10011085
self, progress: JsonDict, batch_size: int
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
--
2+
-- This file is licensed under the Affero General Public License (AGPL) version 3.
3+
--
4+
-- Copyright (C) 2024 New Vector, Ltd
5+
--
6+
-- This program is free software: you can redistribute it and/or modify
7+
-- it under the terms of the GNU Affero General Public License as
8+
-- published by the Free Software Foundation, either version 3 of the
9+
-- License, or (at your option) any later version.
10+
--
11+
-- See the GNU Affero General Public License for more details:
12+
-- <https://www.gnu.org/licenses/agpl-3.0.html>.
13+
14+
INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
15+
(8602, 'receipts_room_id_event_id_index', '{}');

0 commit comments

Comments
 (0)