@@ -25,40 +25,62 @@ use super::StateGroupEntry;
25
25
/// specific room.
26
26
///
27
27
/// - Connects to the database
28
- /// - Fetches rows with group id lower than max
28
+ /// - Fetches the first [group] rows with group id after [min]
29
29
/// - Recursively searches for missing predecessors and adds those
30
30
///
31
+ /// Returns with the state_group map and the id of the last group that was used
32
+ ///
31
33
/// # Arguments
32
34
///
33
- /// * `room_id` - The ID of the room in the database
34
- /// * `db_url` - The URL of a Postgres database. This should be of the
35
- /// form: "postgresql://user:pass@domain:port/database"
36
- /// * `max_state_group` - If specified, then only fetch the entries for state
37
- /// groups lower than or equal to this number. (N.B. all
38
- /// predecessors are also fetched)
35
+ /// * `room_id` - The ID of the room in the database
36
+ /// * `db_url` - The URL of a Postgres database. This should be of the
37
+ /// form: "postgresql://user:pass@domain:port/database"
38
+ /// * `min_state_group` - If specified, then only fetch the entries for state
39
+ /// groups greater than (but not equal) to this number. It
40
+ /// also requires groups_to_compress to be specified
41
+ /// * 'groups_to_compress' - The number of groups to get from the database before stopping
42
+ /// * `max_state_group` - If specified, then only fetch the entries for state
43
+ /// groups lower than or equal to this number.
39
44
pub fn get_data_from_db (
40
45
db_url : & str ,
41
46
room_id : & str ,
47
+ min_state_group : Option < i64 > ,
48
+ groups_to_compress : Option < i64 > ,
42
49
max_state_group : Option < i64 > ,
43
- ) -> BTreeMap < i64 , StateGroupEntry > {
50
+ ) -> ( BTreeMap < i64 , StateGroupEntry > , i64 ) {
51
+ // connect to the database
44
52
let mut builder = SslConnector :: builder ( SslMethod :: tls ( ) ) . unwrap ( ) ;
45
53
builder. set_verify ( SslVerifyMode :: NONE ) ;
46
54
let connector = MakeTlsConnector :: new ( builder. build ( ) ) ;
47
55
48
56
let mut client = Client :: connect ( db_url, connector) . unwrap ( ) ;
49
57
50
- let mut state_group_map = get_initial_data_from_db ( & mut client, room_id, max_state_group) ;
58
+ // Search for the group id of the groups_to_compress'th group after min_state_group
59
+ // If this is saved, then the compressor can continue by having min_state_group being
60
+ // set to this maximum
61
+ let max_group_found = find_max_group (
62
+ & mut client,
63
+ room_id,
64
+ min_state_group,
65
+ groups_to_compress,
66
+ max_state_group,
67
+ ) ;
68
+
69
+ let mut state_group_map =
70
+ get_initial_data_from_db ( & mut client, room_id, min_state_group, max_group_found) ;
51
71
52
72
println ! ( "Got initial state from database. Checking for any missing state groups..." ) ;
53
73
54
74
// Due to reasons some of the state groups appear in the edges table, but
55
- // not in the state_groups_state table. This means they don't get included
56
- // in our DB queries, so we have to fetch any missing groups explicitly.
75
+ // not in the state_groups_state table.
76
+ //
77
+ // Also it is likely that the predecessor of a node will not be within the
78
+ // chunk that was specified by min_state_group and groups_to_compress.
79
+ // This means they don't get included in our DB queries, so we have to fetch
80
+ // any missing groups explicitly.
81
+ //
57
82
// Since the returned groups may themselves reference groups we don't have,
58
83
// we need to do this recursively until we don't find any more missing.
59
- //
60
- // N.B. This does NOT currently fetch the deltas for the missing groups!
61
- // By carefully chosen max_state_group this might cause issues...?
62
84
loop {
63
85
let mut missing_sgs: Vec < _ > = state_group_map
64
86
. iter ( )
@@ -76,41 +98,92 @@ pub fn get_data_from_db(
76
98
. collect ( ) ;
77
99
78
100
if missing_sgs. is_empty ( ) {
79
- println ! ( "No missing state groups" ) ;
101
+ // println!("No missing state groups");
80
102
break ;
81
103
}
82
104
83
105
missing_sgs. sort_unstable ( ) ;
84
106
missing_sgs. dedup ( ) ;
85
107
86
- println ! ( "Missing {} state groups" , missing_sgs. len( ) ) ;
108
+ // println!("Missing {} state groups", missing_sgs.len());
87
109
88
- let map = get_missing_from_db ( & mut client, & missing_sgs) ;
89
- state_group_map. extend ( map. into_iter ( ) ) ;
110
+ // find state groups not picked up already and add them to the map
111
+ let map = get_missing_from_db ( & mut client, & missing_sgs, min_state_group, max_group_found) ;
112
+ for ( k, v) in map {
113
+ state_group_map. entry ( k) . or_insert ( v) ;
114
+ }
90
115
}
91
116
92
- state_group_map
117
+ ( state_group_map, max_group_found)
118
+ }
119
+
120
+ /// Returns the group ID of the last group to be compressed
121
+ ///
122
+ /// This can be saved so that future runs of the compressor only
123
+ /// continue from after this point
124
+ ///
125
+ /// # Arguments
126
+ ///
127
+ /// * `client` - A Postgres client to make requests with
128
+ /// * `room_id` - The ID of the room in the database
129
+ /// * `min_state_group` - The lower limit (non inclusive) of group id's to compress
130
+ /// * 'groups_to_compress' - How many groups to compress
131
+ /// * `max_state_group` - The upper bound on what this method can return
132
+ fn find_max_group (
133
+ client : & mut Client ,
134
+ room_id : & str ,
135
+ min_state_group : Option < i64 > ,
136
+ groups_to_compress : Option < i64 > ,
137
+ max_state_group : Option < i64 > ,
138
+ ) -> i64 {
139
+ // Get list of state_id's in a certain room
140
+ let mut query_chunk_of_ids = "SELECT id FROM state_groups WHERE room_id = $1" . to_string ( ) ;
141
+ let params: Vec < & ( dyn ToSql + Sync ) > ;
142
+
143
+ if let Some ( max) = max_state_group {
144
+ query_chunk_of_ids = format ! ( "{} AND id <= {}" , query_chunk_of_ids, max)
145
+ }
146
+
147
+ // Adds additional constraint if a groups_to_compress has been specified
148
+ if min_state_group. is_some ( ) && groups_to_compress. is_some ( ) {
149
+ params = vec ! [ & room_id, & min_state_group, & groups_to_compress] ;
150
+ query_chunk_of_ids = format ! ( r"{} AND id > $2 LIMIT $3" , query_chunk_of_ids) ;
151
+ } else {
152
+ params = vec ! [ & room_id] ;
153
+ query_chunk_of_ids = format ! ( r"{} ORDER BY id DESC LIMIT 1" , query_chunk_of_ids) ;
154
+ }
155
+
156
+ let sql_query = format ! (
157
+ "SELECT id FROM ({}) AS ids ORDER BY ids.id DESC LIMIT 1" ,
158
+ query_chunk_of_ids
159
+ ) ;
160
+ let final_row = client. query ( sql_query. as_str ( ) , & params) . unwrap ( ) ;
161
+
162
+ final_row. last ( ) . unwrap ( ) . get ( 0 )
93
163
}
94
164
95
165
/// Fetch the entries in state_groups_state and immediate predecessors for
96
166
/// a specific room.
97
167
///
98
- /// - Fetches rows with group id lower than max
168
+ /// - Fetches first [groups_to_compress] rows with group id higher than min
99
169
/// - Stores the group id, predecessor id and deltas into a map
170
+ /// - returns map and maximum row that was considered
100
171
///
101
172
/// # Arguments
102
173
///
103
174
/// * `client` - A Postgres client to make requests with
104
175
/// * `room_id` - The ID of the room in the database
105
- /// * `max_state_group` - If specified, then only fetch the entries for state
106
- /// groups lower than or equal to this number. (N.B. doesn't
107
- /// fetch IMMEDIATE predecessors if ID is above this number)
176
+ /// * `min_state_group` - If specified, then only fetch the entries for state
177
+ /// groups greater than (but not equal) to this number. It
178
+ /// also requires groups_to_compress to be specified
179
+ /// * 'max_group_found' - The upper limit on state_groups ids to get from the database
108
180
fn get_initial_data_from_db (
109
181
client : & mut Client ,
110
182
room_id : & str ,
111
- max_state_group : Option < i64 > ,
183
+ min_state_group : Option < i64 > ,
184
+ max_group_found : i64 ,
112
185
) -> BTreeMap < i64 , StateGroupEntry > {
113
- // Query to get id, predecessor and delta for each state group
186
+ // Query to get id, predecessor and deltas for each state group
114
187
let sql = r#"
115
188
SELECT m.id, prev_state_group, type, state_key, s.event_id
116
189
FROM state_groups AS m
@@ -119,18 +192,21 @@ fn get_initial_data_from_db(
119
192
WHERE m.room_id = $1
120
193
"# ;
121
194
122
- // Adds additional constraint if a max_state_group has been specified
123
- // Then sends query to the datatbase
124
- let mut rows = if let Some ( s) = max_state_group {
125
- let params: Vec < & dyn ToSql > = vec ! [ & room_id, & s] ;
126
- client. query_raw ( format ! ( r"{} AND m.id <= $2" , sql) . as_str ( ) , params)
195
+ // Adds additional constraint if minimum state_group has been specified.
196
+ // note that the maximum group only affects queries if there is also a minimum
197
+ // otherwise it is assumed that ALL groups should be fetched
198
+ let mut rows = if let Some ( min) = min_state_group {
199
+ let params: Vec < & dyn ToSql > = vec ! [ & room_id, & min, & max_group_found] ;
200
+ client. query_raw (
201
+ format ! ( r"{} AND m.id > $2 AND m.id <= $3" , sql) . as_str ( ) ,
202
+ params,
203
+ )
127
204
} else {
128
205
client. query_raw ( sql, & [ room_id] )
129
206
}
130
207
. unwrap ( ) ;
131
208
132
209
// Copy the data from the database into a map
133
-
134
210
let mut state_group_map: BTreeMap < i64 , StateGroupEntry > = BTreeMap :: new ( ) ;
135
211
136
212
let pb = ProgressBar :: new_spinner ( ) ;
@@ -143,8 +219,10 @@ fn get_initial_data_from_db(
143
219
// The row in the map to copy the data to
144
220
let entry = state_group_map. entry ( row. get ( 0 ) ) . or_default ( ) ;
145
221
146
- // Save the predecessor (this may already be there)
222
+ // Save the predecessor and mark for compression (this may already be there)
223
+ // TODO: slightly fewer redundant rewrites
147
224
entry. prev_state_group = row. get ( 1 ) ;
225
+ entry. in_range = true ;
148
226
149
227
// Copy the single delta from the predecessor stored in this row
150
228
if let Some ( etype) = row. get :: < _ , Option < String > > ( 2 ) {
@@ -172,34 +250,57 @@ fn get_initial_data_from_db(
172
250
///
173
251
/// * `client` - A Postgres client to make requests with
174
252
/// * `missing_sgs` - An array of missing state_group ids
175
- fn get_missing_from_db ( client : & mut Client , missing_sgs : & [ i64 ] ) -> BTreeMap < i64 , StateGroupEntry > {
176
- let mut rows = client
177
- . query_raw (
178
- r#"
179
- SELECT state_group, prev_state_group
180
- FROM state_group_edges
181
- WHERE state_group = ANY($1)
182
- "# ,
183
- & [ missing_sgs] ,
184
- )
185
- . unwrap ( ) ;
253
+ /// * 'min_state_group' - Minimum state_group id to mark as in range
254
+ /// * 'max_group_found' - Maximum state_group id to mark as in range
255
+ fn get_missing_from_db (
256
+ client : & mut Client ,
257
+ missing_sgs : & [ i64 ] ,
258
+ min_state_group : Option < i64 > ,
259
+ max_group_found : i64 ,
260
+ ) -> BTreeMap < i64 , StateGroupEntry > {
261
+ // "Due to reasons" it is possible that some states only appear in edges table and not in state_groups table
262
+ // so since we know the IDs we're looking for as they are the missing predecessors, we can find them by
263
+ // left joining onto the edges table (instead of the state_group table!)
264
+ let sql = r#"
265
+ SELECT target.prev_state_group, source.prev_state_group, state.type, state.state_key, state.event_id
266
+ FROM state_group_edges AS target
267
+ LEFT JOIN state_group_edges AS source ON (target.prev_state_group = source.state_group)
268
+ LEFT JOIN state_groups_state AS state ON (target.prev_state_group = state.state_group)
269
+ WHERE target.prev_state_group = ANY($1)
270
+ "# ;
271
+
272
+ let mut rows = client. query_raw ( sql, & [ missing_sgs] ) . unwrap ( ) ;
186
273
187
- // initialise the map with empty entries (the missing group may not
188
- // have a prev_state_group either)
189
- let mut state_group_map: BTreeMap < i64 , StateGroupEntry > = missing_sgs
190
- . iter ( )
191
- . map ( |sg| ( * sg, StateGroupEntry :: default ( ) ) )
192
- . collect ( ) ;
274
+ let mut state_group_map: BTreeMap < i64 , StateGroupEntry > = BTreeMap :: new ( ) ;
193
275
194
276
while let Some ( row) = rows. next ( ) . unwrap ( ) {
195
- let state_group = row. get ( 0 ) ;
196
- let entry = state_group_map. get_mut ( & state_group) . unwrap ( ) ;
277
+ let id = row. get ( 0 ) ;
278
+ // The row in the map to copy the data to
279
+ let entry = state_group_map. entry ( id) . or_default ( ) ;
280
+
281
+ // Save the predecessor and mark for compression (this may already be there)
282
+ // Also may well not exist!
197
283
entry. prev_state_group = row. get ( 1 ) ;
284
+ if let Some ( min) = min_state_group {
285
+ if min < id && id <= max_group_found {
286
+ entry. in_range = true
287
+ }
288
+ }
289
+
290
+ // Copy the single delta from the predecessor stored in this row
291
+ if let Some ( etype) = row. get :: < _ , Option < String > > ( 2 ) {
292
+ entry. state_map . insert (
293
+ & etype,
294
+ & row. get :: < _ , String > ( 3 ) ,
295
+ row. get :: < _ , String > ( 4 ) . into ( ) ,
296
+ ) ;
297
+ }
198
298
}
199
299
200
300
state_group_map
201
301
}
202
302
303
+ // TODO: find a library that has an existing safe postgres escape function
203
304
/// Helper function that escapes the wrapped text when writing SQL
204
305
pub struct PGEscape < ' a > ( pub & ' a str ) ;
205
306
0 commit comments