2
2
// Use of this source code is governed by a BSD-style
3
3
// license that can be found in the LICENSE file.
4
4
5
- package index
5
+ /*
6
+ # Merging indexes
7
+
8
+ To merge two indexes A and B (newer) into a combined index C:
9
+
10
+ Load the path list from B and determine for each path the docid ranges that it will replace in A.
11
+
12
+ Read A's and B's name lists together, merging them into C's name list.
13
+ Discard the identified ranges from A during the merge.
14
+ Also during the merge, record the mapping from A's docids to C's docids, and also the mapping from B's docids to C's docids.
15
+ Both mappings can be summarized in a table like
16
+
17
+ 10-14 map to 20-24
18
+ 15-24 is deleted
19
+ 25-34 maps to 40-49
6
20
7
- // Merging indexes.
8
- //
9
- // To merge two indexes A and B (newer) into a combined index C:
10
- //
11
- // Load the path list from B and determine for each path the docid ranges
12
- // that it will replace in A.
13
- //
14
- // Read A's and B's name lists together, merging them into C's name list.
15
- // Discard the identified ranges from A during the merge. Also during the merge,
16
- // record the mapping from A's docids to C's docids, and also the mapping from
17
- // B's docids to C's docids. Both mappings can be summarized in a table like
18
- //
19
- // 10-14 map to 20-24
20
- // 15-24 is deleted
21
- // 25-34 maps to 40-49
22
- //
23
- // The number of ranges will be at most the combined number of paths.
24
- // Also during the merge, write the name index to a temporary file as usual.
25
- //
26
- // Now merge the posting lists (this is why they begin with the trigram).
27
- // During the merge, translate the docid numbers to the new C docid space.
28
- // Also during the merge, write the posting list index to a temporary file as usual.
29
- //
30
- // Copy the name index and posting list index into C's index and write the trailer.
31
- // Rename C's index onto the new index.
21
+ The number of ranges will be at most the combined number of paths.
22
+ Also during the merge, write the name index to a temporary file as usual.
23
+
24
+ Now merge the posting lists (this is why they begin with the trigram).
25
+ During the merge, translate the docid numbers to the new C docid space.
26
+ Also during the merge, write the posting list index to a temporary file as usual.
27
+
28
+ Copy the name index and posting list index into C's index and write the trailer.
29
+ Rename C's index onto the new index.
30
+ */
31
+
32
+ package index
32
33
33
34
import (
34
35
"encoding/binary"
36
+ "log"
35
37
"os"
36
38
"strings"
37
39
)
@@ -164,6 +166,44 @@ func Merge(dst, src1, src2 string) {
164
166
}
165
167
nameIndexFile .writeUint32 (ix3 .offset ())
166
168
169
+ // To merge blobs, create an array of slices in order of the new posting IDs, and then write them to destination.
170
+ blobs := [][]byte {}
171
+ new = 0
172
+ mi1 = 0
173
+ mi2 = 0
174
+ for new < numName {
175
+ var blob []byte
176
+ if mi1 < len (map1 ) && map1 [mi1 ].new == new {
177
+ for i := map1 [mi1 ].lo ; i < map1 [mi1 ].hi ; i ++ {
178
+ blob = ix1 .Data (i )
179
+ new ++
180
+ }
181
+ mi1 ++
182
+ } else if mi2 < len (map2 ) && map2 [mi2 ].new == new {
183
+ for i := map2 [mi2 ].lo ; i < map2 [mi2 ].hi ; i ++ {
184
+ blob = ix2 .Data (i )
185
+ new ++
186
+ }
187
+ mi2 ++
188
+ } else {
189
+ panic ("merge: inconsistent index" )
190
+ }
191
+ blobs = append (blobs , blob )
192
+ }
193
+
194
+ blobFile := bufCreate ("" )
195
+ var blobOffsets , blobSizes []uint32 // TODO: just make a struct
196
+ for _ , blob := range blobs {
197
+ off := blobFile .offset ()
198
+ size := len (blob )
199
+ _ , err := blobFile .file .Write (blob )
200
+ if err != nil {
201
+ log .Fatalf ("writing %s: %v" , blobFile .name , err )
202
+ }
203
+ blobOffsets = append (blobOffsets , off )
204
+ blobSizes = append (blobSizes , uint32 (size ))
205
+ }
206
+
167
207
// Merged list of posting lists.
168
208
postData := ix3 .offset ()
169
209
var r1 postMapReader
@@ -211,6 +251,9 @@ func Merge(dst, src1, src2 string) {
211
251
}
212
252
}
213
253
254
+ blobData := ix3 .offset ()
255
+ copyFile (ix3 , blobFile )
256
+
214
257
// Name index
215
258
nameIndex := ix3 .offset ()
216
259
copyFile (ix3 , nameIndexFile )
@@ -219,11 +262,19 @@ func Merge(dst, src1, src2 string) {
219
262
postIndex := ix3 .offset ()
220
263
copyFile (ix3 , w .postIndexFile )
221
264
265
+ blobIndex := ix3 .offset ()
266
+ for i , off := range blobOffsets {
267
+ ix3 .writeUint32 (off )
268
+ ix3 .writeUint32 (blobSizes [i ])
269
+ }
270
+
222
271
ix3 .writeUint32 (pathData )
223
272
ix3 .writeUint32 (nameData )
224
273
ix3 .writeUint32 (postData )
274
+ ix3 .writeUint32 (blobData )
225
275
ix3 .writeUint32 (nameIndex )
226
276
ix3 .writeUint32 (postIndex )
277
+ ix3 .writeUint32 (blobIndex )
227
278
ix3 .writeString (trailerMagic )
228
279
ix3 .flush ()
229
280
0 commit comments