-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsbvtree.h
More file actions
307 lines (228 loc) · 8.37 KB
/
Copy pathsbvtree.h
File metadata and controls
307 lines (228 loc) · 8.37 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
/***************************************************************************
* DynFMI - Dynamic FM-Index for a Collection of Texts *
* Copyright (C) 2006 Wolfgang Gerlach *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
***************************************************************************/
// Implementation of the Dynamic Bit Vector with Indels problem
// space: O(n) time: O(log(n))
// papers: V. Maekinen, G. Navarro. Dynamic Entropy-Compressed Sequences and Full-Text
// Indexes. CPM 2006, Chapter 3.4 Dynamic Structures for Bit Vectors
// also: W.-L. Chan, W.-K. Hon, and T.-W. Lam. Compressed index for a dynamic collection
// of texts. In Proc. CPM04, LNCS 3109, pages 445-456, 2004
#ifndef GUARD_SBVTree
#define GUARD_SBVTree
#include <iostream>
#include <bitset>
#include <cstdlib>
#include <stdint.h>
//#include <map>
//#include <stack>
#include <cmath>
#include <fstream>
//#include <vector>
#include <cstdio>
#include "rbtree.h"
#include "types.h"
using namespace rbtree;
// Size of a block in bits.
#ifndef BLOCK_SIZE
#define BLOCK_SIZE 16
#endif
// Type that fits best to store a block
#ifndef uint_block
#define uint_block uint16_t
#endif
// Maximum value on BLOCK_SIZE bits.
#ifndef BS_MAX
#define BS_MAX 0xFFFF
#endif
// logarithm (base 2) of the block size
// we use this for the division by the block size (with bit shiftings).
// if one does not want a power of two as a block size, then the code
// using LOG_BS and log_bs has to be slightly modified (probably using an
// integer division rather than a bit shifting).
#ifndef LOG_BS
#define LOG_BS 4
#endif
// Number of blocks stored in a node.
#ifndef NB_BLOCKS
#define NB_BLOCKS (4*BLOCK_SIZE)
#endif
/* MIN_MERGE*superblock_size corresponds to the minimal number of bits
* under which we merge two nodes */
#define MIN_MERGE 0.5
/*
* MIN_EXCHANGE*superblock_size corresponds to the minimal number of
* bits we exchange under merge and split (during deleteBit).
* Should always be > 32
*/
#define MIN_EXCHANGE 0.1
/* When merging two nodes, we do not want to have a full node (ie.
* a node that has superblock_size bits). MAX_AFTER_MERGE is the maximal
* percentage of the superblock_size after merging.
*/
#define MAX_AFTER_MERGE 0.9
const uint_block block_size = BLOCK_SIZE;
const uint nb_blocks = NB_BLOCKS;
const uint superblock_size = nb_blocks*block_size;
const uint log_bs = LOG_BS;
/* class SBVNode; */
/* class SBVTree; */
#define RANK 0
#define POSITIONS 1
namespace sbvtree {
/* typedef rbtree::RBNode RBNode; */
/* typedef rbtree::RBTree RBTree; */
void callUpdateCounters(RBNode *n, RBTree *T);
void callUpdateCountersOnPathToRoot(RBNode *n, RBTree *T);
class SBVNode : public RBNode
{
public:
size_t myPositions; // 4*4 bytes = 16 bytes
size_t myRank;
size_t subtreePositions; //number of positions stored in the subtree rooted at this node
size_t subtreeRank; //number of bits set in the subtree rooted at this node
uint_block blocks[nb_blocks] ; // we store 2*block_size^2 bits (in theory block_size = log(n) / 2 and thus we store log^2(n) bits).
SBVNode()
: RBNode(this), myPositions(0), myRank(0), subtreePositions(0), subtreeRank(0) {
}
SBVNode(SBVNode* n)
: RBNode(n), myPositions(0), myRank(0), subtreePositions(0), subtreeRank(0) {
}
~SBVNode(){
}
SBVNode* getParent(){
return ((SBVNode*) ((RBNode*) this)->parent);
}
SBVNode* getLeft(){
return ((SBVNode*) ((RBNode*) this)->left);
}
SBVNode* getRight(){
return ((SBVNode*) ((RBNode*) this)->right);
}
void setParent(SBVNode* n){
((RBNode*) this)->parent=(RBNode*)n;
}
void setLeft(SBVNode* n){
((RBNode*) this)->left=(RBNode*)n;
}
void setRight(SBVNode* n){
((RBNode*) this)->right=(RBNode*)n;
}
};
class SBVTree : public RBTree{
public:
/* Precomputed table giving the number of ones in a block */
static byte *number_ones; // size: 1<<block_size
static uint_block * mask_positions; // size: block_size
static bool init_done;
static void init();
//Constructors
SBVTree(){
SBVTree::init();
setNil(new SBVNode());
setRoot(getNil());
tempnil = getNil();
}
//Destructor:
~SBVTree();
bool operator[](size_t);
// inserts bit at position i, countings begins with 1:
void appendBit(bool bit);
void insertBit(bool bit, size_t i);
void deleteBit(size_t i);
void setBit(size_t i);
void unsetBit(size_t i);
void changeBit(size_t i, bool bit);
bool getLastDeletedBit();
size_t getLastDeletedRank();
size_t rank0(size_t i);
size_t rank1(size_t i);
size_t rank(bool b, size_t i){return b?rank1(i):rank0(i);}
size_t select0(size_t i);
size_t select1(size_t i);
size_t select(bool b, size_t i){return b?select1(i):select0(i);}
/**
* Copy nb_bits_copied bits from *ptab_src at position start_src to the position
* 0 of *ptab_dest.
* Note that ptab_dest and ptab_src are pointers on arrays and can designate
* the same array.
*/
ulong array_copyleft(uint_block *ptab_dest, uint_block *ptab_src, ulong start_src, ulong nb_bits_copied);
/**
* Copy nb_bits_copied bits from *ptab_src at the first position to the position
* start_dest in *ptab_dest.
*/
ulong array_copyright(uint_block *ptab_dest, uint_block *ptab_src, ulong start_dest, ulong nb_bits_copied);
void setRoot(SBVNode* n){
((RBTree*) this)->root=(RBNode*)n;
}
SBVNode* getRoot(){
return ((SBVNode*) ((RBTree*) this)->root);
}
void setNil(SBVNode* n){
tempnil = n;
((RBTree*) this)->nil=(RBNode*)n;
}
SBVNode* getNil(){
return tempnil;
}
ulong getNbNodes(SBVNode *current_node) {
ulong nb=0;
if (current_node->left != getNil())
nb+=getNbNodes((SBVNode *)current_node->left);
if (current_node->right != getNil())
nb+=getNbNodes((SBVNode *)current_node->right);
return nb+1;
}
// write bits back into a stream:
void displayBits();
uint_block *getBits();
int getTreeMaxDepth();
int getTreeMinDepth();
size_t getLength();
void iterateReset();
bool iterateGetBit();
bool iterateNext();
size_t iterateGetRank(bool bit);
void checkSubTree(SBVNode *n);
void updateCounters(SBVNode *n);
void updateCountersOnPathToRoot(SBVNode *walk);
//debug:
void printNode(size_t i);
void printSubtree(SBVNode *node);
protected:
size_t iterate; /* total position in the bit vector */
size_t iterateLocal; /* position inside the block */
size_t iterateBlock; /* block number */
size_t iterateRank; /* total rank */
SBVNode *iterateNode;
bool lastDeletedBit;
size_t lastDeletedRank;
SBVNode *tempnil;
// content of BVNode, for debugging:
void printNode(SBVNode *n);
// other operations:
size_t getLocalRank(SBVNode* n, size_t position);
size_t getLocalSelect0(SBVNode* n, size_t query);
size_t getLocalSelect1(SBVNode* n, size_t query);
SBVNode *getNodeWithIthBit(size_t i, ulong *infos);
void deleteNode(SBVNode *n);
void deleteLeaf(SBVNode *leaf);
};
ostream &operator<<(ostream &os, SBVTree &bvt);
} // namespace
void output_bitarray(ostream &os, uint_block *tab, ulong nb_bits, ulong start=0);
#endif