Skip to content

Commit 87d512f

Browse files
committed
datastore: .ef v2 format
1 parent c12a16f commit 87d512f

24 files changed

+373
-137
lines changed

silkworm/db/blocks/headers/header_segment.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,11 @@ void decode_word_into_header(ByteView word, BlockHeader& header) {
2828
success_or_throw(decode_result, "decode_word_into_header: rlp::decode error");
2929
}
3030

31-
void check_sanity_of_header_with_metadata(const BlockHeader& header, datastore::StepRange step_range) {
32-
auto block_num_range = db::blocks::kStepToBlockNumConverter.timestamp_range_from_step_range(step_range);
31+
void check_sanity_of_header_with_metadata(
32+
const BlockHeader& header,
33+
datastore::StepRange step_range,
34+
const datastore::StepToTimestampConverter& step_converter) {
35+
auto block_num_range = step_converter.timestamp_range_from_step_range(step_range);
3336
BlockNum block_from = block_num_range.start;
3437
BlockNum block_to = block_num_range.end;
3538
ensure((header.number >= block_from) && (header.number < block_to), [&]() {

silkworm/db/blocks/headers/header_segment.hpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,10 @@ namespace silkworm::snapshots {
1313

1414
void encode_word_from_header(Bytes& word, const BlockHeader& header);
1515
void decode_word_into_header(ByteView word, BlockHeader& header);
16-
void check_sanity_of_header_with_metadata(const BlockHeader& header, datastore::StepRange step_range);
16+
void check_sanity_of_header_with_metadata(
17+
const BlockHeader& header,
18+
datastore::StepRange step_range,
19+
const datastore::StepToTimestampConverter& step_converter);
1720

1821
struct HeaderSegmentWordEncoder : public Encoder {
1922
BlockHeader value;
@@ -39,8 +42,8 @@ struct HeaderSegmentWordDecoder : public Decoder {
3942
decode_word_into_header(word, value);
4043
}
4144

42-
void check_sanity_with_metadata(const SnapshotPath& path) override {
43-
check_sanity_of_header_with_metadata(value, path.step_range());
45+
void check_sanity_with_metadata(const SnapshotPath& path, const datastore::StepToTimestampConverter& step_converter) override {
46+
check_sanity_of_header_with_metadata(value, path.step_range(), step_converter);
4447
}
4548
};
4649

silkworm/db/datastore/snapshots/btree/btree_index_test.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ TEST_CASE("BTreeIndex", "[snapshots][btree]") {
101101
const auto [kv_file_path, bt_file_path] = sample_3_keys_kv_and_bt_files(tmp_dir);
102102

103103
// Open the KV and BT index files
104-
segment::KVSegmentFileReader kv_segment{kv_file_path, seg::CompressionKind::kNone};
104+
segment::KVSegmentFileReader kv_segment{kv_file_path, {}, seg::CompressionKind::kNone};
105105
BTreeIndex bt_index{bt_file_path};
106106
bt_index.warmup_if_empty_or_check(kv_segment);
107107
REQUIRE(bt_index.key_count() == 3);

silkworm/db/datastore/snapshots/common/codec.hpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@
66
#include <silkworm/core/common/base.hpp>
77
#include <silkworm/core/common/bytes.hpp>
88

9+
namespace silkworm::datastore {
10+
struct StepToTimestampConverter;
11+
} // namespace silkworm::datastore
12+
913
namespace silkworm::snapshots {
1014

1115
class SnapshotPath;
@@ -24,7 +28,8 @@ struct Decoder {
2428
virtual ~Decoder() = default;
2529
using Word = BytesOrByteView;
2630
virtual void decode_word(Word& word) = 0; // this allows word to be moved after decoding
27-
virtual void check_sanity_with_metadata(const SnapshotPath& /*path*/) {}
31+
virtual void decode_word_with_metadata(const SnapshotPath& /*path*/, const datastore::StepToTimestampConverter& /*step_converter*/) {}
32+
virtual void check_sanity_with_metadata(const SnapshotPath& /*path*/, const datastore::StepToTimestampConverter& /*step_converter*/) {}
2833
};
2934

3035
template <class TDecoder>

silkworm/db/datastore/snapshots/elias_fano/elias_fano_decoder.hpp

Lines changed: 0 additions & 27 deletions
This file was deleted.

silkworm/db/datastore/snapshots/elias_fano/elias_fano_decoder_test.cpp

Lines changed: 0 additions & 30 deletions
This file was deleted.

silkworm/db/datastore/snapshots/elias_fano/elias_fano_list.cpp

Lines changed: 27 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -45,29 +45,38 @@
4545

4646
namespace silkworm::snapshots::elias_fano {
4747

48-
EliasFanoList32 EliasFanoList32::from_encoded_data(std::span<const uint8_t> encoded_data) {
49-
ensure(encoded_data.size() >= kCountLength + kULength, "EliasFanoList32::from_encoded_data data too short");
48+
EliasFanoList32 EliasFanoList32::from_encoded_data(BytesOrByteView encoded_data_holder) {
49+
std::span<const uint8_t> encoded_data{ByteView{encoded_data_holder}};
50+
const uint64_t data_offset = kCountLength + kULength;
51+
ensure(encoded_data.size() >= data_offset, "EliasFanoList32::from_encoded_data data too short");
5052
const uint64_t last = endian::load_big_u64(encoded_data.data());
5153
const uint64_t u = endian::load_big_u64(encoded_data.subspan(kCountLength).data());
52-
const auto remaining_data = encoded_data.subspan(kCountLength + kULength);
53-
return EliasFanoList32{last + 1, u - 1, remaining_data};
54+
return EliasFanoList32{last + 1, u - 1, data_offset, std::move(encoded_data_holder)};
55+
}
56+
57+
EliasFanoList32 EliasFanoList32::from_encoded_data(std::span<const uint8_t> encoded_data) {
58+
return from_encoded_data(BytesOrByteView{ByteView{encoded_data}});
5459
}
5560

5661
EliasFanoList32 EliasFanoList32::from_encoded_data(ByteView encoded_data) {
57-
return from_encoded_data(std::span<const uint8_t>{encoded_data});
62+
return from_encoded_data(BytesOrByteView{encoded_data});
5863
}
5964

6065
EliasFanoList32 EliasFanoList32::from_encoded_data(Bytes encoded_data) {
61-
auto elias_fano_list = from_encoded_data(std::span<const uint8_t>{encoded_data});
62-
elias_fano_list.data_holder_ = std::move(encoded_data);
63-
return elias_fano_list;
66+
return from_encoded_data(BytesOrByteView{std::move(encoded_data)});
6467
}
6568

66-
EliasFanoList32::EliasFanoList32(uint64_t count, uint64_t max_value, std::span<const uint8_t> encoded_data)
69+
EliasFanoList32::EliasFanoList32(
70+
uint64_t count,
71+
uint64_t max_value,
72+
uint64_t data_offset,
73+
BytesOrByteView data_holder)
6774
: count_{count},
6875
u_{max_value + 1},
69-
data_{reinterpret_cast<const uint64_t*>(encoded_data.data()), EliasFanoList32::total_words(count, max_value)} {
70-
SILKWORM_ASSERT(EliasFanoList32::total_words(count, max_value) * sizeof(uint64_t) <= encoded_data.size());
76+
data_offset_{data_offset},
77+
total_words_{EliasFanoList32::total_words(count, max_value)},
78+
data_holder_{std::move(data_holder)} {
79+
SILKWORM_ASSERT(EliasFanoList32::total_words(count, max_value) * sizeof(uint64_t) + data_offset <= ByteView{data_holder_}.size());
7180
derive_fields();
7281
}
7382

@@ -164,7 +173,8 @@ std::ostream& operator<<(std::ostream& os, const EliasFanoList32& ef) {
164173
os.write(reinterpret_cast<const char*>(uint64_buffer.data()), sizeof(uint64_t));
165174
SILK_DEBUG << "[index] written EF upper: " << ef.u_;
166175

167-
os.write(reinterpret_cast<const char*>(ef.data_.data()), static_cast<std::streamsize>(ef.data_.size() * sizeof(uint64_t)));
176+
auto data = ef.data();
177+
os.write(reinterpret_cast<const char*>(data.data()), static_cast<std::streamsize>(data.size() * sizeof(uint64_t)));
168178
return os;
169179
}
170180

@@ -197,9 +207,11 @@ uint64_t EliasFanoList32::derive_fields() {
197207
uint64_t words_upper_bits = (count_ + (u_ >> l_) + 63) / 64;
198208
uint64_t jump_words = jump_size_words(count_);
199209
uint64_t total_words = words_lower_bits + words_upper_bits + jump_words;
200-
lower_bits_ = data_.subspan(0, words_lower_bits);
201-
upper_bits_ = data_.subspan(words_lower_bits, words_upper_bits);
202-
jump_ = data_.subspan(words_lower_bits + words_upper_bits, jump_words);
210+
211+
auto data = this->data();
212+
lower_bits_ = data.subspan(0, words_lower_bits);
213+
upper_bits_ = data.subspan(words_lower_bits, words_upper_bits);
214+
jump_ = data.subspan(words_lower_bits + words_upper_bits, jump_words);
203215

204216
return total_words;
205217
}

silkworm/db/datastore/snapshots/elias_fano/elias_fano_list.hpp

Lines changed: 28 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@
3737
#include <span>
3838
#include <utility>
3939

40+
#include <silkworm/core/common/bytes.hpp>
41+
4042
#include "../common/encoding/sequence.hpp"
4143
#include "../common/util/iterator/list_iterator.hpp"
4244

@@ -47,6 +49,9 @@ class EliasFanoList32 {
4749
public:
4850
using value_type = uint64_t;
4951

52+
//! Create a new 32-bit EF list from the given encoded data (i.e. data plus data header)
53+
static EliasFanoList32 from_encoded_data(BytesOrByteView encoded_data_holder);
54+
5055
//! Create a new 32-bit EF list from the given encoded data (i.e. data plus data header)
5156
static EliasFanoList32 from_encoded_data(std::span<const uint8_t> encoded_data);
5257

@@ -60,18 +65,26 @@ class EliasFanoList32 {
6065
//! Create a new 32-bit EF list from an existing data sequence
6166
//! \param count
6267
//! \param max_value
63-
//! \param encoded_data the existing data sequence (portion exceeding the total words will be ignored)
64-
EliasFanoList32(uint64_t count, uint64_t max_value, std::span<const uint8_t> encoded_data);
68+
//! \param data_offset offset in the data_holder to the data sequence
69+
//! \param data_holder the data (portion exceeding the total words will be ignored)
70+
EliasFanoList32(
71+
uint64_t count,
72+
uint64_t max_value,
73+
uint64_t data_offset,
74+
BytesOrByteView data_holder);
6575

6676
size_t size() const { return count_; }
6777

6878
uint64_t max() const { return u_ - 1; }
6979

7080
uint64_t min() const { return at(0); }
7181

72-
std::span<const uint64_t> data() const { return data_; }
82+
std::span<const uint64_t> data() const {
83+
const uint64_t* data = reinterpret_cast<const uint64_t*>(ByteView{data_holder_}.data() + data_offset_);
84+
return {data, total_words_};
85+
}
7386

74-
size_t encoded_data_size() const { return kCountLength + kULength + data_.size() * sizeof(uint64_t); }
87+
size_t encoded_data_size() const { return kCountLength + kULength + total_words_ * sizeof(uint64_t); }
7588

7689
uint64_t at(size_t i) const;
7790
uint64_t operator[](size_t i) const { return at(i); }
@@ -84,23 +97,17 @@ class EliasFanoList32 {
8497
friend std::ostream& operator<<(std::ostream& os, const EliasFanoList32& ef);
8598

8699
bool operator==(const EliasFanoList32& other) const {
87-
return (count_ == other.count_) && (u_ == other.u_) && std::ranges::equal(data_, other.data_);
100+
return (count_ == other.count_) && (u_ == other.u_) && std::ranges::equal(data(), other.data());
88101
}
89102

90103
static uint64_t total_words(uint64_t count, uint64_t max_value);
91104
static uint64_t jump_size_words(uint64_t count);
92105

93-
static EliasFanoList32 empty_list() {
94-
return EliasFanoList32{};
95-
}
96-
97106
using Iterator = ListIterator<EliasFanoList32, value_type>;
98107
Iterator begin() const { return Iterator{*this, 0}; }
99108
Iterator end() const { return Iterator{*this, size()}; }
100109

101110
private:
102-
EliasFanoList32() = default;
103-
104111
uint64_t upper(uint64_t c) const;
105112
uint64_t derive_fields();
106113

@@ -115,10 +122,10 @@ class EliasFanoList32 {
115122
//! The strict upper bound on the EF data points, i.e. max + 1
116123
uint64_t u_{0};
117124
uint64_t l_{0};
118-
//! Lightweight view over the EF encoded data sequence.
119-
std::span<const uint64_t> data_;
120-
//! Copy of the EF encoded data sequence when it must be kept for lifetime reasons
121-
std::optional<Bytes> data_holder_{};
125+
uint64_t data_offset_{0};
126+
uint64_t total_words_{0};
127+
//! The EF encoded data sequence
128+
BytesOrByteView data_holder_{};
122129
};
123130

124131
//! 32-bit Elias-Fano (EF) list writer that can be used to encode one monotone non-decreasing sequence
@@ -139,9 +146,12 @@ class EliasFanoList32Builder {
139146

140147
EliasFanoList32 as_view() const {
141148
const auto max_value = u_ - 1;
142-
return EliasFanoList32{count_,
143-
max_value,
144-
{reinterpret_cast<const uint8_t*>(data_.data()), EliasFanoList32::total_words(count_, max_value) * sizeof(uint64_t)}};
149+
return EliasFanoList32{
150+
count_,
151+
max_value,
152+
0,
153+
BytesOrByteView{ByteView{reinterpret_cast<const uint8_t*>(data_.data()), EliasFanoList32::total_words(count_, max_value) * sizeof(uint64_t)}},
154+
};
145155
};
146156

147157
size_t size() const { return count_; }

silkworm/db/datastore/snapshots/elias_fano/elias_fano_list_test.cpp

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
#include <catch2/catch_test_macros.hpp>
1212

13+
#include <silkworm/core/common/bytes_to_string.hpp>
1314
#include <silkworm/core/common/endian.hpp>
1415
#include <silkworm/core/common/util.hpp>
1516

@@ -114,9 +115,7 @@ TEST_CASE("EliasFanoList32", "[silkworm][recsplit][elias_fano]") {
114115
CHECK(to_hex(ef_bytes) == to_expected_hex(ef_test.offsets.size() - 1, ef_test.expected_u, ef_test.expected_data));
115116

116117
// Decode monotone ascending integer sequence from Elias-Fano representation and compare with original
117-
constexpr size_t kParamsSize{2 * sizeof(uint64_t)}; // count + u length in bytes
118-
std::span<uint8_t> data{ef_bytes.data() + kParamsSize, ef_bytes.size() - kParamsSize};
119-
EliasFanoList32 ef_list_copy{ef_test.offsets.size(), ef_test.expected_u - 1, data};
118+
EliasFanoList32 ef_list_copy = EliasFanoList32::from_encoded_data(ef_bytes);
120119
for (uint64_t i{0}; i < ef_test.offsets.size(); ++i) {
121120
const uint64_t x = ef_list_copy.at(i);
122121
CHECK(x == ef_test.offsets[i]);
@@ -152,4 +151,17 @@ TEST_CASE("EliasFanoList32::seek", "[silkworm][recsplit][elias_fano]") {
152151
CHECK(ef_list.seek(70, true) == SeekResult{offsets.size() - 1, 62});
153152
}
154153

154+
TEST_CASE("EliasFanoList32::from_encoded_data") {
155+
EliasFanoList32Builder expected_list{3, 3};
156+
expected_list.add_offset(1);
157+
expected_list.add_offset(2);
158+
expected_list.add_offset(3);
159+
expected_list.build();
160+
std::stringstream expected_list_stream;
161+
expected_list_stream << expected_list;
162+
const Bytes expected_list_bytes = string_to_bytes(expected_list_stream.str());
163+
164+
CHECK(EliasFanoList32::from_encoded_data(expected_list_bytes) == expected_list.as_view());
165+
}
166+
155167
} // namespace silkworm::snapshots::elias_fano

silkworm/db/datastore/snapshots/history_range_by_keys_query.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ struct HistoryRangeByKeysSegmentQuery {
4040
datastore::Timestamp timestamp,
4141
bool ascending,
4242
Bytes key_data,
43-
const elias_fano::EliasFanoList32& key_timestamps) const {
43+
const InvertedIndexTimestampList& key_timestamps) const {
4444
SILKWORM_ASSERT(ascending); // descending is not implemented
4545

4646
// find the first key timestamp within the ts_range
@@ -66,7 +66,7 @@ struct HistoryRangeByKeysSegmentQuery {
6666
auto ii_reader = entity_.inverted_index.kv_segment_reader<RawDecoder<Bytes>>();
6767
auto begin_it = offset ? ii_reader.seek(*offset) : ii_reader.end();
6868

69-
auto lookup_kv_pair_func = [query = *this, timestamp, ascending](std::pair<Bytes&, elias_fano::EliasFanoList32&>&& ii_entry) {
69+
auto lookup_kv_pair_func = [query = *this, timestamp, ascending](std::pair<Bytes&, InvertedIndexTimestampList&>&& ii_entry) {
7070
return query.lookup_kv_pair(timestamp, ascending, std::move(ii_entry.first), ii_entry.second);
7171
};
7272

0 commit comments

Comments
 (0)