Skip to content

datastore: fix out-of-bounds index handling in snapshots BTree #2883

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 30, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 35 additions & 20 deletions silkworm/db/datastore/snapshots/btree/btree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,23 +24,16 @@ BTree::BTree(

using CompareResult = std::pair<int, BytesOrByteView>;

static CompareResult compare_key(
static std::optional<CompareResult> compare_key(
ByteView key,
BTree::DataIndex key_index,
const BTree::KeyValueIndex& index) {
auto data_key = index.lookup_key(key_index);
ensure(data_key.has_value(), [&] { return "out-of-bounds key=" + to_hex(key) + " data_index=" + std::to_string(key_index); });
if (!data_key) {
return std::nullopt;
}
int cmp = ByteView{*data_key}.compare(key);
return {cmp, std::move(*data_key)};
}

static BTree::KeyValueIndex::LookupResult lookup_key_value(
ByteView key,
BTree::DataIndex key_index,
const BTree::KeyValueIndex& index) {
auto result = index.lookup_key_value(key_index, key);
ensure(result.has_value(), [&] { return "out-of-bounds key=" + to_hex(key) + " data_index=" + std::to_string(key_index); });
return std::move(*result);
return CompareResult{cmp, std::move(*data_key)};
}

BTree::SeekResult BTree::seek(ByteView seek_key, const KeyValueIndex& index) {
Expand All @@ -55,8 +48,12 @@ BTree::SeekResult BTree::seek(ByteView seek_key, const KeyValueIndex& index) {
auto [_, left_index, right_index] = binary_search_in_cache(seek_key); // left_index == right_index when key is found
uint64_t median = 0;
while (left_index < right_index) {
if (right_index - left_index <= kDefaultBtreeStartSkip) { // found small range, faster to scan now
const auto [cmp, key] = compare_key(seek_key, left_index, index);
if (right_index - left_index <= kDefaultBtreeStartSkip) { // found a small range, faster to scan now
const auto cmp_result = compare_key(seek_key, left_index, index);
if (!cmp_result) {
return {/*found=*/false, {}, {}, 0};
}
const auto [cmp, key] = *cmp_result;
if (cmp == 0) {
right_index = left_index;
break;
Expand All @@ -71,7 +68,11 @@ BTree::SeekResult BTree::seek(ByteView seek_key, const KeyValueIndex& index) {
break;
}
median = (left_index + right_index) >> 1;
const auto [cmp, key] = compare_key(seek_key, median, index);
const auto cmp_result = compare_key(seek_key, median, index);
if (!cmp_result) {
return {/*found=*/false, {}, {}, 0};
}
const auto [cmp, key] = *cmp_result;
if (cmp == 0) {
left_index = right_index = median;
break;
Expand Down Expand Up @@ -107,7 +108,7 @@ std::optional<BytesOrByteView> BTree::get(ByteView key, const KeyValueIndex& ind
}
auto [_, left_index, right_index] = binary_search_in_cache(key); // left_index == right_index when key is found
while (left_index < right_index) {
if (right_index - left_index <= kDefaultBtreeStartSkip) { // found small range, faster to scan now
if (right_index - left_index <= kDefaultBtreeStartSkip) { // found a small range, faster to scan now
auto value = index.advance_key_value(left_index, key, right_index - left_index);
if (!value) {
left_index = right_index;
Expand All @@ -116,7 +117,11 @@ std::optional<BytesOrByteView> BTree::get(ByteView key, const KeyValueIndex& ind
return value;
}
const uint64_t median = (left_index + right_index) >> 1;
auto [cmp, optional_v] = lookup_key_value(key, median, index);
const auto lookup_result = index.lookup_key_value(median, key);
if (!lookup_result) {
return std::nullopt;
}
const auto [cmp, optional_v] = *lookup_result;
if (cmp == 0) {
SILKWORM_ASSERT(optional_v);
return optional_v;
Expand All @@ -127,7 +132,11 @@ std::optional<BytesOrByteView> BTree::get(ByteView key, const KeyValueIndex& ind
left_index = median + 1;
}
}
auto [cmp, optional_v] = lookup_key_value(key, left_index, index);
const auto lookup_result = index.lookup_key_value(left_index, key);
if (!lookup_result) {
return std::nullopt;
}
const auto [cmp, optional_v] = *lookup_result;
if (cmp != 0) {
return std::nullopt;
}
Expand Down Expand Up @@ -157,7 +166,9 @@ void BTree::warmup(const KeyValueIndex& index) {
const size_t step = num_nodes_ < fanout_ ? 1 : fanout_; // cache all keys if less than M
for (size_t i{step}; i < num_nodes_; i += step) {
const size_t data_index = i - 1;
auto [_, key] = compare_key({}, data_index, index);
auto cmp_result = compare_key({}, data_index, index);
if (!cmp_result) continue;
auto [_, key] = *cmp_result;
cache_.emplace_back(Node{data_index, Bytes{key}});
cached_bytes += sizeof(Node) + ByteView{key}.size();
}
Expand Down Expand Up @@ -185,7 +196,11 @@ BTree::Nodes BTree::decode_nodes(std::span<uint8_t> encoded_nodes) {

void BTree::check_against_data_keys(const KeyValueIndex& index) {
for (const auto& node : cache_) {
const auto [cmp, key] = compare_key(node.key, node.key_index, index);
const auto cmp_result = compare_key(node.key, node.key_index, index);
ensure(cmp_result.has_value(), [&] {
return "out-of-bounds key=" + to_hex(node.key) + " data_index=" + std::to_string(node.key_index);
});
const auto [cmp, key] = *cmp_result;
ensure(cmp == 0, [&]() {
return "key mismatch node.key=" + to_hex(node.key) +
" key=" + to_hex(key) +
Expand Down
Loading