Skip to content

Commit 03b43ca

Browse files
committed
Support VariableList longer than 2**31 on 32-bit architectures
* Improves handling large VariableList for 32-bit architectures - loudly crash vs. silently overflow (and produce wrong results) * Adds feature to enable capping typenum to usize conversion to usize::MAX * Tests + github actions
1 parent 4fef53f commit 03b43ca

File tree

6 files changed

+378
-37
lines changed

6 files changed

+378
-37
lines changed

.github/workflows/test-suite.yml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,28 @@ jobs:
3131
run: rustup update stable
3232
- name: Run tests
3333
run: cargo test --release
34+
cross-test-i686:
35+
name: cross test i686-unknown-linux-gnu
36+
runs-on: ubuntu-latest
37+
steps:
38+
- uses: actions/checkout@v3
39+
- name: Install cross
40+
run: cargo install cross --git https://github.com/cross-rs/cross
41+
- name: Add i686-unknown-linux-gnu target
42+
run: rustup target add i686-unknown-linux-gnu
43+
- name: Run cross test for i686-unknown-linux-gnu
44+
run: cross test --target i686-unknown-linux-gnu
45+
cross-test-i686-overflow:
46+
name: cross test i686-unknown-linux-gnu (typenum overflow feature)
47+
runs-on: ubuntu-latest
48+
steps:
49+
- uses: actions/checkout@v3
50+
- name: Install cross
51+
run: cargo install cross --git https://github.com/cross-rs/cross
52+
- name: Add i686-unknown-linux-gnu target
53+
run: rustup target add i686-unknown-linux-gnu
54+
- name: Run cross test for i686-unknown-linux-gnu with cap-typenum-to-usize-overflow
55+
run: cross test --target i686-unknown-linux-gnu --features cap-typenum-to-usize-overflow
3456
coverage:
3557
name: cargo-tarpaulin
3658
runs-on: ubuntu-latest

Cargo.toml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,16 @@ typenum = "1.12.0"
2020
smallvec = "1.8.0"
2121
arbitrary = { version = "1.0", features = ["derive"], optional = true }
2222
itertools = "0.13.0"
23+
ethereum_hashing = {version = "0.7.0", optional = true}
2324

2425
[dev-dependencies]
2526
serde_json = "1.0.0"
2627
tree_hash_derive = "0.10.0"
28+
ethereum_hashing = {version = "0.7.0"}
29+
30+
[target.i686-unknown-linux-gnu]
31+
rustflags = ["-C", "target-feature=+sse2"]
32+
33+
[features]
34+
# Very careful usage - see comment in the typenum_helpers
35+
cap-typenum-to-usize-overflow=["dep:ethereum_hashing"]

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ mod fixed_vector;
4242
pub mod serde_utils;
4343
mod tree_hash;
4444
mod variable_list;
45+
mod typenum_helpers;
4546

4647
pub use fixed_vector::FixedVector;
4748
pub use ssz::{BitList, BitVector, Bitfield};

src/tree_hash.rs

Lines changed: 81 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,96 @@
11
use tree_hash::{Hash256, MerkleHasher, TreeHash, TreeHashType};
22
use typenum::Unsigned;
3+
use crate::typenum_helpers::to_usize;
34

4-
/// A helper function providing common functionality between the `TreeHash` implementations for
5-
/// `FixedVector` and `VariableList`.
6-
pub fn vec_tree_hash_root<T, N>(vec: &[T]) -> Hash256
7-
where
8-
T: TreeHash,
9-
N: Unsigned,
10-
{
5+
pub fn packing_factor<T: TreeHash>() -> usize {
116
match T::tree_hash_type() {
12-
TreeHashType::Basic => {
13-
let mut hasher = MerkleHasher::with_leaves(
14-
(N::to_usize() + T::tree_hash_packing_factor() - 1) / T::tree_hash_packing_factor(),
15-
);
7+
TreeHashType::Basic => T::tree_hash_packing_factor(),
8+
TreeHashType::Container | TreeHashType::List | TreeHashType::Vector => 1,
9+
}
10+
}
11+
12+
mod default_impl {
13+
use super::*;
14+
/// A helper function providing common functionality between the `TreeHash` implementations for
15+
/// `FixedVector` and `VariableList`.
16+
pub fn vec_tree_hash_root<T, N>(vec: &[T]) -> Hash256
17+
where
18+
T: TreeHash,
19+
N: Unsigned,
20+
{
21+
match T::tree_hash_type() {
22+
TreeHashType::Basic => {
23+
let mut hasher = MerkleHasher::with_leaves(
24+
(to_usize::<N>() + T::tree_hash_packing_factor() - 1) / T::tree_hash_packing_factor(),
25+
);
26+
27+
for item in vec {
28+
hasher
29+
.write(&item.tree_hash_packed_encoding())
30+
.expect("ssz_types variable vec should not contain more elements than max");
31+
}
1632

17-
for item in vec {
1833
hasher
19-
.write(&item.tree_hash_packed_encoding())
20-
.expect("ssz_types variable vec should not contain more elements than max");
34+
.finish()
35+
.expect("ssz_types variable vec should not have a remaining buffer")
2136
}
37+
TreeHashType::Container | TreeHashType::List | TreeHashType::Vector => {
38+
let mut hasher = MerkleHasher::with_leaves(N::to_usize());
2239

23-
hasher
24-
.finish()
25-
.expect("ssz_types variable vec should not have a remaining buffer")
26-
}
27-
TreeHashType::Container | TreeHashType::List | TreeHashType::Vector => {
28-
let mut hasher = MerkleHasher::with_leaves(N::to_usize());
40+
for item in vec {
41+
hasher
42+
.write(item.tree_hash_root().as_slice())
43+
.expect("ssz_types vec should not contain more elements than max");
44+
}
2945

30-
for item in vec {
3146
hasher
32-
.write(item.tree_hash_root().as_slice())
33-
.expect("ssz_types vec should not contain more elements than max");
47+
.finish()
48+
.expect("ssz_types vec should not have a remaining buffer")
3449
}
50+
}
51+
}
52+
}
53+
54+
#[cfg(feature="cap-typenum-to-usize-overflow")]
55+
mod arch_32x_workaround {
56+
use super::*;
57+
use tree_hash::{Hash256, TreeHash};
58+
use typenum::Unsigned;
59+
use ethereum_hashing::{hash32_concat, ZERO_HASHES};
60+
61+
type MaxDepth = typenum::U536870912;
3562

36-
hasher
37-
.finish()
38-
.expect("ssz_types vec should not have a remaining buffer")
63+
fn pad_to_depth<Current: Unsigned, Target: Unsigned>(hash: Hash256, target_depth: usize, current_depth: usize) -> Hash256 {
64+
let mut curhash: [u8; 32] = hash.0;
65+
for depth in current_depth..target_depth {
66+
curhash = hash32_concat(&curhash, ZERO_HASHES[depth].as_slice());
67+
}
68+
curhash.into()
69+
}
70+
71+
fn target_tree_depth<T: TreeHash, N: Unsigned>() -> usize {
72+
let packing_factor = packing_factor::<T>();
73+
let packing_factor_log2 = packing_factor.next_power_of_two().ilog2() as usize;
74+
let tree_depth = N::to_u64().next_power_of_two().ilog2() as usize;
75+
tree_depth - packing_factor_log2
76+
}
77+
78+
pub fn vec_tree_hash_root<T: TreeHash, N: Unsigned>(vec: &[T]) -> Hash256 {
79+
if N::to_u64() <= MaxDepth::to_u64() {
80+
default_impl::vec_tree_hash_root::<T, N>(vec)
81+
} else {
82+
let main_tree_hash = default_impl::vec_tree_hash_root::<T, MaxDepth>(vec);
83+
84+
let target_depth = target_tree_depth::<T, N>();
85+
let current_depth = target_tree_depth::<T, MaxDepth>();
86+
87+
pad_to_depth::<MaxDepth, N>(main_tree_hash, target_depth, current_depth)
3988
}
4089
}
4190
}
91+
92+
#[cfg(any(target_pointer_width = "64", not(feature="cap-typenum-to-usize-overflow")))]
93+
pub use default_impl::vec_tree_hash_root;
94+
95+
#[cfg(all(not(target_pointer_width = "64"), feature="cap-typenum-to-usize-overflow"))]
96+
pub use arch_32x_workaround::vec_tree_hash_root;

src/typenum_helpers.rs

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
use typenum::Unsigned;
2+
3+
// On x64, all typenums always fit usize
4+
#[cfg(target_pointer_width = "64")]
5+
pub fn to_usize<N:Unsigned>() -> usize {
6+
N::to_usize()
7+
}
8+
9+
10+
// On x32, typenums larger starting from 2**32 do not fit usize,
11+
#[cfg(not(target_pointer_width = "64"))]
12+
pub fn to_usize<N:Unsigned>() -> usize {
13+
let as_usize = N::to_usize();
14+
let as_u64 = N::to_u64();
15+
// If usize == u64 representation - N still fit usize, so
16+
// no overflow happened
17+
if as_usize as u64 == as_u64 {
18+
return as_usize;
19+
}
20+
// else we have a choice:
21+
// Option 1. Loudly panic with as informative message as possible
22+
#[cfg(not(feature="cap-typenum-to-usize-overflow"))]
23+
panic!("Overflow converting typenum U{} to usize (usize::MAX={})", as_u64, usize::MAX);
24+
// Option 2. Use usize::MAX - this allows working with VariableLists "virtually larger" than the
25+
// usize, provided the actual number of elements do not exceed usize.
26+
//
27+
// One example is Ethereum BeaconChain.validators field that is a VariableList<..., 2**40>,
28+
// but actual number of validators is far less than 2**32.
29+
//
30+
// This option still seems sound, since if the number of elements
31+
// actually surpass usize::MAX, the machine running this will OOM/segfault/otherwise violently
32+
// crash the program running this, which is nearly equivalent to panic.
33+
//
34+
// Still, the is a double-edged sword, only apply if you can guarantee that none of the
35+
// VariableList used in your program will have more than usize::MAX elements on the
36+
// architecture with the smallest usize it will be even run.
37+
#[cfg(feature="cap-typenum-to-usize-overflow")]
38+
usize::MAX
39+
}

0 commit comments

Comments
 (0)