Skip to content
This repository was archived by the owner on Feb 6, 2026. It is now read-only.

Commit 57b3990

Browse files
merge: #3382
3382: feat(si-layer-cache): adds an experimental layer cache r=adamhjk a=adamhjk This adds an experimental layer cache to the new engine, which will allow us to start rebuilding the underlying write/read path for the new engine. Co-authored-by: Adam Jacob <adam@systeminit.com>
2 parents 08b8bc3 + 36ca0df commit 57b3990

File tree

20 files changed

+2271
-33
lines changed

20 files changed

+2271
-33
lines changed

Cargo.lock

Lines changed: 431 additions & 19 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ members = [
4141
"lib/si-data-nats",
4242
"lib/si-data-pg",
4343
"lib/si-hash",
44+
"lib/si-layer-cache",
4445
"lib/si-pkg",
4546
"lib/si-posthog-rs",
4647
"lib/si-settings",
@@ -81,6 +82,7 @@ comfy-table = { version = "7.0.1", features = [
8182
config = { version = "0.13.4", default-features = false, features = ["toml"] }
8283
console = "0.15.7"
8384
convert_case = "0.6.0"
85+
criterion = { version = "0.3", features = [ "async_tokio" ] }
8486
crossbeam-channel = "0.5.8"
8587
deadpool = { version = "0.10.0", features = ["rt_tokio_1"] }
8688
deadpool-postgres = "0.12.1"
@@ -114,6 +116,7 @@ jwt-simple = { version = "0.12.6", default-features = false, features = [
114116
"pure-rust",
115117
] }
116118
lazy_static = "1.4.0"
119+
moka = { version = "0.12.5", features = [ "future" ] }
117120
names = { version = "0.14.0", default-features = false }
118121
nix = { version = "0.27.1", features = ["process", "signal"] }
119122
nkeys = "0.4.0"
@@ -164,6 +167,7 @@ serde_json = { version = "1.0.96", features = ["preserve_order"] }
164167
serde_url_params = "0.2.1"
165168
serde_with = "3.0.0"
166169
serde_yaml = "0.9.21"
170+
sled = "0.34.7"
167171
sodiumoxide = "0.2.7"
168172
stream-cancel = "0.8.1"
169173
strum = { version = "0.25.0", features = ["derive"] }

lib/si-layer-cache/BUCK

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
load("@prelude-si//:macros.bzl", "rust_library")
2+
3+
rust_library(
4+
name = "si-layer-cache",
5+
deps = [
6+
"//third-party/rust:serde",
7+
"//third-party/rust:thiserror",
8+
"//third-party/rust:moka",
9+
"//third-party/rust:sled",
10+
"//third-party/rust:lazy_static",
11+
],
12+
srcs = glob([
13+
"src/**/*.rs",
14+
]),
15+
)
16+
17+
rust_test(
18+
name = "test-integration",
19+
deps = [
20+
":si-layer-cache",
21+
"//third-party/rust:tokio",
22+
"//third-party/rust:tempfile",
23+
"//third-party/rust:criterion",
24+
],
25+
srcs = glob([
26+
"tests/**/*.rs",
27+
]),
28+
crate_root = "tests/integration.rs",
29+
env = {
30+
"CARGO_PKG_NAME": "integration",
31+
},
32+
)

lib/si-layer-cache/Cargo.toml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
[package]
2+
name = "si-layer-cache"
3+
version = "0.1.0"
4+
edition = "2021"
5+
rust-version = "1.64"
6+
publish = false
7+
8+
[dependencies]
9+
serde = { workspace = true }
10+
thiserror = { workspace = true }
11+
moka = { workspace = true }
12+
sled = { workspace = true }
13+
lazy_static = { workspace = true }
14+
15+
[dev_dependencies]
16+
tokio = { workspace = true }
17+
tempfile = { workspace = true }
18+
criterion = { workspace = true }
19+
20+
[[bench]]
21+
name = "insert_speed"
22+
harness = false
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
use criterion::{black_box, criterion_group, criterion_main, Criterion};
2+
use si_layer_cache::{CacheType, LayerCache};
3+
4+
use tokio::runtime;
5+
6+
const ASCII_LOWER: [u8; 26] = [
7+
b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p',
8+
b'q', b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z',
9+
];
10+
11+
const ONE_MB: usize = 1_000_000;
12+
13+
pub async fn fresh_cache_count(objects: &[Vec<u8>], count: usize) {
14+
let tempdir = tempfile::TempDir::new_in("/home/adam/benches").expect("cannotc reate tempdir");
15+
let layer_cache = LayerCache::new(tempdir).expect("cannot create layer cache");
16+
for i in 0..count {
17+
layer_cache
18+
.insert(&CacheType::Object, [ASCII_LOWER[i]], objects[i].clone())
19+
.await
20+
.expect("cannot insert into cache");
21+
}
22+
}
23+
24+
pub fn insert_speed_1_mb_object(c: &mut Criterion) {
25+
let rt = runtime::Builder::new_multi_thread()
26+
.build()
27+
.expect("cannot make tokio runtime");
28+
let mut objects: Vec<Vec<u8>> = Vec::with_capacity(ASCII_LOWER.len());
29+
for letter in ASCII_LOWER.iter() {
30+
let object = vec![*letter;ONE_MB];
31+
objects.push(object);
32+
}
33+
34+
c.bench_function("Cold Cache insert speed 1 1mb object", |b| {
35+
b.to_async(&rt)
36+
.iter(|| fresh_cache_count(black_box(&objects[..]), 1))
37+
});
38+
39+
c.bench_function("Cold Cache insert speed 26 1mb objects", |b| {
40+
b.to_async(&rt)
41+
.iter(|| fresh_cache_count(black_box(&objects[..]), ASCII_LOWER.len()))
42+
});
43+
}
44+
45+
pub fn hot_read_1_mb_object(c: &mut Criterion) {
46+
let layer_cache = LayerCache::new("/home/adam/benches/.hot_read_1_mb_object")
47+
.expect("cannot create layer cache");
48+
let rt = runtime::Builder::new_multi_thread()
49+
.build()
50+
.expect("cannot make tokio runtime");
51+
let object = vec![b'a';ONE_MB];
52+
let _r = rt.block_on(layer_cache.insert(&CacheType::Object, b"a", object));
53+
54+
c.bench_function("Hot Cache speed get one 1mb object", |b| {
55+
b.to_async(&rt)
56+
.iter(|| layer_cache.get(&CacheType::Object, [b'a']))
57+
});
58+
}
59+
60+
pub async fn do_cold_memory_hot_disk(key: &[u8], layer_cache: &LayerCache) {
61+
let _r = layer_cache.get(&CacheType::Object, key).await;
62+
layer_cache.memory_cache.object_cache.remove(key).await;
63+
}
64+
65+
pub fn hot_disk_cold_memory_read_1_mb_object(c: &mut Criterion) {
66+
let layer_cache = LayerCache::new("/home/adam/benches/.disk_cache_no_memory_1_mb_object")
67+
.expect("cannot create layer cache");
68+
let rt = runtime::Builder::new_multi_thread()
69+
.build()
70+
.expect("cannot make tokio runtime");
71+
let letter = b'a';
72+
let object = vec![letter;ONE_MB];
73+
let _r = rt.block_on(layer_cache.insert(&CacheType::Object, b"a", object));
74+
let key = [letter];
75+
76+
c.bench_function("Hot Disk cold Memory cache speed get one 1mb object", |b| {
77+
b.to_async(&rt)
78+
.iter(|| do_cold_memory_hot_disk(black_box(&key), black_box(&layer_cache)))
79+
});
80+
}
81+
82+
criterion_group!(
83+
benches,
84+
insert_speed_1_mb_object,
85+
hot_read_1_mb_object,
86+
hot_disk_cold_memory_read_1_mb_object
87+
);
88+
criterion_main!(benches);
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
use std::path::Path;
2+
3+
use sled::{self, IVec};
4+
5+
use crate::{error::LayerCacheResult, CacheType};
6+
7+
#[derive(Debug)]
8+
pub struct DiskCache {
9+
pub db: sled::Db,
10+
pub object_tree: sled::Tree,
11+
pub graph_tree: sled::Tree,
12+
}
13+
14+
impl DiskCache {
15+
pub fn new(path: impl AsRef<Path>) -> LayerCacheResult<DiskCache> {
16+
let db = sled::open(path)?;
17+
let object_tree = db.open_tree([CacheType::Object as u8])?;
18+
let graph_tree = db.open_tree([CacheType::Graph as u8])?;
19+
Ok(DiskCache {
20+
db,
21+
object_tree,
22+
graph_tree,
23+
})
24+
}
25+
26+
fn get_tree(&self, cache_type: &CacheType) -> &sled::Tree {
27+
match cache_type {
28+
CacheType::Graph => &self.graph_tree,
29+
CacheType::Object => &self.object_tree,
30+
}
31+
}
32+
33+
pub fn get(
34+
&self,
35+
cache_type: &CacheType,
36+
key: impl AsRef<[u8]>,
37+
) -> LayerCacheResult<Option<IVec>> {
38+
let tree = self.get_tree(cache_type);
39+
let result = tree.get(key)?;
40+
Ok(result)
41+
}
42+
43+
pub fn contains_key(
44+
&self,
45+
cache_type: &CacheType,
46+
key: impl AsRef<[u8]>,
47+
) -> LayerCacheResult<bool> {
48+
let tree = self.get_tree(cache_type);
49+
let key = key.as_ref();
50+
let result = tree.contains_key(key)?;
51+
Ok(result)
52+
}
53+
54+
pub fn insert(
55+
&self,
56+
cache_type: &CacheType,
57+
key: impl AsRef<[u8]>,
58+
value: impl Into<Vec<u8>>,
59+
) -> LayerCacheResult<()> {
60+
let tree = self.get_tree(cache_type);
61+
let key = key.as_ref();
62+
let _result = tree.insert(key, value.into())?;
63+
Ok(())
64+
}
65+
}

lib/si-layer-cache/src/error.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
use thiserror::Error;
2+
3+
#[derive(Error, Debug)]
4+
pub enum LayerCacheError {
5+
#[error(transparent)]
6+
SledError(#[from] sled::Error),
7+
}
8+
9+
pub type LayerCacheResult<T> = Result<T, LayerCacheError>;

lib/si-layer-cache/src/lib.rs

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
//! A fast in-memory, network aware, layered write-through cache for System Initiative.
2+
//!
3+
//! It should have 3 layers of caching:
4+
//!
5+
//! * Moka, an in-memory LRU style cache.
6+
//! * Sled, an on-disk memory-mapped cache, to keep more data locally than can be held in memory
7+
//! * Postgres, our final persistant storage layer.
8+
//!
9+
//! When a write is requested, the following happens:
10+
//!
11+
//! * The data is written first to a Moka cache
12+
//! * Then written to Sled for persistent storage
13+
//! * The data is then published to a nats topic layer-cache.workspaceId
14+
//! * Any remote si-layer-cache instances listen to this topic, and populate their local caches
15+
//! * Postgres gets written to eventually by a 'persister' process that writes to PG from the write
16+
//! stream
17+
//!
18+
//! When a read is requested, the following happen:
19+
//!
20+
//! * The data is read from the moka cache
21+
//! * On a miss, the data is read from sled, inserted into Moka, and returned to the user
22+
//! * On a miss, the data is read from Postgres, inserted into sled, inserted into moka, and
23+
//! returned to the user
24+
//!
25+
//! The postgres bits remain unimplemented! :)
26+
27+
pub mod disk_cache;
28+
pub mod error;
29+
pub mod memory_cache;
30+
31+
use std::fmt;
32+
use std::path::Path;
33+
34+
use memory_cache::{CacheKey, CacheValueRaw};
35+
36+
use crate::disk_cache::DiskCache;
37+
use crate::error::LayerCacheResult;
38+
use crate::memory_cache::{CacheKeyRef, CacheValue, MemoryCache};
39+
40+
#[derive(Eq, PartialEq, PartialOrd, Ord, Hash, Debug)]
41+
pub enum CacheType {
42+
Object = 1,
43+
Graph,
44+
}
45+
46+
impl fmt::Display for CacheType {
47+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
48+
match self {
49+
CacheType::Object => write!(f, "object"),
50+
CacheType::Graph => write!(f, "graph"),
51+
}
52+
}
53+
}
54+
55+
pub struct LayerCache {
56+
pub memory_cache: MemoryCache,
57+
pub disk_cache: DiskCache,
58+
}
59+
60+
impl LayerCache {
61+
pub fn new(path: impl AsRef<Path>) -> LayerCacheResult<LayerCache> {
62+
let memory_cache = MemoryCache::new();
63+
let disk_cache = DiskCache::new(path)?;
64+
Ok(LayerCache {
65+
memory_cache,
66+
disk_cache,
67+
})
68+
}
69+
70+
#[inline]
71+
pub async fn get(
72+
&self,
73+
cache_type: &CacheType,
74+
key: impl AsRef<CacheKeyRef>,
75+
) -> LayerCacheResult<Option<CacheValue>> {
76+
let key = key.as_ref();
77+
let memory_value = self.memory_cache.get(cache_type, key).await;
78+
if memory_value.is_some() {
79+
Ok(memory_value)
80+
} else {
81+
let maybe_value = self.disk_cache.get(cache_type, key)?;
82+
match maybe_value {
83+
Some(value) => {
84+
let d: Vec<u8> = value.as_ref().into();
85+
self.memory_cache
86+
.insert(cache_type, Vec::from(key), d)
87+
.await;
88+
Ok(self.memory_cache.get(cache_type, key).await)
89+
}
90+
None => Ok(None),
91+
}
92+
}
93+
}
94+
95+
#[inline]
96+
pub async fn insert(
97+
&self,
98+
cache_type: &CacheType,
99+
key: impl Into<CacheKey>,
100+
value: impl Into<CacheValueRaw>,
101+
) -> LayerCacheResult<()> {
102+
let key = key.into();
103+
let in_memory = self.memory_cache.contains_key(cache_type, &key);
104+
let on_disk = self.disk_cache.contains_key(cache_type, &key)?;
105+
106+
match (in_memory, on_disk) {
107+
// In memory and on disk
108+
(true, true) => Ok(()),
109+
// Neither on memory or on disk
110+
(false, false) => {
111+
let value = value.into();
112+
self.memory_cache
113+
.insert(cache_type, key.clone(), value.clone())
114+
.await;
115+
self.disk_cache.insert(cache_type, key, value)?;
116+
Ok(())
117+
}
118+
// Not in memory, but on disk - we can write, becasue objects are immutable
119+
(false, true) => {
120+
let value = value.into();
121+
self.memory_cache
122+
.insert(cache_type, key.clone(), value)
123+
.await;
124+
Ok(())
125+
}
126+
// In memory, but not on disk
127+
(true, false) => {
128+
let value = value.into();
129+
self.disk_cache.insert(cache_type, key, value)?;
130+
Ok(())
131+
}
132+
}
133+
}
134+
}

0 commit comments

Comments
 (0)