Skip to content

Commit 4e27712

Browse files
committed
feat: using sqlite as index cache backend
1 parent b1684e2 commit 4e27712

File tree

4 files changed

+116
-86
lines changed

4 files changed

+116
-86
lines changed

Cargo.toml

+3
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,9 @@ clap = "2.31.2"
6767
unicode-width = "0.1.5"
6868
openssl = { version = '0.10.11', optional = true }
6969
im-rc = "15.0.0"
70+
rusqlite = { version = "0.25.3", features = ["bundled"] }
71+
72+
once_cell = "1.7.2"
7073

7174
# A noop dependency that changes in the Rust repository, it's a bit of a hack.
7275
# See the `src/tools/rustc-workspace-hack/README.md` file in `rust-lang/rust`

src/cargo/sources/registry/db.rs

+58
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
use crate::CargoResult;
2+
use once_cell::sync::OnceCell;
3+
use rusqlite::{params, Connection};
4+
use std::path::Path;
5+
use std::sync::Mutex;
6+
7+
pub(crate) struct Db(Connection);
8+
9+
const TABLE_SUMMARIES: &'static str = "\
10+
CREATE TABLE IF NOT EXISTS summaries (
11+
name TEXT PRIMARY KEY NOT NULL,
12+
contents BLOB NOT NULL
13+
)";
14+
15+
const INSERT_SUMMERIES: &'static str = "\
16+
INSERT OR REPLACE INTO summaries (name, contents) VALUES (?, ?)";
17+
18+
impl Db {
19+
pub fn open<P>(path: P) -> CargoResult<&'static Mutex<Self>>
20+
where
21+
P: AsRef<Path>,
22+
{
23+
static DB: OnceCell<Mutex<Db>> = OnceCell::new();
24+
DB.get_or_try_init(|| {
25+
let conn = Connection::open(path.as_ref())?;
26+
conn.pragma_update(None, "locking_mode", &"EXCLUSIVE")?;
27+
conn.pragma_update(None, "cache_size", &2048)?;
28+
conn.execute(TABLE_SUMMARIES, [])?;
29+
Ok(Mutex::new(Self(conn)))
30+
})
31+
}
32+
33+
pub fn get<K>(&self, key: K) -> CargoResult<Vec<u8>>
34+
where
35+
K: AsRef<[u8]>,
36+
{
37+
let key = key.as_ref();
38+
Ok(self.0.query_row(
39+
"SELECT contents FROM summaries WHERE name = ? LIMIT 1",
40+
[key],
41+
|row| row.get(0),
42+
)?)
43+
}
44+
45+
pub fn insert<K>(&self, key: K, value: &[u8]) -> CargoResult<()>
46+
where
47+
K: AsRef<[u8]>,
48+
{
49+
let key = key.as_ref();
50+
let modified = self.0.execute(INSERT_SUMMERIES, params![key, value])?;
51+
log::debug!(
52+
"insert {} record for {}",
53+
modified,
54+
String::from_utf8_lossy(key)
55+
);
56+
Ok(())
57+
}
58+
}

src/cargo/sources/registry/index.rs

+54-86
Original file line numberDiff line numberDiff line change
@@ -68,18 +68,14 @@
6868
6969
use crate::core::dependency::Dependency;
7070
use crate::core::{PackageId, SourceId, Summary};
71-
use crate::sources::registry::{RegistryData, RegistryPackage, INDEX_V_MAX};
71+
use crate::sources::registry::{db::Db, RegistryData, RegistryPackage, INDEX_V_MAX};
7272
use crate::util::interning::InternedString;
7373
use crate::util::{internal, CargoResult, Config, Filesystem, OptVersionReq, ToSemver};
74-
use anyhow::bail;
75-
use cargo_util::paths;
76-
use log::{debug, info};
7774
use semver::Version;
7875
use std::collections::{HashMap, HashSet};
79-
use std::convert::TryInto;
80-
use std::fs;
8176
use std::path::Path;
8277
use std::str;
78+
use std::sync::Mutex;
8379

8480
/// Crates.io treats hyphen and underscores as interchangeable, but the index and old Cargo do not.
8581
/// Therefore, the index must store uncanonicalized version of the name so old Cargo's can find it.
@@ -322,14 +318,14 @@ impl<'cfg> RegistryIndex<'cfg> {
322318
move |maybe| match maybe.parse(config, raw_data, source_id) {
323319
Ok(summary) => Some(summary),
324320
Err(e) => {
325-
info!("failed to parse `{}` registry package: {}", name, e);
321+
log::info!("failed to parse `{}` registry package: {}", name, e);
326322
None
327323
}
328324
},
329325
)
330326
.filter(move |is| {
331327
if is.v > max_version {
332-
debug!(
328+
log::debug!(
333329
"unsupported schema version {} ({} {})",
334330
is.v,
335331
is.summary.name(),
@@ -367,34 +363,29 @@ impl<'cfg> RegistryIndex<'cfg> {
367363
let cache_root = root.join(".cache");
368364
let index_version = load.current_version();
369365

366+
let db = index_version.and_then(|v| {
367+
let name = |v| format!("{}-{}-{}.sqlite", CURRENT_CACHE_VERSION, INDEX_V_MAX, v);
368+
let path = cache_root.join(&name(v));
369+
Db::open(path)
370+
.map_err(|e| log::debug!("failed to open registry db from {:?}: {}", name(v), e))
371+
.ok()
372+
});
373+
370374
// See module comment in `registry/mod.rs` for why this is structured
371375
// the way it is.
372-
let fs_name = name
376+
let pkg_name = name
373377
.chars()
374378
.flat_map(|c| c.to_lowercase())
375379
.collect::<String>();
376-
let raw_path = match fs_name.len() {
377-
1 => format!("1/{}", fs_name),
378-
2 => format!("2/{}", fs_name),
379-
3 => format!("3/{}/{}", &fs_name[..1], fs_name),
380-
_ => format!("{}/{}/{}", &fs_name[0..2], &fs_name[2..4], fs_name),
381-
};
382380

383381
// Attempt to handle misspellings by searching for a chain of related
384382
// names to the original `raw_path` name. Only return summaries
385383
// associated with the first hit, however. The resolver will later
386384
// reject any candidates that have the wrong name, and with this it'll
387385
// along the way produce helpful "did you mean?" suggestions.
388-
for path in UncanonicalizedIter::new(&raw_path).take(1024) {
389-
let summaries = Summaries::parse(
390-
index_version.as_deref(),
391-
root,
392-
&cache_root,
393-
path.as_ref(),
394-
self.source_id,
395-
load,
396-
self.config,
397-
)?;
386+
for pkg_name in UncanonicalizedIter::new(&pkg_name).take(1024) {
387+
let summaries =
388+
Summaries::parse(root, db, &pkg_name, self.source_id, load, self.config)?;
398389
if let Some(summaries) = summaries {
399390
self.summaries_cache.insert(name, summaries);
400391
return Ok(self.summaries_cache.get_mut(&name).unwrap());
@@ -520,46 +511,61 @@ impl Summaries {
520511
/// * `load` - the actual index implementation which may be very slow to
521512
/// call. We avoid this if we can.
522513
pub fn parse(
523-
index_version: Option<&str>,
524514
root: &Path,
525-
cache_root: &Path,
526-
relative: &Path,
515+
db: Option<&Mutex<Db>>,
516+
pkg_name: &str,
527517
source_id: SourceId,
528518
load: &mut dyn RegistryData,
529519
config: &Config,
530520
) -> CargoResult<Option<Summaries>> {
531521
// First up, attempt to load the cache. This could fail for all manner
532522
// of reasons, but consider all of them non-fatal and just log their
533523
// occurrence in case anyone is debugging anything.
534-
let cache_path = cache_root.join(relative);
535524
let mut cache_contents = None;
536-
if let Some(index_version) = index_version {
537-
match fs::read(&cache_path) {
538-
Ok(contents) => match Summaries::parse_cache(contents, index_version) {
525+
526+
let db = db.and_then(|db| {
527+
db.lock()
528+
.map_err(|e| log::debug!("db mutex poisoned: {}", e))
529+
.ok()
530+
});
531+
532+
if let Some(db) = &db {
533+
match db.get(pkg_name) {
534+
Err(e) => log::debug!("cache missing for {:?} error: {}", pkg_name, e),
535+
Ok(contents) => match Summaries::parse_cache(contents) {
539536
Ok(s) => {
540-
log::debug!("fast path for registry cache of {:?}", relative);
537+
log::debug!("fast path for registry cache of {:?}", pkg_name);
541538
if cfg!(debug_assertions) {
542539
cache_contents = Some(s.raw_data);
543540
} else {
544541
return Ok(Some(s));
545542
}
546543
}
547544
Err(e) => {
548-
log::debug!("failed to parse {:?} cache: {}", relative, e);
545+
log::debug!("failed to parse {:?} cache: {}", pkg_name, e);
549546
}
550547
},
551-
Err(e) => log::debug!("cache missing for {:?} error: {}", relative, e),
552548
}
553549
}
554550

555551
// This is the fallback path where we actually talk to libgit2 to load
556552
// information. Here we parse every single line in the index (as we need
557553
// to find the versions)
558-
log::debug!("slow path for {:?}", relative);
554+
log::debug!("slow path for {:?}", pkg_name);
559555
let mut ret = Summaries::default();
560556
let mut hit_closure = false;
561557
let mut cache_bytes = None;
562-
let err = load.load(root, relative, &mut |contents| {
558+
559+
// See module comment in `registry/mod.rs` for why this is structured
560+
// the way it is.
561+
let relative = match pkg_name.len() {
562+
1 => format!("1/{}", pkg_name),
563+
2 => format!("2/{}", pkg_name),
564+
3 => format!("3/{}/{}", &pkg_name[..1], pkg_name),
565+
_ => format!("{}/{}/{}", &pkg_name[0..2], &pkg_name[2..4], pkg_name),
566+
};
567+
568+
let err = load.load(root, relative.as_ref(), &mut |contents| {
563569
ret.raw_data = contents.to_vec();
564570
let mut cache = SummariesCache::default();
565571
hit_closure = true;
@@ -588,8 +594,8 @@ impl Summaries {
588594
cache.versions.push((version.clone(), line));
589595
ret.versions.insert(version, summary.into());
590596
}
591-
if let Some(index_version) = index_version {
592-
cache_bytes = Some(cache.serialize(index_version));
597+
if db.is_some() {
598+
cache_bytes = Some(cache.serialize());
593599
}
594600
Ok(())
595601
});
@@ -624,13 +630,9 @@ impl Summaries {
624630
//
625631
// This is opportunistic so we ignore failure here but are sure to log
626632
// something in case of error.
627-
if let Some(cache_bytes) = cache_bytes {
628-
if paths::create_dir_all(cache_path.parent().unwrap()).is_ok() {
629-
let path = Filesystem::new(cache_path.clone());
630-
config.assert_package_cache_locked(&path);
631-
if let Err(e) = fs::write(cache_path, cache_bytes) {
632-
log::info!("failed to write cache: {}", e);
633-
}
633+
if let (Some(cache_bytes), Some(db)) = (cache_bytes, db) {
634+
if let Err(e) = db.insert(pkg_name, cache_bytes.as_ref()) {
635+
log::info!("failed to write cache for {:?}: {}", pkg_name, e);
634636
}
635637
}
636638

@@ -639,8 +641,8 @@ impl Summaries {
639641

640642
/// Parses an open `File` which represents information previously cached by
641643
/// Cargo.
642-
pub fn parse_cache(contents: Vec<u8>, last_index_update: &str) -> CargoResult<Summaries> {
643-
let cache = SummariesCache::parse(&contents, last_index_update)?;
644+
pub fn parse_cache(contents: Vec<u8>) -> CargoResult<Summaries> {
645+
let cache = SummariesCache::parse(&contents)?;
644646
let mut ret = Summaries::default();
645647
for (version, summary) in cache.versions {
646648
let (start, end) = subslice_bounds(&contents, summary);
@@ -704,40 +706,10 @@ impl Summaries {
704706
const CURRENT_CACHE_VERSION: u8 = 3;
705707

706708
impl<'a> SummariesCache<'a> {
707-
fn parse(data: &'a [u8], last_index_update: &str) -> CargoResult<SummariesCache<'a>> {
709+
fn parse(data: &'a [u8]) -> CargoResult<SummariesCache<'a>> {
708710
// NB: keep this method in sync with `serialize` below
709-
let (first_byte, rest) = data
710-
.split_first()
711-
.ok_or_else(|| anyhow::format_err!("malformed cache"))?;
712-
if *first_byte != CURRENT_CACHE_VERSION {
713-
bail!("looks like a different Cargo's cache, bailing out");
714-
}
715-
let index_v_bytes = rest
716-
.get(..4)
717-
.ok_or_else(|| anyhow::anyhow!("cache expected 4 bytes for index version"))?;
718-
let index_v = u32::from_le_bytes(index_v_bytes.try_into().unwrap());
719-
if index_v != INDEX_V_MAX {
720-
bail!(
721-
"index format version {} doesn't match the version I know ({})",
722-
index_v,
723-
INDEX_V_MAX
724-
);
725-
}
726-
let rest = &rest[4..];
727-
728-
let mut iter = split(rest, 0);
729-
if let Some(update) = iter.next() {
730-
if update != last_index_update.as_bytes() {
731-
bail!(
732-
"cache out of date: current index ({}) != cache ({})",
733-
last_index_update,
734-
str::from_utf8(update)?,
735-
)
736-
}
737-
} else {
738-
bail!("malformed file");
739-
}
740711
let mut ret = SummariesCache::default();
712+
let mut iter = split(data, 0);
741713
while let Some(version) = iter.next() {
742714
let version = str::from_utf8(version)?;
743715
let version = Version::parse(version)?;
@@ -747,18 +719,14 @@ impl<'a> SummariesCache<'a> {
747719
Ok(ret)
748720
}
749721

750-
fn serialize(&self, index_version: &str) -> Vec<u8> {
722+
fn serialize(&self) -> Vec<u8> {
751723
// NB: keep this method in sync with `parse` above
752724
let size = self
753725
.versions
754726
.iter()
755-
.map(|(_version, data)| (10 + data.len()))
727+
.map(|(_version, data)| 10 + data.len())
756728
.sum();
757729
let mut contents = Vec::with_capacity(size);
758-
contents.push(CURRENT_CACHE_VERSION);
759-
contents.extend(&u32::to_le_bytes(INDEX_V_MAX));
760-
contents.extend_from_slice(index_version.as_bytes());
761-
contents.push(0);
762730
for (version, data) in self.versions.iter() {
763731
contents.extend_from_slice(version.to_string().as_bytes());
764732
contents.push(0);

src/cargo/sources/registry/mod.rs

+1
Original file line numberDiff line numberDiff line change
@@ -520,6 +520,7 @@ pub enum MaybeLock {
520520
Download { url: String, descriptor: String },
521521
}
522522

523+
mod db;
523524
mod index;
524525
mod local;
525526
mod remote;

0 commit comments

Comments
 (0)