Skip to content

Commit 98ea30e

Browse files
authored
Add tool to import from git index into the database (#5112)
1 parent 58e505f commit 98ea30e

File tree

3 files changed

+114
-1
lines changed

3 files changed

+114
-1
lines changed

src/admin/git_import.rs

+110
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
use std::{
2+
fs::File,
3+
io::{BufRead, BufReader},
4+
thread,
5+
time::Duration,
6+
};
7+
8+
use anyhow::Context;
9+
use cargo_registry_index::{Repository, RepositoryConfig};
10+
use diesel::prelude::*;
11+
use indicatif::{ProgressBar, ProgressIterator, ProgressStyle};
12+
13+
use crate::{
14+
admin::dialoguer,
15+
db,
16+
schema::{crates, dependencies, versions},
17+
};
18+
19+
#[derive(clap::Parser, Debug, Copy, Clone)]
20+
#[clap(
21+
name = "git-import",
22+
about = "Import missing fields from git into the database"
23+
)]
24+
pub struct Opts {
25+
/// Time in milliseconds to sleep between crate updates to reduce database load.
26+
#[clap(long)]
27+
delay: u64,
28+
}
29+
30+
pub fn run(opts: Opts) -> anyhow::Result<()> {
31+
let conn = db::oneoff_connection().unwrap();
32+
println!("fetching git repo");
33+
let config = RepositoryConfig::from_environment();
34+
let repo = Repository::open(&config)?;
35+
repo.reset_head()?;
36+
println!("HEAD is at {}", repo.head_oid()?);
37+
let files = repo.get_files_modified_since(None)?;
38+
println!("found {} crates", files.len());
39+
if !dialoguer::confirm("continue?") {
40+
return Ok(());
41+
}
42+
43+
let pb = ProgressBar::new(files.len() as u64);
44+
pb.set_style(ProgressStyle::with_template("{bar:60} ({pos}/{len}, ETA {eta})").unwrap());
45+
46+
for file in files.iter().progress_with(pb) {
47+
thread::sleep(Duration::from_millis(opts.delay));
48+
let crate_name = file.file_name().unwrap().to_str().unwrap();
49+
let path = repo.index_file(crate_name);
50+
if !path.exists() {
51+
continue;
52+
}
53+
let file = File::open(path)?;
54+
let reader = BufReader::new(file);
55+
for line in reader.lines() {
56+
let krate: cargo_registry_index::Crate = serde_json::from_str(&line?)?;
57+
conn.transaction(|| {
58+
import_data(&conn, &krate)
59+
.with_context(|| format!("failed to update crate: {krate:?}"))
60+
})?;
61+
}
62+
}
63+
64+
Ok(())
65+
}
66+
67+
fn import_data(conn: &PgConnection, krate: &cargo_registry_index::Crate) -> QueryResult<()> {
68+
let version_id: i32 = versions::table
69+
.inner_join(crates::table)
70+
.filter(crates::name.eq(&krate.name))
71+
.filter(versions::num.eq(&krate.vers))
72+
.select(versions::id)
73+
.first(conn)?;
74+
75+
// Update the `checksum` and `links` fields.
76+
diesel::update(versions::table)
77+
.set((
78+
versions::checksum.eq(&krate.cksum),
79+
versions::links.eq(&krate.links),
80+
))
81+
.filter(versions::id.eq(version_id))
82+
.execute(conn)?;
83+
// Update the `explicit_name` field for each dependency.
84+
for dep in &krate.deps {
85+
if let Some(package) = &dep.package {
86+
// This is a little tricky because there can be two identical deps in the
87+
// database. The only difference in git is the field we're trying to
88+
// fill (explicit_name). Using `first` here & filtering out existing `explicit_name`
89+
// entries ensure that we assign one explicit_name to each dep.
90+
let id: i32 = dependencies::table
91+
.inner_join(crates::table)
92+
.filter(dependencies::explicit_name.is_null())
93+
.filter(dependencies::version_id.eq(version_id))
94+
.filter(dependencies::req.eq(&dep.req))
95+
.filter(dependencies::features.eq(&dep.features))
96+
.filter(dependencies::optional.eq(&dep.optional))
97+
.filter(dependencies::default_features.eq(&dep.default_features))
98+
.filter(dependencies::target.is_not_distinct_from(&dep.target))
99+
.filter(dependencies::kind.eq(dep.kind.map(|k| k as i32).unwrap_or_default()))
100+
.filter(crates::name.eq(package))
101+
.select(dependencies::id)
102+
.first(conn)?;
103+
diesel::update(dependencies::table)
104+
.set(dependencies::explicit_name.eq(&dep.name))
105+
.filter(dependencies::id.eq(id))
106+
.execute(conn)?;
107+
}
108+
}
109+
Ok(())
110+
}

src/admin/mod.rs

+1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
pub mod delete_crate;
22
pub mod delete_version;
33
pub mod dialoguer;
4+
pub mod git_import;
45
pub mod migrate;
56
pub mod on_call;
67
pub mod populate;

src/bin/crates-admin.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#![warn(clippy::all, rust_2018_idioms)]
22

33
use cargo_registry::admin::{
4-
delete_crate, delete_version, migrate, populate, render_readmes, test_pagerduty,
4+
delete_crate, delete_version, git_import, migrate, populate, render_readmes, test_pagerduty,
55
transfer_crates, upload_index, verify_token, yank_version,
66
};
77

@@ -24,6 +24,7 @@ enum SubCommand {
2424
Migrate(migrate::Opts),
2525
UploadIndex(upload_index::Opts),
2626
YankVersion(yank_version::Opts),
27+
GitImport(git_import::Opts),
2728
}
2829

2930
fn main() -> anyhow::Result<()> {
@@ -42,6 +43,7 @@ fn main() -> anyhow::Result<()> {
4243
SubCommand::Migrate(opts) => migrate::run(opts)?,
4344
SubCommand::UploadIndex(opts) => upload_index::run(opts)?,
4445
SubCommand::YankVersion(opts) => yank_version::run(opts),
46+
SubCommand::GitImport(opts) => git_import::run(opts)?,
4547
}
4648

4749
Ok(())

0 commit comments

Comments
 (0)