Skip to content

Commit 5c43c5f

Browse files
committed
Add database fields such that the index can be generated from the database
1 parent d480f3a commit 5c43c5f

File tree

20 files changed

+422
-15
lines changed

20 files changed

+422
-15
lines changed

cargo-registry-index/lib.rs

+5-5
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ extern crate serde;
55
pub mod testing;
66

77
use anyhow::{anyhow, Context};
8-
use std::collections::HashMap;
8+
use std::collections::BTreeMap;
99
use std::io::Write;
1010
use std::path::{Path, PathBuf};
1111
use std::process::Command;
@@ -101,7 +101,7 @@ pub struct Crate {
101101
pub vers: String,
102102
pub deps: Vec<Dependency>,
103103
pub cksum: String,
104-
pub features: HashMap<String, Vec<String>>,
104+
pub features: BTreeMap<String, Vec<String>>,
105105
/// This field contains features with new, extended syntax. Specifically,
106106
/// namespaced features (`dep:`) and weak dependencies (`pkg?/feat`).
107107
///
@@ -112,7 +112,7 @@ pub struct Crate {
112112
/// will fail to load due to not being able to parse the new syntax, even
113113
/// with a `Cargo.lock` file.
114114
#[serde(skip_serializing_if = "Option::is_none")]
115-
pub features2: Option<HashMap<String, Vec<String>>>,
115+
pub features2: Option<BTreeMap<String, Vec<String>>>,
116116
pub yanked: Option<bool>,
117117
#[serde(default)]
118118
pub links: Option<String>,
@@ -139,7 +139,7 @@ pub struct Crate {
139139
pub v: Option<u32>,
140140
}
141141

142-
#[derive(Serialize, Deserialize, Debug)]
142+
#[derive(Serialize, Deserialize, Debug, PartialEq, PartialOrd, Ord, Eq)]
143143
pub struct Dependency {
144144
pub name: String,
145145
pub req: String,
@@ -152,7 +152,7 @@ pub struct Dependency {
152152
pub package: Option<String>,
153153
}
154154

155-
#[derive(Copy, Clone, Serialize, Deserialize, Debug)]
155+
#[derive(Copy, Clone, Serialize, Deserialize, Debug, PartialEq, PartialOrd, Ord, Eq)]
156156
#[serde(rename_all = "lowercase")]
157157
pub enum DependencyKind {
158158
Normal,
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
ALTER TABLE dependencies
2+
DROP COLUMN explicit_name;
3+
ALTER TABLE versions
4+
DROP COLUMN checksum,
5+
DROP COLUMN links;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
ALTER TABLE dependencies
2+
ADD COLUMN explicit_name VARCHAR NULL;
3+
ALTER TABLE versions
4+
ADD COLUMN checksum CHAR(64) NULL,
5+
ADD COLUMN links VARCHAR NULL;

src/admin/import_cksum.rs

+110
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
use std::{
2+
fs::File,
3+
io::{BufRead, BufReader},
4+
thread,
5+
time::Duration,
6+
};
7+
8+
use cargo_registry_index::{Repository, RepositoryConfig};
9+
use diesel::prelude::*;
10+
use indicatif::{ProgressBar, ProgressIterator, ProgressStyle};
11+
12+
use crate::{
13+
admin::dialoguer,
14+
db,
15+
schema::{crates, dependencies, versions},
16+
};
17+
18+
#[derive(clap::Parser, Debug, Copy, Clone)]
19+
#[clap(
20+
name = "import-cksum",
21+
about = "Import missing fields from git into the database"
22+
)]
23+
pub struct Opts {
24+
/// Time in milliseconds to sleep between crate updates to reduce database load.
25+
#[clap(long)]
26+
pause_millis: u64,
27+
}
28+
29+
pub fn run(opts: Opts) -> anyhow::Result<()> {
30+
let conn = db::oneoff_connection().unwrap();
31+
println!("fetching git repo");
32+
let config = RepositoryConfig::from_environment();
33+
let repo = Repository::open(&config)?;
34+
repo.reset_head()?;
35+
println!("HEAD is at {}", repo.head_oid()?);
36+
37+
let files = repo.get_files_modified_since(None)?;
38+
println!("found {} crates", files.len());
39+
if !dialoguer::confirm("continue?") {
40+
return Ok(());
41+
}
42+
43+
let pb = ProgressBar::new(files.len() as u64);
44+
pb.set_style(ProgressStyle::with_template("{bar:60} ({pos}/{len}, ETA {eta})").unwrap());
45+
46+
for file in files.iter().progress_with(pb) {
47+
thread::sleep(Duration::from_millis(opts.pause_millis));
48+
49+
let crate_name = file.file_name().unwrap().to_str().unwrap();
50+
let path = repo.index_file(crate_name);
51+
if !path.exists() {
52+
continue;
53+
}
54+
let file = File::open(path)?;
55+
let reader = BufReader::new(file);
56+
for line in reader.lines() {
57+
let krate: cargo_registry_index::Crate = serde_json::from_str(&line?)?;
58+
conn.transaction(|| import_data(&conn, krate))?;
59+
}
60+
}
61+
62+
Ok(())
63+
}
64+
65+
fn import_data(conn: &PgConnection, krate: cargo_registry_index::Crate) -> QueryResult<()> {
66+
let (version_id, checksum): (i32, Option<String>) = versions::table
67+
.inner_join(crates::table)
68+
.filter(crates::name.eq(&krate.name))
69+
.filter(versions::num.eq(&krate.vers))
70+
.select((versions::id, versions::checksum))
71+
.first(conn)?;
72+
73+
if checksum.is_none() {
74+
// Update the `checksum` and `links` fields.
75+
diesel::update(versions::table)
76+
.set((
77+
versions::checksum.eq(&krate.cksum),
78+
versions::links.eq(&krate.links),
79+
))
80+
.filter(versions::id.eq(version_id))
81+
.execute(conn)?;
82+
// Update the `explicit_name` field for each dependency.
83+
for dep in &krate.deps {
84+
if let Some(package) = &dep.package {
85+
// This is a little tricky because there can be two identical deps in the
86+
// database. The only difference in git is the field we're trying to
87+
// fill (explicit_name). Using `first` here & filtering out existing `explicit_name`
88+
// entries ensure that we assign one explicit_name to each dep.
89+
let id: i32 = dependencies::table
90+
.inner_join(crates::table)
91+
.filter(dependencies::explicit_name.is_null())
92+
.filter(dependencies::version_id.eq(version_id))
93+
.filter(dependencies::req.eq(&dep.req))
94+
.filter(dependencies::features.eq(&dep.features))
95+
.filter(dependencies::optional.eq(&dep.optional))
96+
.filter(dependencies::default_features.eq(&dep.default_features))
97+
.filter(dependencies::target.is_not_distinct_from(&dep.target))
98+
.filter(dependencies::kind.eq(dep.kind.map(|k| k as i32).unwrap_or_default()))
99+
.filter(crates::name.eq(package))
100+
.select(dependencies::id)
101+
.first(conn)?;
102+
diesel::update(dependencies::table)
103+
.set(dependencies::explicit_name.eq(&dep.name))
104+
.filter(dependencies::id.eq(id))
105+
.execute(conn)?;
106+
}
107+
}
108+
}
109+
Ok(())
110+
}

src/admin/mod.rs

+1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
pub mod delete_crate;
22
pub mod delete_version;
33
pub mod dialoguer;
4+
pub mod import_cksum;
45
pub mod migrate;
56
pub mod on_call;
67
pub mod populate;

src/bin/crates-admin.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#![warn(clippy::all, rust_2018_idioms)]
22

33
use cargo_registry::admin::{
4-
delete_crate, delete_version, migrate, populate, render_readmes, test_pagerduty,
4+
delete_crate, delete_version, import_cksum, migrate, populate, render_readmes, test_pagerduty,
55
transfer_crates, upload_index, verify_token, yank_version,
66
};
77

@@ -24,6 +24,7 @@ enum SubCommand {
2424
Migrate(migrate::Opts),
2525
UploadIndex(upload_index::Opts),
2626
YankVersion(yank_version::Opts),
27+
ImportCksum(import_cksum::Opts),
2728
}
2829

2930
fn main() -> anyhow::Result<()> {
@@ -42,6 +43,7 @@ fn main() -> anyhow::Result<()> {
4243
SubCommand::Migrate(opts) => migrate::run(opts)?,
4344
SubCommand::UploadIndex(opts) => upload_index::run(opts)?,
4445
SubCommand::YankVersion(opts) => yank_version::run(opts),
46+
SubCommand::ImportCksum(opts) => import_cksum::run(opts)?,
4547
}
4648

4749
Ok(())

src/controllers/krate/metadata.rs

+58
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
use std::cmp::Reverse;
88
use std::str::FromStr;
99

10+
use conduit::{Body, Response};
11+
1012
use crate::controllers::frontend_prelude::*;
1113
use crate::controllers::helpers::pagination::PaginationOptions;
1214

@@ -15,6 +17,7 @@ use crate::models::{
1517
TopVersions, User, Version, VersionOwnerAction,
1618
};
1719
use crate::schema::*;
20+
use crate::util::errors::not_found;
1821
use crate::views::{
1922
EncodableCategory, EncodableCrate, EncodableDependency, EncodableKeyword, EncodableVersion,
2023
};
@@ -396,3 +399,58 @@ pub fn reverse_dependencies(req: &mut dyn RequestExt) -> EndpointResult {
396399
"meta": { "total": total },
397400
})))
398401
}
402+
403+
/// Generate the sparse registry config.json file
404+
pub fn config_json(req: &mut dyn RequestExt) -> EndpointResult {
405+
let headers = req.headers();
406+
let proto = headers
407+
.get("X-Forwarded-Proto")
408+
.and_then(|v| v.to_str().ok())
409+
.unwrap_or_else(|| {
410+
if req.app().config.env() == crate::Env::Development {
411+
"http"
412+
} else {
413+
"https"
414+
}
415+
});
416+
let domain_name = headers
417+
.get("X-Forwarded-Host")
418+
.or_else(|| headers.get(http::header::HOST))
419+
.and_then(|v| v.to_str().ok())
420+
.unwrap_or_else(|| &req.app().config.domain_name);
421+
422+
let dl = format!("{proto}://{domain_name}/api/v1/crates");
423+
let api = format!("{proto}://{domain_name}/");
424+
425+
#[derive(Serialize)]
426+
struct R {
427+
dl: String,
428+
api: String,
429+
}
430+
Ok(req.json(&R { dl, api }))
431+
}
432+
433+
/// Generate a sparse registry index file
434+
pub fn versions_registry(req: &mut dyn RequestExt) -> EndpointResult {
435+
let crate_name = &req.params()["crate_id"];
436+
437+
let x1 = req.params()["x1"].as_str();
438+
let x2 = req.params().find("x2");
439+
if (x1, x2) != match crate_name.len() {
440+
1 => ("1", None),
441+
2 => ("2", None),
442+
3 => ("3", Some(&crate_name[0..1])),
443+
_ => (&crate_name[0..2], Some(&crate_name[2..4])),
444+
} {
445+
return Err(not_found())
446+
}
447+
448+
let conn = req.db_read()?;
449+
let krate: Crate = Crate::by_name(crate_name).first(&*conn)?;
450+
let body = krate.index_metadata(&*conn)?;
451+
452+
Ok(Response::builder()
453+
.header(header::CONTENT_TYPE, "text/plain; charset=utf-8")
454+
.body(Body::from_vec(body))
455+
.unwrap()) // Header values are well formed, so should not panic
456+
}

src/controllers/krate/publish.rs

+10-6
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
use flate2::read::GzDecoder;
44
use hex::ToHex;
55
use sha2::{Digest, Sha256};
6-
use std::collections::HashMap;
6+
use std::collections::BTreeMap;
77
use std::io::Read;
88
use std::path::Path;
99
use std::sync::Arc;
@@ -154,6 +154,11 @@ pub fn publish(req: &mut dyn RequestExt) -> EndpointResult {
154154
// This is only redundant for now. Eventually the duplication will be removed.
155155
let license = new_crate.license.clone();
156156

157+
// Read tarball from request
158+
let mut tarball = Vec::new();
159+
LimitErrorReader::new(req.body(), maximums.max_upload_size).read_to_end(&mut tarball)?;
160+
let hex_cksum: String = Sha256::digest(&tarball).encode_hex();
161+
157162
// Persist the new version of this crate
158163
let version = NewVersion::new(
159164
krate.id,
@@ -165,6 +170,8 @@ pub fn publish(req: &mut dyn RequestExt) -> EndpointResult {
165170
// to get here, and max upload sizes are way less than i32 max
166171
file_length as i32,
167172
user.id,
173+
hex_cksum.clone(),
174+
links.clone(),
168175
)?
169176
.save(&conn, &verified_email_address)?;
170177

@@ -191,10 +198,6 @@ pub fn publish(req: &mut dyn RequestExt) -> EndpointResult {
191198
let ignored_invalid_badges = Badge::update_crate(&conn, &krate, new_crate.badges.as_ref())?;
192199
let top_versions = krate.top_versions(&conn)?;
193200

194-
// Read tarball from request
195-
let mut tarball = Vec::new();
196-
LimitErrorReader::new(req.body(), maximums.max_upload_size).read_to_end(&mut tarball)?;
197-
let hex_cksum: String = Sha256::digest(&tarball).encode_hex();
198201
let pkg_name = format!("{}-{}", krate.name, vers);
199202
let cargo_vcs_info = verify_tarball(&pkg_name, &tarball, maximums.max_unpack_size)?;
200203
let pkg_path_in_vcs = cargo_vcs_info.map(|info| info.path_in_vcs);
@@ -217,7 +220,7 @@ pub fn publish(req: &mut dyn RequestExt) -> EndpointResult {
217220
.uploader()
218221
.upload_crate(app.http_client(), tarball, &krate, vers)?;
219222

220-
let (features, features2): (HashMap<_, _>, HashMap<_, _>) =
223+
let (features, features2): (BTreeMap<_, _>, BTreeMap<_, _>) =
221224
features.into_iter().partition(|(_k, vals)| {
222225
!vals
223226
.iter()
@@ -367,6 +370,7 @@ pub fn add_dependencies(
367370
default_features.eq(dep.default_features),
368371
features.eq(&dep.features),
369372
target.eq(dep.target.as_deref()),
373+
explicit_name.eq(dep.explicit_name_in_toml.as_deref())
370374
),
371375
))
372376
})

src/downloads_counter.rs

+2
Original file line numberDiff line numberDiff line change
@@ -457,6 +457,8 @@ mod tests {
457457
None,
458458
0,
459459
self.user.id,
460+
"0000000000000000000000000000000000000000000000000000000000000000".to_string(),
461+
None,
460462
)
461463
.expect("failed to create version")
462464
.save(conn, "[email protected]")

src/models/dependency.rs

+11
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ pub struct Dependency {
2020
pub features: Vec<String>,
2121
pub target: Option<String>,
2222
pub kind: DependencyKind,
23+
pub explicit_name: Option<String>,
2324
}
2425

2526
#[derive(Debug, QueryableByName)]
@@ -43,6 +44,16 @@ pub enum DependencyKind {
4344
// if you add a kind here, be sure to update `from_row` below.
4445
}
4546

47+
impl From<cargo_registry_index::DependencyKind> for DependencyKind {
48+
fn from(dk: cargo_registry_index::DependencyKind) -> Self {
49+
match dk {
50+
cargo_registry_index::DependencyKind::Normal => DependencyKind::Normal,
51+
cargo_registry_index::DependencyKind::Build => DependencyKind::Build,
52+
cargo_registry_index::DependencyKind::Dev => DependencyKind::Dev,
53+
}
54+
}
55+
}
56+
4657
impl From<DependencyKind> for IndexDependencyKind {
4758
fn from(dk: DependencyKind) -> Self {
4859
match dk {

0 commit comments

Comments
 (0)