Skip to content

Commit 5709b8c

Browse files
committed
Upload index metadata to S3 when publishing new crates
Also provides a new admin tool to bulk upload existing index files.
1 parent d4c7512 commit 5709b8c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+3043
-27
lines changed

.env.sample

+8
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,14 @@ export TEST_DATABASE_URL=
3333
# not needed if the S3 bucket is in US standard
3434
# export S3_REGION=
3535

36+
# Credentials for uploading index metadata to S3. You can leave these commented
37+
# out if you're not publishing index metadata to s3 from your crates.io instance.
38+
# export S3_INDEX_BUCKET=
39+
# export S3_INDEX_ACCESS_KEY=
40+
# export S3_INDEX_SECRET_KEY=
41+
# not needed if the S3 bucket is in US standard
42+
# export S3_INDEX_REGION=
43+
3644
# Upstream location of the registry index. Background jobs will push to
3745
# this URL. The default points to a local index for development.
3846
# Run `./script/init-local-index.sh` to initialize this repo.

cargo-registry-index/lib.rs

+66-7
Original file line numberDiff line numberDiff line change
@@ -293,18 +293,33 @@ impl Repository {
293293
.join(Self::relative_index_file(name))
294294
}
295295

296+
/// Returns the relative path to the crate index file.
297+
/// Does not perform conversion to lowercase.
298+
fn relative_index_file_helper(name: &str) -> Vec<&str> {
299+
match name.len() {
300+
1 => vec!["1", name],
301+
2 => vec!["2", name],
302+
3 => vec!["3", &name[..1], name],
303+
_ => vec![&name[0..2], &name[2..4], name],
304+
}
305+
}
306+
296307
/// Returns the relative path to the crate index file that corresponds to
297-
/// the given crate name.
308+
/// the given crate name as a path (i.e. with platform-dependent folder separators).
298309
///
299310
/// see <https://doc.rust-lang.org/cargo/reference/registries.html#index-format>
300311
pub fn relative_index_file(name: &str) -> PathBuf {
301312
let name = name.to_lowercase();
302-
match name.len() {
303-
1 => Path::new("1").join(&name),
304-
2 => Path::new("2").join(&name),
305-
3 => Path::new("3").join(&name[..1]).join(&name),
306-
_ => Path::new(&name[0..2]).join(&name[2..4]).join(&name),
307-
}
313+
Self::relative_index_file_helper(&name).iter().collect()
314+
}
315+
316+
/// Returns the relative path to the crate index file that corresponds to
317+
/// the given crate name for usage in URLs (i.e. with `/` separator).
318+
///
319+
/// see <https://doc.rust-lang.org/cargo/reference/registries.html#index-format>
320+
pub fn relative_index_file_for_url(name: &str) -> String {
321+
let name = name.to_lowercase();
322+
Self::relative_index_file_helper(&name).join("/")
308323
}
309324

310325
/// Returns the [Object ID](git2::Oid) of the currently checked out commit
@@ -343,6 +358,50 @@ impl Repository {
343358
self.push("refs/heads/master")
344359
}
345360

361+
/// Gets a list of files that have been modified since a given `starting_commit`
362+
/// (use `starting_commit = None` for a list of all files).
363+
pub fn get_files_modified_since(
364+
&self,
365+
starting_commit: Option<&str>,
366+
) -> anyhow::Result<Vec<PathBuf>> {
367+
let starting_commit = match starting_commit {
368+
Some(starting_commit) => {
369+
let oid = git2::Oid::from_str(starting_commit)
370+
.context("failed to parse commit into Oid")?;
371+
let commit = self
372+
.repository
373+
.find_commit(oid)
374+
.context("failed to find commit")?;
375+
Some(
376+
commit
377+
.as_object()
378+
.peel_to_tree()
379+
.context("failed to find tree for commit")?,
380+
)
381+
}
382+
None => None,
383+
};
384+
385+
let head = self
386+
.repository
387+
.find_commit(self.head_oid()?)?
388+
.as_object()
389+
.peel_to_tree()
390+
.context("failed to find tree for HEAD")?;
391+
let diff = self
392+
.repository
393+
.diff_tree_to_tree(starting_commit.as_ref(), Some(&head), None)
394+
.context("failed to run diff")?;
395+
let files = diff
396+
.deltas()
397+
.map(|delta| delta.new_file())
398+
.filter(|file| file.exists())
399+
.map(|file| file.path().unwrap().to_path_buf())
400+
.collect();
401+
402+
Ok(files)
403+
}
404+
346405
/// Push the current branch to the provided refname
347406
fn push(&self, refspec: &str) -> anyhow::Result<()> {
348407
let mut ref_status = Ok(());

src/admin/delete_crate.rs

+8-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1-
use crate::{admin::dialoguer, db, models::Crate, schema::crates};
1+
use crate::{admin::dialoguer, config, db, models::Crate, schema::crates};
22

33
use diesel::prelude::*;
4+
use reqwest::blocking::Client;
45

56
#[derive(clap::Parser, Debug)]
67
#[clap(
@@ -25,6 +26,10 @@ pub fn run(opts: Opts) {
2526
fn delete(opts: Opts, conn: &PgConnection) {
2627
let krate: Crate = Crate::by_name(&opts.crate_name).first(conn).unwrap();
2728

29+
let config = config::Base::from_environment();
30+
let uploader = config.uploader();
31+
let client = Client::new();
32+
2833
let prompt = format!(
2934
"Are you sure you want to delete {} ({})?",
3035
opts.crate_name, krate.id
@@ -42,4 +47,6 @@ fn delete(opts: Opts, conn: &PgConnection) {
4247
if !dialoguer::confirm("commit?") {
4348
panic!("aborting transaction");
4449
}
50+
51+
uploader.delete_index(&client, &krate.name).unwrap();
4552
}

src/admin/mod.rs

+1
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,5 @@ pub mod populate;
77
pub mod render_readmes;
88
pub mod test_pagerduty;
99
pub mod transfer_crates;
10+
pub mod upload_index;
1011
pub mod verify_token;

src/admin/upload_index.rs

+60
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
use std::time::{Duration, Instant};
2+
3+
use crate::admin::dialoguer;
4+
use cargo_registry_index::{Repository, RepositoryConfig};
5+
use reqwest::blocking::Client;
6+
7+
use crate::config;
8+
9+
#[derive(clap::Parser, Debug)]
10+
#[clap(
11+
name = "upload-index",
12+
about = "Upload index from git to S3 (http-based index)"
13+
)]
14+
pub struct Opts {
15+
/// Incremental commit. Any changed files made after this commit will be uploaded.
16+
incremental_commit: Option<String>,
17+
}
18+
19+
pub fn run(opts: Opts) -> anyhow::Result<()> {
20+
let config = config::Base::from_environment();
21+
let uploader = config.uploader();
22+
let client = Client::new();
23+
24+
println!("fetching git repo");
25+
let config = RepositoryConfig::from_environment();
26+
let repo = Repository::open(&config)?;
27+
repo.reset_head()?;
28+
println!("HEAD is at {}", repo.head_oid()?);
29+
30+
let files = repo.get_files_modified_since(opts.incremental_commit.as_deref())?;
31+
println!("found {} files to upload", files.len());
32+
if !dialoguer::confirm("continue with upload?") {
33+
return Ok(());
34+
}
35+
36+
let mut progress_update_time = Instant::now();
37+
for (i, file) in files.iter().enumerate() {
38+
let crate_name = file.file_name().unwrap().to_str().unwrap();
39+
let path = repo.index_file(crate_name);
40+
if !path.exists() {
41+
println!("skipping file `{}`", crate_name);
42+
continue;
43+
}
44+
let contents = std::fs::read_to_string(&path)?;
45+
uploader.upload_index(&client, crate_name, contents)?;
46+
47+
// Print a progress update every 10 seconds.
48+
let now = Instant::now();
49+
if now - progress_update_time > Duration::from_secs(10) {
50+
progress_update_time = now;
51+
println!("uploading {}/{}", i, files.len());
52+
}
53+
}
54+
55+
println!(
56+
"uploading completed; use `upload-index {}` for an incremental run",
57+
repo.head_oid()?
58+
);
59+
Ok(())
60+
}

src/bin/crates-admin.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
use cargo_registry::admin::{
44
delete_crate, delete_version, migrate, populate, render_readmes, test_pagerduty,
5-
transfer_crates, verify_token,
5+
transfer_crates, upload_index, verify_token,
66
};
77

88
#[derive(clap::Parser, Debug)]
@@ -22,6 +22,7 @@ enum SubCommand {
2222
TransferCrates(transfer_crates::Opts),
2323
VerifyToken(verify_token::Opts),
2424
Migrate(migrate::Opts),
25+
UploadIndex(upload_index::Opts),
2526
}
2627

2728
fn main() -> anyhow::Result<()> {
@@ -38,6 +39,7 @@ fn main() -> anyhow::Result<()> {
3839
SubCommand::TransferCrates(opts) => transfer_crates::run(opts),
3940
SubCommand::VerifyToken(opts) => verify_token::run(opts).unwrap(),
4041
SubCommand::Migrate(opts) => migrate::run(opts)?,
42+
SubCommand::UploadIndex(opts) => upload_index::run(opts)?,
4143
}
4244

4345
Ok(())

src/config/base.rs

+37-6
Original file line numberDiff line numberDiff line change
@@ -74,15 +74,24 @@ impl Base {
7474

7575
pub fn test() -> Self {
7676
let uploader = Uploader::S3 {
77-
bucket: s3::Bucket::new(
77+
bucket: Box::new(s3::Bucket::new(
7878
String::from("alexcrichton-test"),
7979
None,
8080
dotenv::var("S3_ACCESS_KEY").unwrap_or_default(),
8181
dotenv::var("S3_SECRET_KEY").unwrap_or_default(),
8282
// When testing we route all API traffic over HTTP so we can
8383
// sniff/record it, but everywhere else we use https
8484
"http",
85-
),
85+
)),
86+
index_bucket: Some(Box::new(s3::Bucket::new(
87+
String::from("alexcrichton-test"),
88+
None,
89+
dotenv::var("S3_INDEX_ACCESS_KEY").unwrap_or_default(),
90+
dotenv::var("S3_INDEX_SECRET_KEY").unwrap_or_default(),
91+
// When testing we route all API traffic over HTTP so we can
92+
// sniff/record it, but everywhere else we use https
93+
"http",
94+
))),
8695
cdn: None,
8796
};
8897
Self {
@@ -96,27 +105,49 @@ impl Base {
96105
}
97106

98107
fn s3_panic_if_missing_keys() -> Uploader {
108+
let index_bucket = match dotenv::var("S3_INDEX_BUCKET") {
109+
Ok(name) => Some(Box::new(s3::Bucket::new(
110+
name,
111+
dotenv::var("S3_INDEX_REGION").ok(),
112+
env("S3_INDEX_ACCESS_KEY"),
113+
env("S3_INDEX_SECRET_KEY"),
114+
"https",
115+
))),
116+
Err(_) => None,
117+
};
99118
Uploader::S3 {
100-
bucket: s3::Bucket::new(
119+
bucket: Box::new(s3::Bucket::new(
101120
env("S3_BUCKET"),
102121
dotenv::var("S3_REGION").ok(),
103122
env("S3_ACCESS_KEY"),
104123
env("S3_SECRET_KEY"),
105124
"https",
106-
),
125+
)),
126+
index_bucket,
107127
cdn: dotenv::var("S3_CDN").ok(),
108128
}
109129
}
110130

111131
fn s3_maybe_read_only() -> Uploader {
132+
let index_bucket = match dotenv::var("S3_INDEX_BUCKET") {
133+
Ok(name) => Some(Box::new(s3::Bucket::new(
134+
name,
135+
dotenv::var("S3_INDEX_REGION").ok(),
136+
dotenv::var("S3_INDEX_ACCESS_KEY").unwrap_or_default(),
137+
dotenv::var("S3_INDEX_SECRET_KEY").unwrap_or_default(),
138+
"https",
139+
))),
140+
Err(_) => None,
141+
};
112142
Uploader::S3 {
113-
bucket: s3::Bucket::new(
143+
bucket: Box::new(s3::Bucket::new(
114144
env("S3_BUCKET"),
115145
dotenv::var("S3_REGION").ok(),
116146
dotenv::var("S3_ACCESS_KEY").unwrap_or_default(),
117147
dotenv::var("S3_SECRET_KEY").unwrap_or_default(),
118148
"https",
119-
),
149+
)),
150+
index_bucket,
120151
cdn: dotenv::var("S3_CDN").ok(),
121152
}
122153
}

src/controllers/krate/publish.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ pub fn publish(req: &mut dyn RequestExt) -> EndpointResult {
215215
// Upload crate tarball
216216
app.config
217217
.uploader()
218-
.upload_crate(&app, tarball, &krate, vers)?;
218+
.upload_crate(app.http_client(), tarball, &krate, vers)?;
219219

220220
let (features, features2): (HashMap<_, _>, HashMap<_, _>) =
221221
features.into_iter().partition(|(_k, vals)| {

src/tests/http-data/krate_publish_features_version_2

+67
Original file line numberDiff line numberDiff line change
@@ -65,5 +65,72 @@
6565
],
6666
"body": ""
6767
}
68+
},
69+
{
70+
"request": {
71+
"uri": "http://alexcrichton-test.s3.amazonaws.com/index/3/f/foo",
72+
"method": "PUT",
73+
"headers": [
74+
[
75+
"accept-encoding",
76+
"gzip"
77+
],
78+
[
79+
"accept",
80+
"*/*"
81+
],
82+
[
83+
"content-length",
84+
"336"
85+
],
86+
[
87+
"date",
88+
"Fri, 15 Sep 2017 07:53:06 -0700"
89+
],
90+
[
91+
"authorization",
92+
"AWS AKIAICL5IWUZYWWKA7JA:uDc39eNdF6CcwB+q+JwKsoDLQc4="
93+
],
94+
[
95+
"content-type",
96+
"text/plain"
97+
],
98+
[
99+
"host",
100+
"alexcrichton-test.s3.amazonaws.com"
101+
]
102+
],
103+
"body": "eyJuYW1lIjoiZm9vIiwidmVycyI6IjEuMC4wIiwiZGVwcyI6W3sibmFtZSI6ImJhciIsInJlcSI6Ij4gMCIsImZlYXR1cmVzIjpbXSwib3B0aW9uYWwiOmZhbHNlLCJkZWZhdWx0X2ZlYXR1cmVzIjp0cnVlLCJ0YXJnZXQiOm51bGwsImtpbmQiOiJub3JtYWwifV0sImNrc3VtIjoiYWNiNTYwNGIxMjZhYzg5NGMxZWIxMWM0NTc1YmYyMDcyZmVhNjEyMzJhODg4ZTQ1Mzc3MGM3OWQ3ZWQ1NjQxOSIsImZlYXR1cmVzIjp7Im9sZF9mZWF0IjpbXX0sImZlYXR1cmVzMiI6eyJuZXdfZmVhdCI6WyJkZXA6YmFyIiwiYmFyPy9mZWF0Il19LCJ5YW5rZWQiOmZhbHNlLCJsaW5rcyI6bnVsbCwidiI6Mn0K"
104+
},
105+
"response": {
106+
"status": 200,
107+
"headers": [
108+
[
109+
"x-amz-request-id",
110+
"26589A5E52F8395C"
111+
],
112+
[
113+
"x-amz-id-2",
114+
"JdIvnNTw53aqXjBIqBLNuN4kxf/w1XWX+xuIiGBDYy7yzOSDuAMtBSrTW4ZWetcCIdqCUHuQ51A="
115+
],
116+
[
117+
"content-length",
118+
"0"
119+
],
120+
[
121+
"Server",
122+
"AmazonS3"
123+
],
124+
[
125+
"date",
126+
"Fri,15 Sep 2017 14:53:07 GMT"
127+
],
128+
[
129+
"ETag",
130+
"\"f9016ad360cebb4fe2e6e96e5949f022\""
131+
]
132+
],
133+
"body": ""
134+
}
68135
}
69136
]

0 commit comments

Comments
 (0)