Skip to content

Commit a6de1e1

Browse files
committed
Auto merge of #1681 - sgrif:sg-rate-limit-publish, r=jtgeibel
Add more aggressive rate limiting for publishing new crates This is still incomplete, but the bulk of the code has been written so I figured I'd get some eyes on it. Right now this just panics instead of returning an error if the user is out of tokens. Still left to do are: - The two ignored test cases - Implementing the actual error type - Per-user burst rate overrides - cron job to restrict the table size and clean up stale buckets (I probably won't land this in the initial PR, our users table needs to grow by 2 orders of magnitude for this to really matter -- but I do want to land it as a followup PR since I haven't tested this with cases where now - last_update is greater than a month. It should work fine but I'd rather not have this run against poorly defined semantics) I think the limit we'll probably set to start is 1 req/10s with a burst of 30. The error message will tell folks they can either wait for {time until next token} or email us to get the limit increased for them. This is limited per user instead of per ip since rotating your user is harder than rotating your IP. It's stored in the DB since this is only for publishing new crates, which is slow enough already that the DB load of rate limiting there shouldn't matter. I needed to update to Rust 1.33 to get `Duration::as_millis` (note: the way we're using this feature causes UB if the rate limit is slower than 1 request per 292471208 years. I assume this is not a problem) I needed to update to Diesel 1.4.2 to get a fix for diesel-rs/diesel#2017 The algorithm used is pretty much the standard token bucket algorithm. It's *slightly* different in how we set `tokens = max(0, tokens - 1) + tokens_to_add` instead of `tokens = max(0, tokens_to_add + 1)`. This is because the usual implementation checks available tokens before subtracting them (and thus never persists if there aren't enough tokens available). Since we're doing this in a single query, and we can *only* return the final, persisted value, we have to change the calculation slightly to make sure that a user who is out of tokens gets `1` back after the rate limit. A side effect of all of this is that our token count is actually offset by 1. 0 means the user is not only out of tokens, but that we just tried to take a token and couldn't. 1 means an empty bucket, and a full bucket would technically be burst + 1. The alternative would be -1 meaning the user is actually out of tokens, but since we only ever refill the bucket when we're trying to take a token, we never actually persist a full bucket. I figured a range of 0...burst made more sense than -1..burst.
2 parents e8130f3 + f597a3f commit a6de1e1

File tree

23 files changed

+935
-83
lines changed

23 files changed

+935
-83
lines changed

RustConfig

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
VERSION=1.32.0
1+
VERSION=1.33.0
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
DROP TABLE publish_limit_buckets;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
CREATE TABLE publish_limit_buckets(
2+
user_id INTEGER PRIMARY KEY NOT NULL REFERENCES users,
3+
tokens INTEGER NOT NULL,
4+
last_refill TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
5+
);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
DROP TABLE publish_rate_overrides;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
CREATE TABLE publish_rate_overrides (
2+
user_id INTEGER PRIMARY KEY REFERENCES users,
3+
burst INTEGER NOT NULL
4+
);

src/bin/update-downloads.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ mod test {
107107
name: "foo",
108108
..Default::default()
109109
}
110-
.create_or_update(conn, None, user_id)
110+
.create_or_update(conn, None, user_id, None)
111111
.unwrap();
112112
let version = NewVersion::new(
113113
krate.id,

src/config.rs

+3
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use crate::publish_rate_limit::PublishRateLimit;
12
use crate::{env, uploaders::Uploader, Env, Replica};
23
use std::path::PathBuf;
34
use url::Url;
@@ -16,6 +17,7 @@ pub struct Config {
1617
pub max_unpack_size: u64,
1718
pub mirror: Replica,
1819
pub api_protocol: String,
20+
pub publish_rate_limit: PublishRateLimit,
1921
}
2022

2123
impl Default for Config {
@@ -132,6 +134,7 @@ impl Default for Config {
132134
max_unpack_size: 512 * 1024 * 1024, // 512 MB max when decompressed
133135
mirror,
134136
api_protocol,
137+
publish_rate_limit: Default::default(),
135138
}
136139
}
137140
}

src/controllers/krate/publish.rs

+6-1
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,12 @@ pub fn publish(req: &mut dyn Request) -> CargoResult<Response> {
8484
};
8585

8686
let license_file = new_crate.license_file.as_ref().map(|s| &**s);
87-
let krate = persist.create_or_update(&conn, license_file, user.id)?;
87+
let krate = persist.create_or_update(
88+
&conn,
89+
license_file,
90+
user.id,
91+
Some(&app.config.publish_rate_limit),
92+
)?;
8893

8994
let owners = krate.owners(&conn)?;
9095
if user.rights(req.app(), &owners)? < Rights::Publish {

src/lib.rs

+2
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,10 @@ pub mod email;
3737
pub mod git;
3838
pub mod github;
3939
pub mod middleware;
40+
mod publish_rate_limit;
4041
pub mod render;
4142
pub mod schema;
43+
mod test_util;
4244
pub mod uploaders;
4345
pub mod util;
4446

src/models/category.rs

+2-3
Original file line numberDiff line numberDiff line change
@@ -183,12 +183,11 @@ impl<'a> NewCategory<'a> {
183183
#[cfg(test)]
184184
mod tests {
185185
use super::*;
186+
use crate::test_util::pg_connection_no_transaction;
186187
use diesel::connection::SimpleConnection;
187188

188189
fn pg_connection() -> PgConnection {
189-
let database_url =
190-
dotenv::var("TEST_DATABASE_URL").expect("TEST_DATABASE_URL must be set to run tests");
191-
let conn = PgConnection::establish(&database_url).unwrap();
190+
let conn = pg_connection_no_transaction();
192191
// These tests deadlock if run concurrently
193192
conn.batch_execute("BEGIN; LOCK categories IN ACCESS EXCLUSIVE MODE")
194193
.unwrap();

src/models/krate.rs

+5
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ use crate::models::{
1515
use crate::views::{EncodableCrate, EncodableCrateLinks};
1616

1717
use crate::models::helpers::with_count::*;
18+
use crate::publish_rate_limit::PublishRateLimit;
1819
use crate::schema::*;
1920

2021
/// Hosts in this list are known to not be hosting documentation,
@@ -105,6 +106,7 @@ impl<'a> NewCrate<'a> {
105106
conn: &PgConnection,
106107
license_file: Option<&'a str>,
107108
uploader: i32,
109+
rate_limit: Option<&PublishRateLimit>,
108110
) -> CargoResult<Crate> {
109111
use diesel::update;
110112

@@ -115,6 +117,9 @@ impl<'a> NewCrate<'a> {
115117
// To avoid race conditions, we try to insert
116118
// first so we know whether to add an owner
117119
if let Some(krate) = self.save_new_crate(conn, uploader)? {
120+
if let Some(rate_limit) = rate_limit {
121+
rate_limit.check_rate_limit(uploader, conn)?;
122+
}
118123
return Ok(krate);
119124
}
120125

src/models/user.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ pub struct User {
2121
pub gh_id: i32,
2222
}
2323

24-
#[derive(Insertable, Debug)]
24+
#[derive(Insertable, Debug, Default)]
2525
#[table_name = "users"]
2626
pub struct NewUser<'a> {
2727
pub gh_id: i32,

0 commit comments

Comments
 (0)