Skip to content

Commit 6e6eeb6

Browse files
committed
Auto merge of #1606 - sgrif:sg-log-connection-pool-status, r=jtgeibel
Log the state of the connection pool once per minute On rare occasions we've had outages occur as a result of the connection pool refusing to return any connections. Shortly before this occurs, we start to see significant spikes in response times (likely because the pool has begun serving only a 1 or 2 connections and there's a large amount of contention). Right now we have limited capability to debug this. The only explanations that I can come up with are a bug in r2d2, or something is happening causing our worker threads to be killed in a way that does not run destructors, or something is causing threads to hang indefinitely. If one of those three options above is true, we should see `connections - idle_connections` trend towards zero over time, and diverge away from `in_flight_requests` over time (they aren't expected to be exactly equal, since some requests will never attempt to get a database connection, and we can have more requests than available connections during normal operations) This isn't going to be enough information to definitively figure out what's going on here, but it should hopefully give us *some* information to help figure out where to look next
2 parents 64688be + 90edc2f commit 6e6eeb6

File tree

2 files changed

+59
-0
lines changed

2 files changed

+59
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
//! Log the current state of the database connection pool at most once per minute
2+
3+
use super::prelude::*;
4+
use crate::app::App;
5+
6+
use conduit::Request;
7+
use std::sync::{
8+
atomic::{AtomicUsize, Ordering},
9+
Arc, Mutex,
10+
};
11+
use std::time::{Duration, Instant};
12+
13+
#[derive(Clone)]
14+
pub(crate) struct LogConnectionPoolStatus {
15+
app: Arc<App>,
16+
last_log_time: Arc<Mutex<Instant>>,
17+
in_flight_requests: Arc<AtomicUsize>,
18+
}
19+
20+
impl LogConnectionPoolStatus {
21+
pub(crate) fn new(app: &Arc<App>) -> Self {
22+
Self {
23+
app: app.clone(),
24+
last_log_time: Arc::new(Mutex::new(Instant::now())),
25+
in_flight_requests: Arc::new(AtomicUsize::new(0)),
26+
}
27+
}
28+
}
29+
30+
impl Middleware for LogConnectionPoolStatus {
31+
fn before(&self, _: &mut dyn Request) -> Result<(), Box<dyn Error + Send>> {
32+
let mut last_log_time = self.last_log_time.lock().unwrap_or_else(|e| e.into_inner());
33+
let in_flight_requests = self.in_flight_requests.fetch_add(1, Ordering::SeqCst);
34+
if last_log_time.elapsed() >= Duration::from_secs(60) {
35+
*last_log_time = Instant::now();
36+
println!(
37+
"connection_pool_status=\"{:?}\" in_flight_requests={}",
38+
self.app.diesel_database.state(),
39+
in_flight_requests
40+
);
41+
}
42+
Ok(())
43+
}
44+
45+
fn after(
46+
&self,
47+
_: &mut dyn Request,
48+
res: Result<Response, Box<dyn Error + Send>>,
49+
) -> Result<Response, Box<dyn Error + Send>> {
50+
self.in_flight_requests.fetch_sub(1, Ordering::SeqCst);
51+
res
52+
}
53+
}

src/middleware/mod.rs

+6
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ pub use self::current_user::CurrentUser;
99
pub use self::debug::*;
1010
pub use self::ember_index_rewrite::EmberIndexRewrite;
1111
pub use self::head::Head;
12+
use self::log_connection_pool_status::LogConnectionPoolStatus;
1213
pub use self::security_headers::SecurityHeaders;
1314
pub use self::static_or_continue::StaticOrContinue;
1415

@@ -19,6 +20,7 @@ mod debug;
1920
mod ember_index_rewrite;
2021
mod ensure_well_formed_500;
2122
mod head;
23+
mod log_connection_pool_status;
2224
mod log_request;
2325
mod require_user_agent;
2426
mod security_headers;
@@ -53,6 +55,10 @@ pub fn build_middleware(app: Arc<App>, endpoints: R404) -> MiddlewareBuilder {
5355
m.add(DebugRequest);
5456
}
5557

58+
if env::var_os("LOG_CONNECTION_POOL_STATUS").is_some() {
59+
m.add(LogConnectionPoolStatus::new(&app));
60+
}
61+
5662
m.add(ConditionalGet);
5763

5864
m.add(Cookie::new());

0 commit comments

Comments
 (0)