18
18
import typing
19
19
from typing import Any , DefaultDict , Iterator , List , Set
20
20
21
- from prometheus_client .core import Counter
21
+ from prometheus_client .core import Counter , Gauge
22
22
23
23
from twisted .internet import defer
24
24
29
29
make_deferred_yieldable ,
30
30
run_in_background ,
31
31
)
32
+ from synapse .metrics import count , LaterGauge
32
33
from synapse .util import Clock
33
34
34
35
if typing .TYPE_CHECKING :
@@ -51,6 +52,34 @@ def new_limiter() -> "_PerHostRatelimiter":
51
52
str , "_PerHostRatelimiter"
52
53
] = collections .defaultdict (new_limiter )
53
54
55
+ # We track the number of affected hosts per time-period so we can
56
+ # differentiate one really noisy homeserver from a general
57
+ # ratelimit tuning problem across the federation.
58
+ LaterGauge (
59
+ "synapse_rate_limit_sleep_affected_hosts" ,
60
+ "Number of hosts that had requests put to sleep" ,
61
+ [],
62
+ lambda : count (
63
+ bool ,
64
+ [
65
+ ratelimiter .should_sleep ()
66
+ for ratelimiter in self .ratelimiters .values ()
67
+ ],
68
+ ),
69
+ )
70
+ LaterGauge (
71
+ "synapse_rate_limit_reject_affected_hosts" ,
72
+ "Number of hosts that had requests rejected" ,
73
+ [],
74
+ lambda : count (
75
+ bool ,
76
+ [
77
+ ratelimiter .should_reject ()
78
+ for ratelimiter in self .ratelimiters .values ()
79
+ ],
80
+ ),
81
+ )
82
+
54
83
def ratelimit (self , host : str ) -> "_GeneratorContextManager[defer.Deferred[None]]" :
55
84
"""Used to ratelimit an incoming request from a given host
56
85
@@ -116,6 +145,17 @@ def ratelimit(self, host: str) -> "Iterator[defer.Deferred[None]]":
116
145
finally :
117
146
self ._on_exit (request_id )
118
147
148
+ def should_reject (self ):
149
+ """
150
+ Reject the request if we already have too many queued up (either
151
+ sleeping or in the ready queue).
152
+ """
153
+ queue_size = len (self .ready_request_queue ) + len (self .sleeping_requests )
154
+ return queue_size > self .reject_limit
155
+
156
+ def should_sleep (self ):
157
+ return len (self .request_times ) > self .sleep_limit
158
+
119
159
def _on_enter (self , request_id : object ) -> "defer.Deferred[None]" :
120
160
time_now = self .clock .time_msec ()
121
161
@@ -126,8 +166,7 @@ def _on_enter(self, request_id: object) -> "defer.Deferred[None]":
126
166
127
167
# reject the request if we already have too many queued up (either
128
168
# sleeping or in the ready queue).
129
- queue_size = len (self .ready_request_queue ) + len (self .sleeping_requests )
130
- if queue_size > self .reject_limit :
169
+ if self .should_reject ():
131
170
logger .debug ("Ratelimiter(%s): rejecting request" , self .host )
132
171
rate_limit_reject_counter .inc ()
133
172
raise LimitExceededError (
@@ -157,7 +196,7 @@ def queue_request() -> "defer.Deferred[None]":
157
196
len (self .request_times ),
158
197
)
159
198
160
- if len ( self .request_times ) > self . sleep_limit :
199
+ if self .should_sleep () :
161
200
logger .debug (
162
201
"Ratelimiter(%s) [%s]: sleeping request for %f sec" ,
163
202
self .host ,
0 commit comments