Skip to content

Commit 40b1ecb

Browse files
sandy2008claudefriedrichg
authored
[BUGFIX] Ingester: fix inflight query counter leak on resource rejection (#7539)
* Fix ingester inflight query counter leak Signed-off-by: Sandy Chen <Yuxuan.Chen@morganstanley.com> * Address review feedback on inflight query leak fix - Add clarifying comment before the post-rejection QueryStream block, per inline review nit. - Replace `i.resourceBasedLimiter = nil` with mutating the underlying mock monitor's heap utilization, so the same live limiter admits the second query. The regression test now exercises the limiter machinery on the retry instead of bypassing it. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> Signed-off-by: Sandy Chen <Yuxuan.Chen@morganstanley.com> --------- Signed-off-by: Sandy Chen <Yuxuan.Chen@morganstanley.com> Signed-off-by: Friedrich Gonzalez <1517449+friedrichg@users.noreply.github.com> Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com> Co-authored-by: Friedrich Gonzalez <1517449+friedrichg@users.noreply.github.com>
1 parent 40a27ad commit 40b1ecb

3 files changed

Lines changed: 14 additions & 4 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
* [BUGFIX] Ingester: Close TSDB when compaction fails during `createTSDB`, preventing resource leaks (file descriptors, mmap handles) that could lead to ingester instability. #7560
4949
* [BUGFIX] Tenant Federation: Fix result cache returning stale data after a new tenant is added when `-tenant-federation.regex-matcher-enabled=true`. The resolved tenant set is now hashed and included in the cache key so that any change to the matched tenant list automatically invalidates cached entries. Non-regex users are unaffected. #7562
5050
* [BUGFIX] Tenant Federation: Fix regex resolver clearing known users list when user scan fails. #7534
51+
* [BUGFIX] Ingester: Fix inflight query counter leak when resource-based query protection rejects a request. #7539
5152
* [BUGFIX] Ingester: Release the TSDB appender on every early-return path in `Push` (e.g. out-of-order label set) by deferring `Rollback`. Previously such requests leaked TSDB head series references, mmap'd chunks and pending state per request, causing the `cortex_ingester_tsdb_head_active_appenders` gauge to grow unbounded. #7528
5253
* [BUGFIX] Ring: Fix ring token conflict resolution only applied to updated instance and make constantly token conflict check during instance observe period.
5354
* [BUGFIX] Distributor: Fix a panic (`slice bounds out of range`) in the stream push path when the context deadline expires while the worker goroutine is still marshalling a `WriteRequest`. #7541

pkg/ingester/ingester.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2578,15 +2578,15 @@ func (i *Ingester) trackInflightQueryRequest() (func(), error) {
25782578
}
25792579
}
25802580

2581-
i.maxInflightQueryRequests.Track(i.inflightQueryRequests.Inc())
2582-
25832581
if i.resourceBasedLimiter != nil {
25842582
if err := i.resourceBasedLimiter.AcceptNewRequest(); err != nil {
25852583
level.Warn(i.logger).Log("msg", "failed to accept request", "err", err)
25862584
return nil, limiter.ErrResourceLimitReached
25872585
}
25882586
}
25892587

2588+
i.maxInflightQueryRequests.Track(i.inflightQueryRequests.Inc())
2589+
25902590
return func() {
25912591
i.inflightQueryRequests.Dec()
25922592
}, nil

pkg/ingester/ingester_test.go

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3533,7 +3533,9 @@ func Test_Ingester_Query_ResourceThresholdBreached(t *testing.T) {
35333533
{labels.FromStrings("__name__", "test_1", "route", "get_user", "status", "200"), 1, 100000},
35343534
}
35353535

3536-
i, err := prepareIngesterWithBlocksStorage(t, defaultIngesterTestConfig(t), prometheus.NewRegistry())
3536+
cfg := defaultIngesterTestConfig(t)
3537+
cfg.DefaultLimits.MaxInflightQueryRequests = 1
3538+
i, err := prepareIngesterWithBlocksStorage(t, cfg, prometheus.NewRegistry())
35373539
require.NoError(t, err)
35383540
require.NoError(t, services.StartAndAwaitRunning(context.Background(), i))
35393541
defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck
@@ -3542,7 +3544,8 @@ func Test_Ingester_Query_ResourceThresholdBreached(t *testing.T) {
35423544
resource.CPU: 0.5,
35433545
resource.Heap: 0.5,
35443546
}
3545-
i.resourceBasedLimiter, err = limiter.NewResourceBasedLimiter(&mockResourceMonitor{cpu: 0.4, heap: 0.6}, limits, nil, "ingester")
3547+
monitor := &mockResourceMonitor{cpu: 0.4, heap: 0.6}
3548+
i.resourceBasedLimiter, err = limiter.NewResourceBasedLimiter(monitor, limits, nil, "ingester")
35463549
require.NoError(t, err)
35473550

35483551
// Wait until it's ACTIVE
@@ -3566,6 +3569,12 @@ func Test_Ingester_Query_ResourceThresholdBreached(t *testing.T) {
35663569

35673570
// Expected error from isRetryableError in blocks_store_queryable.go
35683571
require.ErrorIs(t, err, limiter.ErrResourceLimitReached)
3572+
require.Equal(t, int64(0), i.inflightQueryRequests.Load())
3573+
3574+
// Verify that a query not blocked by the limiter still succeeds after the rejected request.
3575+
monitor.heap = 0.4
3576+
s = &mockQueryStreamServer{ctx: ctx}
3577+
require.NoError(t, i.QueryStream(rreq, s))
35693578
}
35703579

35713580
func TestIngester_LabelValues_ShouldNotCreateTSDBIfDoesNotExists(t *testing.T) {

0 commit comments

Comments
 (0)