Skip to content

Consider query select time in max query length check #5808

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
* [ENHANCEMENT] AlertManager: Retrying AlertManager Delete Silence on error #5794
* [ENHANCEMENT] Ingester: Add new ingester metric `cortex_ingester_max_inflight_query_requests`. #5798
* [ENHANCEMENT] Query: Added `query_storage_wall_time` to Query Frontend and Ruler query stats log for wall time spent on fetching data from storage. Query evaluation is not included. #5799
* [ENHANCEMENT] Query: Added additional max query length check at Query Frontend and Ruler. Added `-querier.ignore-max-query-length` flag to disable max query length check at Querier. #5808
* [BUGFIX] Distributor: Do not use label with empty values for sharding #5717
* [BUGFIX] Query Frontend: queries with negative offset should check whether it is cacheable or not. #5719
* [BUGFIX] Redis Cache: pass `cache_size` config correctly. #5734
Expand Down
6 changes: 6 additions & 0 deletions docs/blocks-storage/querier.md
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,12 @@ querier:
# engine.
# CLI flag: -querier.thanos-engine
[thanos_engine: <boolean> | default = false]

# If enabled, ignore max query length check at Querier select method. Users
# can choose to ignore it since the validation can be done before Querier
# evaluation like at Query Frontend or Ruler.
# CLI flag: -querier.ignore-max-query-length
[ignore_max_query_length: <boolean> | default = false]
```

### `blocks_storage_config`
Expand Down
6 changes: 6 additions & 0 deletions docs/configuration/config-file-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -3714,6 +3714,12 @@ store_gateway_client:
# engine.
# CLI flag: -querier.thanos-engine
[thanos_engine: <boolean> | default = false]

# If enabled, ignore max query length check at Querier select method. Users can
# choose to ignore it since the validation can be done before Querier evaluation
# like at Query Frontend or Ruler.
# CLI flag: -querier.ignore-max-query-length
[ignore_max_query_length: <boolean> | default = false]
```

### `query_frontend_config`
Expand Down
4 changes: 3 additions & 1 deletion pkg/cortex/modules.go
Original file line number Diff line number Diff line change
Expand Up @@ -457,12 +457,13 @@ func (t *Cortex) initQueryFrontendTripperware() (serv services.Service, err erro
queryAnalyzer,
prometheusCodec,
shardedPrometheusCodec,
t.Cfg.Querier.LookbackDelta,
)
if err != nil {
return nil, err
}

instantQueryMiddlewares, err := instantquery.Middlewares(util_log.Logger, t.Overrides, queryAnalyzer)
instantQueryMiddlewares, err := instantquery.Middlewares(util_log.Logger, t.Overrides, queryAnalyzer, t.Cfg.Querier.LookbackDelta)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -548,6 +549,7 @@ func (t *Cortex) initRuler() (serv services.Service, err error) {
return nil, nil
}

t.Cfg.Ruler.LookbackDelta = t.Cfg.Querier.LookbackDelta
t.Cfg.Ruler.Ring.ListenPort = t.Cfg.Server.GRPCListenPort
metrics := ruler.NewRuleEvalMetrics(t.Cfg.Ruler, prometheus.DefaultRegisterer)

Expand Down
35 changes: 22 additions & 13 deletions pkg/querier/querier.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@ type Config struct {
// Experimental. Use https://github.com/thanos-io/promql-engine rather than
// the Prometheus query engine.
ThanosEngine bool `yaml:"thanos_engine"`

// Ignore max query length check at Querier.
IgnoreMaxQueryLength bool `yaml:"ignore_max_query_length"`
}

var (
Expand Down Expand Up @@ -119,6 +122,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
f.DurationVar(&cfg.ShuffleShardingIngestersLookbackPeriod, "querier.shuffle-sharding-ingesters-lookback-period", 0, "When distributor's sharding strategy is shuffle-sharding and this setting is > 0, queriers fetch in-memory series from the minimum set of required ingesters, selecting only ingesters which may have received series since 'now - lookback period'. The lookback period should be greater or equal than the configured 'query store after' and 'query ingesters within'. If this setting is 0, queriers always query all ingesters (ingesters shuffle sharding on read path is disabled).")
f.BoolVar(&cfg.ThanosEngine, "querier.thanos-engine", false, "Experimental. Use Thanos promql engine https://github.com/thanos-io/promql-engine rather than the Prometheus promql engine.")
f.Int64Var(&cfg.MaxSubQuerySteps, "querier.max-subquery-steps", 0, "Max number of steps allowed for every subquery expression in query. Number of steps is calculated using subquery range / step. A value > 0 enables it.")
f.BoolVar(&cfg.IgnoreMaxQueryLength, "querier.ignore-max-query-length", false, "If enabled, ignore max query length check at Querier select method. Users can choose to ignore it since the validation can be done before Querier evaluation like at Query Frontend or Ruler.")
}

// Validate the config
Expand Down Expand Up @@ -256,16 +260,17 @@ type limiterHolder struct {
func NewQueryable(distributor QueryableWithFilter, stores []QueryableWithFilter, chunkIterFn chunkIteratorFunc, cfg Config, limits *validation.Overrides) storage.Queryable {
return storage.QueryableFunc(func(mint, maxt int64) (storage.Querier, error) {
q := querier{
now: time.Now(),
mint: mint,
maxt: maxt,
chunkIterFn: chunkIterFn,
limits: limits,
maxQueryIntoFuture: cfg.MaxQueryIntoFuture,
queryStoreForLabels: cfg.QueryStoreForLabels,
distributor: distributor,
stores: stores,
limiterHolder: &limiterHolder{},
now: time.Now(),
mint: mint,
maxt: maxt,
chunkIterFn: chunkIterFn,
limits: limits,
maxQueryIntoFuture: cfg.MaxQueryIntoFuture,
queryStoreForLabels: cfg.QueryStoreForLabels,
ignoreMaxQueryLength: cfg.IgnoreMaxQueryLength,
distributor: distributor,
stores: stores,
limiterHolder: &limiterHolder{},
}

return q, nil
Expand All @@ -283,6 +288,8 @@ type querier struct {
distributor QueryableWithFilter
stores []QueryableWithFilter
limiterHolder *limiterHolder

ignoreMaxQueryLength bool
}

func (q querier) setupFromCtx(ctx context.Context) (context.Context, *querier_stats.QueryStats, string, int64, int64, storage.Querier, []storage.Querier, error) {
Expand Down Expand Up @@ -397,9 +404,11 @@ func (q querier) Select(ctx context.Context, sortSeries bool, sp *storage.Select
// Validate query time range. This validation should be done only for instant / range queries and
// NOT for metadata queries (series, labels) because the query-frontend doesn't support splitting
// of such queries.
if maxQueryLength := q.limits.MaxQueryLength(userID); maxQueryLength > 0 && endTime.Sub(startTime) > maxQueryLength {
limitErr := validation.LimitError(fmt.Sprintf(validation.ErrQueryTooLong, endTime.Sub(startTime), maxQueryLength))
return storage.ErrSeriesSet(limitErr)
if !q.ignoreMaxQueryLength {
if maxQueryLength := q.limits.MaxQueryLength(userID); maxQueryLength > 0 && endTime.Sub(startTime) > maxQueryLength {
limitErr := validation.LimitError(fmt.Sprintf(validation.ErrQueryTooLong, endTime.Sub(startTime), maxQueryLength))
return storage.ErrSeriesSet(limitErr)
}
}

if len(queriers) == 1 {
Expand Down
11 changes: 11 additions & 0 deletions pkg/querier/querier_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -833,6 +833,9 @@ func TestQuerier_ValidateQueryTimeRange_MaxQueryLength(t *testing.T) {
queryStartTime time.Time
queryEndTime time.Time
expected error

// If enabled, skip max query length check at Querier.
ignoreMaxQueryLength bool
}{
"should allow query on short time range and rate time window close to the limit": {
query: "rate(foo[29d])",
Expand All @@ -858,6 +861,13 @@ func TestQuerier_ValidateQueryTimeRange_MaxQueryLength(t *testing.T) {
queryEndTime: time.Now(),
expected: errors.New("expanding series: the query time range exceeds the limit (query length: 721h1m0s, limit: 720h0m0s)"),
},
"max query length check ignored, invalid query is still allowed": {
query: "rate(foo[1m])",
queryStartTime: time.Now().Add(-maxQueryLength).Add(-time.Hour),
queryEndTime: time.Now(),
expected: nil,
ignoreMaxQueryLength: true,
},
}

opts := promql.EngineOpts{
Expand All @@ -873,6 +883,7 @@ func TestQuerier_ValidateQueryTimeRange_MaxQueryLength(t *testing.T) {
flagext.DefaultValues(&cfg)
// Disable active query tracker to avoid mmap error.
cfg.ActiveQueryTrackerDir = ""
cfg.IgnoreMaxQueryLength = testData.ignoreMaxQueryLength

limits := DefaultLimitsConfig()
limits.MaxQueryLength = model.Duration(maxQueryLength)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package instantquery

import (
"time"

"github.com/go-kit/log"
"github.com/thanos-io/thanos/pkg/querysharding"

Expand All @@ -11,9 +13,11 @@ func Middlewares(
log log.Logger,
limits tripperware.Limits,
queryAnalyzer querysharding.Analyzer,
lookbackDelta time.Duration,
) ([]tripperware.Middleware, error) {
var m []tripperware.Middleware

m = append(m, tripperware.ShardByMiddleware(log, limits, InstantQueryCodec, queryAnalyzer))
m := []tripperware.Middleware{
NewLimitsMiddleware(limits, lookbackDelta),
tripperware.ShardByMiddleware(log, limits, InstantQueryCodec, queryAnalyzer),
}
return m, nil
}
64 changes: 64 additions & 0 deletions pkg/querier/tripperware/instantquery/limits.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
package instantquery

import (
"context"
"net/http"
"time"

"github.com/prometheus/prometheus/promql"
"github.com/prometheus/prometheus/promql/parser"
"github.com/weaveworks/common/httpgrpc"

"github.com/cortexproject/cortex/pkg/querier/tripperware"
"github.com/cortexproject/cortex/pkg/tenant"
"github.com/cortexproject/cortex/pkg/util"
"github.com/cortexproject/cortex/pkg/util/spanlogger"
"github.com/cortexproject/cortex/pkg/util/validation"
)

type limitsMiddleware struct {
tripperware.Limits
next tripperware.Handler

lookbackDelta time.Duration
}

// NewLimitsMiddleware creates a new Middleware that enforces query limits.
func NewLimitsMiddleware(l tripperware.Limits, lookbackDelta time.Duration) tripperware.Middleware {
return tripperware.MiddlewareFunc(func(next tripperware.Handler) tripperware.Handler {
return limitsMiddleware{
next: next,
Limits: l,

lookbackDelta: lookbackDelta,
}
})
}

func (l limitsMiddleware) Do(ctx context.Context, r tripperware.Request) (tripperware.Response, error) {
log, ctx := spanlogger.New(ctx, "limits")
defer log.Finish()

tenantIDs, err := tenant.TenantIDs(ctx)
if err != nil {
return nil, httpgrpc.Errorf(http.StatusBadRequest, err.Error())
}

// Enforce the max query length.
if maxQueryLength := validation.SmallestPositiveNonZeroDurationPerTenant(tenantIDs, l.MaxQueryLength); maxQueryLength > 0 {
expr, err := parser.ParseExpr(r.GetQuery())
if err != nil {
// Let Querier propagates the parsing error.
return l.next.Do(ctx, r)
}

// Enforce query length across all selectors in the query.
min, max := promql.FindMinMaxTime(&parser.EvalStmt{Expr: expr, Start: util.TimeFromMillis(0), End: util.TimeFromMillis(0), LookbackDelta: l.lookbackDelta})
diff := util.TimeFromMillis(max).Sub(util.TimeFromMillis(min))
if diff > maxQueryLength {
return nil, httpgrpc.Errorf(http.StatusBadRequest, validation.ErrQueryTooLong, diff, maxQueryLength)
}
}

return l.next.Do(ctx, r)
}
109 changes: 109 additions & 0 deletions pkg/querier/tripperware/instantquery/limits_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
package instantquery

import (
"context"
"testing"
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/require"
"github.com/weaveworks/common/user"

"github.com/cortexproject/cortex/pkg/querier/tripperware"
"github.com/cortexproject/cortex/pkg/util/validation"
)

func TestLimitsMiddleware_MaxQueryLength(t *testing.T) {
t.Parallel()
const (
thirtyDays = 30 * 24 * time.Hour
)

tests := map[string]struct {
maxQueryLength time.Duration
query string
expectedErr string
}{
"should skip validation if max length is disabled": {
maxQueryLength: 0,
},
"even though failed to parse expression, should return no error since request will pass to next middleware": {
query: `up[`,
maxQueryLength: thirtyDays,
},
"should succeed on a query not exceeding time range": {
query: `up`,
maxQueryLength: thirtyDays,
},
"should succeed on a query not exceeding time range2": {
query: `up[29d]`,
maxQueryLength: thirtyDays,
},
"should succeed on a query not exceeding time range3": {
query: `rate(up[29d]) + rate(test[29d])`,
maxQueryLength: thirtyDays,
},
"should fail on a query exceeding time range": {
query: `rate(up[31d])`,
maxQueryLength: thirtyDays,
expectedErr: "the query time range exceeds the limit",
},
"should fail on a query exceeding time range, work for multiple selects": {
query: `rate(up[20d]) + rate(up[20d] offset 20d)`,
maxQueryLength: thirtyDays,
expectedErr: "the query time range exceeds the limit",
},
}

for testName, testData := range tests {
testData := testData
t.Run(testName, func(t *testing.T) {
t.Parallel()
req := &PrometheusRequest{Query: testData.query}

limits := &mockLimits{maxQueryLength: testData.maxQueryLength}
middleware := NewLimitsMiddleware(limits, 5*time.Minute)

innerRes := NewEmptyPrometheusInstantQueryResponse()
inner := &mockHandler{}
inner.On("Do", mock.Anything, mock.Anything).Return(innerRes, nil)

ctx := user.InjectOrgID(context.Background(), "test")
outer := middleware.Wrap(inner)
res, err := outer.Do(ctx, req)

if testData.expectedErr != "" {
require.Error(t, err)
assert.Contains(t, err.Error(), testData.expectedErr)
assert.Nil(t, res)
assert.Len(t, inner.Calls, 0)
} else {
// We expect the response returned by the inner handler.
require.NoError(t, err)
assert.Same(t, innerRes, res)

// The time range of the request passed to the inner handler should have not been manipulated.
require.Len(t, inner.Calls, 1)
}
})
}
}

type mockLimits struct {
validation.Overrides
maxQueryLength time.Duration
}

func (m mockLimits) MaxQueryLength(string) time.Duration {
return m.maxQueryLength
}

type mockHandler struct {
mock.Mock
}

func (m *mockHandler) Do(ctx context.Context, req tripperware.Request) (tripperware.Response, error) {
args := m.Called(ctx, req)
return args.Get(0).(tripperware.Response), args.Error(1)
}
Loading