diff --git a/CHANGELOG.md b/CHANGELOG.md index 1cf3e1ed8d..676ba2fc8d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ * [FEATURE] Update prometheus alertmanager version to v0.28.0 and add new integration msteamsv2, jira, and rocketchat. #6590 * [FEATURE] Ingester: Add a `-ingester.enable-ooo-native-histograms` flag to enable out-of-order native histogram ingestion per tenant. It only takes effect when `-blocks-storage.tsdb.enable-native-histograms=true` and `-ingester.out-of-order-time-window` > 0. It is applied after the restart if it is changed at runtime through the runtime config. #6626 * [ENHANCEMENT] Querier: limit label APIs to query only ingesters if `start` param is not been specified. #6618 +* [ENHANCEMENT] Ruler: Add rule information (group name, namespace, name, and kind) to query parameters sent to the Query Frontend to leave rule information logs on query stats. #6539 * [ENHANCEMENT] Alertmanager: Add new limits `-alertmanager.max-silences-count` and `-alertmanager.max-silences-size-bytes` for limiting silences per tenant. #6605 * [ENHANCEMENT] Update prometheus version to v3.1.0. #6583 * [ENHANCEMENT] Add `compactor.auto-forget-delay` for compactor to auto forget compactors after X minutes without heartbeat. #6533 diff --git a/docs/configuration/config-file-reference.md b/docs/configuration/config-file-reference.md index 1b9a03b47b..1bb8a1226e 100644 --- a/docs/configuration/config-file-reference.md +++ b/docs/configuration/config-file-reference.md @@ -4854,7 +4854,9 @@ ring: [disabled_tenants: | default = ""] # Report query statistics for ruler queries to complete as a per user metric and -# as an info level log message. +# as an info level log message. It works only when the -ruler.frontend-address +# is not configured. When -ruler.frontend-address enabled, the Query Frontend +# tracks query statistics logs and metrics. # CLI flag: -ruler.query-stats-enabled [query_stats_enabled: | default = false] diff --git a/pkg/ruler/compat.go b/pkg/ruler/compat.go index eb34ee02e3..a856c5fd63 100644 --- a/pkg/ruler/compat.go +++ b/pkg/ruler/compat.go @@ -184,7 +184,20 @@ func EngineQueryFunc(engine promql.QueryEngine, frontendClient *frontendClient, } if frontendClient != nil { - v, err := frontendClient.InstantQuery(ctx, qs, t) + // query parameters sent to the Query Frontend to leave rule information logs on query stats + queryParams := map[string]string{} + + if origin := ctx.Value(promql.QueryOrigin{}); origin != nil { + queryLabels := origin.(map[string]interface{}) + rgMap := queryLabels["ruleGroup"].(map[string]string) + queryParams["rule_group"] = rgMap["name"] + queryParams["rule_namespace"] = rgMap["file"] + } + ruleDetail := rules.FromOriginContext(ctx) + queryParams["rule"] = ruleDetail.Name + queryParams["rule_kind"] = ruleDetail.Kind + + v, err := frontendClient.InstantQuery(ctx, qs, t, queryParams) if err != nil { return nil, err } @@ -333,7 +346,9 @@ func DefaultTenantManagerFactory(cfg Config, p Pusher, q storage.Queryable, engi totalWrites := evalMetrics.TotalWritesVec.WithLabelValues(userID) failedWrites := evalMetrics.FailedWritesVec.WithLabelValues(userID) - if cfg.FrontendAddress != "" { + shouldEvalFromQFE := cfg.FrontendAddress != "" + if shouldEvalFromQFE { + // evaluate rules via Query-Frontend c, err := frontendPool.GetClientFor(cfg.FrontendAddress) if err != nil { return nil, err @@ -343,7 +358,7 @@ func DefaultTenantManagerFactory(cfg Config, p Pusher, q storage.Queryable, engi var queryFunc rules.QueryFunc engineQueryFunc := EngineQueryFunc(engine, client, q, overrides, userID, cfg.LookbackDelta) metricsQueryFunc := MetricsQueryFunc(engineQueryFunc, totalQueries, failedQueries) - if cfg.EnableQueryStats { + if cfg.EnableQueryStats && !shouldEvalFromQFE { queryFunc = RecordAndReportRuleQueryMetrics(metricsQueryFunc, userID, evalMetrics, logger) } else { queryFunc = metricsQueryFunc diff --git a/pkg/ruler/frontend_client.go b/pkg/ruler/frontend_client.go index dff0fca11c..7cd78bf571 100644 --- a/pkg/ruler/frontend_client.go +++ b/pkg/ruler/frontend_client.go @@ -49,12 +49,17 @@ func NewFrontendClient(client httpgrpc.HTTPClient, timeout time.Duration, promet } } -func (p *FrontendClient) makeRequest(ctx context.Context, qs string, ts time.Time) (*httpgrpc.HTTPRequest, error) { +func (p *FrontendClient) makeRequest(ctx context.Context, qs string, ts time.Time, queryParams map[string]string) (*httpgrpc.HTTPRequest, error) { args := make(url.Values) args.Set("query", qs) if !ts.IsZero() { args.Set("time", ts.Format(time.RFC3339Nano)) } + // set query parameters sent to the Query Frontend to leave rule information logs on query stats + for k, v := range queryParams { + args.Set(k, v) + } + body := []byte(args.Encode()) //lint:ignore faillint wrapper around upstream method @@ -87,11 +92,11 @@ func (p *FrontendClient) makeRequest(ctx context.Context, qs string, ts time.Tim return req, nil } -func (p *FrontendClient) InstantQuery(ctx context.Context, qs string, t time.Time) (promql.Vector, error) { +func (p *FrontendClient) InstantQuery(ctx context.Context, qs string, t time.Time, queryParams map[string]string) (promql.Vector, error) { log, ctx := spanlogger.New(ctx, "FrontendClient.InstantQuery") defer log.Span.Finish() - req, err := p.makeRequest(ctx, qs, t) + req, err := p.makeRequest(ctx, qs, t, queryParams) if err != nil { level.Error(log).Log("err", err, "query", qs) return nil, err diff --git a/pkg/ruler/frontend_client_test.go b/pkg/ruler/frontend_client_test.go index d46df39738..f329643527 100644 --- a/pkg/ruler/frontend_client_test.go +++ b/pkg/ruler/frontend_client_test.go @@ -32,7 +32,7 @@ func TestTimeout(t *testing.T) { ctx := context.Background() ctx = user.InjectOrgID(ctx, "userID") frontendClient := NewFrontendClient(mockHTTPGRPCClient(mockClientFn), time.Second*5, "/prometheus", "json") - _, err := frontendClient.InstantQuery(ctx, "query", time.Now()) + _, err := frontendClient.InstantQuery(ctx, "query", time.Now(), nil) require.Equal(t, context.DeadlineExceeded, err) } @@ -41,7 +41,7 @@ func TestNoOrgId(t *testing.T) { return nil, nil } frontendClient := NewFrontendClient(mockHTTPGRPCClient(mockClientFn), time.Second*5, "/prometheus", "json") - _, err := frontendClient.InstantQuery(context.Background(), "query", time.Now()) + _, err := frontendClient.InstantQuery(context.Background(), "query", time.Now(), nil) require.Equal(t, user.ErrNoOrgID, err) } @@ -152,7 +152,7 @@ func TestInstantQueryJsonCodec(t *testing.T) { ctx := context.Background() ctx = user.InjectOrgID(ctx, "userID") frontendClient := NewFrontendClient(mockHTTPGRPCClient(mockClientFn), time.Second*5, "/prometheus", "json") - vector, err := frontendClient.InstantQuery(ctx, "query", time.Now()) + vector, err := frontendClient.InstantQuery(ctx, "query", time.Now(), nil) require.Equal(t, test.expected, vector) require.Equal(t, test.expectedErr, err) }) @@ -301,7 +301,7 @@ func TestInstantQueryProtoCodec(t *testing.T) { ctx := context.Background() ctx = user.InjectOrgID(ctx, "userID") frontendClient := NewFrontendClient(mockHTTPGRPCClient(mockClientFn), time.Second*5, "/prometheus", "protobuf") - vector, err := frontendClient.InstantQuery(ctx, "query", time.Now()) + vector, err := frontendClient.InstantQuery(ctx, "query", time.Now(), nil) require.Equal(t, test.expected, vector) require.Equal(t, test.expectedErr, err) }) diff --git a/pkg/ruler/manager_metrics.go b/pkg/ruler/manager_metrics.go index 93acdc26b1..75d3ad124e 100644 --- a/pkg/ruler/manager_metrics.go +++ b/pkg/ruler/manager_metrics.go @@ -255,7 +255,7 @@ func NewRuleEvalMetrics(cfg Config, reg prometheus.Registerer) *RuleEvalMetrics Help: "Number of failed queries by ruler.", }, []string{"user"}), } - if cfg.EnableQueryStats { + if cfg.EnableQueryStats && cfg.FrontendAddress == "" { m.RulerQuerySeconds = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ Name: "cortex_ruler_query_seconds_total", Help: "Total amount of wall clock time spent processing queries by the ruler.", diff --git a/pkg/ruler/ruler.go b/pkg/ruler/ruler.go index 067c7a4f59..fd5c7e173d 100644 --- a/pkg/ruler/ruler.go +++ b/pkg/ruler/ruler.go @@ -249,7 +249,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) { f.Var(&cfg.EnabledTenants, "ruler.enabled-tenants", "Comma separated list of tenants whose rules this ruler can evaluate. If specified, only these tenants will be handled by ruler, otherwise this ruler can process rules from all tenants. Subject to sharding.") f.Var(&cfg.DisabledTenants, "ruler.disabled-tenants", "Comma separated list of tenants whose rules this ruler cannot evaluate. If specified, a ruler that would normally pick the specified tenant(s) for processing will ignore them instead. Subject to sharding.") - f.BoolVar(&cfg.EnableQueryStats, "ruler.query-stats-enabled", false, "Report query statistics for ruler queries to complete as a per user metric and as an info level log message.") + f.BoolVar(&cfg.EnableQueryStats, "ruler.query-stats-enabled", false, "Report query statistics for ruler queries to complete as a per user metric and as an info level log message. It works only when the -ruler.frontend-address is not configured. When -ruler.frontend-address enabled, the Query Frontend tracks query statistics logs and metrics.") f.BoolVar(&cfg.DisableRuleGroupLabel, "ruler.disable-rule-group-label", false, "Disable the rule_group label on exported metrics") f.BoolVar(&cfg.EnableHAEvaluation, "ruler.enable-ha-evaluation", false, "Enable high availability")