Skip to content

Commit 67837c0

Browse files
juliusvaknuds1
authored andcommitted
Add support for Prometheus 2.0 rule format
The rule format to use is now set binary-wide via the `-ruler.rule-format-version` and `configs.rule-format-version` flags, which still default to the Prometheus 1.x rule format. There's some trickiness here regarding what data type to return from parsing, regarding the ability to track alert states, and not being able to create final rule groups yet. That's laid out in the comment above RulesConfig.Parse(). Fixes cortexproject/cortex#622
1 parent b02d032 commit 67837c0

File tree

4 files changed

+158
-60
lines changed

4 files changed

+158
-60
lines changed

pkg/ruler/api.go

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,22 +17,26 @@ import (
1717

1818
// API implements the configs api.
1919
type API struct {
20-
db db.RulesDB
20+
db db.RulesDB
21+
ruleFormatVersion configs.RuleFormatVersion
2122
http.Handler
2223
}
2324

2425
// NewAPIFromConfig makes a new API from our database config.
25-
func NewAPIFromConfig(cfg db.Config) (*API, error) {
26+
func NewAPIFromConfig(cfg db.Config, rfv configs.RuleFormatVersion) (*API, error) {
2627
db, err := db.NewRulesDB(cfg)
2728
if err != nil {
2829
return nil, err
2930
}
30-
return NewAPI(db), nil
31+
return NewAPI(db, rfv), nil
3132
}
3233

3334
// NewAPI creates a new API.
34-
func NewAPI(db db.RulesDB) *API {
35-
a := &API{db: db}
35+
func NewAPI(db db.RulesDB, rfv configs.RuleFormatVersion) *API {
36+
a := &API{
37+
db: db,
38+
ruleFormatVersion: rfv,
39+
}
3640
r := mux.NewRouter()
3741
a.RegisterRoutes(r)
3842
a.Handler = r
@@ -98,11 +102,21 @@ func (a *API) casConfig(w http.ResponseWriter, r *http.Request) {
98102
http.Error(w, err.Error(), http.StatusBadRequest)
99103
return
100104
}
101-
if _, err := updateReq.NewConfig.Parse(); err != nil {
105+
106+
switch a.ruleFormatVersion {
107+
case configs.RuleFormatV1:
108+
_, err = updateReq.NewConfig.ParseV1()
109+
case configs.RuleFormatV2:
110+
_, err = updateReq.NewConfig.ParseV2()
111+
default:
112+
panic("unknown rule format")
113+
}
114+
if err != nil {
102115
level.Error(logger).Log("msg", "invalid rules", "err", err)
103116
http.Error(w, fmt.Sprintf("Invalid rules: %v", err), http.StatusBadRequest)
104117
return
105118
}
119+
106120
updated, err := a.db.SetRulesConfig(userID, updateReq.OldConfig, updateReq.NewConfig)
107121
if err != nil {
108122
level.Error(logger).Log("msg", "error storing config", "err", err)

pkg/ruler/api_test.go

Lines changed: 102 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ var (
3333
// setup sets up the environment for the tests.
3434
func setup(t *testing.T) {
3535
database = dbtest.Setup(t)
36-
app = NewAPI(database)
36+
app = NewAPI(database, configs.RuleFormatV2)
3737
counter = 0
3838
privateAPI = dbStore{db: database}
3939
}
@@ -75,22 +75,46 @@ func makeUserID() string {
7575
}
7676

7777
// makeRulerConfig makes an arbitrary ruler config
78-
func makeRulerConfig() configs.RulesConfig {
79-
return configs.RulesConfig(map[string]string{
80-
"filename.rules": makeString(`
78+
func makeRulerConfig(rfv configs.RuleFormatVersion) configs.RulesConfig {
79+
switch rfv {
80+
case configs.RuleFormatV1:
81+
return configs.RulesConfig(map[string]string{
82+
"filename.rules": makeString(`
83+
# Config no. %d.
84+
ALERT ScrapeFailed
85+
IF up != 1
86+
FOR 10m
87+
LABELS { severity="warning" }
88+
ANNOTATIONS {
89+
summary = "Scrape of {{$labels.job}} (pod: {{$labels.instance}}) failed.",
90+
description = "Prometheus cannot reach the /metrics page on the {{$labels.instance}} pod.",
91+
impact = "We have no monitoring data for {{$labels.job}} - {{$labels.instance}}. At worst, it's completely down. At best, we cannot reliably respond to operational issues.",
92+
dashboardURL = "$${base_url}/admin/prometheus/targets",
93+
}
94+
`),
95+
})
96+
case configs.RuleFormatV2:
97+
return configs.RulesConfig(map[string]string{
98+
"filename.rules": makeString(`
8199
# Config no. %d.
82-
ALERT ScrapeFailed
83-
IF up != 1
84-
FOR 10m
85-
LABELS { severity="warning" }
86-
ANNOTATIONS {
87-
summary = "Scrape of {{$labels.job}} (pod: {{$labels.instance}}) failed.",
88-
description = "Prometheus cannot reach the /metrics page on the {{$labels.instance}} pod.",
89-
impact = "We have no monitoring data for {{$labels.job}} - {{$labels.instance}}. At worst, it's completely down. At best, we cannot reliably respond to operational issues.",
90-
dashboardURL = "$${base_url}/admin/prometheus/targets",
91-
}
92-
`),
93-
})
100+
groups:
101+
- name: example
102+
rules:
103+
- alert: ScrapeFailed
104+
expr: 'up != 1'
105+
for: 10m
106+
labels:
107+
severity: warning
108+
annotations:
109+
summary: "Scrape of {{$labels.job}} (pod: {{$labels.instance}}) failed."
110+
description: "Prometheus cannot reach the /metrics page on the {{$labels.instance}} pod."
111+
impact: "We have no monitoring data for {{$labels.job}} - {{$labels.instance}}. At worst, it's completely down. At best, we cannot reliably respond to operational issues."
112+
dashboardURL: "$${base_url}/admin/prometheus/targets"
113+
`),
114+
})
115+
default:
116+
panic("unknown rule format")
117+
}
94118
}
95119

96120
// parseVersionedRulesConfig parses a configs.VersionedRulesConfig from JSON.
@@ -146,7 +170,7 @@ func Test_PostConfig_CreatesConfig(t *testing.T) {
146170
defer cleanup(t)
147171

148172
userID := makeUserID()
149-
config := makeRulerConfig()
173+
config := makeRulerConfig(configs.RuleFormatV2)
150174
result := post(t, userID, nil, config)
151175
assert.Equal(t, config, result.Config)
152176
}
@@ -177,27 +201,69 @@ func Test_PostConfig_InvalidNewConfig(t *testing.T) {
177201
}
178202
}
179203

180-
// Posting to a configuration sets it so that you can get it again.
181-
func Test_PostConfig_UpdatesConfig(t *testing.T) {
204+
// Posting a v1 rule format configuration sets it so that you can get it again.
205+
func Test_PostConfig_UpdatesConfig_V1RuleFormat(t *testing.T) {
206+
setup(t)
207+
app = NewAPI(database, configs.RuleFormatV1)
208+
defer cleanup(t)
209+
210+
userID := makeUserID()
211+
config1 := makeRulerConfig(configs.RuleFormatV1)
212+
view1 := post(t, userID, nil, config1)
213+
config2 := makeRulerConfig(configs.RuleFormatV1)
214+
view2 := post(t, userID, config1, config2)
215+
assert.True(t, view2.ID > view1.ID, "%v > %v", view2.ID, view1.ID)
216+
assert.Equal(t, config2, view2.Config)
217+
}
218+
219+
// Posting an invalid v1 rule format config when there's one already set returns an error and leaves the config as is.
220+
func Test_PostConfig_InvalidChangedConfig_V1RuleFormat(t *testing.T) {
221+
setup(t)
222+
app = NewAPI(database, configs.RuleFormatV1)
223+
defer cleanup(t)
224+
225+
userID := makeUserID()
226+
config := makeRulerConfig(configs.RuleFormatV1)
227+
post(t, userID, nil, config)
228+
invalidConfig := map[string]string{
229+
"some.rules": "invalid config",
230+
}
231+
updateRequest := configUpdateRequest{
232+
OldConfig: nil,
233+
NewConfig: invalidConfig,
234+
}
235+
b, err := json.Marshal(updateRequest)
236+
require.NoError(t, err)
237+
reader := bytes.NewReader(b)
238+
{
239+
w := requestAsUser(t, app, userID, "POST", endpoint, reader)
240+
require.Equal(t, http.StatusBadRequest, w.Code)
241+
}
242+
result := get(t, userID)
243+
assert.Equal(t, config, result.Config)
244+
}
245+
246+
// Posting a v2 rule format configuration sets it so that you can get it again.
247+
func Test_PostConfig_UpdatesConfig_V2RuleFormat(t *testing.T) {
182248
setup(t)
183249
defer cleanup(t)
184250

185251
userID := makeUserID()
186-
config1 := makeRulerConfig()
252+
config1 := makeRulerConfig(configs.RuleFormatV2)
187253
view1 := post(t, userID, nil, config1)
188-
config2 := makeRulerConfig()
254+
config2 := makeRulerConfig(configs.RuleFormatV2)
189255
view2 := post(t, userID, config1, config2)
190256
assert.True(t, view2.ID > view1.ID, "%v > %v", view2.ID, view1.ID)
191257
assert.Equal(t, config2, view2.Config)
192258
}
193259

194-
// Posting an invalid config when there's one already set returns an error and leaves the config as is.
195-
func Test_PostConfig_InvalidChangedConfig(t *testing.T) {
260+
// Posting an invalid v2 rule format config when there's one already set returns an error and leaves the config as is.
261+
func Test_PostConfig_InvalidChangedConfig_V2RuleFormat(t *testing.T) {
196262
setup(t)
197263
defer cleanup(t)
198264

199265
userID := makeUserID()
200-
config := makeRulerConfig()
266+
config := makeRulerConfig(configs.RuleFormatV2)
201267
post(t, userID, nil, config)
202268
invalidConfig := map[string]string{
203269
"some.rules": "invalid config",
@@ -224,8 +290,8 @@ func Test_PostConfig_MultipleUsers(t *testing.T) {
224290

225291
userID1 := makeUserID()
226292
userID2 := makeUserID()
227-
config1 := post(t, userID1, nil, makeRulerConfig())
228-
config2 := post(t, userID2, nil, makeRulerConfig())
293+
config1 := post(t, userID1, nil, makeRulerConfig(configs.RuleFormatV2))
294+
config2 := post(t, userID2, nil, makeRulerConfig(configs.RuleFormatV2))
229295
foundConfig1 := get(t, userID1)
230296
assert.Equal(t, config1, foundConfig1)
231297
foundConfig2 := get(t, userID2)
@@ -249,7 +315,7 @@ func Test_GetAllConfigs(t *testing.T) {
249315
defer cleanup(t)
250316

251317
userID := makeUserID()
252-
config := makeRulerConfig()
318+
config := makeRulerConfig(configs.RuleFormatV2)
253319
view := post(t, userID, nil, config)
254320

255321
found, err := privateAPI.GetConfigs(0)
@@ -266,9 +332,9 @@ func Test_GetAllConfigs_Newest(t *testing.T) {
266332

267333
userID := makeUserID()
268334

269-
config1 := post(t, userID, nil, makeRulerConfig())
270-
config2 := post(t, userID, config1.Config, makeRulerConfig())
271-
lastCreated := post(t, userID, config2.Config, makeRulerConfig())
335+
config1 := post(t, userID, nil, makeRulerConfig(configs.RuleFormatV2))
336+
config2 := post(t, userID, config1.Config, makeRulerConfig(configs.RuleFormatV2))
337+
lastCreated := post(t, userID, config2.Config, makeRulerConfig(configs.RuleFormatV2))
272338

273339
found, err := privateAPI.GetConfigs(0)
274340
assert.NoError(t, err, "error getting configs")
@@ -281,10 +347,10 @@ func Test_GetConfigs_IncludesNewerConfigsAndExcludesOlder(t *testing.T) {
281347
setup(t)
282348
defer cleanup(t)
283349

284-
post(t, makeUserID(), nil, makeRulerConfig())
285-
config2 := post(t, makeUserID(), nil, makeRulerConfig())
350+
post(t, makeUserID(), nil, makeRulerConfig(configs.RuleFormatV2))
351+
config2 := post(t, makeUserID(), nil, makeRulerConfig(configs.RuleFormatV2))
286352
userID3 := makeUserID()
287-
config3 := post(t, userID3, nil, makeRulerConfig())
353+
config3 := post(t, userID3, nil, makeRulerConfig(configs.RuleFormatV2))
288354

289355
found, err := privateAPI.GetConfigs(config2.ID)
290356
assert.NoError(t, err, "error getting configs")
@@ -302,14 +368,14 @@ func postAlertmanagerConfig(t *testing.T, userID, configFile string) {
302368
b, err := json.Marshal(config)
303369
require.NoError(t, err)
304370
reader := bytes.NewReader(b)
305-
configsAPI := api.New(database)
371+
configsAPI := api.New(database, configs.RuleFormatV2)
306372
w := requestAsUser(t, configsAPI, userID, "POST", "/api/prom/configs/alertmanager", reader)
307373
require.Equal(t, http.StatusNoContent, w.Code)
308374
}
309375

310376
// getAlertmanagerConfig posts an alertmanager config to the alertmanager configs API.
311377
func getAlertmanagerConfig(t *testing.T, userID string) string {
312-
w := requestAsUser(t, api.New(database), userID, "GET", "/api/prom/configs/alertmanager", nil)
378+
w := requestAsUser(t, api.New(database, configs.RuleFormatV2), userID, "GET", "/api/prom/configs/alertmanager", nil)
313379
var x configs.View
314380
b := w.Body.Bytes()
315381
err := json.Unmarshal(b, &x)
@@ -351,7 +417,7 @@ func Test_AlertmanagerConfig_RulerConfigDoesntChangeIt(t *testing.T) {
351417
- name: noop`)
352418
postAlertmanagerConfig(t, userID, alertmanagerConfig)
353419

354-
rulerConfig := makeRulerConfig()
420+
rulerConfig := makeRulerConfig(configs.RuleFormatV2)
355421
post(t, userID, nil, rulerConfig)
356422

357423
newAlertmanagerConfig := getAlertmanagerConfig(t, userID)

pkg/ruler/ruler.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ import (
3333
"github.com/weaveworks/common/instrument"
3434
"github.com/weaveworks/common/user"
3535
"github.com/weaveworks/cortex/pkg/chunk"
36+
"github.com/weaveworks/cortex/pkg/configs"
3637
"github.com/weaveworks/cortex/pkg/distributor"
3738
"github.com/weaveworks/cortex/pkg/querier"
3839
"github.com/weaveworks/cortex/pkg/util"
@@ -81,6 +82,9 @@ type Config struct {
8182
// This is used for template expansion in alerts; must be a valid URL
8283
ExternalURL util.URLValue
8384

85+
// Whether to parse rules according to the Prometheus v1 or v2 rule format.
86+
RuleFormatVersion configs.RuleFormatVersion
87+
8488
// How frequently to evaluate rules by default.
8589
EvaluationInterval time.Duration
8690
NumWorkers int
@@ -105,6 +109,7 @@ type Config struct {
105109
func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
106110
cfg.ExternalURL.URL, _ = url.Parse("") // Must be non-nil
107111
f.Var(&cfg.ExternalURL, "ruler.external.url", "URL of alerts return path.")
112+
f.Var(&cfg.RuleFormatVersion, "ruler.rule-format-version", "Which Prometheus rule format version to use: '1' or '2' (default '1').")
108113
f.DurationVar(&cfg.EvaluationInterval, "ruler.evaluation-interval", 15*time.Second, "How frequently to evaluate rules")
109114
f.IntVar(&cfg.NumWorkers, "ruler.num-workers", 1, "Number of rule evaluator worker routines in this process")
110115
f.Var(&cfg.AlertmanagerURL, "ruler.alertmanager-url", "URL of the Alertmanager to send notifications to.")
@@ -286,7 +291,7 @@ func (r *Ruler) newGroup(ctx context.Context, userID string, item *workItem) (*r
286291
Registerer: prometheus.DefaultRegisterer,
287292
}
288293
delay := 0 * time.Second // Unused, so 0 value is fine.
289-
return rules.NewGroup(item.filename, "none", delay, item.rules, opts), nil
294+
return rules.NewGroup(item.groupName, "none", delay, item.rules, opts), nil
290295
}
291296

292297
// sendAlerts implements a rules.NotifyFunc for a Notifier.
@@ -402,7 +407,7 @@ type Server struct {
402407
// NewServer makes a new rule processing server.
403408
func NewServer(cfg Config, ruler *Ruler, rulesAPI RulesAPI) (*Server, error) {
404409
// TODO: Separate configuration for polling interval.
405-
s := newScheduler(rulesAPI, cfg.EvaluationInterval, cfg.EvaluationInterval)
410+
s := newScheduler(rulesAPI, cfg.EvaluationInterval, cfg.EvaluationInterval, cfg.RuleFormatVersion)
406411
if cfg.NumWorkers <= 0 {
407412
return nil, fmt.Errorf("must have at least 1 worker, got %d", cfg.NumWorkers)
408413
}

0 commit comments

Comments
 (0)