Skip to content

Commit a1d4089

Browse files
committed
Make Cortex work with updated Prometheus packages
1 parent 1af41fd commit a1d4089

File tree

4 files changed

+127
-27
lines changed

4 files changed

+127
-27
lines changed

cmd/lite/main.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
"github.com/prometheus/prometheus/config"
1414
"github.com/prometheus/prometheus/promql"
1515
"github.com/prometheus/prometheus/web/api/v1"
16+
"github.com/prometheus/tsdb"
1617
"google.golang.org/grpc"
1718

1819
"github.com/weaveworks/common/middleware"
@@ -155,6 +156,8 @@ func main() {
155156
querier.DummyAlertmanagerRetriever{},
156157
func() config.Config { return config.Config{} },
157158
func(f http.HandlerFunc) http.HandlerFunc { return f },
159+
func() *tsdb.DB { return nil }, // Only needed for admin APIs.
160+
false, // Disable admin APIs.
158161
)
159162
promRouter := route.New().WithPrefix("/api/prom/api/v1")
160163
api.Register(promRouter)

cmd/querier/main.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
"github.com/prometheus/prometheus/config"
1414
"github.com/prometheus/prometheus/promql"
1515
"github.com/prometheus/prometheus/web/api/v1"
16+
"github.com/prometheus/tsdb"
1617

1718
"github.com/weaveworks/common/middleware"
1819
"github.com/weaveworks/common/server"
@@ -98,6 +99,8 @@ func main() {
9899
querier.DummyAlertmanagerRetriever{},
99100
func() config.Config { return config.Config{} },
100101
func(f http.HandlerFunc) http.HandlerFunc { return f },
102+
func() *tsdb.DB { return nil }, // Only needed for admin APIs.
103+
false, // Disable admin APIs.
101104
)
102105
promRouter := route.New().WithPrefix("/api/prom/api/v1")
103106
api.Register(promRouter)

pkg/querier/querier.go

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -195,27 +195,28 @@ type mergeQuerier struct {
195195
metadataOnly bool
196196
}
197197

198-
func (mq mergeQuerier) Select(matchers ...*labels.Matcher) storage.SeriesSet {
198+
func (mq mergeQuerier) Select(matchers ...*labels.Matcher) (storage.SeriesSet, error) {
199+
// TODO: Update underlying selectors to return errors directly.
199200
if mq.metadataOnly {
200-
return mq.selectMetadata(matchers...)
201+
return mq.selectMetadata(matchers...), nil
201202
}
202-
return mq.selectSamples(matchers...)
203+
return mq.selectSamples(matchers...), nil
203204
}
204205

205206
func (mq mergeQuerier) selectMetadata(matchers ...*labels.Matcher) storage.SeriesSet {
206207
// NB that we don't do this in parallel, as in practice we only have two queriers,
207208
// one of which is the chunk store, which doesn't implement this yet.
208-
var set storage.SeriesSet
209+
seriesSets := make([]storage.SeriesSet, 0, len(mq.queriers))
209210
for _, q := range mq.queriers {
210211
ms, err := q.MetricsForLabelMatchers(mq.ctx, model.Time(mq.mint), model.Time(mq.maxt), matchers...)
211212
if err != nil {
212213
return errSeriesSet{err: err}
213214
}
214215
ss := metricsToSeriesSet(ms)
215-
set = storage.DeduplicateSeriesSet(set, ss)
216+
seriesSets = append(seriesSets, ss)
216217
}
217218

218-
return set
219+
return storage.NewMergeSeriesSet(seriesSets)
219220
}
220221

221222
func (mq mergeQuerier) selectSamples(matchers ...*labels.Matcher) storage.SeriesSet {

pkg/ruler/ruler.go

Lines changed: 114 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package ruler
22

33
import (
4+
native_ctx "context"
45
"flag"
56
"fmt"
67
"net/http"
@@ -12,11 +13,17 @@ import (
1213
gklog "github.com/go-kit/kit/log"
1314
"github.com/go-kit/kit/log/level"
1415
"github.com/prometheus/client_golang/prometheus"
16+
config_util "github.com/prometheus/common/config"
1517
"github.com/prometheus/common/model"
1618
"github.com/prometheus/prometheus/config"
19+
"github.com/prometheus/prometheus/discovery"
20+
sd_config "github.com/prometheus/prometheus/discovery/config"
21+
"github.com/prometheus/prometheus/discovery/dns"
22+
"github.com/prometheus/prometheus/discovery/targetgroup"
1723
"github.com/prometheus/prometheus/notifier"
1824
"github.com/prometheus/prometheus/promql"
1925
"github.com/prometheus/prometheus/rules"
26+
"github.com/prometheus/prometheus/util/strutil"
2027
"golang.org/x/net/context"
2128
"golang.org/x/net/context/ctxhttp"
2229

@@ -97,7 +104,59 @@ type Ruler struct {
97104

98105
// Per-user notifiers with separate queues.
99106
notifiersMtx sync.Mutex
100-
notifiers map[string]*notifier.Notifier
107+
notifiers map[string]*rulerNotifier
108+
}
109+
110+
type rulerNotifier struct {
111+
notifier *notifier.Notifier
112+
sdCtx context.Context
113+
sdCancel context.CancelFunc
114+
sdManager *discovery.Manager
115+
wg sync.WaitGroup
116+
logger gklog.Logger
117+
}
118+
119+
func newRulerNotifier(o *notifier.Options, l gklog.Logger) *rulerNotifier {
120+
ctx, cancel := context.WithCancel(context.Background())
121+
return &rulerNotifier{
122+
notifier: notifier.New(o, l),
123+
sdCtx: ctx,
124+
sdCancel: cancel,
125+
sdManager: discovery.NewManager(l),
126+
logger: l,
127+
}
128+
}
129+
130+
func (rn *rulerNotifier) run() {
131+
rn.wg.Add(2)
132+
go func() {
133+
if err := rn.sdManager.Run(rn.sdCtx); err != nil {
134+
level.Error(rn.logger).Log("msg", "error starting notifier discovery manager", "err", err)
135+
}
136+
}()
137+
go func() {
138+
rn.notifier.Run(rn.sdManager.SyncCh())
139+
}()
140+
}
141+
142+
func (rn *rulerNotifier) applyConfig(cfg *config.Config) error {
143+
if err := rn.notifier.ApplyConfig(cfg); err != nil {
144+
return err
145+
}
146+
147+
amConfigs := cfg.AlertingConfig.AlertmanagerConfigs
148+
if len(amConfigs) != 1 {
149+
return fmt.Errorf("ruler alerting config should have exactly one AlertmanagerConfig")
150+
}
151+
return rn.sdManager.ApplyConfig(
152+
map[string]sd_config.ServiceDiscoveryConfig{"ruler": amConfigs[0].ServiceDiscoveryConfig},
153+
)
154+
}
155+
156+
func (rn *rulerNotifier) stop() {
157+
rn.sdCancel()
158+
rn.notifier.Stop()
159+
rn.wg.Wait()
101160
}
102161

103162
// NewRuler creates a new ruler from a distributor and chunk store.
@@ -112,7 +171,7 @@ func NewRuler(cfg Config, d *distributor.Distributor, c *chunk.Store) (*Ruler, e
112171
alertURL: cfg.ExternalURL.URL,
113172
notifierCfg: ncfg,
114173
queueCapacity: cfg.NotificationQueueCapacity,
115-
notifiers: map[string]*notifier.Notifier{},
174+
notifiers: map[string]*rulerNotifier{},
116175
}, nil
117176
}
118177

@@ -124,23 +183,23 @@ func buildNotifierConfig(rulerConfig *Config) (*config.Config, error) {
124183
}
125184

126185
u := rulerConfig.AlertmanagerURL
127-
var sdConfig config.ServiceDiscoveryConfig
186+
var sdConfig sd_config.ServiceDiscoveryConfig
128187
if rulerConfig.AlertmanagerDiscovery {
129188
if !strings.Contains(u.Host, "_tcp.") {
130189
return nil, fmt.Errorf("When alertmanager-discovery is on, host name must be of the form _portname._tcp.service.fqdn (is %q)", u.Host)
131190
}
132-
dnsSDConfig := config.DNSSDConfig{
191+
dnsSDConfig := dns.SDConfig{
133192
Names: []string{u.Host},
134193
RefreshInterval: model.Duration(rulerConfig.AlertmanagerRefreshInterval),
135194
Type: "SRV",
136195
Port: 0, // Ignored, because of SRV.
137196
}
138-
sdConfig = config.ServiceDiscoveryConfig{
139-
DNSSDConfigs: []*config.DNSSDConfig{&dnsSDConfig},
197+
sdConfig = sd_config.ServiceDiscoveryConfig{
198+
DNSSDConfigs: []*dns.SDConfig{&dnsSDConfig},
140199
}
141200
} else {
142-
sdConfig = config.ServiceDiscoveryConfig{
143-
StaticConfigs: []*config.TargetGroup{
201+
sdConfig = sd_config.ServiceDiscoveryConfig{
202+
StaticConfigs: []*targetgroup.Group{
144203
{
145204
Targets: []model.LabelSet{
146205
{
@@ -165,14 +224,14 @@ func buildNotifierConfig(rulerConfig *Config) (*config.Config, error) {
165224
}
166225

167226
if u.User != nil {
168-
amConfig.HTTPClientConfig = config.HTTPClientConfig{
169-
BasicAuth: &config.BasicAuth{
227+
amConfig.HTTPClientConfig = config_util.HTTPClientConfig{
228+
BasicAuth: &config_util.BasicAuth{
170229
Username: u.User.Username(),
171230
},
172231
}
173232

174233
if password, isSet := u.User.Password(); isSet {
175-
amConfig.HTTPClientConfig.BasicAuth.Password = config.Secret(password)
234+
amConfig.HTTPClientConfig.BasicAuth.Password = config_util.Secret(password)
176235
}
177236
}
178237

@@ -191,26 +250,59 @@ func (r *Ruler) newGroup(ctx context.Context, rs []rules.Rule) (*rules.Group, er
191250
}
192251
opts := &rules.ManagerOptions{
193252
Appendable: appendable,
194-
QueryEngine: r.engine,
253+
QueryFunc: rules.EngineQueryFunc(r.engine),
195254
Context: ctx,
196255
ExternalURL: r.alertURL,
197-
Notifier: notifier,
256+
NotifyFunc: sendAlerts(notifier, r.alertURL.String()),
198257
Logger: gklog.NewNopLogger(),
258+
Registerer: prometheus.DefaultRegisterer,
199259
}
200260
delay := 0 * time.Second // Unused, so 0 value is fine.
201261
return rules.NewGroup("default", "none", delay, rs, opts), nil
202262
}
203263

264+
// sendAlerts implements a the rules.NotifyFunc for a Notifier.
265+
// It filters any non-firing alerts from the input.
266+
//
267+
// Copied from Prometheus's main.go.
268+
func sendAlerts(n *notifier.Notifier, externalURL string) rules.NotifyFunc {
269+
return func(ctx native_ctx.Context, expr string, alerts ...*rules.Alert) error {
270+
var res []*notifier.Alert
271+
272+
for _, alert := range alerts {
273+
// Only send actually firing alerts.
274+
if alert.State == rules.StatePending {
275+
continue
276+
}
277+
a := &notifier.Alert{
278+
StartsAt: alert.FiredAt,
279+
Labels: alert.Labels,
280+
Annotations: alert.Annotations,
281+
GeneratorURL: externalURL + strutil.TableLinkForExpression(expr),
282+
}
283+
if !alert.ResolvedAt.IsZero() {
284+
a.EndsAt = alert.ResolvedAt
285+
}
286+
res = append(res, a)
287+
}
288+
289+
if len(alerts) > 0 {
290+
n.Send(res...)
291+
}
292+
return nil
293+
}
294+
}
295+
204296
func (r *Ruler) getOrCreateNotifier(userID string) (*notifier.Notifier, error) {
205297
r.notifiersMtx.Lock()
206298
defer r.notifiersMtx.Unlock()
207299

208300
n, ok := r.notifiers[userID]
209301
if ok {
210-
return n, nil
302+
return n.notifier, nil
211303
}
212304

213-
n = notifier.New(&notifier.Options{
305+
n = newRulerNotifier(&notifier.Options{
214306
QueueCapacity: r.queueCapacity,
215307
Do: func(ctx context.Context, client *http.Client, req *http.Request) (*http.Response, error) {
216308
// Note: The passed-in context comes from the Prometheus rule group code
@@ -222,17 +314,18 @@ func (r *Ruler) getOrCreateNotifier(userID string) (*notifier.Notifier, error) {
222314
}
223315
return ctxhttp.Do(ctx, client, req)
224316
},
225-
}, gklog.NewNopLogger())
317+
}, util.Logger)
318+
319+
go n.run()
226320

227321
// This should never fail, unless there's a programming mistake.
228-
if err := n.ApplyConfig(r.notifierCfg); err != nil {
322+
if err := n.applyConfig(r.notifierCfg); err != nil {
229323
return nil, err
230324
}
231-
go n.Run()
232325

233326
// TODO: Remove notifiers for stale users. Right now this is a slow leak.
234327
r.notifiers[userID] = n
235-
return n, nil
328+
return n.notifier, nil
236329
}
237330

238331
// Evaluate a list of rules in the given context.
@@ -245,7 +338,7 @@ func (r *Ruler) Evaluate(ctx context.Context, rs []rules.Rule) {
245338
level.Error(logger).Log("msg", "failed to create rule group", "err", err)
246339
return
247340
}
248-
g.Eval(start)
341+
g.Eval(ctx, start)
249342

250343
// The prometheus routines we're calling have their own instrumentation
251344
// but, a) it's rule-based, not group-based, b) it's a summary, not a
@@ -260,7 +353,7 @@ func (r *Ruler) Stop() {
260353
defer r.notifiersMtx.Unlock()
261354

262355
for _, n := range r.notifiers {
263-
n.Stop()
356+
n.stop()
264357
}
265358
}
266359

0 commit comments

Comments
 (0)