Skip to content

Commit d805c18

Browse files
committed
Update for newer upstream alertmanager
1 parent a2839d2 commit d805c18

File tree

4 files changed

+119
-328
lines changed

4 files changed

+119
-328
lines changed

pkg/alertmanager/alertmanager.go

Lines changed: 38 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111

1212
"github.com/go-kit/kit/log"
1313
"github.com/prometheus/alertmanager/api"
14+
"github.com/prometheus/alertmanager/cluster"
1415
"github.com/prometheus/alertmanager/config"
1516
"github.com/prometheus/alertmanager/dispatch"
1617
"github.com/prometheus/alertmanager/inhibit"
@@ -24,7 +25,6 @@ import (
2425
"github.com/prometheus/client_golang/prometheus"
2526
"github.com/prometheus/common/route"
2627
"github.com/prometheus/prometheus/pkg/labels"
27-
"github.com/weaveworks/mesh"
2828
)
2929

3030
const notificationLogMaintenancePeriod = 15 * time.Minute
@@ -35,31 +35,33 @@ type Config struct {
3535
// Used to persist notification logs and silences on disk.
3636
DataDir string
3737
Logger log.Logger
38-
MeshRouter gossipRouter
3938
Retention time.Duration
4039
ExternalURL *url.URL
4140
}
4241

4342
// An Alertmanager manages the alerts for one user.
4443
type Alertmanager struct {
45-
cfg *Config
46-
api *api.API
47-
logger log.Logger
48-
nflog nflog.Log
49-
silences *silence.Silences
50-
marker types.Marker
51-
alerts *mem.Alerts
52-
dispatcher *dispatch.Dispatcher
53-
inhibitor *inhibit.Inhibitor
54-
stop chan struct{}
55-
wg sync.WaitGroup
56-
router *route.Router
44+
cfg *Config
45+
api *api.API
46+
logger log.Logger
47+
nflog *nflog.Log
48+
silences *silence.Silences
49+
marker types.Marker
50+
alerts *mem.Alerts
51+
dispatcher *dispatch.Dispatcher
52+
inhibitor *inhibit.Inhibitor
53+
stop chan struct{}
54+
wg sync.WaitGroup
55+
router *route.Router
56+
peer *cluster.Peer
57+
peerTimeout time.Duration
5758
}
5859

5960
// New creates a new Alertmanager.
60-
func New(cfg *Config) (*Alertmanager, error) {
61+
func New(peer *cluster.Peer, peerTimeout time.Duration, cfg *Config) (*Alertmanager, error) {
6162
am := &Alertmanager{
6263
cfg: cfg,
64+
peer: peer,
6365
logger: log.With(cfg.Logger, "user", cfg.UserID),
6466
stop: make(chan struct{}),
6567
}
@@ -68,9 +70,6 @@ func New(cfg *Config) (*Alertmanager, error) {
6870
nflogID := fmt.Sprintf("nflog:%s", cfg.UserID)
6971
var err error
7072
am.nflog, err = nflog.New(
71-
nflog.WithMesh(func(g mesh.Gossiper) mesh.Gossip {
72-
return cfg.MeshRouter.newGossip(nflogID, g)
73-
}),
7473
nflog.WithRetention(cfg.Retention),
7574
nflog.WithSnapshot(filepath.Join(cfg.DataDir, nflogID)),
7675
nflog.WithMaintenance(notificationLogMaintenancePeriod, am.stop, am.wg.Done),
@@ -86,22 +85,25 @@ func New(cfg *Config) (*Alertmanager, error) {
8685

8786
am.marker = types.NewMarker()
8887

88+
// TODO(cortex): Build a registry that can merge metrics from multiple users.
89+
// For now, these metrics are ignored, as we can't register the same
90+
// metric twice with a single registry.
91+
localRegistry := prometheus.NewRegistry()
92+
8993
silencesID := fmt.Sprintf("silences:%s", cfg.UserID)
9094
am.silences, err = silence.New(silence.Options{
9195
SnapshotFile: filepath.Join(cfg.DataDir, silencesID),
9296
Retention: cfg.Retention,
9397
Logger: log.With(am.logger, "component", "silences"),
94-
// TODO(cortex): Build a registry that can merge metrics from multiple users.
95-
// For now, these metrics are ignored, as we can't register the same
96-
// metric twice with a single registry.
97-
Metrics: prometheus.NewRegistry(),
98-
Gossip: func(g mesh.Gossiper) mesh.Gossip {
99-
return cfg.MeshRouter.newGossip(silencesID, g)
100-
},
98+
Metrics: localRegistry,
10199
})
102100
if err != nil {
103101
return nil, fmt.Errorf("failed to create silences: %v", err)
104102
}
103+
if peer != nil {
104+
c := peer.AddState("sil:"+cfg.UserID, am.silences, localRegistry)
105+
am.silences.SetBroadcast(c.Broadcast)
106+
}
105107

106108
am.wg.Add(1)
107109
go func() {
@@ -122,7 +124,7 @@ func New(cfg *Config) (*Alertmanager, error) {
122124
return am.dispatcher.Groups(matchers)
123125
},
124126
marker.Status,
125-
nil, // Passing a nil mesh router since we don't show mesh router information in Cortex anyway.
127+
peer,
126128
log.With(am.logger, "component", "api"),
127129
)
128130

@@ -148,6 +150,14 @@ func New(cfg *Config) (*Alertmanager, error) {
148150
return am, nil
149151
}
150152

153+
// clusterWait returns a function that inspects the current peer state and returns
154+
// a duration of one base timeout for each peer with a higher ID than ourselves.
155+
func clusterWait(p *cluster.Peer, timeout time.Duration) func() time.Duration {
156+
return func() time.Duration {
157+
return time.Duration(p.Position()) * timeout
158+
}
159+
}
160+
151161
// ApplyConfig applies a new configuration to an Alertmanager.
152162
func (am *Alertmanager) ApplyConfig(conf *config.Config) error {
153163
var (
@@ -176,7 +186,7 @@ func (am *Alertmanager) ApplyConfig(conf *config.Config) error {
176186

177187
am.inhibitor = inhibit.NewInhibitor(am.alerts, conf.InhibitRules, am.marker, log.With(am.logger, "component", "inhibitor"))
178188

179-
waitFunc := meshWait(am.cfg.MeshRouter, 5*time.Second)
189+
waitFunc := clusterWait(am.peer, am.peerTimeout)
180190
timeoutFunc := func(d time.Duration) time.Duration {
181191
if d < notify.MinTimeout {
182192
d = notify.MinTimeout
@@ -192,6 +202,7 @@ func (am *Alertmanager) ApplyConfig(conf *config.Config) error {
192202
am.silences,
193203
am.nflog,
194204
am.marker,
205+
am.peer,
195206
log.With(am.logger, "component", "pipeline"),
196207
)
197208
am.dispatcher = dispatch.NewDispatcher(

pkg/alertmanager/mesh.go

Lines changed: 0 additions & 190 deletions
This file was deleted.

0 commit comments

Comments
 (0)