Skip to content

Update alertmanager to upstream v0.15.1 with memberlist #929

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Aug 16, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
## master / unreleased

* [FEATURE] You can now specify `http_config` on alert receivers #929
* [CHANGE] Flags changed due to changes upstream in Prometheus Alertmanager #929:
* `alertmanager.mesh.listen-address` is now `cluster.listen-address`
* `alertmanager.mesh.peer.host` and `alertmanager.mesh.peer.service` can be replaced by `cluster.peer`
* `alertmanager.mesh.hardware-address`, `alertmanager.mesh.nickname`, `alertmanager.mesh.password`, and `alertmanager.mesh.peer.refresh-interval` all disappear.
* [CHANGE] Retention period should now be a multiple of periodic table duration #1564
* [FEATURE] Add option to use jump hashing to load balance requests to memcached #1554
* [FEATURE] Add status page for HA tracker to distributors #1546
Expand Down
3 changes: 1 addition & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ require (
github.com/opentracing/opentracing-go v1.1.0
github.com/philhofer/fwd v0.0.0-20160129035939-98c11a7a6ec8 // indirect
github.com/pkg/errors v0.8.1
github.com/prometheus/alertmanager v0.12.1-0.20190731170042-6e135a0112f5
github.com/prometheus/alertmanager v0.15.1
github.com/prometheus/client_golang v1.0.0
github.com/prometheus/common v0.4.1
github.com/prometheus/prometheus v0.0.0-20190731144842-63ed2e28f1ac
Expand All @@ -71,7 +71,6 @@ require (
github.com/uber/jaeger-lib v2.0.0+incompatible // indirect
github.com/weaveworks/billing-client v0.0.0-20171006123215-be0d55e547b1
github.com/weaveworks/common v0.0.0-20190714171817-ddeaa31513fd
github.com/weaveworks/mesh v0.0.0-20170131170447-5015f896ab62
github.com/weaveworks/promrus v1.2.0 // indirect
go.etcd.io/bbolt v1.3.3
go.etcd.io/etcd v0.0.0-20190709142735-eb7dd97135a5
Expand Down
6 changes: 2 additions & 4 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -396,8 +396,8 @@ github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINE
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI=
github.com/prometheus/alertmanager v0.12.1-0.20190731170042-6e135a0112f5 h1:xqYzIhRFSm2qWLNG9UkSCGOHzlg3IOc0Autvw6fobRQ=
github.com/prometheus/alertmanager v0.12.1-0.20190731170042-6e135a0112f5/go.mod h1:zdz6eCci7rHWB/8/1E/9JEfoKqCAIlxmt8EIKvHi0dI=
github.com/prometheus/alertmanager v0.15.1 h1:LioLwocIFmJvFSZMkMPwPYQ+zFwjQpMHQu6AXhmdpX4=
github.com/prometheus/alertmanager v0.15.1/go.mod h1:zdz6eCci7rHWB/8/1E/9JEfoKqCAIlxmt8EIKvHi0dI=
github.com/prometheus/client_golang v0.8.0/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
github.com/prometheus/client_golang v0.9.3-0.20190127221311-3c4408c8b829 h1:D+CiwcpGTW6pL6bv6KI3KbyEyCKyS+1JWS2h8PNDnGA=
Expand Down Expand Up @@ -513,8 +513,6 @@ github.com/weaveworks/billing-client v0.0.0-20171006123215-be0d55e547b1 h1:qi+Yk
github.com/weaveworks/billing-client v0.0.0-20171006123215-be0d55e547b1/go.mod h1:7gGdEUJaCrSlWi/mjd68CZv0sfqektYPDcro9cE+M9k=
github.com/weaveworks/common v0.0.0-20190714171817-ddeaa31513fd h1:yJjtAvkWEyZlz5DGkw3pL12Kbns8rOfPvDrtlIiC82A=
github.com/weaveworks/common v0.0.0-20190714171817-ddeaa31513fd/go.mod h1:pSm+0KR57BG3pvGoJWFXJSAC7+sEPewcvdt5StevL3A=
github.com/weaveworks/mesh v0.0.0-20170131170447-5015f896ab62 h1:M8NXuAqtV1qKYA0PFpdu1iJiCwiftOGz8FYfJxqq+GI=
github.com/weaveworks/mesh v0.0.0-20170131170447-5015f896ab62/go.mod h1:mcON9Ws1aW0crSErpXWp7U1ErCDEKliDX2OhVlbWRKk=
github.com/weaveworks/promrus v1.2.0 h1:jOLf6pe6/vss4qGHjXmGz4oDJQA+AOCqEL3FvvZGz7M=
github.com/weaveworks/promrus v1.2.0/go.mod h1:SaE82+OJ91yqjrE1rsvBWVzNZKcHYFtMUyS1+Ogs/KA=
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 h1:eY9dn8+vbi4tKz5Qo6v2eYzo7kUS51QINcR5jNpbZS8=
Expand Down
42 changes: 26 additions & 16 deletions pkg/alertmanager/alertmanager.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (

"github.com/go-kit/kit/log"
"github.com/prometheus/alertmanager/api"
"github.com/prometheus/alertmanager/cluster"
"github.com/prometheus/alertmanager/config"
"github.com/prometheus/alertmanager/dispatch"
"github.com/prometheus/alertmanager/inhibit"
Expand All @@ -24,7 +25,6 @@ import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/route"
"github.com/prometheus/prometheus/pkg/labels"
"github.com/weaveworks/mesh"
)

const notificationLogMaintenancePeriod = 15 * time.Minute
Expand All @@ -35,7 +35,8 @@ type Config struct {
// Used to persist notification logs and silences on disk.
DataDir string
Logger log.Logger
MeshRouter gossipRouter
Peer *cluster.Peer
PeerTimeout time.Duration
Retention time.Duration
ExternalURL *url.URL
}
Expand All @@ -45,7 +46,7 @@ type Alertmanager struct {
cfg *Config
api *api.API
logger log.Logger
nflog nflog.Log
nflog *nflog.Log
silences *silence.Silences
marker types.Marker
alerts *mem.Alerts
Expand All @@ -68,9 +69,6 @@ func New(cfg *Config) (*Alertmanager, error) {
nflogID := fmt.Sprintf("nflog:%s", cfg.UserID)
var err error
am.nflog, err = nflog.New(
nflog.WithMesh(func(g mesh.Gossiper) mesh.Gossip {
return cfg.MeshRouter.newGossip(nflogID, g)
}),
nflog.WithRetention(cfg.Retention),
nflog.WithSnapshot(filepath.Join(cfg.DataDir, nflogID)),
nflog.WithMaintenance(notificationLogMaintenancePeriod, am.stop, am.wg.Done),
Expand All @@ -86,22 +84,25 @@ func New(cfg *Config) (*Alertmanager, error) {

am.marker = types.NewMarker()

// TODO(cortex): Build a registry that can merge metrics from multiple users.
// For now, these metrics are ignored, as we can't register the same
// metric twice with a single registry.
localRegistry := prometheus.NewRegistry()

silencesID := fmt.Sprintf("silences:%s", cfg.UserID)
am.silences, err = silence.New(silence.Options{
SnapshotFile: filepath.Join(cfg.DataDir, silencesID),
Retention: cfg.Retention,
Logger: log.With(am.logger, "component", "silences"),
// TODO(cortex): Build a registry that can merge metrics from multiple users.
// For now, these metrics are ignored, as we can't register the same
// metric twice with a single registry.
Metrics: prometheus.NewRegistry(),
Gossip: func(g mesh.Gossiper) mesh.Gossip {
return cfg.MeshRouter.newGossip(silencesID, g)
},
Metrics: localRegistry,
})
if err != nil {
return nil, fmt.Errorf("failed to create silences: %v", err)
}
if cfg.Peer != nil {
c := cfg.Peer.AddState("sil:"+cfg.UserID, am.silences, localRegistry)
am.silences.SetBroadcast(c.Broadcast)
}

am.wg.Add(1)
go func() {
Expand All @@ -122,15 +123,15 @@ func New(cfg *Config) (*Alertmanager, error) {
return am.dispatcher.Groups(matchers)
},
marker.Status,
nil, // Passing a nil mesh router since we don't show mesh router information in Cortex anyway.
cfg.Peer,
log.With(am.logger, "component", "api"),
)

am.router = route.New()

webReload := make(chan chan error)
ui.Register(am.router.WithPrefix(am.cfg.ExternalURL.Path), webReload, log.With(am.logger, "component", "ui"))
am.api.Register(am.router.WithPrefix(path.Join(am.cfg.ExternalURL.Path, "/api")))
am.api.Register(am.router.WithPrefix(path.Join(am.cfg.ExternalURL.Path, "/api/v1")))

go func() {
for {
Expand All @@ -148,6 +149,14 @@ func New(cfg *Config) (*Alertmanager, error) {
return am, nil
}

// clusterWait returns a function that inspects the current peer state and returns
// a duration of one base timeout for each peer with a higher ID than ourselves.
func clusterWait(p *cluster.Peer, timeout time.Duration) func() time.Duration {
return func() time.Duration {
return time.Duration(p.Position()) * timeout
}
}

// ApplyConfig applies a new configuration to an Alertmanager.
func (am *Alertmanager) ApplyConfig(userID string, conf *config.Config) error {
var (
Expand Down Expand Up @@ -178,7 +187,7 @@ func (am *Alertmanager) ApplyConfig(userID string, conf *config.Config) error {

am.inhibitor = inhibit.NewInhibitor(am.alerts, conf.InhibitRules, am.marker, log.With(am.logger, "component", "inhibitor"))

waitFunc := meshWait(am.cfg.MeshRouter, 5*time.Second)
waitFunc := clusterWait(am.cfg.Peer, am.cfg.PeerTimeout)
timeoutFunc := func(d time.Duration) time.Duration {
if d < notify.MinTimeout {
d = notify.MinTimeout
Expand All @@ -194,6 +203,7 @@ func (am *Alertmanager) ApplyConfig(userID string, conf *config.Config) error {
am.silences,
am.nflog,
am.marker,
am.cfg.Peer,
log.With(am.logger, "component", "pipeline"),
)
am.dispatcher = dispatch.NewDispatcher(
Expand Down
190 changes: 0 additions & 190 deletions pkg/alertmanager/mesh.go

This file was deleted.

Loading