Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 39 additions & 13 deletions notify/notify.go
Original file line number Diff line number Diff line change
Expand Up @@ -213,9 +213,11 @@ type NotificationLog interface {
}

type metrics struct {
numNotifications *prometheus.CounterVec
numFailedNotifications *prometheus.CounterVec
notificationLatencySeconds *prometheus.HistogramVec
numNotifications *prometheus.CounterVec
numTotalFailedNotifications *prometheus.CounterVec
numNotificationRequestsTotal *prometheus.CounterVec
numNotificationRequestsFailedTotal *prometheus.CounterVec
notificationLatencySeconds *prometheus.HistogramVec
}

func newMetrics(r prometheus.Registerer) *metrics {
Expand All @@ -225,11 +227,21 @@ func newMetrics(r prometheus.Registerer) *metrics {
Name: "notifications_total",
Help: "The total number of attempted notifications.",
}, []string{"integration"}),
numFailedNotifications: prometheus.NewCounterVec(prometheus.CounterOpts{
numTotalFailedNotifications: prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: "alertmanager",
Name: "notifications_failed_total",
Help: "The total number of failed notifications.",
}, []string{"integration"}),
numNotificationRequestsTotal: prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: "alertmanager",
Name: "notification_requests_total",
Help: "The total number of attempted notification requests.",
}, []string{"integration"}),
numNotificationRequestsFailedTotal: prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: "alertmanager",
Name: "notification_requests_failed_total",
Help: "The total number of failed notification requests.",
}, []string{"integration"}),
notificationLatencySeconds: prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: "alertmanager",
Name: "notification_latency_seconds",
Expand All @@ -248,10 +260,16 @@ func newMetrics(r prometheus.Registerer) *metrics {
"victorops",
} {
m.numNotifications.WithLabelValues(integration)
m.numFailedNotifications.WithLabelValues(integration)
m.numTotalFailedNotifications.WithLabelValues(integration)
m.numNotificationRequestsTotal.WithLabelValues(integration)
m.numNotificationRequestsFailedTotal.WithLabelValues(integration)
m.notificationLatencySeconds.WithLabelValues(integration)
}
r.MustRegister(m.numNotifications, m.numFailedNotifications, m.notificationLatencySeconds)
r.MustRegister(
m.numNotifications, m.numTotalFailedNotifications,
m.numNotificationRequestsTotal, m.numNotificationRequestsFailedTotal,
m.notificationLatencySeconds,
)
return m
}

Expand Down Expand Up @@ -389,7 +407,7 @@ func NewGossipSettleStage(p *cluster.Peer) *GossipSettleStage {
return &GossipSettleStage{peer: p}
}

func (n *GossipSettleStage) Exec(ctx context.Context, l log.Logger, alerts ...*types.Alert) (context.Context, []*types.Alert, error) {
func (n *GossipSettleStage) Exec(ctx context.Context, _ log.Logger, alerts ...*types.Alert) (context.Context, []*types.Alert, error) {
if n.peer != nil {
n.peer.WaitReady()
}
Expand All @@ -407,7 +425,7 @@ func NewMuteStage(m types.Muter) *MuteStage {
}

// Exec implements the Stage interface.
func (n *MuteStage) Exec(ctx context.Context, l log.Logger, alerts ...*types.Alert) (context.Context, []*types.Alert, error) {
func (n *MuteStage) Exec(ctx context.Context, _ log.Logger, alerts ...*types.Alert) (context.Context, []*types.Alert, error) {
var filtered []*types.Alert
for _, a := range alerts {
// TODO(fabxc): increment total alerts counter.
Expand All @@ -434,7 +452,7 @@ func NewWaitStage(wait func() time.Duration) *WaitStage {
}

// Exec implements the Stage interface.
func (ws *WaitStage) Exec(ctx context.Context, l log.Logger, alerts ...*types.Alert) (context.Context, []*types.Alert, error) {
func (ws *WaitStage) Exec(ctx context.Context, _ log.Logger, alerts ...*types.Alert) (context.Context, []*types.Alert, error) {
select {
case <-time.After(ws.wait()):
case <-ctx.Done():
Expand Down Expand Up @@ -541,7 +559,7 @@ func (n *DedupStage) needsUpdate(entry *nflogpb.Entry, firing, resolved map[uint
}

// Exec implements the Stage interface.
func (n *DedupStage) Exec(ctx context.Context, l log.Logger, alerts ...*types.Alert) (context.Context, []*types.Alert, error) {
func (n *DedupStage) Exec(ctx context.Context, _ log.Logger, alerts ...*types.Alert) (context.Context, []*types.Alert, error) {
gkey, ok := GroupKey(ctx)
if !ok {
return ctx, nil, errors.New("group key missing")
Expand Down Expand Up @@ -609,8 +627,16 @@ func NewRetryStage(i Integration, groupName string, metrics *metrics) *RetryStag
}
}

// Exec implements the Stage interface.
func (r RetryStage) Exec(ctx context.Context, l log.Logger, alerts ...*types.Alert) (context.Context, []*types.Alert, error) {
r.metrics.numNotifications.WithLabelValues(r.integration.Name()).Inc()
ctx, alerts, err := r.exec(ctx, l, alerts...)
if err != nil {
r.metrics.numTotalFailedNotifications.WithLabelValues(r.integration.Name()).Inc()
}
return ctx, alerts, err
}

func (r RetryStage) exec(ctx context.Context, l log.Logger, alerts ...*types.Alert) (context.Context, []*types.Alert, error) {
var sent []*types.Alert

// If we shouldn't send notifications for resolved alerts, but there are only
Expand Down Expand Up @@ -663,9 +689,9 @@ func (r RetryStage) Exec(ctx context.Context, l log.Logger, alerts ...*types.Ale
now := time.Now()
retry, err := r.integration.Notify(ctx, sent...)
r.metrics.notificationLatencySeconds.WithLabelValues(r.integration.Name()).Observe(time.Since(now).Seconds())
r.metrics.numNotifications.WithLabelValues(r.integration.Name()).Inc()
r.metrics.numNotificationRequestsTotal.WithLabelValues(r.integration.Name()).Inc()
if err != nil {
r.metrics.numFailedNotifications.WithLabelValues(r.integration.Name()).Inc()
r.metrics.numNotificationRequestsFailedTotal.WithLabelValues(r.integration.Name()).Inc()
if !retry {
return ctx, alerts, errors.Wrapf(err, "%s/%s: notify retry canceled due to unrecoverable error after %d attempts", r.groupName, r.integration.String(), i)
}
Expand Down