Skip to content

More visibility on ingesters #68

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Oct 25, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 28 additions & 3 deletions distributor.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ import (

var (
numClientsDesc = prometheus.NewDesc(
"prometheus_distributor_ingester_clients",
"prism_distributor_ingester_clients",
"The current number of ingester clients.",
nil, nil,
)
Expand All @@ -51,6 +51,7 @@ type Distributor struct {
ingesterAppendFailures *prometheus.CounterVec
ingesterQueries *prometheus.CounterVec
ingesterQueryFailures *prometheus.CounterVec
ingestersAlive *prometheus.Desc
}

// ReadRing represents the read inferface to the ring.
Expand Down Expand Up @@ -114,7 +115,7 @@ func NewDistributor(cfg DistributorConfig) (*Distributor, error) {
}, []string{"ingester"}),
ingesterAppendFailures: prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: "prism",
Name: "distributor_ingester_appends_total",
Name: "distributor_ingester_append_failures_total",
Help: "The total number of failed batch appends sent to ingesters.",
}, []string{"ingester"}),
ingesterQueries: prometheus.NewCounterVec(prometheus.CounterOpts{
Expand All @@ -124,9 +125,14 @@ func NewDistributor(cfg DistributorConfig) (*Distributor, error) {
}, []string{"ingester"}),
ingesterQueryFailures: prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: "prism",
Name: "distributor_ingester_appends_total",
Name: "distributor_ingester_query_failures_total",
Help: "The total number of failed queries sent to ingesters.",
}, []string{"ingester"}),
ingestersAlive: prometheus.NewDesc(
"prism_distributor_ingesters_alive",
"Number of ingesters in the ring that have heartbeats within timeout.",
nil, nil,
),
}, nil
}

Expand Down Expand Up @@ -257,6 +263,10 @@ func (d *Distributor) Query(ctx context.Context, from, to model.Time, matchers .
return err
}

if len(ingesters) < d.cfg.MinReadSuccesses {
return fmt.Errorf("could only find %d ingesters for query. Need at least %d", len(ingesters), d.cfg.MinReadSuccesses)
}

// Fetch samples from multiple ingesters and group them by fingerprint (unsorted
// and with overlap).
successes := 0
Expand Down Expand Up @@ -363,6 +373,11 @@ func (d *Distributor) Describe(ch chan<- *prometheus.Desc) {
d.sendDuration.Describe(ch)
d.cfg.Ring.Describe(ch)
ch <- numClientsDesc
d.ingesterAppends.Describe(ch)
d.ingesterAppendFailures.Describe(ch)
d.ingesterQueries.Describe(ch)
d.ingesterQueryFailures.Describe(ch)
ch <- d.ingestersAlive
}

// Collect implements prometheus.Collector.
Expand All @@ -371,6 +386,16 @@ func (d *Distributor) Collect(ch chan<- prometheus.Metric) {
ch <- d.receivedSamples
d.sendDuration.Collect(ch)
d.cfg.Ring.Collect(ch)
d.ingesterAppends.Collect(ch)
d.ingesterAppendFailures.Collect(ch)
d.ingesterQueries.Collect(ch)
d.ingesterQueryFailures.Collect(ch)

ch <- prometheus.MustNewConstMetric(
d.ingestersAlive,
prometheus.GaugeValue,
float64(len(d.cfg.Ring.GetAll(d.cfg.HeartbeatTimeout))),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This metric might strengthen the argument of moving the heartbeat timeout into the ring completely. We have the total ingesters metric in the ring, but the one about alive ones in the distributor, bleh. Just something to keep in mind though, fine for now.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As long as the Ring in inextricably tied to IngesterDesc, sure, why not?

)

d.clientsMtx.RLock()
defer d.clientsMtx.RUnlock()
Expand Down
22 changes: 11 additions & 11 deletions ring/ring.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@ type Ring struct {
ringDesc Desc

ingesterOwnershipDesc *prometheus.Desc
ingesterTotalDesc *prometheus.Desc
tokensTotalDesc *prometheus.Desc
numIngestersDesc *prometheus.Desc
numTokensDesc *prometheus.Desc
}

// New creates a new Ring
Expand All @@ -61,17 +61,17 @@ func New(client CoordinationStateClient) *Ring {
quit: make(chan struct{}),
done: make(chan struct{}),
ingesterOwnershipDesc: prometheus.NewDesc(
"prometheus_distributor_ingester_ownership_percent",
"prism_distributor_ingester_ownership_percent",
"The percent ownership of the ring by ingester",
[]string{"ingester"}, nil,
),
ingesterTotalDesc: prometheus.NewDesc(
"prometheus_distributor_ingesters_total",
numIngestersDesc: prometheus.NewDesc(
"prism_distributor_ingesters",
"Number of ingesters in the ring",
nil, nil,
),
tokensTotalDesc: prometheus.NewDesc(
"prometheus_distributor_tokens_total",
numTokensDesc: prometheus.NewDesc(
"prism_distributor_tokens",
"Number of tokens in the ring",
nil, nil,
),
Expand Down Expand Up @@ -166,8 +166,8 @@ func (r *Ring) search(key uint32) int {
// Describe implements prometheus.Collector.
func (r *Ring) Describe(ch chan<- *prometheus.Desc) {
ch <- r.ingesterOwnershipDesc
ch <- r.ingesterTotalDesc
ch <- r.tokensTotalDesc
ch <- r.numIngestersDesc
ch <- r.numTokensDesc
}

// Collect implements prometheus.Collector.
Expand Down Expand Up @@ -196,12 +196,12 @@ func (r *Ring) Collect(ch chan<- prometheus.Metric) {
}

ch <- prometheus.MustNewConstMetric(
r.ingesterTotalDesc,
r.numIngestersDesc,
prometheus.GaugeValue,
float64(len(r.ringDesc.Ingesters)),
)
ch <- prometheus.MustNewConstMetric(
r.tokensTotalDesc,
r.numTokensDesc,
prometheus.GaugeValue,
float64(len(r.ringDesc.Tokens)),
)
Expand Down