Skip to content

Commit 2e01fdb

Browse files
committed
Add metrics to track ingesting native histograms
Signed-off-by: SungJin1212 <[email protected]>
1 parent 24efa2b commit 2e01fdb

File tree

5 files changed

+92
-32
lines changed

5 files changed

+92
-32
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
* [FEATURE] Store Gateway: Add an in-memory chunk cache. #6245
1717
* [FEATURE] Chunk Cache: Support multi level cache and add metrics. #6249
1818
* [FEATURE] Distributor: Accept multiple HA Tracker pairs in the same request. #6256
19+
* [ENHANCEMENT] Ingester: Add metrics to track succeed/failed native histograms. #6370
1920
* [ENHANCEMENT] Query Frontend/Querier: Add an experimental flag `-querier.enable-promql-experimental-functions` to enable experimental promQL functions. #6355
2021
* [ENHANCEMENT] OTLP: Add `-distributor.otlp-max-recv-msg-size` flag to limit OTLP request size in bytes. #6333
2122
* [ENHANCEMENT] S3 Bucket Client: Add a list objects version configs to configure list api object version. #6280

pkg/ingester/ingester.go

Lines changed: 37 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1122,19 +1122,21 @@ func (i *Ingester) Push(ctx context.Context, req *cortexpb.WriteRequest) (*corte
11221122
// Keep track of some stats which are tracked only if the samples will be
11231123
// successfully committed
11241124
var (
1125-
succeededSamplesCount = 0
1126-
failedSamplesCount = 0
1127-
succeededExemplarsCount = 0
1128-
failedExemplarsCount = 0
1129-
startAppend = time.Now()
1130-
sampleOutOfBoundsCount = 0
1131-
sampleOutOfOrderCount = 0
1132-
sampleTooOldCount = 0
1133-
newValueForTimestampCount = 0
1134-
perUserSeriesLimitCount = 0
1135-
perLabelSetSeriesLimitCount = 0
1136-
perMetricSeriesLimitCount = 0
1137-
nativeHistogramCount = 0
1125+
succeededSamplesCount = 0
1126+
failedSamplesCount = 0
1127+
succeededHistogramCount = 0
1128+
failedHistogramCount = 0
1129+
succeededExemplarsCount = 0
1130+
failedExemplarsCount = 0
1131+
startAppend = time.Now()
1132+
sampleOutOfBoundsCount = 0
1133+
sampleOutOfOrderCount = 0
1134+
sampleTooOldCount = 0
1135+
newValueForTimestampCount = 0
1136+
perUserSeriesLimitCount = 0
1137+
perLabelSetSeriesLimitCount = 0
1138+
perMetricSeriesLimitCount = 0
1139+
discardedNativeHistogramCount = 0
11381140

11391141
updateFirstPartial = func(errFn func() error) {
11401142
if firstPartialErr == nil {
@@ -1213,8 +1215,10 @@ func (i *Ingester) Push(ctx context.Context, req *cortexpb.WriteRequest) (*corte
12131215
tsLabelsHash := tsLabels.Hash()
12141216
ref, copiedLabels := app.GetRef(tsLabels, tsLabelsHash)
12151217

1216-
// To find out if any sample was added to this series, we keep old value.
1218+
// To find out if any sample was added to this series, we keep fold value.
12171219
oldSucceededSamplesCount := succeededSamplesCount
1220+
// To find out if any histogram was added to this series, we keep old value.
1221+
oldSucceededHistogramsCount := succeededHistogramCount
12181222

12191223
for _, s := range ts.Samples {
12201224
var err error
@@ -1266,19 +1270,19 @@ func (i *Ingester) Push(ctx context.Context, req *cortexpb.WriteRequest) (*corte
12661270

12671271
if ref != 0 {
12681272
if _, err = app.AppendHistogram(ref, copiedLabels, hp.TimestampMs, h, fh); err == nil {
1269-
succeededSamplesCount++
1273+
succeededHistogramCount++
12701274
continue
12711275
}
12721276
} else {
12731277
// Copy the label set because both TSDB and the active series tracker may retain it.
12741278
copiedLabels = cortexpb.FromLabelAdaptersToLabelsWithCopy(ts.Labels)
12751279
if ref, err = app.AppendHistogram(0, copiedLabels, hp.TimestampMs, h, fh); err == nil {
1276-
succeededSamplesCount++
1280+
succeededHistogramCount++
12771281
continue
12781282
}
12791283
}
12801284

1281-
failedSamplesCount++
1285+
failedHistogramCount++
12821286

12831287
if rollback := handleAppendFailure(err, hp.TimestampMs, ts.Labels, copiedLabels); !rollback {
12841288
continue
@@ -1290,7 +1294,7 @@ func (i *Ingester) Push(ctx context.Context, req *cortexpb.WriteRequest) (*corte
12901294
return nil, wrapWithUser(err, userID)
12911295
}
12921296
} else {
1293-
nativeHistogramCount += len(ts.Histograms)
1297+
discardedNativeHistogramCount += len(ts.Histograms)
12941298
}
12951299

12961300
if i.cfg.ActiveSeriesMetricsEnabled && succeededSamplesCount > oldSucceededSamplesCount {
@@ -1300,6 +1304,13 @@ func (i *Ingester) Push(ctx context.Context, req *cortexpb.WriteRequest) (*corte
13001304
})
13011305
}
13021306

1307+
if i.cfg.ActiveSeriesMetricsEnabled && succeededHistogramCount > oldSucceededHistogramsCount {
1308+
db.activeSeries.UpdateSeries(tsLabels, tsLabelsHash, startAppend, func(l labels.Labels) labels.Labels {
1309+
// we must already have copied the labels if succeededHistogramCount has been incremented.
1310+
return copiedLabels
1311+
})
1312+
}
1313+
13031314
maxExemplarsForUser := i.getMaxExemplars(userID)
13041315
if maxExemplarsForUser > 0 {
13051316
// app.AppendExemplar currently doesn't create the series, it must
@@ -1344,7 +1355,7 @@ func (i *Ingester) Push(ctx context.Context, req *cortexpb.WriteRequest) (*corte
13441355
i.TSDBState.appenderCommitDuration.Observe(time.Since(startCommit).Seconds())
13451356

13461357
// If only invalid samples are pushed, don't change "last update", as TSDB was not modified.
1347-
if succeededSamplesCount > 0 {
1358+
if succeededSamplesCount > 0 || succeededHistogramCount > 0 {
13481359
db.setLastUpdate(time.Now())
13491360
}
13501361

@@ -1353,6 +1364,8 @@ func (i *Ingester) Push(ctx context.Context, req *cortexpb.WriteRequest) (*corte
13531364
// which will be converted into an HTTP 5xx and the client should/will retry.
13541365
i.metrics.ingestedSamples.Add(float64(succeededSamplesCount))
13551366
i.metrics.ingestedSamplesFail.Add(float64(failedSamplesCount))
1367+
i.metrics.ingestedHistograms.Add(float64(succeededHistogramCount))
1368+
i.metrics.ingestedHistogramsFail.Add(float64(failedHistogramCount))
13561369
i.metrics.ingestedExemplars.Add(float64(succeededExemplarsCount))
13571370
i.metrics.ingestedExemplarsFail.Add(float64(failedExemplarsCount))
13581371

@@ -1378,20 +1391,20 @@ func (i *Ingester) Push(ctx context.Context, req *cortexpb.WriteRequest) (*corte
13781391
i.validateMetrics.DiscardedSamples.WithLabelValues(perLabelsetSeriesLimit, userID).Add(float64(perLabelSetSeriesLimitCount))
13791392
}
13801393

1381-
if !i.cfg.BlocksStorageConfig.TSDB.EnableNativeHistograms && nativeHistogramCount > 0 {
1382-
i.validateMetrics.DiscardedSamples.WithLabelValues(nativeHistogramSample, userID).Add(float64(nativeHistogramCount))
1394+
if !i.cfg.BlocksStorageConfig.TSDB.EnableNativeHistograms && discardedNativeHistogramCount > 0 {
1395+
i.validateMetrics.DiscardedSamples.WithLabelValues(nativeHistogramSample, userID).Add(float64(discardedNativeHistogramCount))
13831396
}
13841397

13851398
// Distributor counts both samples, metadata and histograms, so for consistency ingester does the same.
1386-
i.ingestionRate.Add(int64(succeededSamplesCount + ingestedMetadata))
1399+
i.ingestionRate.Add(int64(succeededSamplesCount + succeededHistogramCount + ingestedMetadata))
13871400

13881401
switch req.Source {
13891402
case cortexpb.RULE:
13901403
db.ingestedRuleSamples.Add(int64(succeededSamplesCount))
13911404
case cortexpb.API:
13921405
fallthrough
13931406
default:
1394-
db.ingestedAPISamples.Add(int64(succeededSamplesCount))
1407+
db.ingestedAPISamples.Add(int64(succeededSamplesCount + succeededHistogramCount))
13951408
}
13961409

13971410
if firstPartialErr != nil {
@@ -1400,7 +1413,7 @@ func (i *Ingester) Push(ctx context.Context, req *cortexpb.WriteRequest) (*corte
14001413
if errors.As(firstPartialErr, &ve) {
14011414
code = ve.code
14021415
}
1403-
level.Debug(logutil.WithContext(ctx, i.logger)).Log("msg", "partial failures to push", "totalSamples", succeededSamplesCount+failedSamplesCount, "failedSamples", failedSamplesCount, "firstPartialErr", firstPartialErr)
1416+
level.Debug(logutil.WithContext(ctx, i.logger)).Log("msg", "partial failures to push", "totalSamples", succeededSamplesCount+succeededHistogramCount+failedSamplesCount+failedHistogramCount, "failedSamples", failedSamplesCount, "failedHistogram", failedHistogramCount, "firstPartialErr", firstPartialErr)
14041417
return &cortexpb.WriteResponse{}, httpgrpc.Errorf(code, wrapWithUser(firstPartialErr, userID).Error())
14051418
}
14061419

pkg/ingester/ingester_test.go

Lines changed: 38 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -976,7 +976,13 @@ func TestIngester_Push(t *testing.T) {
976976
cortex_ingester_ingested_samples_total 1
977977
# HELP cortex_ingester_ingested_samples_failures_total The total number of samples that errored on ingestion.
978978
# TYPE cortex_ingester_ingested_samples_failures_total counter
979-
cortex_ingester_ingested_samples_failures_total 2
979+
cortex_ingester_ingested_samples_failures_total 1
980+
# HELP cortex_ingester_ingested_native_histograms_total The total number of native histograms ingested.
981+
# TYPE cortex_ingester_ingested_native_histograms_total counter
982+
cortex_ingester_ingested_native_histograms_total 0
983+
# HELP cortex_ingester_ingested_native_histograms_failures_total The total number of native histograms that errored on ingestion.
984+
# TYPE cortex_ingester_ingested_native_histograms_failures_total counter
985+
cortex_ingester_ingested_native_histograms_failures_total 1
980986
# HELP cortex_ingester_memory_users The current number of users in memory.
981987
# TYPE cortex_ingester_memory_users gauge
982988
cortex_ingester_memory_users 1
@@ -1032,7 +1038,13 @@ func TestIngester_Push(t *testing.T) {
10321038
cortex_ingester_ingested_samples_total 1
10331039
# HELP cortex_ingester_ingested_samples_failures_total The total number of samples that errored on ingestion.
10341040
# TYPE cortex_ingester_ingested_samples_failures_total counter
1035-
cortex_ingester_ingested_samples_failures_total 2
1041+
cortex_ingester_ingested_samples_failures_total 1
1042+
# HELP cortex_ingester_ingested_native_histograms_total The total number of native histograms ingested.
1043+
# TYPE cortex_ingester_ingested_native_histograms_total counter
1044+
cortex_ingester_ingested_native_histograms_total 1
1045+
# HELP cortex_ingester_ingested_native_histograms_failures_total The total number of native histograms that errored on ingestion.
1046+
# TYPE cortex_ingester_ingested_native_histograms_failures_total counter
1047+
cortex_ingester_ingested_native_histograms_failures_total 0
10361048
# HELP cortex_ingester_memory_users The current number of users in memory.
10371049
# TYPE cortex_ingester_memory_users gauge
10381050
cortex_ingester_memory_users 1
@@ -1290,12 +1302,18 @@ func TestIngester_Push(t *testing.T) {
12901302
"cortex_ingester_active_series",
12911303
},
12921304
expectedMetrics: `
1293-
# HELP cortex_ingester_ingested_samples_total The total number of samples ingested.
1294-
# TYPE cortex_ingester_ingested_samples_total counter
1295-
cortex_ingester_ingested_samples_total 1
12961305
# HELP cortex_ingester_ingested_samples_failures_total The total number of samples that errored on ingestion.
12971306
# TYPE cortex_ingester_ingested_samples_failures_total counter
12981307
cortex_ingester_ingested_samples_failures_total 0
1308+
# HELP cortex_ingester_ingested_samples_total The total number of samples ingested.
1309+
# TYPE cortex_ingester_ingested_samples_total counter
1310+
cortex_ingester_ingested_samples_total 0
1311+
# HELP cortex_ingester_ingested_native_histograms_total The total number of native histograms ingested.
1312+
# TYPE cortex_ingester_ingested_native_histograms_total counter
1313+
cortex_ingester_ingested_native_histograms_total 1
1314+
# HELP cortex_ingester_ingested_native_histograms_failures_total The total number of native histograms that errored on ingestion.
1315+
# TYPE cortex_ingester_ingested_native_histograms_failures_total counter
1316+
cortex_ingester_ingested_native_histograms_failures_total 0
12991317
# HELP cortex_ingester_memory_users The current number of users in memory.
13001318
# TYPE cortex_ingester_memory_users gauge
13011319
cortex_ingester_memory_users 1
@@ -1342,10 +1360,16 @@ func TestIngester_Push(t *testing.T) {
13421360
expectedMetrics: `
13431361
# HELP cortex_ingester_ingested_samples_total The total number of samples ingested.
13441362
# TYPE cortex_ingester_ingested_samples_total counter
1345-
cortex_ingester_ingested_samples_total 1
1363+
cortex_ingester_ingested_samples_total 0
13461364
# HELP cortex_ingester_ingested_samples_failures_total The total number of samples that errored on ingestion.
13471365
# TYPE cortex_ingester_ingested_samples_failures_total counter
13481366
cortex_ingester_ingested_samples_failures_total 0
1367+
# HELP cortex_ingester_ingested_native_histograms_total The total number of native histograms ingested.
1368+
# TYPE cortex_ingester_ingested_native_histograms_total counter
1369+
cortex_ingester_ingested_native_histograms_total 1
1370+
# HELP cortex_ingester_ingested_native_histograms_failures_total The total number of native histograms that errored on ingestion.
1371+
# TYPE cortex_ingester_ingested_native_histograms_failures_total counter
1372+
cortex_ingester_ingested_native_histograms_failures_total 0
13491373
# HELP cortex_ingester_memory_users The current number of users in memory.
13501374
# TYPE cortex_ingester_memory_users gauge
13511375
cortex_ingester_memory_users 1
@@ -1393,10 +1417,16 @@ func TestIngester_Push(t *testing.T) {
13931417
expectedMetrics: `
13941418
# HELP cortex_ingester_ingested_samples_total The total number of samples ingested.
13951419
# TYPE cortex_ingester_ingested_samples_total counter
1396-
cortex_ingester_ingested_samples_total 2
1420+
cortex_ingester_ingested_samples_total 1
13971421
# HELP cortex_ingester_ingested_samples_failures_total The total number of samples that errored on ingestion.
13981422
# TYPE cortex_ingester_ingested_samples_failures_total counter
13991423
cortex_ingester_ingested_samples_failures_total 0
1424+
# HELP cortex_ingester_ingested_native_histograms_total The total number of native histograms ingested.
1425+
# TYPE cortex_ingester_ingested_native_histograms_total counter
1426+
cortex_ingester_ingested_native_histograms_total 1
1427+
# HELP cortex_ingester_ingested_native_histograms_failures_total The total number of native histograms that errored on ingestion.
1428+
# TYPE cortex_ingester_ingested_native_histograms_failures_total counter
1429+
cortex_ingester_ingested_native_histograms_failures_total 0
14001430
# HELP cortex_ingester_memory_users The current number of users in memory.
14011431
# TYPE cortex_ingester_memory_users gauge
14021432
cortex_ingester_memory_users 1
@@ -1684,7 +1714,7 @@ func TestIngester_PushNativeHistogramErrors(t *testing.T) {
16841714
_, err = i.Push(ctx, req)
16851715
assert.Equal(t, httpgrpc.Errorf(http.StatusBadRequest, wrapWithUser(wrappedTSDBIngestErr(tc.expectedErr, model.Time(10), metricLabelAdapters), userID).Error()), err)
16861716

1687-
require.Equal(t, testutil.ToFloat64(i.metrics.ingestedSamplesFail), float64(1))
1717+
require.Equal(t, testutil.ToFloat64(i.metrics.ingestedHistogramsFail), float64(1))
16881718
})
16891719
}
16901720
}

pkg/ingester/metrics.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,11 @@ const (
2525

2626
type ingesterMetrics struct {
2727
ingestedSamples prometheus.Counter
28+
ingestedHistograms prometheus.Counter
2829
ingestedExemplars prometheus.Counter
2930
ingestedMetadata prometheus.Counter
3031
ingestedSamplesFail prometheus.Counter
32+
ingestedHistogramsFail prometheus.Counter
3133
ingestedExemplarsFail prometheus.Counter
3234
ingestedMetadataFail prometheus.Counter
3335
queries prometheus.Counter
@@ -80,6 +82,10 @@ func newIngesterMetrics(r prometheus.Registerer,
8082
Name: "cortex_ingester_ingested_samples_total",
8183
Help: "The total number of samples ingested.",
8284
}),
85+
ingestedHistograms: promauto.With(r).NewCounter(prometheus.CounterOpts{
86+
Name: "cortex_ingester_ingested_native_histograms_total",
87+
Help: "The total number of native histograms ingested.",
88+
}),
8389
ingestedExemplars: promauto.With(r).NewCounter(prometheus.CounterOpts{
8490
Name: "cortex_ingester_ingested_exemplars_total",
8591
Help: "The total number of exemplars ingested.",
@@ -92,6 +98,10 @@ func newIngesterMetrics(r prometheus.Registerer,
9298
Name: "cortex_ingester_ingested_samples_failures_total",
9399
Help: "The total number of samples that errored on ingestion.",
94100
}),
101+
ingestedHistogramsFail: promauto.With(r).NewCounter(prometheus.CounterOpts{
102+
Name: "cortex_ingester_ingested_native_histograms_failures_total",
103+
Help: "The total number of native histograms that errored on ingestion.",
104+
}),
95105
ingestedExemplarsFail: promauto.With(r).NewCounter(prometheus.CounterOpts{
96106
Name: "cortex_ingester_ingested_exemplars_failures_total",
97107
Help: "The total number of exemplars that errored on ingestion.",

pkg/ingester/metrics_test.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,12 @@ func TestIngesterMetrics(t *testing.T) {
6464
# HELP cortex_ingester_ingested_samples_total The total number of samples ingested.
6565
# TYPE cortex_ingester_ingested_samples_total counter
6666
cortex_ingester_ingested_samples_total 0
67+
# HELP cortex_ingester_ingested_native_histograms_total The total number of native histograms ingested.
68+
# TYPE cortex_ingester_ingested_native_histograms_total counter
69+
cortex_ingester_ingested_native_histograms_total 0
70+
# HELP cortex_ingester_ingested_native_histograms_failures_total The total number of native histograms that errored on ingestion.
71+
# TYPE cortex_ingester_ingested_native_histograms_failures_total counter
72+
cortex_ingester_ingested_native_histograms_failures_total 0
6773
# HELP cortex_ingester_ingestion_rate_samples_per_second Current ingestion rate in samples/sec that ingester is using to limit access.
6874
# TYPE cortex_ingester_ingestion_rate_samples_per_second gauge
6975
cortex_ingester_ingestion_rate_samples_per_second 0

0 commit comments

Comments
 (0)