-
Notifications
You must be signed in to change notification settings - Fork 816
Cache older index entries #1130
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
package chunk | ||
|
||
import ( | ||
"time" | ||
|
||
"github.com/prometheus/common/model" | ||
"github.com/weaveworks/common/mtime" | ||
) | ||
|
||
type schemaCaching struct { | ||
Schema | ||
|
||
cacheOlderThan time.Duration | ||
} | ||
|
||
func (s *schemaCaching) GetReadQueriesForMetric(from, through model.Time, userID string, metricName model.LabelValue) ([]IndexQuery, error) { | ||
cFrom, cThrough, from, through := splitTimesByCacheability(from, through, model.TimeFromUnix(mtime.Now().Add(-s.cacheOlderThan).Unix())) | ||
|
||
cacheableQueries, err := s.Schema.GetReadQueriesForMetric(cFrom, cThrough, userID, metricName) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
activeQueries, err := s.Schema.GetReadQueriesForMetric(from, through, userID, metricName) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
return mergeCacheableAndActiveQueries(cacheableQueries, activeQueries), nil | ||
} | ||
|
||
func (s *schemaCaching) GetReadQueriesForMetricLabel(from, through model.Time, userID string, metricName model.LabelValue, labelName model.LabelName) ([]IndexQuery, error) { | ||
cFrom, cThrough, from, through := splitTimesByCacheability(from, through, model.TimeFromUnix(mtime.Now().Add(-s.cacheOlderThan).Unix())) | ||
|
||
cacheableQueries, err := s.Schema.GetReadQueriesForMetricLabel(cFrom, cThrough, userID, metricName, labelName) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
activeQueries, err := s.Schema.GetReadQueriesForMetricLabel(from, through, userID, metricName, labelName) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
return mergeCacheableAndActiveQueries(cacheableQueries, activeQueries), nil | ||
} | ||
|
||
func (s *schemaCaching) GetReadQueriesForMetricLabelValue(from, through model.Time, userID string, metricName model.LabelValue, labelName model.LabelName, labelValue model.LabelValue) ([]IndexQuery, error) { | ||
cFrom, cThrough, from, through := splitTimesByCacheability(from, through, model.TimeFromUnix(mtime.Now().Add(-s.cacheOlderThan).Unix())) | ||
|
||
cacheableQueries, err := s.Schema.GetReadQueriesForMetricLabelValue(cFrom, cThrough, userID, metricName, labelName, labelValue) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
activeQueries, err := s.Schema.GetReadQueriesForMetricLabelValue(from, through, userID, metricName, labelName, labelValue) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
return mergeCacheableAndActiveQueries(cacheableQueries, activeQueries), nil | ||
} | ||
|
||
// If the query resulted in series IDs, use this method to find chunks. | ||
func (s *schemaCaching) GetChunksForSeries(from, through model.Time, userID string, seriesID []byte) ([]IndexQuery, error) { | ||
cFrom, cThrough, from, through := splitTimesByCacheability(from, through, model.TimeFromUnix(mtime.Now().Add(-s.cacheOlderThan).Unix())) | ||
|
||
cacheableQueries, err := s.Schema.GetChunksForSeries(cFrom, cThrough, userID, seriesID) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
activeQueries, err := s.Schema.GetChunksForSeries(from, through, userID, seriesID) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
return mergeCacheableAndActiveQueries(cacheableQueries, activeQueries), nil | ||
} | ||
|
||
func splitTimesByCacheability(from, through model.Time, cacheBefore model.Time) (model.Time, model.Time, model.Time, model.Time) { | ||
if from.After(cacheBefore) { | ||
return 0, 0, from, through | ||
} | ||
|
||
if through.Before(cacheBefore) { | ||
return from, through, 0, 0 | ||
} | ||
|
||
return from, cacheBefore, cacheBefore, through | ||
} | ||
|
||
func mergeCacheableAndActiveQueries(cacheableQueries []IndexQuery, activeQueries []IndexQuery) []IndexQuery { | ||
finalQueries := make([]IndexQuery, 0, len(cacheableQueries)+len(activeQueries)) | ||
|
||
Outer: | ||
for _, cq := range cacheableQueries { | ||
for _, aq := range activeQueries { | ||
// When deduping, the bucket values only influence TableName and HashValue | ||
// and just checking those is enough. | ||
if cq.TableName == aq.TableName && cq.HashValue == aq.HashValue { | ||
continue Outer | ||
} | ||
} | ||
|
||
cq.Immutable = true | ||
finalQueries = append(finalQueries, cq) | ||
} | ||
|
||
finalQueries = append(finalQueries, activeQueries...) | ||
|
||
return finalQueries | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
package chunk | ||
|
||
import ( | ||
"testing" | ||
"time" | ||
|
||
"github.com/prometheus/common/model" | ||
"github.com/stretchr/testify/require" | ||
"github.com/weaveworks/common/mtime" | ||
) | ||
|
||
func TestCachingSchema(t *testing.T) { | ||
const ( | ||
userID = "userid" | ||
periodicPrefix = "periodicPrefix" | ||
) | ||
|
||
dailyBuckets := makeSchema("v3") | ||
schema := &schemaCaching{ | ||
Schema: dailyBuckets, | ||
cacheOlderThan: 24 * time.Hour, | ||
} | ||
|
||
baseTime := time.Unix(0, 0) | ||
baseTime = baseTime.Add(30*24*time.Hour - 1) | ||
|
||
mtime.NowForce(baseTime) | ||
|
||
for _, tc := range []struct { | ||
from, through time.Time | ||
|
||
cacheableIdx int | ||
}{ | ||
{ | ||
// Completely cacheable. | ||
baseTime.Add(-36 * time.Hour), | ||
baseTime.Add(-25 * time.Hour), | ||
0, | ||
}, | ||
{ | ||
// Completely active. | ||
baseTime.Add(-23 * time.Hour), | ||
baseTime.Add(-2 * time.Hour), | ||
-1, | ||
}, | ||
{ | ||
// Mix of both but the cacheable entry is also active. | ||
baseTime.Add(-36 * time.Hour), | ||
baseTime.Add(-2 * time.Hour), | ||
-1, | ||
}, | ||
{ | ||
// Mix of both. | ||
baseTime.Add(-50 * time.Hour), | ||
baseTime.Add(-2 * time.Hour), | ||
0, | ||
}, | ||
} { | ||
have, err := schema.GetReadQueriesForMetric( | ||
model.TimeFromUnix(tc.from.Unix()), model.TimeFromUnix(tc.through.Unix()), | ||
userID, model.LabelValue("foo"), | ||
) | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
|
||
for i := range have { | ||
if i <= tc.cacheableIdx { | ||
require.True(t, have[i].Immutable) | ||
} else { | ||
require.False(t, have[i].Immutable) | ||
} | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -46,10 +46,10 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) { | |
|
||
// Deprecated flags!! | ||
f.IntVar(&cfg.IndexCacheSize, "store.index-cache-size", 0, "Deprecated: Use -store.index-cache-read.*; Size of in-memory index cache, 0 to disable.") | ||
f.DurationVar(&cfg.IndexCacheValidity, "store.index-cache-validity", 5*time.Minute, "Deprecated: Use -store.index-cache-read.*; Period for which entries in the index cache are valid. Should be no higher than -ingester.max-chunk-idle.") | ||
cfg.memcacheClient.RegisterFlagsWithPrefix("index.", "Deprecated: Use -store.index-cache-read.*;", f) | ||
|
||
cfg.indexQueriesCacheConfig.RegisterFlagsWithPrefix("store.index-cache-read.", "Cache config for index entry reading. ", f) | ||
f.DurationVar(&cfg.IndexCacheValidity, "store.index-cache-validity", 5*time.Minute, "Cache validity for active index entries. Should be no higher than -ingester.max-chunk-idle.") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks like this is no longer used? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is how long we want to cache the active entries. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I can't see where it is used either There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is being used here: https://github.com/cortexproject/cortex/pull/1130/files#diff-d479a87a51735dca31797a0bc4af42caL95 to set the valid duration for caching mutable entries. |
||
} | ||
|
||
// NewStore makes the storage clients based on the configuration. | ||
|
@@ -59,7 +59,7 @@ func NewStore(cfg Config, storeCfg chunk.StoreConfig, schemaCfg chunk.SchemaConf | |
// Building up from deprecated flags. | ||
var caches []cache.Cache | ||
if cfg.IndexCacheSize > 0 { | ||
fifocache := cache.Instrument("fifo-index", cache.NewFifoCache("index", cache.FifoCacheConfig{Size: cfg.IndexCacheSize, Validity: cfg.IndexCacheValidity})) | ||
fifocache := cache.Instrument("fifo-index", cache.NewFifoCache("index", cache.FifoCacheConfig{Size: cfg.IndexCacheSize})) | ||
caches = append(caches, fifocache) | ||
} | ||
if cfg.memcacheClient.Host != "" { | ||
|
@@ -76,7 +76,6 @@ func NewStore(cfg Config, storeCfg chunk.StoreConfig, schemaCfg chunk.SchemaConf | |
var tieredCache cache.Cache | ||
if len(caches) > 0 { | ||
tieredCache = cache.NewTiered(caches) | ||
cfg.indexQueriesCacheConfig.DefaultValidity = cfg.IndexCacheValidity | ||
} else { | ||
tieredCache, err = cache.New(cfg.indexQueriesCacheConfig) | ||
if err != nil { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this really valuable in the presence of the caching front-end which will shard by day?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not really, if using the frontend, but the frontend is an optional component, I guess?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Plus I'd say yes it is - the caching frontend only matches exact querier, this will match individual labels, which is useful across multiple different queries.