roystchiang
diff --git a/‎CHANGELOG.md
Lines changed: 4 additions & 0 deletions b/‎CHANGELOG.md
Lines changed: 4 additions & 0 deletions
diff --git a/‎docs/blocks-storage/querier.md
Lines changed: 15 additions & 0 deletions b/‎docs/blocks-storage/querier.md
Lines changed: 15 additions & 0 deletions
diff --git a/‎docs/blocks-storage/store-gateway.md
Lines changed: 15 additions & 0 deletions b/‎docs/blocks-storage/store-gateway.md
Lines changed: 15 additions & 0 deletions
diff --git a/‎docs/configuration/arguments.md
Lines changed: 2 additions & 0 deletions b/‎docs/configuration/arguments.md
Lines changed: 2 additions & 0 deletions
diff --git a/‎docs/configuration/config-file-reference.md
Lines changed: 15 additions & 0 deletions b/‎docs/configuration/config-file-reference.md
Lines changed: 15 additions & 0 deletions
diff --git a/‎docs/proposals/parallel-compaction.md
Lines changed: 4 additions & 2 deletions b/‎docs/proposals/parallel-compaction.md
Lines changed: 4 additions & 2 deletions
diff --git a/‎go.mod
Lines changed: 5 additions & 5 deletions b/‎go.mod
Lines changed: 5 additions & 5 deletions
@@ -41,6 +41,7 @@
 * [ENHANCEMENT] Exemplars are now emitted for all gRPC calls and many operations tracked by histograms. #4462
 * [ENHANCEMENT] New options `-server.http-listen-network` and `-server.grpc-listen-network` allow binding as 'tcp4' or 'tcp6'. #4462
 * [ENHANCEMENT] Rulers: Using shuffle sharding subring on GetRules API. #4466
+* [ENHANCEMENT] Support memcached auto-discovery via `auto-discovery` flag, introduced by thanos in https://github.com/thanos-io/thanos/pull/4487. Both AWS and Google Cloud memcached service support auto-discovery, which returns a list of nodes of the memcached cluster. #4412
 * [BUGFIX] Fixes a panic in the query-tee when comparing result. #4465
 * [BUGFIX] Frontend: Fixes @ modifier functions (start/end) when splitting queries by time. #4464
 * [BUGFIX] Compactor: compactor will no longer try to compact blocks that are already marked for deletion. Previously compactor would consider blocks marked for deletion within `-compactor.deletion-delay / 2` period as eligible for compaction. #4328
@@ -53,6 +54,9 @@
 * [BUGFIX] Memberlist: forward only changes, not entire original message. #4419
 * [BUGFIX] Memberlist: don't accept old tombstones as incoming change, and don't forward such messages to other gossip members. #4420
 * [BUGFIX] Querier: fixed panic when querying exemplars and using `-distributor.shard-by-all-labels=false`. #4473
+* [BUGFIX] Querier: honor querier minT,maxT if `nil` SelectHints are passed to Select(). #4413
+* [BUGFIX] Compactor: fixed panic while collecting Prometheus metrics. #4483
+
 
 ## 1.10.0 / 2021-08-03
 
 
@@ -512,6 +512,11 @@ blocks_storage:
         # CLI flag: -blocks-storage.bucket-store.index-cache.memcached.max-item-size
         [max_item_size: <int> | default = 1048576]
 
+        # Use memcached auto-discovery mechanism provided by some cloud provider
+        # like GCP and AWS
+        # CLI flag: -blocks-storage.bucket-store.index-cache.memcached.auto-discovery
+        [auto_discovery: <boolean> | default = false]
+
     chunks_cache:
       # Backend for chunks cache, if not empty. Supported values: memcached.
       # CLI flag: -blocks-storage.bucket-store.chunks-cache.backend
@@ -559,6 +564,11 @@ blocks_storage:
         # CLI flag: -blocks-storage.bucket-store.chunks-cache.memcached.max-item-size
         [max_item_size: <int> | default = 1048576]
 
+        # Use memcached auto-discovery mechanism provided by some cloud provider
+        # like GCP and AWS
+        # CLI flag: -blocks-storage.bucket-store.chunks-cache.memcached.auto-discovery
+        [auto_discovery: <boolean> | default = false]
+
       # Size of each subrange that bucket object is split into for better
       # caching.
       # CLI flag: -blocks-storage.bucket-store.chunks-cache.subrange-size
@@ -625,6 +635,11 @@ blocks_storage:
         # CLI flag: -blocks-storage.bucket-store.metadata-cache.memcached.max-item-size
         [max_item_size: <int> | default = 1048576]
 
+        # Use memcached auto-discovery mechanism provided by some cloud provider
+        # like GCP and AWS
+        # CLI flag: -blocks-storage.bucket-store.metadata-cache.memcached.auto-discovery
+        [auto_discovery: <boolean> | default = false]
+
       # How long to cache list of tenants in the bucket.
       # CLI flag: -blocks-storage.bucket-store.metadata-cache.tenants-list-ttl
       [tenants_list_ttl: <duration> | default = 15m]
 
@@ -576,6 +576,11 @@ blocks_storage:
         # CLI flag: -blocks-storage.bucket-store.index-cache.memcached.max-item-size
         [max_item_size: <int> | default = 1048576]
 
+        # Use memcached auto-discovery mechanism provided by some cloud provider
+        # like GCP and AWS
+        # CLI flag: -blocks-storage.bucket-store.index-cache.memcached.auto-discovery
+        [auto_discovery: <boolean> | default = false]
+
     chunks_cache:
       # Backend for chunks cache, if not empty. Supported values: memcached.
       # CLI flag: -blocks-storage.bucket-store.chunks-cache.backend
@@ -623,6 +628,11 @@ blocks_storage:
         # CLI flag: -blocks-storage.bucket-store.chunks-cache.memcached.max-item-size
         [max_item_size: <int> | default = 1048576]
 
+        # Use memcached auto-discovery mechanism provided by some cloud provider
+        # like GCP and AWS
+        # CLI flag: -blocks-storage.bucket-store.chunks-cache.memcached.auto-discovery
+        [auto_discovery: <boolean> | default = false]
+
       # Size of each subrange that bucket object is split into for better
       # caching.
       # CLI flag: -blocks-storage.bucket-store.chunks-cache.subrange-size
@@ -689,6 +699,11 @@ blocks_storage:
         # CLI flag: -blocks-storage.bucket-store.metadata-cache.memcached.max-item-size
         [max_item_size: <int> | default = 1048576]
 
+        # Use memcached auto-discovery mechanism provided by some cloud provider
+        # like GCP and AWS
+        # CLI flag: -blocks-storage.bucket-store.metadata-cache.memcached.auto-discovery
+        [auto_discovery: <boolean> | default = false]
+
       # How long to cache list of tenants in the bucket.
       # CLI flag: -blocks-storage.bucket-store.metadata-cache.tenants-list-ttl
       [tenants_list_ttl: <duration> | default = 15m]
 
@@ -533,6 +533,8 @@ The DNS service discovery, inspired from Thanos DNS SD, supports different disco
 
 If **no prefix** is provided, the provided IP or hostname will be used straightaway without pre-resolving it.
 
+If you are using a managed memcached service from [Google Cloud](https://cloud.google.com/memorystore/docs/memcached/auto-discovery-overview), or [AWS](https://docs.aws.amazon.com/AmazonElastiCache/latest/mem-ug/AutoDiscovery.HowAutoDiscoveryWorks.html), use the [auto-discovery](./config-file-reference.md#memcached-client-config) flag instead of DNS discovery, then use the discovery/configuration endpoint as the domain name without any prefix.
+
 ## Logging of IP of reverse proxy
 
 If a reverse proxy is used in front of Cortex it might be diffult to troubleshoot errors. The following 3 settings can be used to log the IP address passed along by the reverse proxy in headers like X-Forwarded-For.
 
@@ -4765,6 +4765,11 @@ bucket_store:
       # CLI flag: -blocks-storage.bucket-store.index-cache.memcached.max-item-size
       [max_item_size: <int> | default = 1048576]
 
+      # Use memcached auto-discovery mechanism provided by some cloud provider
+      # like GCP and AWS
+      # CLI flag: -blocks-storage.bucket-store.index-cache.memcached.auto-discovery
+      [auto_discovery: <boolean> | default = false]
+
   chunks_cache:
     # Backend for chunks cache, if not empty. Supported values: memcached.
     # CLI flag: -blocks-storage.bucket-store.chunks-cache.backend
@@ -4812,6 +4817,11 @@ bucket_store:
       # CLI flag: -blocks-storage.bucket-store.chunks-cache.memcached.max-item-size
       [max_item_size: <int> | default = 1048576]
 
+      # Use memcached auto-discovery mechanism provided by some cloud provider
+      # like GCP and AWS
+      # CLI flag: -blocks-storage.bucket-store.chunks-cache.memcached.auto-discovery
+      [auto_discovery: <boolean> | default = false]
+
     # Size of each subrange that bucket object is split into for better caching.
     # CLI flag: -blocks-storage.bucket-store.chunks-cache.subrange-size
     [subrange_size: <int> | default = 16000]
@@ -4877,6 +4887,11 @@ bucket_store:
       # CLI flag: -blocks-storage.bucket-store.metadata-cache.memcached.max-item-size
       [max_item_size: <int> | default = 1048576]
 
+      # Use memcached auto-discovery mechanism provided by some cloud provider
+      # like GCP and AWS
+      # CLI flag: -blocks-storage.bucket-store.metadata-cache.memcached.auto-discovery
+      [auto_discovery: <boolean> | default = false]
+
     # How long to cache list of tenants in the bucket.
     # CLI flag: -blocks-storage.bucket-store.metadata-cache.tenants-list-ttl
     [tenants_list_ttl: <duration> | default = 15m]
 
@@ -11,7 +11,9 @@ slug: parallel-compaction
 ---
 
 ## Introduction
-As a part of pushing Cortex’s scaling capability at AWS, we have done performance testing with Cortex and found the compactor to be one of the main limiting factors for higher active timeseries limit per tenant. The documentation [Compactor](https://cortexmetrics.io/docs/blocks-storage/compactor/#how-compaction-works) describes the responsibilities of a compactor, and this proposal focuses on the limitations of the current compactor architecture. In the current architecture, compactor has simple sharding, meaning that a single tenant is sharded to a single compactor. In addition, a compactor handles compaction groups of a single tenant iteratively, meaning that blocks belonging non-overlapping times are not compacted in parallel.
+As a part of pushing Cortex’s scaling capability at AWS, we have done performance testing with Cortex and found the compactor to be one of the main limiting factors for higher active timeseries limit per tenant. The documentation [Compactor](https://cortexmetrics.io/docs/blocks-storage/compactor/#how-compaction-works) describes the responsibilities of a compactor, and this proposal focuses on the limitations of the current compactor architecture. In the current architecture, compactor has simple sharding, meaning that a single tenant is sharded to a single compactor. The compactor generates compaction groups, which are groups of Prometheus TSDB blocks that can be compacted together, independently of another group. However, a compactor currnetly handles compaction groups of a single tenant iteratively, meaning that blocks belonging non-overlapping times are not compacted in parallel.
+
+Cortex ingesters are responsible for uploading TSDB blocks with data emitted by a tenant. These blocks are considered as level-1 blocks, as they contain duplicate timeseries for the same time interval, depending on the replication factor. [Vertical compaction](https://cortexmetrics.io/docs/blocks-storage/compactor/#how-compaction-works) is done to merge all the blocks with the same time interval and deduplicate the samples. These merged blocks are level-2 blocks. Subsequent compactions such as horizontal compaction can happen, further increasing the compaction level of the blocks.
 
 ### Problem and Requirements
 Currently, a compactor is able to compact up to 20M timeseries within 2 hours for a level-2 compaction, including the time to download blocks, compact, and upload the newly compacted block. We would like to increase the timeseries limit per tenant, and compaction is one of the limiting factors. In addition, we would like to achieve the following:
@@ -42,7 +44,7 @@ The benefit of this approach is that this aligns with what Cortex currently does
 
 ### Bad block resulting in non-ideal compaction groups
 
-A Cortex operator configures the compaction block range. Using 2h and 6h as example, [2h-1] [2h-2] [2h-3] [2h-4] [2h-5] [2h-6]. If the [2h-1] block is corrupted, we may compact the subsequent [2h-2] [2h-3] [2h-4] [2h-5] [2h-6] blocks. To compact into a 6 hour group, the ideal compaction is [2h-1] [2h-2] [2h-3] and [2h-4] [2h-5] [2h-6]. The cortex planner needs to know the ideal compaction interval, and prevent compaction of [2h-2] [2h-3] [2h-4] from happening, which will result in [2h-1] not able to be compacted into longer time interval blocks. Cortex has full information regarding all the available blocks, so we should utilize this information to achieve the best compaction interval.
+A Cortex operator configures the compaction block range as 2h and 6h. If a full 6-hour block cannot be compacted due to compaction failures, the compactor should not split up the group into subgroups, as this may cause suboptimal grouping of block. Cortex has full information regarding all the available blocks, so we should utilize this information to achieve the best compaction group possible.
 
 ## Alternatives
 
 
@@ -11,7 +11,7 @@ require (
 	github.com/NYTimes/gziphandler v1.1.1
 	github.com/alecthomas/units v0.0.0-20210208195552-ff826a37aa15
 	github.com/alicebob/miniredis/v2 v2.14.3
-	github.com/aws/aws-sdk-go v1.38.68
+	github.com/aws/aws-sdk-go v1.40.11
 	github.com/bradfitz/gomemcache v0.0.0-20190913173617-a41fca850d0b
 	github.com/cespare/xxhash v1.1.0
 	github.com/dustin/go-humanize v1.0.0
@@ -41,21 +41,21 @@ require (
 	github.com/opentracing-contrib/go-stdlib v1.0.0
 	github.com/opentracing/opentracing-go v1.2.0
 	github.com/pkg/errors v0.9.1
-	github.com/prometheus/alertmanager v0.22.3-0.20210726110322-3d86bd709df8
+	github.com/prometheus/alertmanager v0.23.1-0.20210914172521-e35efbddb66a
 	github.com/prometheus/client_golang v1.11.0
 	github.com/prometheus/client_model v0.2.0
-	github.com/prometheus/common v0.29.0
+	github.com/prometheus/common v0.30.0
 	github.com/prometheus/prometheus v1.8.2-0.20210720123808-b1ed4a0a663d
 	github.com/segmentio/fasthash v0.0.0-20180216231524-a72b379d632e
 	github.com/sony/gobreaker v0.4.1
 	github.com/spf13/afero v1.2.2
 	github.com/stretchr/testify v1.7.0
-	github.com/thanos-io/thanos v0.22.0
+	github.com/thanos-io/thanos v0.19.1-0.20210803192524-baea4ce9ef52
 	github.com/uber/jaeger-client-go v2.29.1+incompatible
 	github.com/weaveworks/common v0.0.0-20210901124008-1fa3f9fa874c
 	go.etcd.io/bbolt v1.3.6
 	go.uber.org/atomic v1.9.0
-	golang.org/x/net v0.0.0-20210610132358-84b48f89b13b
+	golang.org/x/net v0.0.0-20210726213435-c6fcb2dbf985
 	golang.org/x/sync v0.0.0-20210220032951-036812b2e83c
 	golang.org/x/time v0.0.0-20210611083556-38a9dc6acbc6
 	google.golang.org/api v0.50.0