diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 0e1b47780e..234056c931 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -22,9 +22,5 @@ A clear and concise description of what you expected to happen. - Infrastructure: [e.g., Kubernetes, bare-metal, laptop] - Deployment tool: [e.g., helm, jsonnet] -**Storage Engine** -- [ ] Blocks -- [ ] Chunks - **Additional Context** diff --git a/CHANGELOG.md b/CHANGELOG.md index 61159e50f1..9574ea5619 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,34 @@ ## master / unreleased +**This release removes support for chunks storage. See below for more.** + +* [CHANGE] Remove support for chunks storage entirely. If you are using chunks storage on a previous version, you must [migrate your data](https://github.com/cortexproject/cortex/blob/v1.11.1/docs/blocks-storage/migrate-from-chunks-to-blocks.md) on version 1.12 or earlier. Before upgrading to this release, you should also remove any deprecated chunks-related configuration, as this release will no longer accept that. The following flags are gone: + - `-dynamodb.*` + - `-metrics.*` + - `-s3.*` + - `-azure.*` + - `-bigtable.*` + - `-gcs.*` + - `-cassandra.*` + - `-boltdb.*` + - `-local.*` + - some `-ingester` flags: + - `-ingester.wal-enabled` + - `-ingester.checkpoint-enabled` + - `-ingester.recover-from-wal` + - `-ingester.wal-dir` + - `-ingester.checkpoint-duration` + - `-ingester.flush-on-shutdown-with-wal-enabled` + - `-ingester.max-transfer-retries` + - `-ingester.max-samples-per-query` + - `-ingester.min-chunk-length` + - `-store.*` except `-store.engine` and `-store.max-query-length` + - `-store.query-chunk-limit` was deprecated and replaced by `-querier.max-fetched-chunks-per-query` + - `-deletes.*` + - `-grpc-store.*` + - `-flusher.wal-dir`, `-flusher.concurrent-flushes`, `-flusher.flush-op-timeout` +* [CHANGE] Remove support for alertmanager and ruler legacy store configuration. Before upgrading, you need to convert your configuration to use the `alertmanager-storage` and `ruler-storage` configuration on the version that you're already running, then upgrade. * [CHANGE] Changed default for `-ingester.min-ready-duration` from 1 minute to 15 seconds. #4539 * [CHANGE] query-frontend: Do not print anything in the logs of `query-frontend` if a in-progress query has been canceled (context canceled) to avoid spam. #4562 * [CHANGE] Compactor block deletion mark migration, needed when upgrading from v1.7, is now disabled by default. #4597 diff --git a/Makefile b/Makefile index 82e560d353..2b874fea47 100644 --- a/Makefile +++ b/Makefile @@ -96,15 +96,12 @@ pkg/frontend/v1/frontendv1pb/frontend.pb.go: pkg/frontend/v1/frontendv1pb/fronte pkg/frontend/v2/frontendv2pb/frontend.pb.go: pkg/frontend/v2/frontendv2pb/frontend.proto pkg/querier/queryrange/queryrange.pb.go: pkg/querier/queryrange/queryrange.proto pkg/querier/stats/stats.pb.go: pkg/querier/stats/stats.proto -pkg/chunk/storage/caching_index_client.pb.go: pkg/chunk/storage/caching_index_client.proto pkg/distributor/ha_tracker.pb.go: pkg/distributor/ha_tracker.proto pkg/ruler/rulespb/rules.pb.go: pkg/ruler/rulespb/rules.proto pkg/ruler/ruler.pb.go: pkg/ruler/ruler.proto pkg/ring/kv/memberlist/kv.pb.go: pkg/ring/kv/memberlist/kv.proto pkg/scheduler/schedulerpb/scheduler.pb.go: pkg/scheduler/schedulerpb/scheduler.proto pkg/storegateway/storegatewaypb/gateway.pb.go: pkg/storegateway/storegatewaypb/gateway.proto -pkg/chunk/grpc/grpc.pb.go: pkg/chunk/grpc/grpc.proto -tools/blocksconvert/scheduler.pb.go: tools/blocksconvert/scheduler.proto pkg/alertmanager/alertmanagerpb/alertmanager.pb.go: pkg/alertmanager/alertmanagerpb/alertmanager.proto pkg/alertmanager/alertspb/alerts.pb.go: pkg/alertmanager/alertspb/alerts.proto @@ -359,7 +356,7 @@ dist/$(UPTODATE)-packages: dist $(wildcard packaging/deb/**) $(wildcard packagin --before-remove packaging/deb/control/prerm \ --package dist/cortex-$(VERSION)_$$arch.deb \ dist/cortex-linux-$$arch=/usr/local/bin/cortex \ - docs/chunks-storage/single-process-config.yaml=/etc/cortex/single-process-config.yaml \ + docs/configuration/single-process-config-blocks.yaml=/etc/cortex/single-process-config.yaml \ packaging/deb/default/cortex=/etc/default/cortex \ packaging/deb/systemd/cortex.service=/etc/systemd/system/cortex.service; \ $(FPM_OPTS) -t rpm \ @@ -368,7 +365,7 @@ dist/$(UPTODATE)-packages: dist $(wildcard packaging/deb/**) $(wildcard packagin --before-remove packaging/rpm/control/preun \ --package dist/cortex-$(VERSION)_$$arch.rpm \ dist/cortex-linux-$$arch=/usr/local/bin/cortex \ - docs/chunks-storage/single-process-config.yaml=/etc/cortex/single-process-config.yaml \ + docs/configuration/single-process-config-blocks.yaml=/etc/cortex/single-process-config.yaml \ packaging/rpm/sysconfig/cortex=/etc/sysconfig/cortex \ packaging/rpm/systemd/cortex.service=/etc/systemd/system/cortex.service; \ done diff --git a/cmd/blocksconvert/Dockerfile b/cmd/blocksconvert/Dockerfile deleted file mode 100644 index 100ee9aa3b..0000000000 --- a/cmd/blocksconvert/Dockerfile +++ /dev/null @@ -1,9 +0,0 @@ -FROM alpine:3.14 -RUN apk add --no-cache ca-certificates -COPY blocksconvert / -ENTRYPOINT ["/blocksconvert"] - -ARG revision -LABEL org.opencontainers.image.title="blocksconvert" \ - org.opencontainers.image.source="https://github.com/cortexproject/cortex/tree/master/tools/blocksconvert" \ - org.opencontainers.image.revision="${revision}" diff --git a/cmd/blocksconvert/main.go b/cmd/blocksconvert/main.go deleted file mode 100644 index f3f2b8a5e0..0000000000 --- a/cmd/blocksconvert/main.go +++ /dev/null @@ -1,107 +0,0 @@ -package main - -import ( - "context" - "flag" - "fmt" - "os" - "strings" - - "github.com/go-kit/log/level" - "github.com/prometheus/client_golang/prometheus" - "github.com/weaveworks/common/server" - "github.com/weaveworks/common/signals" - - "github.com/cortexproject/cortex/pkg/cortex" - util_log "github.com/cortexproject/cortex/pkg/util/log" - "github.com/cortexproject/cortex/pkg/util/services" - "github.com/cortexproject/cortex/tools/blocksconvert" - "github.com/cortexproject/cortex/tools/blocksconvert/builder" - "github.com/cortexproject/cortex/tools/blocksconvert/cleaner" - "github.com/cortexproject/cortex/tools/blocksconvert/scanner" - "github.com/cortexproject/cortex/tools/blocksconvert/scheduler" -) - -type Config struct { - Target string - ServerConfig server.Config - - SharedConfig blocksconvert.SharedConfig - ScannerConfig scanner.Config - BuilderConfig builder.Config - SchedulerConfig scheduler.Config - CleanerConfig cleaner.Config -} - -func main() { - cfg := Config{} - flag.StringVar(&cfg.Target, "target", "", "Module to run: Scanner, Scheduler, Builder") - cfg.SharedConfig.RegisterFlags(flag.CommandLine) - cfg.ScannerConfig.RegisterFlags(flag.CommandLine) - cfg.BuilderConfig.RegisterFlags(flag.CommandLine) - cfg.SchedulerConfig.RegisterFlags(flag.CommandLine) - cfg.CleanerConfig.RegisterFlags(flag.CommandLine) - cfg.ServerConfig.RegisterFlags(flag.CommandLine) - flag.Parse() - - util_log.InitLogger(&cfg.ServerConfig) - - cortex.DisableSignalHandling(&cfg.ServerConfig) - serv, err := server.New(cfg.ServerConfig) - if err != nil { - level.Error(util_log.Logger).Log("msg", "Unable to initialize server", "err", err.Error()) - os.Exit(1) - } - - cfg.Target = strings.ToLower(cfg.Target) - - registry := prometheus.DefaultRegisterer - - var targetService services.Service - switch cfg.Target { - case "scanner": - targetService, err = scanner.NewScanner(cfg.ScannerConfig, cfg.SharedConfig, util_log.Logger, registry) - case "builder": - targetService, err = builder.NewBuilder(cfg.BuilderConfig, cfg.SharedConfig, util_log.Logger, registry) - case "scheduler": - targetService, err = scheduler.NewScheduler(cfg.SchedulerConfig, cfg.SharedConfig, util_log.Logger, registry, serv.HTTP, serv.GRPC) - case "cleaner": - targetService, err = cleaner.NewCleaner(cfg.CleanerConfig, cfg.SharedConfig, util_log.Logger, registry) - default: - err = fmt.Errorf("unknown target") - } - - if err != nil { - level.Error(util_log.Logger).Log("msg", "failed to initialize", "err", err) - os.Exit(1) - } - - servService := cortex.NewServerService(serv, func() []services.Service { - return []services.Service{targetService} - }) - servManager, err := services.NewManager(servService, targetService) - if err == nil { - servManager.AddListener(services.NewManagerListener(nil, nil, func(service services.Service) { - servManager.StopAsync() - })) - - err = services.StartManagerAndAwaitHealthy(context.Background(), servManager) - } - if err != nil { - level.Error(util_log.Logger).Log("msg", "Unable to start", "err", err.Error()) - os.Exit(1) - } - - // Setup signal handler and ask service maanger to stop when signal arrives. - handler := signals.NewHandler(serv.Log) - go func() { - handler.Loop() - servManager.StopAsync() - }() - - // We only wait for target service. If any other service fails, listener will stop it (via manager) - if err := targetService.AwaitTerminated(context.Background()); err != nil { - level.Error(util_log.Logger).Log("msg", cfg.Target+" failed", "err", targetService.FailureCase()) - os.Exit(1) - } -} diff --git a/docs/api/_index.md b/docs/api/_index.md index c20975ec95..32dce280eb 100644 --- a/docs/api/_index.md +++ b/docs/api/_index.md @@ -28,7 +28,7 @@ For the sake of clarity, in this document we have grouped API endpoints by servi | [Remote write](#remote-write) | Distributor | `POST /api/v1/push` | | [Tenants stats](#tenants-stats) | Distributor | `GET /distributor/all_user_stats` | | [HA tracker status](#ha-tracker-status) | Distributor | `GET /distributor/ha_tracker` | -| [Flush chunks / blocks](#flush-chunks--blocks) | Ingester | `GET,POST /ingester/flush` | +| [Flush blocks](#flush-blocks) | Ingester | `GET,POST /ingester/flush` | | [Shutdown](#shutdown) | Ingester | `GET,POST /ingester/shutdown` | | [Ingesters ring status](#ingesters-ring-status) | Ingester | `GET /ingester/ring` | | [Instant query](#instant-query) | Querier, Query-frontend | `GET,POST /api/v1/query` | @@ -40,7 +40,6 @@ For the sake of clarity, in this document we have grouped API endpoints by servi | [Get metric metadata](#get-metric-metadata) | Querier, Query-frontend | `GET /api/v1/metadata` | | [Remote read](#remote-read) | Querier, Query-frontend | `POST /api/v1/read` | | [Get tenant ingestion stats](#get-tenant-ingestion-stats) | Querier | `GET /api/v1/user_stats` | -| [Get tenant chunks](#get-tenant-chunks) | Querier | `GET /api/v1/chunks` | | [Ruler ring status](#ruler-ring-status) | Ruler | `GET /ruler/ring` | | [Ruler rules ](#ruler-rule-groups) | Ruler | `GET /ruler/rule_groups` | | [List rules](#list-rules) | Ruler | `GET /api/v1/rules` | @@ -60,9 +59,6 @@ For the sake of clarity, in this document we have grouped API endpoints by servi | [Get Alertmanager configuration](#get-alertmanager-configuration) | Alertmanager | `GET /api/v1/alerts` | | [Set Alertmanager configuration](#set-alertmanager-configuration) | Alertmanager | `POST /api/v1/alerts` | | [Delete Alertmanager configuration](#delete-alertmanager-configuration) | Alertmanager | `DELETE /api/v1/alerts` | -| [Delete series](#delete-series) | Purger | `PUT,POST /api/v1/admin/tsdb/delete_series` | -| [List delete requests](#list-delete-requests) | Purger | `GET /api/v1/admin/tsdb/delete_series` | -| [Cancel delete request](#cancel-delete-request) | Purger | `PUT,POST /api/v1/admin/tsdb/cancel_delete_request` | | [Tenant delete request](#tenant-delete-request) | Purger | `POST /purger/delete_tenant` | | [Tenant delete status](#tenant-delete-status) | Purger | `GET /purger/delete_tenant_status` | | [Store-gateway ring status](#store-gateway-ring-status) | Store-gateway | `GET /store-gateway/ring` | @@ -247,7 +243,7 @@ Displays a web page with the current status of the HA tracker, including the ele ## Ingester -### Flush chunks / blocks +### Flush blocks ``` GET,POST /ingester/flush @@ -256,11 +252,11 @@ GET,POST /ingester/flush GET,POST /flush ``` -Triggers a flush of the in-memory time series data (chunks or blocks) to the long-term storage. This endpoint triggers the flush also when `-ingester.flush-on-shutdown-with-wal-enabled` or `-blocks-storage.tsdb.flush-blocks-on-shutdown` are disabled. +Triggers a flush of the in-memory time series data to the long-term storage. This endpoint triggers the flush also when `-ingester.flush-on-shutdown-with-wal-enabled` or `-blocks-storage.tsdb.flush-blocks-on-shutdown` are disabled. -When using blocks storage, this endpoint accepts `tenant` parameter to specify tenant whose blocks are compacted and shipped. This parameter may be specified multiple times to select more tenants. If no tenant is specified, all tenants are flushed. +This endpoint accepts `tenant` parameter to specify tenant whose blocks are compacted and shipped. This parameter may be specified multiple times to select more tenants. If no tenant is specified, all tenants are flushed. -Flush endpoint now also accepts `wait=true` parameter, which makes the call synchronous – it will only return after flushing has finished. Note that returned status code does not reflect the result of flush operation. This parameter is only available when using blocks storage. +Flush endpoint now also accepts `wait=true` parameter, which makes the call synchronous – it will only return after flushing has finished. Note that returned status code does not reflect the result of flush operation. ### Shutdown @@ -427,25 +423,6 @@ Returns realtime ingestion rate, for the authenticated tenant, in `JSON` format. _Requires [authentication](#authentication)._ -### Get tenant chunks - -``` -GET /api/v1/chunks - -# Legacy -GET /chunks -``` - -Fetch a compressed tar of all the chunks containing samples for the given time range and label matchers. This endpoint is supported only by the **chunks storage**, requires `-querier.ingester-streaming=true` and should **not be exposed to users** but just used for debugging purposes. - -| URL query parameter | Description | -| ------------------- | ----------- | -| `start` | Start timestamp, in RFC3339 format or unix epoch. | -| `end` | End timestamp, in RFC3339 format or unix epoch. | -| `matcher` | Label matcher that selects the series for which chunks should be fetched. | - -_Requires [authentication](#authentication)._ - ## Ruler The ruler API endpoints require to configure a backend object storage to store the recording rules and alerts. The ruler API uses the concept of a "namespace" when creating rule groups. This is a stand in for the name of the rule file in Prometheus and rule groups must be named uniquely within a namespace. @@ -810,52 +787,7 @@ _Requires [authentication](#authentication)._ ## Purger -The Purger service provides APIs for requesting deletion of series in chunks storage and managing delete requests. For more information about it, please read the [Delete series Guide](../guides/deleting-series.md). - -### Delete series - -``` -PUT,POST /api/v1/admin/tsdb/delete_series - -# Legacy -PUT,POST /api/v1/admin/tsdb/delete_series -``` - -Prometheus-compatible delete series endpoint. - -_For more information, please check out the Prometheus [delete series](https://prometheus.io/docs/prometheus/latest/querying/api/#delete-series) documentation._ - -_Requires [authentication](#authentication)._ - -### List delete requests - -``` -GET /api/v1/admin/tsdb/delete_series - -# Legacy -GET /api/v1/admin/tsdb/delete_series -``` - -List all the delete requests. - -_Requires [authentication](#authentication)._ - -### Cancel delete request - -``` -PUT,POST /api/v1/admin/tsdb/cancel_delete_request - -# Legacy -PUT,POST /api/v1/admin/tsdb/cancel_delete_request -``` - -Cancel a delete request while the request is still in the grace period (before the request is effectively processed by the purger and time series data is hard-deleted from the storage). - -| URL query parameter | Description | -| ------------------- | ----------- | -| `request_id` | Deletion request ID to cancel. Can be obtained by the [List delete requests](#list-delete-requests) endpoint. | - -_Requires [authentication](#authentication)._ +The Purger service provides APIs for requesting deletion of tenants. ### Tenant Delete Request diff --git a/docs/architecture.md b/docs/architecture.md index 2249e29c25..4114170fbd 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -19,32 +19,7 @@ Incoming samples (writes from Prometheus) are handled by the [distributor](#dist ## Storage -Cortex currently supports two storage engines to store and query the time series: - -- Chunks (deprecated) -- Blocks - -The two engines mostly share the same Cortex architecture with few differences outlined in the rest of the document. - -### Chunks storage (deprecated) - -The chunks storage stores each single time series into a separate object called _Chunk_. Each Chunk contains the samples for a given period (defaults to 12 hours). Chunks are then indexed by time range and labels, in order to provide a fast lookup across many (over millions) Chunks. - -For this reason, the chunks storage consists of: - -* An index for the Chunks. This index can be backed by: - * [Amazon DynamoDB](https://aws.amazon.com/dynamodb) - * [Google Bigtable](https://cloud.google.com/bigtable) - * [Apache Cassandra](https://cassandra.apache.org) -* An object store for the Chunk data itself, which can be: - * [Amazon DynamoDB](https://aws.amazon.com/dynamodb) - * [Google Bigtable](https://cloud.google.com/bigtable) - * [Apache Cassandra](https://cassandra.apache.org) - * [Amazon S3](https://aws.amazon.com/s3) - * [Google Cloud Storage](https://cloud.google.com/storage/) - * [Microsoft Azure Storage](https://azure.microsoft.com/en-us/services/storage/) - -For more information, please check out the [Chunks storage](./chunks-storage/_index.md) documentation. +Cortex currently supports the `blocks` storage engine to store and query time series. It used to support `chunks` storage in the past. ### Blocks storage @@ -151,14 +126,14 @@ We recommend randomly load balancing write requests across distributor instances The **ingester** service is responsible for writing incoming series to a [long-term storage backend](#storage) on the write path and returning in-memory series samples for queries on the read path. -Incoming series are not immediately written to the storage but kept in memory and periodically flushed to the storage (by default, 12 hours for the chunks storage and 2 hours for the blocks storage). For this reason, the [queriers](#querier) may need to fetch samples both from ingesters and long-term storage while executing a query on the read path. +Incoming series are not immediately written to the storage but kept in memory and periodically flushed to the storage (by default, 2 hours). For this reason, the [queriers](#querier) may need to fetch samples both from ingesters and long-term storage while executing a query on the read path. Ingesters contain a **lifecycler** which manages the lifecycle of an ingester and stores the **ingester state** in the [hash ring](#the-hash-ring). Each ingester could be in one of the following states: - **`PENDING`**
- The ingester has just started. While in this state, the ingester doesn't receive neither write and read requests, and could be waiting for time series data transfer from another ingester if running the chunks storage and the [hand-over](guides/ingesters-rolling-updates.md#chunks-storage-with-wal-disabled-hand-over) is enabled. + The ingester has just started. While in this state, the ingester doesn't receive neither write and read requests. - **`JOINING`**
- The ingester is starting up and joining the ring. While in this state the ingester doesn't receive neither write and read requests. The ingester will join the ring using tokens received by a leaving ingester as part of the [hand-over](guides/ingesters-rolling-updates.md#chunks-storage-with-wal-disabled-hand-over) process (if enabled), otherwise it could load tokens from disk (if `-ingester.tokens-file-path` is configured) or generate a set of new random ones. Finally, the ingester optionally observes the ring for tokens conflicts and then, once any conflict is resolved, will move to `ACTIVE` state. + The ingester is starting up and joining the ring. While in this state the ingester doesn't receive neither write and read requests. The ingester will join the ring using tokens loaded from disk (if `-ingester.tokens-file-path` is configured) or generate a set of new random ones. Finally, the ingester optionally observes the ring for tokens conflicts and then, once any conflict is resolved, will move to `ACTIVE` state. - **`ACTIVE`**
The ingester is up and running. While in this state the ingester can receive both write and read requests. - **`LEAVING`**
@@ -166,8 +141,6 @@ Ingesters contain a **lifecycler** which manages the lifecycle of an ingester an - **`UNHEALTHY`**
The ingester has failed to heartbeat to the ring's KV Store. While in this state, distributors skip the ingester while building the replication set for incoming series and the ingester does not receive write or read requests. -_The ingester states are internally used for different purposes, including the series hand-over process supported by the chunks storage. For more information about it, please check out the [Ingester hand-over](guides/ingesters-rolling-updates.md#chunks-storage-with-wal-disabled-hand-over) documentation._ - Ingesters are **semi-stateful**. #### Ingesters failure and data loss @@ -183,8 +156,6 @@ The **write-ahead log** (WAL) is used to write to a persistent disk all incoming Contrary to the sole replication and given the persistent disk data is not lost, in the event of multiple ingesters failure each ingester will recover the in-memory series samples from WAL upon subsequent restart. The replication is still recommended in order to ensure no temporary failures on the read path in the event of a single ingester failure. -The WAL for the chunks storage is disabled by default, while it's always enabled for the blocks storage. - #### Ingesters write de-amplification Ingesters store recently received samples in-memory in order to perform write de-amplification. If the ingesters would immediately write received samples to the long-term storage, the system would be very difficult to scale due to the very high pressure on the storage. For this reason, the ingesters batch and compress samples in-memory and periodically flush them out to the storage. diff --git a/docs/blocks-storage/_index.md b/docs/blocks-storage/_index.md index 97e5997b5d..32296fba15 100644 --- a/docs/blocks-storage/_index.md +++ b/docs/blocks-storage/_index.md @@ -33,8 +33,6 @@ The **[compactor](./compactor.md)** is responsible to merge and deduplicate smal The `alertmanager` and `ruler` components can also use object storage to store its configurations and rules uploaded by users. In that case a separate bucket should be created to store alertmanager configurations and rules: using the same bucket between ruler/alertmanager and blocks will cause issue with the **[compactor](./compactor.md)**. -Finally, the [**table-manager**](../chunks-storage/table-manager.md) and the [**schema config**](../chunks-storage/schema-config.md) are **not used** by the blocks storage. - ### The write path **Ingesters** receive incoming samples from the distributors. Each push request belongs to a tenant, and the ingester appends the received samples to the specific per-tenant TSDB stored on the local disk. The received samples are both kept in-memory and written to a write-ahead log (WAL) and used to recover the in-memory series in case the ingester abruptly terminates. The per-tenant TSDB is lazily created in each ingester as soon as the first samples are received for that tenant. @@ -47,7 +45,7 @@ In order to effectively use the **WAL** and being able to recover the in-memory The series sharding and replication done by the distributor doesn't change based on the storage engine. -It's important to note that - differently than the [chunks storage](../chunks-storage/_index.md) - due to the replication factor N (typically 3), each time series is stored by N ingesters. Since each ingester writes its own block to the long-term storage, this leads a storage utilization N times more than the chunks storage. [Compactor](./compactor.md) solves this problem by merging blocks from multiple ingesters into a single block, and removing duplicated samples. After blocks compaction, the storage utilization is significantly smaller compared to the chunks storage for the same exact series and samples. +It's important to note that due to the replication factor N (typically 3), each time series is stored by N ingesters. Since each ingester writes its own block to the long-term storage, this leads a storage utilization N times more. [Compactor](./compactor.md) solves this problem by merging blocks from multiple ingesters into a single block, and removing duplicated samples. After blocks compaction, the storage utilization is significantly smaller. For more information, please refer to the following dedicated sections: diff --git a/docs/blocks-storage/convert-stored-chunks-to-blocks.md b/docs/blocks-storage/convert-stored-chunks-to-blocks.md deleted file mode 100644 index fc6f98e16a..0000000000 --- a/docs/blocks-storage/convert-stored-chunks-to-blocks.md +++ /dev/null @@ -1,115 +0,0 @@ ---- -title: "Convert long-term storage from chunks to blocks" -linkTitle: "Convert long-term storage from chunks to blocks" -weight: 6 -slug: convert-long-term-storage-from-chunks-to-blocks ---- - -If you have [configured your cluster to write new data to blocks](./migrate-from-chunks-to-blocks.md), there is still a question about old data. -Cortex can query both chunks and the blocks at the same time, but converting old chunks to blocks still has some benefits, like being able to decommission the chunks storage backend and save costs. -This document presents set of tools for doing the conversion. - -_[Original design document](https://docs.google.com/document/d/1VI0cgaJmHD0pcrRb3UV04f8szXXGmFKQyqUJnFOcf6Q/edit?usp=sharing) for `blocksconvert` is also available._ - -## Tools - -Cortex provides a tool called `blocksconvert`, which is actually collection of three tools for converting chunks to blocks. - -Tools are: - -- [**Scanner**](#scanner)
- Scans the chunks index database and produces so-called "plan files", each file being a set of series and chunks for each series. Plan files are uploaded to the same object store bucket where blocks live. -- [**Scheduler**](#scheduler)
- Looks for plan files, and distributes them to builders. Scheduler has global view of overall conversion progress. -- [**Builder**](#builder)
- Asks scheduler for next plan file to work on, fetches chunks, puts them into TSDB block, and uploads the block to the object store. It repeats this process until there are no more plans. -- [**Cleaner**](#cleaner)
- Cleaner asks scheduler for next plan file to work on, but instead of building the block, it actually **REMOVES CHUNKS** and **INDEX ENTRIES** from the Index database. - -All tools start HTTP server (see `-server.http*` options) exposing the `/metrics` endpoint. -All tools also start gRPC server (`-server.grpc*` options), but only Scheduler exposes services on it. - -### Scanner - -Scanner is started by running `blocksconvert -target=scanner`. Scanner requires configuration for accessing Cortex Index: - -- `-schema-config-file` – this is standard Cortex schema file. -- `-bigtable.instance`, `-bigtable.project` – options for BigTable access. -- `-dynamodb.url` - for DynamoDB access. Example `dynamodb://us-east-1/` -- `-blocks-storage.backend` and corresponding `-blocks-storage.*` options for storing plan files. -- `-scanner.output-dir` – specifies local directory for writing plan files to. Finished plan files are deleted after upload to the bucket. List of scanned tables is also kept in this directory, to avoid scanning the same tables multiple times when Scanner is restarted. -- `-scanner.allowed-users` – comma-separated list of Cortex tenants that should have plans generated. If empty, plans for all found users are generated. -- `-scanner.ignore-users-regex` - If plans for all users are generated (`-scanner.allowed-users` is not set), then users matching this non-empty regular expression will be skipped. -- `-scanner.tables-limit` – How many tables should be scanned? By default all tables are scanned, but when testing scanner it may be useful to start with small number of tables first. -- `-scanner.tables` – Comma-separated list of tables to be scanned. Can be used to scan specific tables only. Note that schema is still used to find all tables first, and then this list is consulted to select only specified tables. -- `-scanner.scan-period-start` & `-scanner.scan-period-end` - limit the scan to a particular date range (format like `2020-12-31`) - -Scanner will read the Cortex schema file to discover Index tables, and then it will start scanning them from most-recent table first, going back. -For each table, it will fully read the table and generate a plan for each user and day stored in the table. -Plan files are then uploaded to the configured blocks-storage bucket (at the `-blocksconvert.bucket-prefix` location prefix), and local copies are deleted. -After that, scanner continues with the next table until it scans them all or `-scanner.tables-limit` is reached. - -Note that even though `blocksconvert` has options for configuring different Index store backends, **it only supports BigTable and DynamoDB at the moment.** - -It is expected that only single Scanner process is running. -Scanner does the scanning of multiple table subranges concurrently. - -Scanner exposes metrics with `cortex_blocksconvert_scanner_` prefix, eg. total number of scanned index entries of different type, number of open files (scanner doesn't close currently plan files until entire table has been scanned), scanned rows and parsed index entries. - -**Scanner only supports schema version v9 on DynamoDB; v9, v10 and v11 on BigTable. Earlier schema versions are currently not supported.** - -### Scheduler - -Scheduler is started by running `blocksconvert -target=scheduler`. It only needs to be configured with options to access the object store with blocks: - -- `-blocks-storage.*` - Blocks storage object store configuration. -- `-scheduler.scan-interval` – How often to scan for plan files and their status. -- `-scheduler.allowed-users` – Comma-separated list of Cortex tenants. If set, only plans for these tenants will be offered to Builders. - -It is expected that only single Scheduler process is running. Schedulers consume very little resources. - -Scheduler's metrics have `cortex_blocksconvert_scheduler` prefix (number of plans in different states, oldest/newest plan). -Scheduler HTTP server also exposes `/plans` page that shows currently queued plans, and all plans and their status for all users. - -### Builder - -Builder asks scheduler for next plan to work on, downloads the plan, builds the block and uploads the block to the blocks storage. It then repeats the process while there are still plans. - -Builder is started by `blocksconvert -target=builder`. It needs to be configured with Scheduler endpoint, Cortex schema file, chunk-store specific options and blocks storage to upload blocks to. - -- `-builder.scheduler-endpoint` - where to find scheduler, eg. "scheduler:9095" -- `-schema-config-file` - Cortex schema file, used to find out which chunks store to use for given plan -- `-gcs.bucketname` – when using GCS as chunks store (other chunks backend storages, like S3, are supported as well) -- `-blocks-storage.*` - blocks storage configuration -- `-builder.output-dir` - Local directory where Builder keeps the block while it is being built. Once block is uploaded to blocks storage, it is deleted from local directory. - -Multiple builders may run at the same time, each builder will receive different plan to work on from scheduler. -Builders are CPU intensive (decoding and merging chunks), and require fast disk IO for writing blocks. - -Builders's metrics have `cortex_blocksconvert_builder` prefix, and include total number of fetched chunks and their size, read position of the current plan and plan size, total number of written series and samples, number of chunks that couldn't be downloaded. - -### Cleaner - -Cleaner is similar to builder in that it asks scheduler for next plan to work on, but instead of building blocks, it actually **REMOVES CHUNKS and INDEX ENTRIES**. Use with caution. - -Cleaner is started by using `blocksconvert -target=cleaner`. Like Builder, it needs Scheduler endpoint, Cortex schema file, index and chunk-store specific options. Note that Cleaner works with any index store supported by Cortex, not just BigTable. - -- `-cleaner.scheduler-endpoint` – where to find scheduler -- `-blocks-storage.*` – blocks storage configuration, used for downloading plan files -- `-cleaner.plans-dir` – local directory to store plan file while it is being processed by Cleaner. -- `-schema-config-file` – Cortex schema file. - -Cleaner doesn't **scan** for index entries, but uses existing plan files to find chunks and index entries. For each series, Cleaner needs to download at least one chunk. This is because plan file doesn't contain label names and values, but chunks do. Cleaner will then delete all index entries associated with the series, and also all chunks. - -**WARNING:** If both Builder and Cleaner run at the same time and use use the same Scheduler, **some plans will be handled by builder, and some by cleaner!** This will result in a loss of data! - -Cleaner should only be deployed if no other Builder is running. Running multiple Cleaners at once is not supported, and will result in leftover chunks and index entries. Reason for this is that chunks can span multiple days, and chunk is fully deleted only when processing plan (day) when chunk started. Since cleaner also needs to download some chunks to be able to clean up all index entries, when using multiple cleaners, it can happen that cleaner processing older plans will delete chunks required to properly clean up data in newer plans. When using single cleaner only, this is not a problem, since scheduler sends plans to cleaner in time-reversed order. - -**Note:** Cleaner is designed for use in very special cases, eg. when deleting chunks and index entries for a specific customer. If `blocksconvert` was used to convert ALL chunks to blocks, it is simpler to just drop the index and chunks database afterwards. In such case, Cleaner is not needed. - -### Limitations - -The `blocksconvert` toolset currently has the following limitations: - -- Scanner supports only BigTable and DynamoDB for chunks index backend, and cannot currently scan other databases. -- Supports only chunks schema versions v9 for DynamoDB; v9, v10 and v11 for Bigtable. diff --git a/docs/blocks-storage/migrate-from-chunks-to-blocks.md b/docs/blocks-storage/migrate-from-chunks-to-blocks.md deleted file mode 100644 index 3ebc122d06..0000000000 --- a/docs/blocks-storage/migrate-from-chunks-to-blocks.md +++ /dev/null @@ -1,280 +0,0 @@ ---- -title: "Migrate Cortex cluster from chunks to blocks" -linkTitle: "Migrate Cortex cluster from chunks to blocks" -weight: 5 -slug: migrate-cortex-cluster-from-chunks-to-blocks ---- - -This article describes how to migrate existing Cortex cluster from chunks storage to blocks storage, -and highlight possible issues you may encounter in the process. - -_This document replaces the [Cortex proposal](https://cortexmetrics.io/docs/proposals/ingesters-migration/), -which was written before support for migration was in place._ - -## Introduction - -This article **assumes** that: - -- Cortex cluster is managed by Kubernetes -- Cortex is using chunks storage -- Ingesters are using WAL -- Cortex version 1.4.0 or later. - -_If your ingesters are not using WAL, the documented procedure will still apply, but the presented migration script will not work properly without changes, as it assumes that ingesters are managed via StatefulSet._ - -The migration procedure is composed by 3 steps: - -1. [Preparation](#step-1-preparation) -1. [Ingesters migration](#step-2-ingesters-migration) -1. [Cleanup](#step-3-cleanup) - -_In case of any issue during or after the migration, this document also outlines a [Rollback](#rollback) strategy._ - -## Step 1: Preparation - -Before starting the migration of ingesters, we need to prepare other services. - -### Querier and Ruler - -_Everything discussed for querier applies to ruler as well, since it shares querier configuration – CLI flags prefix is `-querier` even when used by ruler._ - -Querier and ruler need to be reconfigured as follow: - -- `-querier.second-store-engine=blocks` -- `-querier.query-store-after=0` - -#### `-querier.second-store-engine=blocks` - -Querier (and ruler) needs to be reconfigured to query both chunks storage and blocks storage at the same time. -This is achieved by using `-querier.second-store-engine=blocks` option, and providing querier with full blocks configuration, but keeping "primary" store set to `-store.engine=chunks`. - -#### `-querier.query-store-after=0` - -Querier (and ruler) has an option `-querier.query-store-after` to query store only if query hits data older than some period of time. -For example, if ingesters keep 12h of data in memory, there is no need to hit the store for queries that only need last 1h of data. -During the migration, this flag needs to be set to 0, to make queriers always consult the store when handling queries. -As chunks ingesters shut down, they flush chunks to the storage. They are then replaced with new ingesters configured -to use blocks. Queriers cannot fetch recent chunks from ingesters directly (as blocks ingester don't reload chunks), -and need to use storage instead. - -### Query-frontend - -Query-frontend needs to be reconfigured as follow: - -- `-querier.parallelise-shardable-queries=false` - -#### `-querier.parallelise-shardable-queries=false` - -Query frontend has an option `-querier.parallelise-shardable-queries` to split some incoming queries into multiple queries based on sharding factor used in v11 schema of chunk storage. -As the description implies, it only works when using chunks storage. -During and after the migration to blocks (and also after possible rollback), this option needs to be disabled otherwise query-frontend will generate queries that cannot be satisfied by blocks storage. - -### Compactor and Store-gateway - -[Compactor](./compactor.md) and [store-gateway](./store-gateway.md) services should be deployed and successfully up and running before migrating ingesters. - -### Ingester – blocks - -Migration script presented in Step 2 assumes that there are two StatefulSets of ingesters: existing one configured with chunks, and the new one with blocks. -New StatefulSet with blocks ingesters should have 0 replicas at the beginning of migration. - -### Table-Manager - chunks - -If you use a store with provisioned IO, e.g. DynamoDB, scale up the provision before starting the migration. -Each ingester will need to flush all chunks before exiting, so will write to the store at many times the normal rate. - -Stop or reconfigure the table-manager to stop it adjusting the provision back to normal. -(Don't do the migration on Wednesday night when a new weekly table might be required.) - -## Step 2: Ingesters migration - -We have developed a script available in Cortex [`tools/migrate-ingester-statefulsets.sh`](https://github.com/cortexproject/cortex/blob/master/tools/migrate-ingester-statefulsets.sh) to migrate ingesters between two StatefulSets, shutting down ingesters one by one. - -It can be used like this: - -``` -$ tools/migrate-ingester-statefulsets.sh -``` - -Where parameters are: -- ``: Kubernetes namespace where the Cortex cluster is running -- ``: name of the ingesters StatefulSet to scale down (running chunks storage) -- ``: name of the ingesters StatefulSet to scale up (running blocks storage) -- ``: number of instances to scale down (in `ingester-old` statefulset) and scale up (in `ingester-new`), or "all" – which will scale down all remaining instances in `ingester-old` statefulset - -After starting new pod in `ingester-new` statefulset, script then triggers `/shutdown` endpoint on the old ingester. When the flushing on the old ingester is complete, scale down of statefulset continues, and process repeats. - -_The script supports both migration from chunks to blocks, and viceversa (eg. rollback)._ - -### Known issues - -There are few known issues with the script: - -- If expected messages don't appear in the log, but pod keeps on running, the script will never finish. -- Script doesn't verify that flush finished without any error. - -## Step 3: Cleanup - -When the ingesters migration finishes, there are still two StatefulSets, with original StatefulSet (running the chunks storage) having 0 instances now. - -At this point, we can delete the old StatefulSet and its persistent volumes and recreate it with final blocks storage configuration (eg. changing PVs), and use the script again to move pods from `ingester-blocks` to `ingester`. - -Querier (and ruler) can be reconfigured to use `blocks` as "primary" store to search, and `chunks` as secondary: - -- `-store.engine=blocks` -- `-querier.second-store-engine=chunks` -- `-querier.use-second-store-before-time=` -- `-querier.ingester-streaming=true` - -#### `-querier.use-second-store-before-time` - -The CLI flag `-querier.use-second-store-before-time` (or its respective YAML config option) is only available for secondary store. -This flag can be set to a timestamp when migration has finished, and it avoids querying secondary store (chunks) for data when running queries that don't need data before given time. - -Both primary and secondary stores are queried before this time, so the overlap where some data is in chunks and some in blocks is covered. - -## Rollback - -If rollback to chunks is needed for any reason, it is possible to use the same migration script with reversed arguments: - -- Scale down ingesters StatefulSet running blocks storage -- Scale up ingesters StatefulSet running chunks storage - -_Blocks ingesters support the same `/shutdown` endpoint for flushing data._ - -During the rollback, queriers and rulers need to use the same configuration changes as during migration. You should also make sure the following settings are applied: - -- `-store.engine=chunks` -- `-querier.second-store-engine=blocks` -- `-querier.use-second-store-before-time` should not be set -- `-querier.ingester-streaming=false` - -Once the rollback is complete, some configuration changes need to stay in place, because some data has already been stored to blocks: - -- The query sharding in the query-frontend must be kept disabled, otherwise querying blocks will not work correctly -- `store-gateway` needs to keep running, otherwise querying blocks will fail -- `compactor` may be shutdown, after it has no more compaction work to do - -Kubernetes resources related to the ingesters running the blocks storage may be deleted. - -### Known issues - -After rollback, chunks ingesters will replay their old Write-Ahead-Log, thus loading old chunks into memory. -WAL doesn't remember whether these old chunks were already flushed or not, so they will be flushed again to the storage. -Until that flush happens, Cortex reports those chunks as unflushed, which may trigger some alerts based on `cortex_oldest_unflushed_chunk_timestamp_seconds` metric. - -## Appendix - -### Jsonnet config - -This section shows how to use [cortex-jsonnet](https://github.com/grafana/cortex-jsonnet) to configure additional services. - -We will assume that `main.jsonnet` is main configuration for the cluster, that also imports `temp.jsonnet` – with our temporary configuration for migration. - -In `main.jsonnet` we have something like this: - -```jsonnet -local cortex = import 'cortex/cortex.libsonnet'; -local wal = import 'cortex/wal.libsonnet'; -local temp = import 'temp.jsonnet'; - -// Note that 'tsdb' is not imported here. -cortex + wal + temp { - _images+:: (import 'images.libsonnet'), - - _config+:: { - cluster: 'k8s-cluster', - namespace: 'k8s-namespace', - -... -``` - -To configure querier to use secondary store for querying, we need to add: - -``` - querier_second_storage_engine: 'blocks', - blocks_storage_bucket_name: 'bucket-for-storing-blocks', -``` - -to the `_config` object in main.jsonnet. - -Let's generate blocks configuration now in `temp.jsonnet`. -There are comments inside that should give you an idea about what's happening. -Most important thing is generating resources with blocks configuration, and exposing some of them. - - -```jsonnet -{ - local cortex = import 'cortex/cortex.libsonnet', - local tsdb = import 'cortex/tsdb.libsonnet', - local rootConfig = self._config, - local statefulSet = $.apps.v1beta1.statefulSet, - - // Prepare TSDB resources, but hide them. Cherry-picked resources will be exposed later. - tsdb_config:: cortex + tsdb + { - _config+:: { - cluster: rootConfig.cluster, - namespace: rootConfig.namespace, - external_url: rootConfig.external_url, - - // This Cortex cluster is using the blocks storage. - storage_tsdb_bucket_name: rootConfig.storage_tsdb_bucket_name, - cortex_store_gateway_data_disk_size: '100Gi', - cortex_compactor_data_disk_class: 'fast', - }, - - // We create another statefulset for ingesters here, with different name. - ingester_blocks_statefulset: self.newIngesterStatefulSet('ingester-blocks', self.ingester_container) + - statefulSet.mixin.spec.withReplicas(0), - - ingester_blocks_pdb: self.newIngesterPdb('ingester-blocks-pdb', 'ingester-blocks'), - ingester_blocks_service: $.util.serviceFor(self.ingester_blocks_statefulset, self.ingester_service_ignored_labels), - }, - - _config+: { - queryFrontend+: { - // Disabled because querying blocks-data breaks if query is rewritten for sharding. - sharded_queries_enabled: false, - }, - }, - - // Expose some services from TSDB configuration, needed for running Querier with Chunks as primary and TSDB as secondary store. - tsdb_store_gateway_pdb: self.tsdb_config.store_gateway_pdb, - tsdb_store_gateway_service: self.tsdb_config.store_gateway_service, - tsdb_store_gateway_statefulset: self.tsdb_config.store_gateway_statefulset, - - tsdb_memcached_metadata: self.tsdb_config.memcached_metadata, - - tsdb_ingester_statefulset: self.tsdb_config.ingester_blocks_statefulset, - tsdb_ingester_pdb: self.tsdb_config.ingester_blocks_pdb, - tsdb_ingester_service: self.tsdb_config.ingester_blocks_service, - - tsdb_compactor_statefulset: self.tsdb_config.compactor_statefulset, - - // Querier and ruler configuration used during migration, and after. - query_config_during_migration:: { - // Disable streaming, as it is broken when querying both chunks and blocks ingesters at the same time. - 'querier.ingester-streaming': 'false', - - // query-store-after is required during migration, since new ingesters running on blocks will not load any chunks from chunks-WAL. - // All such chunks are however flushed to the store. - 'querier.query-store-after': '0', - }, - - query_config_after_migration:: { - 'querier.ingester-streaming': 'true', - 'querier.query-ingesters-within': '13h', // TSDB ingesters have data for up to 4d. - 'querier.query-store-after': '12h', // Can be enabled once blocks ingesters are running for 12h. - - // Switch TSDB and chunks. TSDB is "primary" now so that we can skip querying chunks for old queries. - // We can do this, because querier/ruler have both configurations. - 'store.engine': 'blocks', - 'querier.second-store-engine': 'chunks', - - 'querier.use-second-store-before-time': '2020-07-28T17:00:00Z', // If migration from chunks finished around 18:10 CEST, no need to query chunk store for queries before this time. - }, - - querier_args+:: self.tsdb_config.blocks_metadata_caching_config + self.query_config_during_migration, // + self.query_config_after_migration, - ruler_args+:: self.tsdb_config.blocks_metadata_caching_config + self.query_config_during_migration, // + self.query_config_after_migration, -} -``` diff --git a/docs/blocks-storage/querier.md b/docs/blocks-storage/querier.md index 90b57ce34d..1abd93e2f2 100644 --- a/docs/blocks-storage/querier.md +++ b/docs/blocks-storage/querier.md @@ -7,7 +7,7 @@ slug: querier -The **querier** service handles queries using the [PromQL](https://prometheus.io/docs/prometheus/latest/querying/basics/) query language. The querier service is used both by the chunks and blocks storage, and the [general architecture documentation](../architecture.md#querier) applies to the blocks storage too, except for the differences described in this document. +The **querier** service handles queries using the [PromQL](https://prometheus.io/docs/prometheus/latest/querying/basics/) query language. This document dives into the storage-specific details of the querier service. The [general architecture documentation](../architecture.md#querier) applies too. The querier is **stateless**. @@ -203,15 +203,6 @@ querier: # CLI flag: -querier.store-gateway-client.tls-insecure-skip-verify [tls_insecure_skip_verify: | default = false] - # Second store engine to use for querying. Empty = disabled. - # CLI flag: -querier.second-store-engine - [second_store_engine: | default = ""] - - # If specified, second store is only used for queries before this timestamp. - # Default value 0 means secondary store is always queried. - # CLI flag: -querier.use-second-store-before-time - [use_second_store_before_time: