Skip to content

Commit 0ec7b96

Browse files
authored
Use newer AWS API for paginated queries (#2452)
* Refactor: export NextDelay() so we can call it from other packages Signed-off-by: Bryan Boreham <[email protected]> * Refactor: use newer AWS API for paginated queries This is less code, and more robust when retrying requests. We don't need an indirection on the request object for testing now. Signed-off-by: Bryan Boreham <[email protected]> * Don't need DynamoDB request wrapper to do so much now Now we are calling QueryPagesWithContext directly we don't need the paging interface and we never re-use request objects. Signed-off-by: Bryan Boreham <[email protected]> * Stop AWS Retryer if context is cancelled Ingester.flushUserSeries() puts a timeout on the context, so don't retry for longer than that. Signed-off-by: Bryan Boreham <[email protected]>
1 parent b44dc01 commit 0ec7b96

File tree

7 files changed

+89
-95
lines changed

7 files changed

+89
-95
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@
5656
* [ENHANCEMENT] Redis Cache: Added `idle_timeout`, `wait_on_pool_exhaustion` and `max_conn_lifetime` options to redis cache configuration. #2550
5757
* [ENHANCEMENT] WAL: the experimental tag has been removed on the WAL in ingesters.
5858
* [BUGFIX] Ruler: Ensure temporary rule files with special characters are properly mapped and cleaned up. #2506
59+
* [ENHANCEMENT] Use newer AWS API for paginated queries - removes 'Deprecated' message from logfiles. #2452
5960
* [BUGFIX] Fixes #2411, Ensure requests are properly routed to the prometheus api embedded in the query if `-server.path-prefix` is set. #2372
6061
* [BUGFIX] Experimental TSDB: fixed chunk data corruption when querying back series using the experimental blocks storage. #2400
6162
* [BUGFIX] Cassandra Storage: Fix endpoint TLS host verification. #2109

pkg/chunk/aws/dynamodb_storage_client.go

Lines changed: 29 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212

1313
"github.com/go-kit/kit/log/level"
1414
ot "github.com/opentracing/opentracing-go"
15+
otlog "github.com/opentracing/opentracing-go/log"
1516
"golang.org/x/time/rate"
1617

1718
"github.com/aws/aws-sdk-go/aws"
@@ -21,6 +22,7 @@ import (
2122
"github.com/aws/aws-sdk-go/aws/session"
2223
"github.com/aws/aws-sdk-go/service/dynamodb"
2324
"github.com/aws/aws-sdk-go/service/dynamodb/dynamodbiface"
25+
"github.com/pkg/errors"
2426
"github.com/prometheus/client_golang/prometheus"
2527
awscommon "github.com/weaveworks/common/aws"
2628
"github.com/weaveworks/common/instrument"
@@ -144,7 +146,6 @@ type dynamoDBStorageClient struct {
144146

145147
// These functions exists for mocking, so we don't have to write a whole load
146148
// of boilerplate.
147-
queryRequestFn func(ctx context.Context, input *dynamodb.QueryInput) dynamoDBRequest
148149
batchGetItemRequestFn func(ctx context.Context, input *dynamodb.BatchGetItemInput) dynamoDBRequest
149150
batchWriteItemRequestFn func(ctx context.Context, input *dynamodb.BatchWriteItemInput) dynamoDBRequest
150151
}
@@ -172,7 +173,6 @@ func newDynamoDBStorageClient(cfg DynamoDBConfig, schemaCfg chunk.SchemaConfig)
172173
DynamoDB: dynamoDB,
173174
writeThrottle: rate.NewLimiter(rate.Limit(cfg.ThrottleLimit), dynamoDBMaxWriteBatchSize),
174175
}
175-
client.queryRequestFn = client.queryRequest
176176
client.batchGetItemRequestFn = client.batchGetItemRequest
177177
client.batchWriteItemRequestFn = client.batchWriteItemRequest
178178
return client, nil
@@ -327,88 +327,44 @@ func (a dynamoDBStorageClient) query(ctx context.Context, query chunk.IndexQuery
327327
}
328328
}
329329

330-
request := a.queryRequestFn(ctx, input)
331330
pageCount := 0
332331
defer func() {
333332
dynamoQueryPagesCount.Observe(float64(pageCount))
334333
}()
335334

336-
for page := request; page != nil; page = page.NextPage() {
337-
pageCount++
338-
339-
response, err := a.queryPage(ctx, input, page, query.HashValue, pageCount)
340-
if err != nil {
341-
return err
342-
}
343-
344-
if !callback(query, response) {
345-
if err != nil {
346-
return fmt.Errorf("QueryPages error: table=%v, err=%v", *input.TableName, page.Error())
347-
}
348-
return nil
349-
}
350-
if !page.HasNextPage() {
351-
return nil
335+
retryer := newRetryer(ctx, a.cfg.backoffConfig)
336+
err := instrument.CollectedRequest(ctx, "DynamoDB.QueryPages", dynamoRequestDuration, instrument.ErrorCode, func(innerCtx context.Context) error {
337+
if sp := ot.SpanFromContext(innerCtx); sp != nil {
338+
sp.SetTag("tableName", query.TableName)
339+
sp.SetTag("hashValue", query.HashValue)
352340
}
353-
}
354-
return nil
355-
}
356-
357-
func (a dynamoDBStorageClient) queryPage(ctx context.Context, input *dynamodb.QueryInput, page dynamoDBRequest, hashValue string, pageCount int) (*dynamoDBReadResponse, error) {
358-
backoff := util.NewBackoff(ctx, a.cfg.backoffConfig)
359-
360-
var err error
361-
for backoff.Ongoing() {
362-
err = instrument.CollectedRequest(ctx, "DynamoDB.QueryPages", dynamoRequestDuration, instrument.ErrorCode, func(innerCtx context.Context) error {
341+
return a.DynamoDB.QueryPagesWithContext(innerCtx, input, func(output *dynamodb.QueryOutput, _ bool) bool {
342+
pageCount++
363343
if sp := ot.SpanFromContext(innerCtx); sp != nil {
364-
sp.SetTag("tableName", aws.StringValue(input.TableName))
365-
sp.SetTag("hashValue", hashValue)
366-
sp.SetTag("page", pageCount)
367-
sp.SetTag("retry", backoff.NumRetries())
344+
sp.LogFields(otlog.Int("page", pageCount))
368345
}
369-
return page.Send()
370-
})
371-
372-
if cc := page.Data().(*dynamodb.QueryOutput).ConsumedCapacity; cc != nil {
373-
dynamoConsumedCapacity.WithLabelValues("DynamoDB.QueryPages", *cc.TableName).
374-
Add(float64(*cc.CapacityUnits))
375-
}
376346

377-
if err != nil {
378-
recordDynamoError(*input.TableName, err, "DynamoDB.QueryPages")
379-
if awsErr, ok := err.(awserr.Error); ok && ((awsErr.Code() == dynamodb.ErrCodeProvisionedThroughputExceededException) || page.Retryable()) {
380-
if awsErr.Code() != dynamodb.ErrCodeProvisionedThroughputExceededException {
381-
level.Warn(util.Logger).Log("msg", "DynamoDB error", "retry", backoff.NumRetries(), "table", *input.TableName, "err", err)
382-
}
383-
backoff.Wait()
384-
continue
347+
if cc := output.ConsumedCapacity; cc != nil {
348+
dynamoConsumedCapacity.WithLabelValues("DynamoDB.QueryPages", *cc.TableName).
349+
Add(float64(*cc.CapacityUnits))
385350
}
386-
return nil, fmt.Errorf("QueryPage error: table=%v, err=%v", *input.TableName, err)
387-
}
388351

389-
queryOutput := page.Data().(*dynamodb.QueryOutput)
390-
return &dynamoDBReadResponse{
391-
items: queryOutput.Items,
392-
}, nil
352+
return callback(query, &dynamoDBReadResponse{items: output.Items})
353+
}, retryer.withRetries, withErrorHandler(query.TableName, "DynamoDB.QueryPages"))
354+
})
355+
if err != nil {
356+
return errors.Wrapf(err, "QueryPages error: table=%v", query.TableName)
393357
}
394-
return nil, fmt.Errorf("QueryPage error: %s for table %v, last error %v", backoff.Err(), *input.TableName, err)
358+
return err
395359
}
396360

397361
type dynamoDBRequest interface {
398-
NextPage() dynamoDBRequest
399362
Send() error
400363
Data() interface{}
401364
Error() error
402-
HasNextPage() bool
403365
Retryable() bool
404366
}
405367

406-
func (a dynamoDBStorageClient) queryRequest(ctx context.Context, input *dynamodb.QueryInput) dynamoDBRequest {
407-
req, _ := a.DynamoDB.QueryRequest(input)
408-
req.SetContext(ctx)
409-
return dynamoDBRequestAdapter{req}
410-
}
411-
412368
func (a dynamoDBStorageClient) batchGetItemRequest(ctx context.Context, input *dynamodb.BatchGetItemInput) dynamoDBRequest {
413369
req, _ := a.DynamoDB.BatchGetItemRequest(input)
414370
req.SetContext(ctx)
@@ -425,33 +381,18 @@ type dynamoDBRequestAdapter struct {
425381
request *request.Request
426382
}
427383

428-
func (a dynamoDBRequestAdapter) NextPage() dynamoDBRequest {
429-
next := a.request.NextPage()
430-
if next == nil {
431-
return nil
432-
}
433-
return dynamoDBRequestAdapter{next}
434-
}
435-
436384
func (a dynamoDBRequestAdapter) Data() interface{} {
437385
return a.request.Data
438386
}
439387

440388
func (a dynamoDBRequestAdapter) Send() error {
441-
// Clear error in case we are retrying the same operation - if we
442-
// don't do this then the same error will come back again immediately
443-
a.request.Error = nil
444389
return a.request.Send()
445390
}
446391

447392
func (a dynamoDBRequestAdapter) Error() error {
448393
return a.request.Error
449394
}
450395

451-
func (a dynamoDBRequestAdapter) HasNextPage() bool {
452-
return a.request.HasNextPage()
453-
}
454-
455396
func (a dynamoDBRequestAdapter) Retryable() bool {
456397
return aws.BoolValue(a.request.Retryable)
457398
}
@@ -840,6 +781,16 @@ func (b dynamoDBReadRequest) TakeReqs(from dynamoDBReadRequest, max int) {
840781
}
841782
}
842783

784+
func withErrorHandler(tableName, operation string) func(req *request.Request) {
785+
return func(req *request.Request) {
786+
req.Handlers.CompleteAttempt.PushBack(func(req *request.Request) {
787+
if req.Error != nil {
788+
recordDynamoError(tableName, req.Error, operation)
789+
}
790+
})
791+
}
792+
}
793+
843794
func recordDynamoError(tableName string, err error, operation string) {
844795
if awsErr, ok := err.(awserr.Error); ok {
845796
dynamoFailures.WithLabelValues(tableName, awsErr.Code(), operation).Add(float64(1))

pkg/chunk/aws/fixtures.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@ var Fixtures = []testutils.Fixture{
4141
}
4242
index := &dynamoDBStorageClient{
4343
DynamoDB: dynamoDB,
44-
queryRequestFn: dynamoDB.queryRequest,
4544
batchGetItemRequestFn: dynamoDB.batchGetItemRequest,
4645
batchWriteItemRequestFn: dynamoDB.batchWriteItemRequest,
4746
schemaCfg: schemaConfig,
@@ -80,7 +79,6 @@ func dynamoDBFixture(provisionedErr, gangsize, maxParallelism int) testutils.Fix
8079
},
8180
DynamoDB: dynamoDB,
8281
writeThrottle: rate.NewLimiter(10, dynamoDBMaxWriteBatchSize),
83-
queryRequestFn: dynamoDB.queryRequest,
8482
batchGetItemRequestFn: dynamoDB.batchGetItemRequest,
8583
batchWriteItemRequestFn: dynamoDB.batchWriteItemRequest,
8684
schemaCfg: schemaCfg,

pkg/chunk/aws/mock.go

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ func (m *mockDynamoDBClient) batchGetItemRequest(_ context.Context, input *dynam
194194
}
195195
}
196196

197-
func (m *mockDynamoDBClient) queryRequest(_ context.Context, input *dynamodb.QueryInput) dynamoDBRequest {
197+
func (m *mockDynamoDBClient) QueryPagesWithContext(ctx aws.Context, input *dynamodb.QueryInput, fn func(*dynamodb.QueryOutput, bool) bool, opts ...request.Option) error {
198198
result := &dynamodb.QueryOutput{
199199
Items: []map[string]*dynamodb.AttributeValue{},
200200
}
@@ -241,20 +241,15 @@ func (m *mockDynamoDBClient) queryRequest(_ context.Context, input *dynamodb.Que
241241

242242
result.Items = append(result.Items, item)
243243
}
244-
245-
return &dynamoDBMockRequest{
246-
result: result,
247-
}
244+
fn(result, true)
245+
return nil
248246
}
249247

250248
type dynamoDBMockRequest struct {
251249
result interface{}
252250
err error
253251
}
254252

255-
func (m *dynamoDBMockRequest) NextPage() dynamoDBRequest {
256-
return m
257-
}
258253
func (m *dynamoDBMockRequest) Send() error {
259254
return m.err
260255
}
@@ -264,9 +259,6 @@ func (m *dynamoDBMockRequest) Data() interface{} {
264259
func (m *dynamoDBMockRequest) Error() error {
265260
return m.err
266261
}
267-
func (m *dynamoDBMockRequest) HasNextPage() bool {
268-
return false
269-
}
270262
func (m *dynamoDBMockRequest) Retryable() bool {
271263
return false
272264
}

pkg/chunk/aws/retryer.go

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
package aws
2+
3+
import (
4+
"context"
5+
"time"
6+
7+
"github.com/aws/aws-sdk-go/aws/request"
8+
ot "github.com/opentracing/opentracing-go"
9+
otlog "github.com/opentracing/opentracing-go/log"
10+
11+
"github.com/cortexproject/cortex/pkg/util"
12+
)
13+
14+
// Map Cortex Backoff into AWS Retryer interface
15+
type retryer struct {
16+
*util.Backoff
17+
maxRetries int
18+
}
19+
20+
var _ request.Retryer = &retryer{}
21+
22+
func newRetryer(ctx context.Context, cfg util.BackoffConfig) *retryer {
23+
return &retryer{
24+
Backoff: util.NewBackoff(ctx, cfg),
25+
maxRetries: cfg.MaxRetries,
26+
}
27+
}
28+
29+
func (r *retryer) withRetries(req *request.Request) {
30+
req.Retryer = r
31+
}
32+
33+
// RetryRules return the retry delay that should be used by the SDK before
34+
// making another request attempt for the failed request.
35+
func (r *retryer) RetryRules(req *request.Request) time.Duration {
36+
duration := r.Backoff.NextDelay()
37+
if sp := ot.SpanFromContext(req.Context()); sp != nil {
38+
sp.LogFields(otlog.Int("retry", r.NumRetries()))
39+
}
40+
return duration
41+
}
42+
43+
// ShouldRetry returns if the failed request is retryable.
44+
func (r *retryer) ShouldRetry(req *request.Request) bool {
45+
return r.Ongoing() && (req.IsErrorRetryable() || req.IsErrorThrottle())
46+
}
47+
48+
// MaxRetries is the number of times a request may be retried before
49+
// failing.
50+
func (r *retryer) MaxRetries() int {
51+
return r.maxRetries
52+
}

pkg/util/backoff.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ func (b *Backoff) NumRetries() int {
7474
// Returns immediately if Context is terminated
7575
func (b *Backoff) Wait() {
7676
// Increase the number of retries and get the next delay
77-
sleepTime := b.nextDelay()
77+
sleepTime := b.NextDelay()
7878

7979
if b.Ongoing() {
8080
select {
@@ -84,7 +84,7 @@ func (b *Backoff) Wait() {
8484
}
8585
}
8686

87-
func (b *Backoff) nextDelay() time.Duration {
87+
func (b *Backoff) NextDelay() time.Duration {
8888
b.numRetries++
8989

9090
// Handle the edge case the min and max have the same value

pkg/util/backoff_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ import (
66
"time"
77
)
88

9-
func TestBackoff_nextDelay(t *testing.T) {
9+
func TestBackoff_NextDelay(t *testing.T) {
1010
t.Parallel()
1111

1212
tests := map[string]struct {
@@ -92,7 +92,7 @@ func TestBackoff_nextDelay(t *testing.T) {
9292
})
9393

9494
for _, expectedRange := range testData.expectedRanges {
95-
delay := b.nextDelay()
95+
delay := b.NextDelay()
9696

9797
if delay < expectedRange[0] || delay > expectedRange[1] {
9898
t.Errorf("%d expected to be within %d and %d", delay, expectedRange[0], expectedRange[1])

0 commit comments

Comments
 (0)