Skip to content

Commit 0efd586

Browse files
committed
cloud: retry output if current state outputs are not available
1 parent 716fcce commit 0efd586

File tree

3 files changed

+228
-1
lines changed

3 files changed

+228
-1
lines changed

internal/cloud/retry.go

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
// Copyright (c) HashiCorp, Inc.
2+
// SPDX-License-Identifier: BUSL-1.1
3+
4+
package cloud
5+
6+
import (
7+
"context"
8+
"log"
9+
"sync/atomic"
10+
"time"
11+
)
12+
13+
// Fatal implements a RetryBackoff func return value that, if encountered,
14+
// signals that the func should not be retried. In that case, the error
15+
// returned by the interface method will be returned by RetryBackoff
16+
type Fatal interface {
17+
FatalError() error
18+
}
19+
20+
// NonRetryableError is a simple implementation of Fatal that wraps an error
21+
type NonRetryableError struct {
22+
InnerError error
23+
}
24+
25+
// FatalError returns the inner error, but also implements Fatal, which
26+
// signals to RetryBackoff that a non-retryable error occurred.
27+
func (e NonRetryableError) FatalError() error {
28+
return e.InnerError
29+
}
30+
31+
// Error returns the inner error string
32+
func (e NonRetryableError) Error() string {
33+
return e.InnerError.Error()
34+
}
35+
36+
var (
37+
initialBackoffDelay = time.Second
38+
maxBackoffDelay = 3 * time.Second
39+
)
40+
41+
// RetryBackoff retries function f until nil or a FatalError is returned.
42+
// RetryBackoff only returns an error if the context is in error or if a
43+
// FatalError was encountered.
44+
func RetryBackoff(ctx context.Context, f func() error) error {
45+
// doneCh signals that the routine is done and sends the last error
46+
var doneCh = make(chan struct{})
47+
var errVal atomic.Value
48+
type errWrap struct {
49+
E error
50+
}
51+
52+
go func() {
53+
// the retry delay between each attempt
54+
var delay time.Duration = 0
55+
defer close(doneCh)
56+
57+
for {
58+
select {
59+
case <-ctx.Done():
60+
return
61+
case <-time.After(delay):
62+
}
63+
64+
err := f()
65+
switch e := err.(type) {
66+
case nil:
67+
return
68+
case Fatal:
69+
errVal.Store(errWrap{e.FatalError()})
70+
return
71+
}
72+
73+
delay *= 2
74+
if delay == 0 {
75+
delay = initialBackoffDelay
76+
}
77+
78+
delay = min(delay, maxBackoffDelay)
79+
80+
log.Printf("[WARN] retryable error: %q, delaying for %s", err, delay)
81+
}
82+
}()
83+
84+
// Wait until done or deadline
85+
select {
86+
case <-doneCh:
87+
case <-ctx.Done():
88+
}
89+
90+
err, hadErr := errVal.Load().(errWrap)
91+
var lastErr error
92+
if hadErr {
93+
lastErr = err.E
94+
}
95+
96+
if ctx.Err() != nil {
97+
return ctx.Err()
98+
}
99+
100+
return lastErr
101+
}

internal/cloud/retry_test.go

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
// Copyright (c) HashiCorp, Inc.
2+
// SPDX-License-Identifier: BUSL-1.1
3+
4+
package cloud
5+
6+
import (
7+
"context"
8+
"errors"
9+
"testing"
10+
"time"
11+
)
12+
13+
type fatalError struct{}
14+
15+
var fe = errors.New("this was a fatal error")
16+
17+
func (f fatalError) FatalError() error {
18+
return fe
19+
}
20+
21+
func (f fatalError) Error() string {
22+
return f.FatalError().Error()
23+
}
24+
25+
func Test_RetryBackoff_canceled(t *testing.T) {
26+
t.Parallel()
27+
ctx, cancel := context.WithCancel(context.Background())
28+
29+
cancel()
30+
31+
err := RetryBackoff(ctx, func() error {
32+
return nil
33+
})
34+
35+
if !errors.Is(err, context.Canceled) {
36+
t.Errorf("expected canceled error, got %q", err)
37+
}
38+
}
39+
40+
func Test_RetryBackoff_deadline(t *testing.T) {
41+
t.Parallel()
42+
ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(time.Millisecond))
43+
44+
defer cancel()
45+
46+
err := RetryBackoff(ctx, func() error {
47+
time.Sleep(10 * time.Millisecond)
48+
return nil
49+
})
50+
51+
if !errors.Is(err, context.DeadlineExceeded) {
52+
t.Errorf("expected timeout error, got %q", err)
53+
}
54+
}
55+
56+
func Test_RetryBackoff_happy(t *testing.T) {
57+
t.Parallel()
58+
59+
err := RetryBackoff(context.Background(), func() error {
60+
return nil
61+
})
62+
63+
if err != nil {
64+
t.Errorf("expected nil err, got %q", err)
65+
}
66+
}
67+
68+
func Test_RetryBackoff_fatal(t *testing.T) {
69+
t.Parallel()
70+
71+
err := RetryBackoff(context.Background(), func() error {
72+
return fatalError{}
73+
})
74+
75+
if !errors.Is(fe, err) {
76+
t.Errorf("expected fatal error, got %q", err)
77+
}
78+
}
79+
80+
func Test_RetryBackoff_non_fatal(t *testing.T) {
81+
t.Parallel()
82+
83+
var retriedCount = 0
84+
85+
err := RetryBackoff(context.Background(), func() error {
86+
retriedCount += 1
87+
if retriedCount == 2 {
88+
return nil
89+
}
90+
return errors.New("retryable error")
91+
})
92+
93+
if err != nil {
94+
t.Errorf("expected no error, got %q", err)
95+
}
96+
97+
if retriedCount != 2 {
98+
t.Errorf("expected 2 retries, got %d", retriedCount)
99+
}
100+
}

internal/cloud/state.go

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -516,10 +516,36 @@ func (s *State) Delete(force bool) error {
516516

517517
// GetRootOutputValues fetches output values from HCP Terraform
518518
func (s *State) GetRootOutputValues(ctx context.Context) (map[string]*states.OutputValue, error) {
519+
// The cloud backend initializes this value to true, but we want to implement
520+
// some custom retry logic. This code presumes that the tfeClient doesn't need
521+
// to be shared with other goroutines by the caller.
522+
s.tfeClient.RetryServerErrors(false)
523+
defer s.tfeClient.RetryServerErrors(true)
519524

520-
so, err := s.tfeClient.StateVersionOutputs.ReadCurrent(ctx, s.workspace.ID)
525+
ctx, cancel := context.WithTimeout(ctx, time.Minute)
526+
defer cancel()
527+
528+
var so *tfe.StateVersionOutputsList
529+
err := RetryBackoff(ctx, func() error {
530+
var err error
531+
so, err = s.tfeClient.StateVersionOutputs.ReadCurrent(ctx, s.workspace.ID)
532+
533+
if err != nil {
534+
if strings.Contains(err.Error(), "service unavailable") {
535+
return err
536+
}
537+
return NonRetryableError{err}
538+
}
539+
return nil
540+
})
521541

522542
if err != nil {
543+
switch err {
544+
case context.DeadlineExceeded:
545+
return nil, fmt.Errorf("current outputs were not ready to be read within the deadline. Please try again")
546+
case context.Canceled:
547+
return nil, fmt.Errorf("canceled reading current outputs")
548+
}
523549
return nil, fmt.Errorf("could not read state version outputs: %w", err)
524550
}
525551

0 commit comments

Comments
 (0)