Skip to content

Commit bb17734

Browse files
authored
Merge pull request #22037 from hashicorp/jbardin/ssh-alive
monitor ssh connection live-ness
2 parents 0111498 + 2206512 commit bb17734

22 files changed

Lines changed: 1954 additions & 46 deletions

communicator/ssh/communicator.go

Lines changed: 49 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -27,17 +27,23 @@ import (
2727
const (
2828
// DefaultShebang is added at the top of a SSH script file
2929
DefaultShebang = "#!/bin/sh\n"
30+
)
31+
32+
var (
33+
// randShared is a global random generator object that is shared. This must be
34+
// shared since it is seeded by the current time and creating multiple can
35+
// result in the same values. By using a shared RNG we assure different numbers
36+
// per call.
37+
randLock sync.Mutex
38+
randShared *rand.Rand
3039

3140
// enable ssh keeplive probes by default
3241
keepAliveInterval = 2 * time.Second
33-
)
3442

35-
// randShared is a global random generator object that is shared.
36-
// This must be shared since it is seeded by the current time and
37-
// creating multiple can result in the same values. By using a shared
38-
// RNG we assure different numbers per call.
39-
var randLock sync.Mutex
40-
var randShared *rand.Rand
43+
// max time to wait for for a KeepAlive response before considering the
44+
// connection to be dead.
45+
maxKeepAliveDelay = 120 * time.Second
46+
)
4147

4248
// Communicator represents the SSH communicator
4349
type Communicator struct {
@@ -225,20 +231,50 @@ func (c *Communicator) Connect(o terraform.UIOutput) (err error) {
225231
// long-running commands.
226232
log.Printf("[DEBUG] starting ssh KeepAlives")
227233
go func() {
228-
t := time.NewTicker(keepAliveInterval)
229-
defer t.Stop()
234+
defer cancelKeepAlive()
235+
// Along with the KeepAlives generating packets to keep the tcp
236+
// connection open, we will use the replies to verify liveness of the
237+
// connection. This will prevent dead connections from blocking the
238+
// provisioner indefinitely.
239+
respCh := make(chan error, 1)
240+
241+
go func() {
242+
t := time.NewTicker(keepAliveInterval)
243+
defer t.Stop()
244+
for {
245+
select {
246+
case <-t.C:
247+
_, _, err := c.client.SendRequest("keepalive@terraform.io", true, nil)
248+
respCh <- err
249+
case <-ctx.Done():
250+
return
251+
}
252+
}
253+
}()
254+
255+
after := time.NewTimer(maxKeepAliveDelay)
256+
defer after.Stop()
257+
230258
for {
231259
select {
232-
case <-t.C:
233-
// there's no useful response to these, just abort when there's
234-
// an error.
235-
_, _, err := c.client.SendRequest("keepalive@terraform.io", true, nil)
260+
case err := <-respCh:
236261
if err != nil {
262+
log.Printf("[ERROR] ssh keepalive: %s", err)
263+
sshConn.Close()
237264
return
238265
}
266+
case <-after.C:
267+
// abort after too many missed keepalives
268+
log.Println("[ERROR] no reply from ssh server")
269+
sshConn.Close()
270+
return
239271
case <-ctx.Done():
240272
return
241273
}
274+
if !after.Stop() {
275+
<-after.C
276+
}
277+
after.Reset(maxKeepAliveDelay)
242278
}
243279
}()
244280

communicator/ssh/communicator_test.go

Lines changed: 59 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -100,16 +100,19 @@ func newMockLineServer(t *testing.T, signer ssh.Signer, pubKey string) string {
100100

101101
go func(in <-chan *ssh.Request) {
102102
for req := range in {
103+
// since this channel's requests are serviced serially,
104+
// this will block keepalive probes, and can simulate a
105+
// hung connection.
106+
if bytes.Contains(req.Payload, []byte("sleep")) {
107+
time.Sleep(time.Second)
108+
}
109+
103110
if req.WantReply {
104111
req.Reply(true, nil)
105112
}
106113
}
107114
}(requests)
108115

109-
go func(newChannel ssh.NewChannel) {
110-
conn.OpenChannel(newChannel.ChannelType(), nil)
111-
}(newChannel)
112-
113116
defer channel.Close()
114117
}
115118
conn.Close()
@@ -182,6 +185,10 @@ func TestStart(t *testing.T) {
182185
// TestKeepAlives verifies that the keepalive messages don't interfere with
183186
// normal operation of the client.
184187
func TestKeepAlives(t *testing.T) {
188+
ivl := keepAliveInterval
189+
keepAliveInterval = 250 * time.Millisecond
190+
defer func() { keepAliveInterval = ivl }()
191+
185192
address := newMockLineServer(t, nil, testClientPublicKey)
186193
parts := strings.Split(address, ":")
187194

@@ -193,7 +200,6 @@ func TestKeepAlives(t *testing.T) {
193200
"password": "pass",
194201
"host": parts[0],
195202
"port": parts[1],
196-
"timeout": "30s",
197203
},
198204
},
199205
}
@@ -209,18 +215,64 @@ func TestKeepAlives(t *testing.T) {
209215

210216
var cmd remote.Cmd
211217
stdout := new(bytes.Buffer)
212-
cmd.Command = "echo foo"
218+
cmd.Command = "sleep"
213219
cmd.Stdout = stdout
214220

215221
// wait a bit before executing the command, so that at least 1 keepalive is sent
216-
time.Sleep(3 * time.Second)
222+
time.Sleep(500 * time.Millisecond)
217223

218224
err = c.Start(&cmd)
219225
if err != nil {
220226
t.Fatalf("error executing remote command: %s", err)
221227
}
222228
}
223229

230+
// TestDeadConnection verifies that failed keepalive messages will eventually
231+
// kill the connection.
232+
func TestFailedKeepAlives(t *testing.T) {
233+
ivl := keepAliveInterval
234+
del := maxKeepAliveDelay
235+
maxKeepAliveDelay = 500 * time.Millisecond
236+
keepAliveInterval = 250 * time.Millisecond
237+
defer func() {
238+
keepAliveInterval = ivl
239+
maxKeepAliveDelay = del
240+
}()
241+
242+
address := newMockLineServer(t, nil, testClientPublicKey)
243+
parts := strings.Split(address, ":")
244+
245+
r := &terraform.InstanceState{
246+
Ephemeral: terraform.EphemeralState{
247+
ConnInfo: map[string]string{
248+
"type": "ssh",
249+
"user": "user",
250+
"password": "pass",
251+
"host": parts[0],
252+
"port": parts[1],
253+
},
254+
},
255+
}
256+
257+
c, err := New(r)
258+
if err != nil {
259+
t.Fatalf("error creating communicator: %s", err)
260+
}
261+
262+
if err := c.Connect(nil); err != nil {
263+
t.Fatal(err)
264+
}
265+
var cmd remote.Cmd
266+
stdout := new(bytes.Buffer)
267+
cmd.Command = "sleep"
268+
cmd.Stdout = stdout
269+
270+
err = c.Start(&cmd)
271+
if err == nil {
272+
t.Fatal("expected connection error")
273+
}
274+
}
275+
224276
func TestLostConnection(t *testing.T) {
225277
address := newMockLineServer(t, nil, testClientPublicKey)
226278
parts := strings.Split(address, ":")

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ require (
120120
go.uber.org/atomic v1.3.2 // indirect
121121
go.uber.org/multierr v1.1.0 // indirect
122122
go.uber.org/zap v1.9.1 // indirect
123-
golang.org/x/crypto v0.0.0-20190426145343-a29dc8fdc734
123+
golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4
124124
golang.org/x/net v0.0.0-20190502183928-7f726cade0ab
125125
golang.org/x/oauth2 v0.0.0-20190220154721-9b3c75971fc9
126126
google.golang.org/api v0.1.0

go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -451,6 +451,8 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2 h1:VklqNMn3ovrHsnt90Pveol
451451
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
452452
golang.org/x/crypto v0.0.0-20190426145343-a29dc8fdc734 h1:p/H982KKEjUnLJkM3tt/LemDnOc1GiZL5FCVlORJ5zo=
453453
golang.org/x/crypto v0.0.0-20190426145343-a29dc8fdc734/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
454+
golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4 h1:HuIa8hRrWRSrqYzx1qI49NNxhdi2PrY7gxVSq1JjLDc=
455+
golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
454456
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
455457
golang.org/x/lint v0.0.0-20180702182130-06c8688daad7/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
456458
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=

vendor/golang.org/x/crypto/ed25519/ed25519.go

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vendor/golang.org/x/crypto/ed25519/ed25519_go113.go

Lines changed: 73 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)