Skip to content

Commit f61f18d

Browse files
committed
crypto/tls: make Conn.Read return (n, io.EOF) when EOF is next in buffer
Update #3514 An io.Reader is permitted to return either (n, nil) or (n, io.EOF) on EOF or other error. The tls package previously always returned (n, nil) for a read of size n if n bytes were available, not surfacing errors at the same time. Amazon's HTTPS frontends like to hang up on clients without sending the appropriate HTTP headers. (In their defense, they're allowed to hang up any time, but generally a server hangs up after a bit of inactivity, not immediately.) In any case, the Go HTTP client tries to re-use connections by looking at whether the response headers say to keep the connection open, and because the connection looks okay, under heavy load it's possible we'll reuse it immediately, writing the next request, just as the Transport's always-reading goroutine returns from tls.Conn.Read and sees (0, io.EOF). But because Amazon does send an AlertCloseNotify record before it hangs up on us, and the tls package does its own internal buffering (up to 1024 bytes) of pending data, we have the AlertCloseNotify in an unread buffer when our Conn.Read (to the HTTP Transport code) reads its final bit of data in the HTTP response body. This change makes that final Read return (n, io.EOF) when an AlertCloseNotify record is buffered right after, if we'd otherwise return (n, nil). A dependent change in the HTTP code then notes whether a client connection has seen an io.EOF and uses that as an additional signal to not reuse a HTTPS connection. With both changes, the majority of Amazon request failures go away. Without either one, 10-20 goroutines hitting the S3 API leads to such an error rate that empirically up to 5 retries are needed to complete an API call. LGTM=agl, rsc R=agl, rsc CC=golang-codereviews https://golang.org/cl/76400046
1 parent 8de04c7 commit f61f18d

File tree

2 files changed

+90
-7
lines changed

2 files changed

+90
-7
lines changed

src/pkg/crypto/tls/conn.go

+21
Original file line numberDiff line numberDiff line change
@@ -451,6 +451,8 @@ func (b *block) readFromUntil(r io.Reader, n int) error {
451451
m, err := r.Read(b.data[len(b.data):cap(b.data)])
452452
b.data = b.data[0 : len(b.data)+m]
453453
if len(b.data) >= n {
454+
// TODO(bradfitz,agl): slightly suspicious
455+
// that we're throwing away r.Read's err here.
454456
break
455457
}
456458
if err != nil {
@@ -906,6 +908,25 @@ func (c *Conn) Read(b []byte) (n int, err error) {
906908
c.input = nil
907909
}
908910

911+
// If a close-notify alert is waiting, read it so that
912+
// we can return (n, EOF) instead of (n, nil), to signal
913+
// to the HTTP response reading goroutine that the
914+
// connection is now closed. This eliminates a race
915+
// where the HTTP response reading goroutine would
916+
// otherwise not observe the EOF until its next read,
917+
// by which time a client goroutine might have already
918+
// tried to reuse the HTTP connection for a new
919+
// request.
920+
// See https://codereview.appspot.com/76400046
921+
// and http://golang.org/issue/3514
922+
if ri := c.rawInput; ri != nil &&
923+
n != 0 && err == nil &&
924+
c.input == nil && len(ri.data) > 0 && recordType(ri.data[0]) == recordTypeAlert {
925+
if recErr := c.readRecord(recordTypeApplicationData); recErr != nil {
926+
err = recErr // will be io.EOF on closeNotify
927+
}
928+
}
929+
909930
if n != 0 || err != nil {
910931
return n, err
911932
}

src/pkg/crypto/tls/tls_test.go

+69-7
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
package tls
66

77
import (
8+
"io"
89
"net"
910
"strings"
1011
"testing"
@@ -109,18 +110,22 @@ func TestX509MixedKeyPair(t *testing.T) {
109110
}
110111
}
111112

112-
func TestDialTimeout(t *testing.T) {
113-
if testing.Short() {
114-
t.Skip("skipping in short mode")
115-
}
116-
117-
listener, err := net.Listen("tcp", "127.0.0.1:0")
113+
func newLocalListener(t *testing.T) net.Listener {
114+
ln, err := net.Listen("tcp", "127.0.0.1:0")
118115
if err != nil {
119-
listener, err = net.Listen("tcp6", "[::1]:0")
116+
ln, err = net.Listen("tcp6", "[::1]:0")
120117
}
121118
if err != nil {
122119
t.Fatal(err)
123120
}
121+
return ln
122+
}
123+
124+
func TestDialTimeout(t *testing.T) {
125+
if testing.Short() {
126+
t.Skip("skipping in short mode")
127+
}
128+
listener := newLocalListener(t)
124129

125130
addr := listener.Addr().String()
126131
defer listener.Close()
@@ -142,6 +147,7 @@ func TestDialTimeout(t *testing.T) {
142147
Timeout: 10 * time.Millisecond,
143148
}
144149

150+
var err error
145151
if _, err = DialWithDialer(dialer, "tcp", addr, nil); err == nil {
146152
t.Fatal("DialWithTimeout completed successfully")
147153
}
@@ -150,3 +156,59 @@ func TestDialTimeout(t *testing.T) {
150156
t.Errorf("resulting error not a timeout: %s", err)
151157
}
152158
}
159+
160+
// tests that Conn.Read returns (non-zero, io.EOF) instead of
161+
// (non-zero, nil) when a Close (alertCloseNotify) is sitting right
162+
// behind the application data in the buffer.
163+
func TestConnReadNonzeroAndEOF(t *testing.T) {
164+
ln := newLocalListener(t)
165+
defer ln.Close()
166+
167+
srvCh := make(chan *Conn, 1)
168+
go func() {
169+
sconn, err := ln.Accept()
170+
if err != nil {
171+
t.Error(err)
172+
srvCh <- nil
173+
return
174+
}
175+
serverConfig := *testConfig
176+
srv := Server(sconn, &serverConfig)
177+
if err := srv.Handshake(); err != nil {
178+
t.Error("handshake: %v", err)
179+
srvCh <- nil
180+
return
181+
}
182+
srvCh <- srv
183+
}()
184+
185+
clientConfig := *testConfig
186+
conn, err := Dial("tcp", ln.Addr().String(), &clientConfig)
187+
if err != nil {
188+
t.Fatal(err)
189+
}
190+
defer conn.Close()
191+
192+
srv := <-srvCh
193+
if srv == nil {
194+
return
195+
}
196+
197+
buf := make([]byte, 6)
198+
199+
srv.Write([]byte("foobar"))
200+
n, err := conn.Read(buf)
201+
if n != 6 || err != nil || string(buf) != "foobar" {
202+
t.Fatalf("Read = %d, %v, data %q; want 6, nil, foobar", n, err, buf)
203+
}
204+
205+
srv.Write([]byte("abcdef"))
206+
srv.Close()
207+
n, err = conn.Read(buf)
208+
if n != 6 || string(buf) != "abcdef" {
209+
t.Fatalf("Read = %d, buf= %q; want 6, abcdef", n, buf)
210+
}
211+
if err != io.EOF {
212+
t.Errorf("Second Read error = %v; want io.EOF", err)
213+
}
214+
}

0 commit comments

Comments
 (0)