Skip to content

Commit 19f6d3f

Browse files
tracywwnjdavem330
authored andcommitted
net/tcp-fastopen: Add new API support
This patch adds a new socket option, TCP_FASTOPEN_CONNECT, as an alternative way to perform Fast Open on the active side (client). Prior to this patch, a client needs to replace the connect() call with sendto(MSG_FASTOPEN). This can be cumbersome for applications who want to use Fast Open: these socket operations are often done in lower layer libraries used by many other applications. Changing these libraries and/or the socket call sequences are not trivial. A more convenient approach is to perform Fast Open by simply enabling a socket option when the socket is created w/o changing other socket calls sequence: s = socket() create a new socket setsockopt(s, IPPROTO_TCP, TCP_FASTOPEN_CONNECT …); newly introduced sockopt If set, new functionality described below will be used. Return ENOTSUPP if TFO is not supported or not enabled in the kernel. connect() With cookie present, return 0 immediately. With no cookie, initiate 3WHS with TFO cookie-request option and return -1 with errno = EINPROGRESS. write()/sendmsg() With cookie present, send out SYN with data and return the number of bytes buffered. With no cookie, and 3WHS not yet completed, return -1 with errno = EINPROGRESS. No MSG_FASTOPEN flag is needed. read() Return -1 with errno = EWOULDBLOCK/EAGAIN if connect() is called but write() is not called yet. Return -1 with errno = EWOULDBLOCK/EAGAIN if connection is established but no msg is received yet. Return number of bytes read if socket is established and there is msg received. The new API simplifies life for applications that always perform a write() immediately after a successful connect(). Such applications can now take advantage of Fast Open by merely making one new setsockopt() call at the time of creating the socket. Nothing else about the application's socket call sequence needs to change. Signed-off-by: Wei Wang <[email protected]> Acked-by: Eric Dumazet <[email protected]> Acked-by: Yuchung Cheng <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 25776aa commit 19f6d3f

File tree

9 files changed

+111
-11
lines changed

9 files changed

+111
-11
lines changed

include/linux/tcp.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,8 @@ struct tcp_sock {
222222
u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */
223223
u8 chrono_type:2, /* current chronograph type */
224224
rate_app_limited:1, /* rate_{delivered,interval_us} limited? */
225-
unused:5;
225+
fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */
226+
unused:4;
226227
u8 nonagle : 4,/* Disable Nagle algorithm? */
227228
thin_lto : 1,/* Use linear timeouts for thin streams */
228229
unused1 : 1,

include/net/inet_sock.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,11 @@ struct inet_sock {
206206
transparent:1,
207207
mc_all:1,
208208
nodefrag:1;
209-
__u8 bind_address_no_port:1;
209+
__u8 bind_address_no_port:1,
210+
defer_connect:1; /* Indicates that fastopen_connect is set
211+
* and cookie exists so we defer connect
212+
* until first data frame is written
213+
*/
210214
__u8 rcv_tos;
211215
__u8 convert_csum;
212216
int uc_index;

include/net/tcp.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1495,6 +1495,7 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
14951495
void tcp_fastopen_init_key_once(bool publish);
14961496
bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
14971497
struct tcp_fastopen_cookie *cookie);
1498+
bool tcp_fastopen_defer_connect(struct sock *sk, int *err);
14981499
#define TCP_FASTOPEN_KEY_LENGTH 16
14991500

15001501
/* Fastopen key context */

include/uapi/linux/tcp.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ enum {
116116
#define TCP_SAVE_SYN 27 /* Record SYN headers for new connections */
117117
#define TCP_SAVED_SYN 28 /* Get SYN headers recorded for connection */
118118
#define TCP_REPAIR_WINDOW 29 /* Get/set window parameters */
119+
#define TCP_FASTOPEN_CONNECT 30 /* Attempt FastOpen with connect */
119120

120121
struct tcp_repair_opt {
121122
__u32 opt_code;

net/ipv4/af_inet.c

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -576,13 +576,24 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
576576
int err;
577577
long timeo;
578578

579-
if (addr_len < sizeof(uaddr->sa_family))
580-
return -EINVAL;
579+
/*
580+
* uaddr can be NULL and addr_len can be 0 if:
581+
* sk is a TCP fastopen active socket and
582+
* TCP_FASTOPEN_CONNECT sockopt is set and
583+
* we already have a valid cookie for this socket.
584+
* In this case, user can call write() after connect().
585+
* write() will invoke tcp_sendmsg_fastopen() which calls
586+
* __inet_stream_connect().
587+
*/
588+
if (uaddr) {
589+
if (addr_len < sizeof(uaddr->sa_family))
590+
return -EINVAL;
581591

582-
if (uaddr->sa_family == AF_UNSPEC) {
583-
err = sk->sk_prot->disconnect(sk, flags);
584-
sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
585-
goto out;
592+
if (uaddr->sa_family == AF_UNSPEC) {
593+
err = sk->sk_prot->disconnect(sk, flags);
594+
sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
595+
goto out;
596+
}
586597
}
587598

588599
switch (sock->state) {
@@ -593,7 +604,10 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
593604
err = -EISCONN;
594605
goto out;
595606
case SS_CONNECTING:
596-
err = -EALREADY;
607+
if (inet_sk(sk)->defer_connect)
608+
err = -EINPROGRESS;
609+
else
610+
err = -EALREADY;
597611
/* Fall out of switch with err, set for this state */
598612
break;
599613
case SS_UNCONNECTED:
@@ -607,6 +621,9 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
607621

608622
sock->state = SS_CONNECTING;
609623

624+
if (!err && inet_sk(sk)->defer_connect)
625+
goto out;
626+
610627
/* Just entered SS_CONNECTING state; the only
611628
* difference is that return value in non-blocking
612629
* case is EINPROGRESS, rather than EALREADY.

net/ipv4/tcp.c

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -533,6 +533,12 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
533533

534534
if (tp->urg_data & TCP_URG_VALID)
535535
mask |= POLLPRI;
536+
} else if (sk->sk_state == TCP_SYN_SENT && inet_sk(sk)->defer_connect) {
537+
/* Active TCP fastopen socket with defer_connect
538+
* Return POLLOUT so application can call write()
539+
* in order for kernel to generate SYN+data
540+
*/
541+
mask |= POLLOUT | POLLWRNORM;
536542
}
537543
/* This barrier is coupled with smp_wmb() in tcp_reset() */
538544
smp_rmb();
@@ -1071,6 +1077,7 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
10711077
int *copied, size_t size)
10721078
{
10731079
struct tcp_sock *tp = tcp_sk(sk);
1080+
struct inet_sock *inet = inet_sk(sk);
10741081
int err, flags;
10751082

10761083
if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE))
@@ -1085,9 +1092,19 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
10851092
tp->fastopen_req->data = msg;
10861093
tp->fastopen_req->size = size;
10871094

1095+
if (inet->defer_connect) {
1096+
err = tcp_connect(sk);
1097+
/* Same failure procedure as in tcp_v4/6_connect */
1098+
if (err) {
1099+
tcp_set_state(sk, TCP_CLOSE);
1100+
inet->inet_dport = 0;
1101+
sk->sk_route_caps = 0;
1102+
}
1103+
}
10881104
flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0;
10891105
err = __inet_stream_connect(sk->sk_socket, msg->msg_name,
10901106
msg->msg_namelen, flags);
1107+
inet->defer_connect = 0;
10911108
*copied = tp->fastopen_req->copied;
10921109
tcp_free_fastopen_req(tp);
10931110
return err;
@@ -1107,7 +1124,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
11071124
lock_sock(sk);
11081125

11091126
flags = msg->msg_flags;
1110-
if (flags & MSG_FASTOPEN) {
1127+
if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect)) {
11111128
err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size);
11121129
if (err == -EINPROGRESS && copied_syn > 0)
11131130
goto out;
@@ -2656,6 +2673,18 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
26562673
err = -EINVAL;
26572674
}
26582675
break;
2676+
case TCP_FASTOPEN_CONNECT:
2677+
if (val > 1 || val < 0) {
2678+
err = -EINVAL;
2679+
} else if (sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) {
2680+
if (sk->sk_state == TCP_CLOSE)
2681+
tp->fastopen_connect = val;
2682+
else
2683+
err = -EINVAL;
2684+
} else {
2685+
err = -EOPNOTSUPP;
2686+
}
2687+
break;
26592688
case TCP_TIMESTAMP:
26602689
if (!tp->repair)
26612690
err = -EPERM;
@@ -3016,6 +3045,10 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
30163045
val = icsk->icsk_accept_queue.fastopenq.max_qlen;
30173046
break;
30183047

3048+
case TCP_FASTOPEN_CONNECT:
3049+
val = tp->fastopen_connect;
3050+
break;
3051+
30193052
case TCP_TIMESTAMP:
30203053
val = tcp_time_stamp + tp->tsoffset;
30213054
break;

net/ipv4/tcp_fastopen.c

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -346,3 +346,36 @@ bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
346346
}
347347
return cookie->len > 0;
348348
}
349+
350+
/* This function checks if we want to defer sending SYN until the first
351+
* write(). We defer under the following conditions:
352+
* 1. fastopen_connect sockopt is set
353+
* 2. we have a valid cookie
354+
* Return value: return true if we want to defer until application writes data
355+
* return false if we want to send out SYN immediately
356+
*/
357+
bool tcp_fastopen_defer_connect(struct sock *sk, int *err)
358+
{
359+
struct tcp_fastopen_cookie cookie = { .len = 0 };
360+
struct tcp_sock *tp = tcp_sk(sk);
361+
u16 mss;
362+
363+
if (tp->fastopen_connect && !tp->fastopen_req) {
364+
if (tcp_fastopen_cookie_check(sk, &mss, &cookie)) {
365+
inet_sk(sk)->defer_connect = 1;
366+
return true;
367+
}
368+
369+
/* Alloc fastopen_req in order for FO option to be included
370+
* in SYN
371+
*/
372+
tp->fastopen_req = kzalloc(sizeof(*tp->fastopen_req),
373+
sk->sk_allocation);
374+
if (tp->fastopen_req)
375+
tp->fastopen_req->cookie = cookie;
376+
else
377+
*err = -ENOBUFS;
378+
}
379+
return false;
380+
}
381+
EXPORT_SYMBOL(tcp_fastopen_defer_connect);

net/ipv4/tcp_ipv4.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
232232
/* OK, now commit destination to socket. */
233233
sk->sk_gso_type = SKB_GSO_TCPV4;
234234
sk_setup_caps(sk, &rt->dst);
235+
rt = NULL;
235236

236237
if (!tp->write_seq && likely(!tp->repair))
237238
tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
@@ -242,9 +243,13 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
242243

243244
inet->inet_id = tp->write_seq ^ jiffies;
244245

246+
if (tcp_fastopen_defer_connect(sk, &err))
247+
return err;
248+
if (err)
249+
goto failure;
250+
245251
err = tcp_connect(sk);
246252

247-
rt = NULL;
248253
if (err)
249254
goto failure;
250255

net/ipv6/tcp_ipv6.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,11 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
287287
inet->inet_dport,
288288
&tp->tsoffset);
289289

290+
if (tcp_fastopen_defer_connect(sk, &err))
291+
return err;
292+
if (err)
293+
goto late_failure;
294+
290295
err = tcp_connect(sk);
291296
if (err)
292297
goto late_failure;

0 commit comments

Comments
 (0)