@@ -724,7 +724,7 @@ void mptcp_data_acked(struct sock *sk)
724
724
{
725
725
mptcp_reset_timer (sk );
726
726
727
- if ((! test_bit (MPTCP_SEND_SPACE , & mptcp_sk (sk )-> flags ) ||
727
+ if ((test_bit (MPTCP_NOSPACE , & mptcp_sk (sk )-> flags ) ||
728
728
mptcp_send_head (sk ) ||
729
729
(inet_sk_state_load (sk ) != TCP_ESTABLISHED )))
730
730
mptcp_schedule_work (sk );
@@ -835,20 +835,6 @@ static void dfrag_clear(struct sock *sk, struct mptcp_data_frag *dfrag)
835
835
put_page (dfrag -> page );
836
836
}
837
837
838
- static bool mptcp_is_writeable (struct mptcp_sock * msk )
839
- {
840
- struct mptcp_subflow_context * subflow ;
841
-
842
- if (!sk_stream_is_writeable ((struct sock * )msk ))
843
- return false;
844
-
845
- mptcp_for_each_subflow (msk , subflow ) {
846
- if (sk_stream_is_writeable (subflow -> tcp_sock ))
847
- return true;
848
- }
849
- return false;
850
- }
851
-
852
838
static void mptcp_clean_una (struct sock * sk )
853
839
{
854
840
struct mptcp_sock * msk = mptcp_sk (sk );
@@ -901,13 +887,8 @@ static void mptcp_clean_una_wakeup(struct sock *sk)
901
887
mptcp_clean_una (sk );
902
888
903
889
/* Only wake up writers if a subflow is ready */
904
- if (mptcp_is_writeable (msk )) {
905
- set_bit (MPTCP_SEND_SPACE , & msk -> flags );
906
- smp_mb__after_atomic ();
907
-
908
- /* set SEND_SPACE before sk_stream_write_space clears
909
- * NOSPACE
910
- */
890
+ if (sk_stream_is_writeable (sk )) {
891
+ clear_bit (MPTCP_NOSPACE , & msk -> flags );
911
892
sk_stream_write_space (sk );
912
893
}
913
894
}
@@ -1041,17 +1022,25 @@ static void mptcp_nospace(struct mptcp_sock *msk)
1041
1022
{
1042
1023
struct mptcp_subflow_context * subflow ;
1043
1024
1044
- clear_bit ( MPTCP_SEND_SPACE , & msk -> flags );
1025
+ set_bit ( MPTCP_NOSPACE , & msk -> flags );
1045
1026
smp_mb__after_atomic (); /* msk->flags is changed by write_space cb */
1046
1027
1047
1028
mptcp_for_each_subflow (msk , subflow ) {
1048
1029
struct sock * ssk = mptcp_subflow_tcp_sock (subflow );
1030
+ bool ssk_writeable = sk_stream_is_writeable (ssk );
1049
1031
struct socket * sock = READ_ONCE (ssk -> sk_socket );
1050
1032
1033
+ if (ssk_writeable || !sock )
1034
+ continue ;
1035
+
1051
1036
/* enables ssk->write_space() callbacks */
1052
- if (sock )
1053
- set_bit (SOCK_NOSPACE , & sock -> flags );
1037
+ set_bit (SOCK_NOSPACE , & sock -> flags );
1054
1038
}
1039
+
1040
+ /* mptcp_data_acked() could run just before we set the NOSPACE bit,
1041
+ * so explicitly check for snd_una value
1042
+ */
1043
+ mptcp_clean_una ((struct sock * )msk );
1055
1044
}
1056
1045
1057
1046
static bool mptcp_subflow_active (struct mptcp_subflow_context * subflow )
@@ -1155,12 +1144,6 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk,
1155
1144
return NULL ;
1156
1145
}
1157
1146
1158
- static void ssk_check_wmem (struct mptcp_sock * msk )
1159
- {
1160
- if (unlikely (!mptcp_is_writeable (msk )))
1161
- mptcp_nospace (msk );
1162
- }
1163
-
1164
1147
static void mptcp_push_release (struct sock * sk , struct sock * ssk ,
1165
1148
struct mptcp_sendmsg_info * info )
1166
1149
{
@@ -1332,7 +1315,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
1332
1315
1333
1316
wait_for_memory :
1334
1317
mptcp_nospace (msk );
1335
- mptcp_clean_una (sk );
1336
1318
if (mptcp_timer_pending (sk ))
1337
1319
mptcp_reset_timer (sk );
1338
1320
ret = sk_stream_wait_memory (sk , & timeo );
@@ -1344,7 +1326,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
1344
1326
mptcp_push_pending (sk , msg -> msg_flags );
1345
1327
1346
1328
out :
1347
- ssk_check_wmem (msk );
1348
1329
release_sock (sk );
1349
1330
return copied ? : ret ;
1350
1331
}
@@ -1921,7 +1902,6 @@ static int __mptcp_init_sock(struct sock *sk)
1921
1902
INIT_LIST_HEAD (& msk -> conn_list );
1922
1903
INIT_LIST_HEAD (& msk -> join_list );
1923
1904
INIT_LIST_HEAD (& msk -> rtx_queue );
1924
- __set_bit (MPTCP_SEND_SPACE , & msk -> flags );
1925
1905
INIT_WORK (& msk -> work , mptcp_worker );
1926
1906
msk -> out_of_order_queue = RB_ROOT ;
1927
1907
msk -> first_pending = NULL ;
@@ -2619,13 +2599,6 @@ bool mptcp_finish_join(struct sock *ssk)
2619
2599
return true;
2620
2600
}
2621
2601
2622
- static bool mptcp_memory_free (const struct sock * sk , int wake )
2623
- {
2624
- struct mptcp_sock * msk = mptcp_sk (sk );
2625
-
2626
- return wake ? test_bit (MPTCP_SEND_SPACE , & msk -> flags ) : true;
2627
- }
2628
-
2629
2602
static struct proto mptcp_prot = {
2630
2603
.name = "MPTCP" ,
2631
2604
.owner = THIS_MODULE ,
@@ -2646,7 +2619,6 @@ static struct proto mptcp_prot = {
2646
2619
.sockets_allocated = & mptcp_sockets_allocated ,
2647
2620
.memory_allocated = & tcp_memory_allocated ,
2648
2621
.memory_pressure = & tcp_memory_pressure ,
2649
- .stream_memory_free = mptcp_memory_free ,
2650
2622
.sysctl_wmem_offset = offsetof(struct net , ipv4 .sysctl_tcp_wmem ),
2651
2623
.sysctl_rmem_offset = offsetof(struct net , ipv4 .sysctl_tcp_rmem ),
2652
2624
.sysctl_mem = sysctl_tcp_mem ,
@@ -2820,6 +2792,39 @@ static __poll_t mptcp_check_readable(struct mptcp_sock *msk)
2820
2792
0 ;
2821
2793
}
2822
2794
2795
+ static bool __mptcp_check_writeable (struct mptcp_sock * msk )
2796
+ {
2797
+ struct sock * sk = (struct sock * )msk ;
2798
+ bool mptcp_writable ;
2799
+
2800
+ mptcp_clean_una (sk );
2801
+ mptcp_writable = sk_stream_is_writeable (sk );
2802
+ if (!mptcp_writable )
2803
+ mptcp_nospace (msk );
2804
+
2805
+ return mptcp_writable ;
2806
+ }
2807
+
2808
+ static __poll_t mptcp_check_writeable (struct mptcp_sock * msk )
2809
+ {
2810
+ struct sock * sk = (struct sock * )msk ;
2811
+ __poll_t ret = 0 ;
2812
+ bool slow ;
2813
+
2814
+ if (unlikely (sk -> sk_shutdown & SEND_SHUTDOWN ))
2815
+ return 0 ;
2816
+
2817
+ if (sk_stream_is_writeable (sk ))
2818
+ return EPOLLOUT | EPOLLWRNORM ;
2819
+
2820
+ slow = lock_sock_fast (sk );
2821
+ if (__mptcp_check_writeable (msk ))
2822
+ ret = EPOLLOUT | EPOLLWRNORM ;
2823
+
2824
+ unlock_sock_fast (sk , slow );
2825
+ return ret ;
2826
+ }
2827
+
2823
2828
static __poll_t mptcp_poll (struct file * file , struct socket * sock ,
2824
2829
struct poll_table_struct * wait )
2825
2830
{
@@ -2838,8 +2843,7 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
2838
2843
2839
2844
if (state != TCP_SYN_SENT && state != TCP_SYN_RECV ) {
2840
2845
mask |= mptcp_check_readable (msk );
2841
- if (test_bit (MPTCP_SEND_SPACE , & msk -> flags ))
2842
- mask |= EPOLLOUT | EPOLLWRNORM ;
2846
+ mask |= mptcp_check_writeable (msk );
2843
2847
}
2844
2848
if (sk -> sk_shutdown & RCV_SHUTDOWN )
2845
2849
mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP ;
0 commit comments