Skip to content

Commit 417fa6d

Browse files
author
Martin KaFai Lau
committed
Merge branch 'fix sockmap + stream af_unix memleak'
John Fastabend says: ==================== There was a memleak when streaming af_unix sockets were inserted into multiple sockmap slots and/or maps. This is because each insert would call a proto update operatino and these must be allowed to be called multiple times. The streaming af_unix implementation recently added a refcnt to handle a use after free issue, however it introduced a memleak when inserted into multiple maps. This series fixes the memleak, adds a note in the code so we remember that proto updates need to support this. And then we add three tests for each of the slightly different iterations of adding sockets into multiple maps. I kept them as 3 independent test cases here. I have some slight preference for this they could however be a single test, but then you don't get to run them independently which was sort of useful while debugging. ==================== Signed-off-by: Martin KaFai Lau <[email protected]>
2 parents b456005 + bdbca46 commit 417fa6d

File tree

3 files changed

+236
-4
lines changed

3 files changed

+236
-4
lines changed

include/linux/skmsg.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,11 @@ struct sk_psock {
100100
void (*saved_close)(struct sock *sk, long timeout);
101101
void (*saved_write_space)(struct sock *sk);
102102
void (*saved_data_ready)(struct sock *sk);
103+
/* psock_update_sk_prot may be called with restore=false many times
104+
* so the handler must be safe for this case. It will be called
105+
* exactly once with restore=true when the psock is being destroyed
106+
* and psock refcnt is zero, but before an RCU grace period.
107+
*/
103108
int (*psock_update_sk_prot)(struct sock *sk, struct sk_psock *psock,
104109
bool restore);
105110
struct proto *sk_proto;

net/unix/unix_bpf.c

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -161,15 +161,30 @@ int unix_stream_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool r
161161
{
162162
struct sock *sk_pair;
163163

164+
/* Restore does not decrement the sk_pair reference yet because we must
165+
* keep the a reference to the socket until after an RCU grace period
166+
* and any pending sends have completed.
167+
*/
164168
if (restore) {
165169
sk->sk_write_space = psock->saved_write_space;
166170
sock_replace_proto(sk, psock->sk_proto);
167171
return 0;
168172
}
169173

170-
sk_pair = unix_peer(sk);
171-
sock_hold(sk_pair);
172-
psock->sk_pair = sk_pair;
174+
/* psock_update_sk_prot can be called multiple times if psock is
175+
* added to multiple maps and/or slots in the same map. There is
176+
* also an edge case where replacing a psock with itself can trigger
177+
* an extra psock_update_sk_prot during the insert process. So it
178+
* must be safe to do multiple calls. Here we need to ensure we don't
179+
* increment the refcnt through sock_hold many times. There will only
180+
* be a single matching destroy operation.
181+
*/
182+
if (!psock->sk_pair) {
183+
sk_pair = unix_peer(sk);
184+
sock_hold(sk_pair);
185+
psock->sk_pair = sk_pair;
186+
}
187+
173188
unix_stream_bpf_check_needs_rebuild(psock->sk_proto);
174189
sock_replace_proto(sk, &unix_stream_bpf_prot);
175190
return 0;

tools/testing/selftests/bpf/prog_tests/sockmap_basic.c

Lines changed: 213 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -555,6 +555,213 @@ static void test_sockmap_unconnected_unix(void)
555555
close(dgram);
556556
}
557557

558+
static void test_sockmap_many_socket(void)
559+
{
560+
struct test_sockmap_pass_prog *skel;
561+
int stream[2], dgram, udp, tcp;
562+
int i, err, map, entry = 0;
563+
564+
skel = test_sockmap_pass_prog__open_and_load();
565+
if (!ASSERT_OK_PTR(skel, "open_and_load"))
566+
return;
567+
568+
map = bpf_map__fd(skel->maps.sock_map_rx);
569+
570+
dgram = xsocket(AF_UNIX, SOCK_DGRAM, 0);
571+
if (dgram < 0) {
572+
test_sockmap_pass_prog__destroy(skel);
573+
return;
574+
}
575+
576+
tcp = connected_socket_v4();
577+
if (!ASSERT_GE(tcp, 0, "connected_socket_v4")) {
578+
close(dgram);
579+
test_sockmap_pass_prog__destroy(skel);
580+
return;
581+
}
582+
583+
udp = xsocket(AF_INET, SOCK_DGRAM | SOCK_NONBLOCK, 0);
584+
if (udp < 0) {
585+
close(dgram);
586+
close(tcp);
587+
test_sockmap_pass_prog__destroy(skel);
588+
return;
589+
}
590+
591+
err = socketpair(AF_UNIX, SOCK_STREAM, 0, stream);
592+
ASSERT_OK(err, "socketpair(af_unix, sock_stream)");
593+
if (err)
594+
goto out;
595+
596+
for (i = 0; i < 2; i++, entry++) {
597+
err = bpf_map_update_elem(map, &entry, &stream[0], BPF_ANY);
598+
ASSERT_OK(err, "bpf_map_update_elem(stream)");
599+
}
600+
for (i = 0; i < 2; i++, entry++) {
601+
err = bpf_map_update_elem(map, &entry, &dgram, BPF_ANY);
602+
ASSERT_OK(err, "bpf_map_update_elem(dgram)");
603+
}
604+
for (i = 0; i < 2; i++, entry++) {
605+
err = bpf_map_update_elem(map, &entry, &udp, BPF_ANY);
606+
ASSERT_OK(err, "bpf_map_update_elem(udp)");
607+
}
608+
for (i = 0; i < 2; i++, entry++) {
609+
err = bpf_map_update_elem(map, &entry, &tcp, BPF_ANY);
610+
ASSERT_OK(err, "bpf_map_update_elem(tcp)");
611+
}
612+
for (entry--; entry >= 0; entry--) {
613+
err = bpf_map_delete_elem(map, &entry);
614+
ASSERT_OK(err, "bpf_map_delete_elem(entry)");
615+
}
616+
617+
close(stream[0]);
618+
close(stream[1]);
619+
out:
620+
close(dgram);
621+
close(tcp);
622+
close(udp);
623+
test_sockmap_pass_prog__destroy(skel);
624+
}
625+
626+
static void test_sockmap_many_maps(void)
627+
{
628+
struct test_sockmap_pass_prog *skel;
629+
int stream[2], dgram, udp, tcp;
630+
int i, err, map[2], entry = 0;
631+
632+
skel = test_sockmap_pass_prog__open_and_load();
633+
if (!ASSERT_OK_PTR(skel, "open_and_load"))
634+
return;
635+
636+
map[0] = bpf_map__fd(skel->maps.sock_map_rx);
637+
map[1] = bpf_map__fd(skel->maps.sock_map_tx);
638+
639+
dgram = xsocket(AF_UNIX, SOCK_DGRAM, 0);
640+
if (dgram < 0) {
641+
test_sockmap_pass_prog__destroy(skel);
642+
return;
643+
}
644+
645+
tcp = connected_socket_v4();
646+
if (!ASSERT_GE(tcp, 0, "connected_socket_v4")) {
647+
close(dgram);
648+
test_sockmap_pass_prog__destroy(skel);
649+
return;
650+
}
651+
652+
udp = xsocket(AF_INET, SOCK_DGRAM | SOCK_NONBLOCK, 0);
653+
if (udp < 0) {
654+
close(dgram);
655+
close(tcp);
656+
test_sockmap_pass_prog__destroy(skel);
657+
return;
658+
}
659+
660+
err = socketpair(AF_UNIX, SOCK_STREAM, 0, stream);
661+
ASSERT_OK(err, "socketpair(af_unix, sock_stream)");
662+
if (err)
663+
goto out;
664+
665+
for (i = 0; i < 2; i++, entry++) {
666+
err = bpf_map_update_elem(map[i], &entry, &stream[0], BPF_ANY);
667+
ASSERT_OK(err, "bpf_map_update_elem(stream)");
668+
}
669+
for (i = 0; i < 2; i++, entry++) {
670+
err = bpf_map_update_elem(map[i], &entry, &dgram, BPF_ANY);
671+
ASSERT_OK(err, "bpf_map_update_elem(dgram)");
672+
}
673+
for (i = 0; i < 2; i++, entry++) {
674+
err = bpf_map_update_elem(map[i], &entry, &udp, BPF_ANY);
675+
ASSERT_OK(err, "bpf_map_update_elem(udp)");
676+
}
677+
for (i = 0; i < 2; i++, entry++) {
678+
err = bpf_map_update_elem(map[i], &entry, &tcp, BPF_ANY);
679+
ASSERT_OK(err, "bpf_map_update_elem(tcp)");
680+
}
681+
for (entry--; entry >= 0; entry--) {
682+
err = bpf_map_delete_elem(map[1], &entry);
683+
entry--;
684+
ASSERT_OK(err, "bpf_map_delete_elem(entry)");
685+
err = bpf_map_delete_elem(map[0], &entry);
686+
ASSERT_OK(err, "bpf_map_delete_elem(entry)");
687+
}
688+
689+
close(stream[0]);
690+
close(stream[1]);
691+
out:
692+
close(dgram);
693+
close(tcp);
694+
close(udp);
695+
test_sockmap_pass_prog__destroy(skel);
696+
}
697+
698+
static void test_sockmap_same_sock(void)
699+
{
700+
struct test_sockmap_pass_prog *skel;
701+
int stream[2], dgram, udp, tcp;
702+
int i, err, map, zero = 0;
703+
704+
skel = test_sockmap_pass_prog__open_and_load();
705+
if (!ASSERT_OK_PTR(skel, "open_and_load"))
706+
return;
707+
708+
map = bpf_map__fd(skel->maps.sock_map_rx);
709+
710+
dgram = xsocket(AF_UNIX, SOCK_DGRAM, 0);
711+
if (dgram < 0) {
712+
test_sockmap_pass_prog__destroy(skel);
713+
return;
714+
}
715+
716+
tcp = connected_socket_v4();
717+
if (!ASSERT_GE(tcp, 0, "connected_socket_v4")) {
718+
close(dgram);
719+
test_sockmap_pass_prog__destroy(skel);
720+
return;
721+
}
722+
723+
udp = xsocket(AF_INET, SOCK_DGRAM | SOCK_NONBLOCK, 0);
724+
if (udp < 0) {
725+
close(dgram);
726+
close(tcp);
727+
test_sockmap_pass_prog__destroy(skel);
728+
return;
729+
}
730+
731+
err = socketpair(AF_UNIX, SOCK_STREAM, 0, stream);
732+
ASSERT_OK(err, "socketpair(af_unix, sock_stream)");
733+
if (err)
734+
goto out;
735+
736+
for (i = 0; i < 2; i++) {
737+
err = bpf_map_update_elem(map, &zero, &stream[0], BPF_ANY);
738+
ASSERT_OK(err, "bpf_map_update_elem(stream)");
739+
}
740+
for (i = 0; i < 2; i++) {
741+
err = bpf_map_update_elem(map, &zero, &dgram, BPF_ANY);
742+
ASSERT_OK(err, "bpf_map_update_elem(dgram)");
743+
}
744+
for (i = 0; i < 2; i++) {
745+
err = bpf_map_update_elem(map, &zero, &udp, BPF_ANY);
746+
ASSERT_OK(err, "bpf_map_update_elem(udp)");
747+
}
748+
for (i = 0; i < 2; i++) {
749+
err = bpf_map_update_elem(map, &zero, &tcp, BPF_ANY);
750+
ASSERT_OK(err, "bpf_map_update_elem(tcp)");
751+
}
752+
753+
err = bpf_map_delete_elem(map, &zero);
754+
ASSERT_OK(err, "bpf_map_delete_elem(entry)");
755+
756+
close(stream[0]);
757+
close(stream[1]);
758+
out:
759+
close(dgram);
760+
close(tcp);
761+
close(udp);
762+
test_sockmap_pass_prog__destroy(skel);
763+
}
764+
558765
void test_sockmap_basic(void)
559766
{
560767
if (test__start_subtest("sockmap create_update_free"))
@@ -597,7 +804,12 @@ void test_sockmap_basic(void)
597804
test_sockmap_skb_verdict_fionread(false);
598805
if (test__start_subtest("sockmap skb_verdict msg_f_peek"))
599806
test_sockmap_skb_verdict_peek();
600-
601807
if (test__start_subtest("sockmap unconnected af_unix"))
602808
test_sockmap_unconnected_unix();
809+
if (test__start_subtest("sockmap one socket to many map entries"))
810+
test_sockmap_many_socket();
811+
if (test__start_subtest("sockmap one socket to many maps"))
812+
test_sockmap_many_maps();
813+
if (test__start_subtest("sockmap same socket replace"))
814+
test_sockmap_same_sock();
603815
}

0 commit comments

Comments
 (0)