Skip to content

Commit e95c6cf

Browse files
committed
Merge branch 'sockmap-fixes'
John Fastabend says: ==================== sockmap fixes for net The following implements a set of fixes for sockmap and changes the API slightly in a few places to reduce preempt_disable/enable scope. We do this here in net because it requires an API change and this avoids getting stuck with legacy API going forward. The short description: Access to skb mark is removed, it is problematic when we add features in the future because mark is a union and used by the TCP/socket code internally. We don't want to expose this to the BPF programs or let programs change the values. The other change is caching metadata in the skb itself between when the BPF program returns a redirect code and the core code implements the redirect. This avoids having per cpu metadata. Finally, tighten restriction on using sockmap to CAP_NET_ADMIN and only SOCK_STREAM sockets. ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents 1cc276c + 9ef2a8c commit e95c6cf

File tree

11 files changed

+74
-34
lines changed

11 files changed

+74
-34
lines changed

include/linux/filter.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -728,7 +728,7 @@ void xdp_do_flush_map(void);
728728
void bpf_warn_invalid_xdp_action(u32 act);
729729
void bpf_warn_invalid_xdp_redirect(u32 ifindex);
730730

731-
struct sock *do_sk_redirect_map(void);
731+
struct sock *do_sk_redirect_map(struct sk_buff *skb);
732732

733733
#ifdef CONFIG_BPF_JIT
734734
extern int bpf_jit_enable;

include/net/tcp.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -840,6 +840,11 @@ struct tcp_skb_cb {
840840
struct inet6_skb_parm h6;
841841
#endif
842842
} header; /* For incoming skbs */
843+
struct {
844+
__u32 key;
845+
__u32 flags;
846+
struct bpf_map *map;
847+
} bpf;
843848
};
844849
};
845850

kernel/bpf/devmap.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,9 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
7878
int err = -EINVAL;
7979
u64 cost;
8080

81+
if (!capable(CAP_NET_ADMIN))
82+
return ERR_PTR(-EPERM);
83+
8184
/* check sanity of attributes */
8285
if (attr->max_entries == 0 || attr->key_size != 4 ||
8386
attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE)

kernel/bpf/sockmap.c

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
#include <linux/workqueue.h>
4040
#include <linux/list.h>
4141
#include <net/strparser.h>
42+
#include <net/tcp.h>
4243

4344
struct bpf_stab {
4445
struct bpf_map map;
@@ -101,9 +102,16 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
101102
return SK_DROP;
102103

103104
skb_orphan(skb);
105+
/* We need to ensure that BPF metadata for maps is also cleared
106+
* when we orphan the skb so that we don't have the possibility
107+
* to reference a stale map.
108+
*/
109+
TCP_SKB_CB(skb)->bpf.map = NULL;
104110
skb->sk = psock->sock;
105111
bpf_compute_data_end(skb);
112+
preempt_disable();
106113
rc = (*prog->bpf_func)(skb, prog->insnsi);
114+
preempt_enable();
107115
skb->sk = NULL;
108116

109117
return rc;
@@ -114,17 +122,10 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
114122
struct sock *sk;
115123
int rc;
116124

117-
/* Because we use per cpu values to feed input from sock redirect
118-
* in BPF program to do_sk_redirect_map() call we need to ensure we
119-
* are not preempted. RCU read lock is not sufficient in this case
120-
* with CONFIG_PREEMPT_RCU enabled so we must be explicit here.
121-
*/
122-
preempt_disable();
123125
rc = smap_verdict_func(psock, skb);
124126
switch (rc) {
125127
case SK_REDIRECT:
126-
sk = do_sk_redirect_map();
127-
preempt_enable();
128+
sk = do_sk_redirect_map(skb);
128129
if (likely(sk)) {
129130
struct smap_psock *peer = smap_psock_sk(sk);
130131

@@ -141,8 +142,6 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
141142
/* Fall through and free skb otherwise */
142143
case SK_DROP:
143144
default:
144-
if (rc != SK_REDIRECT)
145-
preempt_enable();
146145
kfree_skb(skb);
147146
}
148147
}
@@ -487,6 +486,9 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
487486
int err = -EINVAL;
488487
u64 cost;
489488

489+
if (!capable(CAP_NET_ADMIN))
490+
return ERR_PTR(-EPERM);
491+
490492
/* check sanity of attributes */
491493
if (attr->max_entries == 0 || attr->key_size != 4 ||
492494
attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE)
@@ -840,6 +842,12 @@ static int sock_map_update_elem(struct bpf_map *map,
840842
return -EINVAL;
841843
}
842844

845+
if (skops.sk->sk_type != SOCK_STREAM ||
846+
skops.sk->sk_protocol != IPPROTO_TCP) {
847+
fput(socket->file);
848+
return -EOPNOTSUPP;
849+
}
850+
843851
err = sock_map_ctx_update_elem(&skops, map, key, flags);
844852
fput(socket->file);
845853
return err;

net/core/filter.c

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1839,31 +1839,31 @@ static const struct bpf_func_proto bpf_redirect_proto = {
18391839
.arg2_type = ARG_ANYTHING,
18401840
};
18411841

1842-
BPF_CALL_3(bpf_sk_redirect_map, struct bpf_map *, map, u32, key, u64, flags)
1842+
BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
1843+
struct bpf_map *, map, u32, key, u64, flags)
18431844
{
1844-
struct redirect_info *ri = this_cpu_ptr(&redirect_info);
1845+
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
18451846

18461847
if (unlikely(flags))
18471848
return SK_ABORTED;
18481849

1849-
ri->ifindex = key;
1850-
ri->flags = flags;
1851-
ri->map = map;
1850+
tcb->bpf.key = key;
1851+
tcb->bpf.flags = flags;
1852+
tcb->bpf.map = map;
18521853

18531854
return SK_REDIRECT;
18541855
}
18551856

1856-
struct sock *do_sk_redirect_map(void)
1857+
struct sock *do_sk_redirect_map(struct sk_buff *skb)
18571858
{
1858-
struct redirect_info *ri = this_cpu_ptr(&redirect_info);
1859+
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
18591860
struct sock *sk = NULL;
18601861

1861-
if (ri->map) {
1862-
sk = __sock_map_lookup_elem(ri->map, ri->ifindex);
1862+
if (tcb->bpf.map) {
1863+
sk = __sock_map_lookup_elem(tcb->bpf.map, tcb->bpf.key);
18631864

1864-
ri->ifindex = 0;
1865-
ri->map = NULL;
1866-
/* we do not clear flags for future lookup */
1865+
tcb->bpf.key = 0;
1866+
tcb->bpf.map = NULL;
18671867
}
18681868

18691869
return sk;
@@ -1873,9 +1873,10 @@ static const struct bpf_func_proto bpf_sk_redirect_map_proto = {
18731873
.func = bpf_sk_redirect_map,
18741874
.gpl_only = false,
18751875
.ret_type = RET_INTEGER,
1876-
.arg1_type = ARG_CONST_MAP_PTR,
1877-
.arg2_type = ARG_ANYTHING,
1876+
.arg1_type = ARG_PTR_TO_CTX,
1877+
.arg2_type = ARG_CONST_MAP_PTR,
18781878
.arg3_type = ARG_ANYTHING,
1879+
.arg4_type = ARG_ANYTHING,
18791880
};
18801881

18811882
BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
@@ -3683,7 +3684,6 @@ static bool sk_skb_is_valid_access(int off, int size,
36833684
{
36843685
if (type == BPF_WRITE) {
36853686
switch (off) {
3686-
case bpf_ctx_range(struct __sk_buff, mark):
36873687
case bpf_ctx_range(struct __sk_buff, tc_index):
36883688
case bpf_ctx_range(struct __sk_buff, priority):
36893689
break;
@@ -3693,6 +3693,7 @@ static bool sk_skb_is_valid_access(int off, int size,
36933693
}
36943694

36953695
switch (off) {
3696+
case bpf_ctx_range(struct __sk_buff, mark):
36963697
case bpf_ctx_range(struct __sk_buff, tc_classid):
36973698
return false;
36983699
case bpf_ctx_range(struct __sk_buff, data):

samples/sockmap/sockmap_kern.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ int bpf_prog2(struct __sk_buff *skb)
6262
ret = 1;
6363

6464
bpf_printk("sockmap: %d -> %d @ %d\n", lport, bpf_ntohl(rport), ret);
65-
return bpf_sk_redirect_map(&sock_map, ret, 0);
65+
return bpf_sk_redirect_map(skb, &sock_map, ret, 0);
6666
}
6767

6868
SEC("sockops")

tools/include/uapi/linux/bpf.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -569,9 +569,10 @@ union bpf_attr {
569569
* @flags: reserved for future use
570570
* Return: 0 on success or negative error code
571571
*
572-
* int bpf_sk_redirect_map(map, key, flags)
572+
* int bpf_sk_redirect_map(skb, map, key, flags)
573573
* Redirect skb to a sock in map using key as a lookup key for the
574574
* sock in map.
575+
* @skb: pointer to skb
575576
* @map: pointer to sockmap
576577
* @key: key to lookup sock in map
577578
* @flags: reserved for future use

tools/testing/selftests/bpf/bpf_helpers.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ static int (*bpf_xdp_adjust_head)(void *ctx, int offset) =
6565
static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval,
6666
int optlen) =
6767
(void *) BPF_FUNC_setsockopt;
68-
static int (*bpf_sk_redirect_map)(void *map, int key, int flags) =
68+
static int (*bpf_sk_redirect_map)(void *ctx, void *map, int key, int flags) =
6969
(void *) BPF_FUNC_sk_redirect_map;
7070
static int (*bpf_sock_map_update)(void *map, void *key, void *value,
7171
unsigned long long flags) =

tools/testing/selftests/bpf/sockmap_verdict_prog.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,8 @@ int bpf_prog2(struct __sk_buff *skb)
6161
bpf_printk("verdict: data[0] = redir(%u:%u)\n", map, sk);
6262

6363
if (!map)
64-
return bpf_sk_redirect_map(&sock_map_rx, sk, 0);
65-
return bpf_sk_redirect_map(&sock_map_tx, sk, 0);
64+
return bpf_sk_redirect_map(skb, &sock_map_rx, sk, 0);
65+
return bpf_sk_redirect_map(skb, &sock_map_tx, sk, 0);
6666
}
6767

6868
char _license[] SEC("license") = "GPL";

tools/testing/selftests/bpf/test_maps.c

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -466,7 +466,7 @@ static void test_sockmap(int tasks, void *data)
466466
int one = 1, map_fd_rx, map_fd_tx, map_fd_break, s, sc, rc;
467467
struct bpf_map *bpf_map_rx, *bpf_map_tx, *bpf_map_break;
468468
int ports[] = {50200, 50201, 50202, 50204};
469-
int err, i, fd, sfd[6] = {0xdeadbeef};
469+
int err, i, fd, udp, sfd[6] = {0xdeadbeef};
470470
u8 buf[20] = {0x0, 0x5, 0x3, 0x2, 0x1, 0x0};
471471
int parse_prog, verdict_prog;
472472
struct sockaddr_in addr;
@@ -548,6 +548,16 @@ static void test_sockmap(int tasks, void *data)
548548
goto out_sockmap;
549549
}
550550

551+
/* Test update with unsupported UDP socket */
552+
udp = socket(AF_INET, SOCK_DGRAM, 0);
553+
i = 0;
554+
err = bpf_map_update_elem(fd, &i, &udp, BPF_ANY);
555+
if (!err) {
556+
printf("Failed socket SOCK_DGRAM allowed '%i:%i'\n",
557+
i, udp);
558+
goto out_sockmap;
559+
}
560+
551561
/* Test update without programs */
552562
for (i = 0; i < 6; i++) {
553563
err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY);

tools/testing/selftests/bpf/test_verifier.c

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1130,15 +1130,27 @@ static struct bpf_test tests[] = {
11301130
.errstr = "invalid bpf_context access",
11311131
},
11321132
{
1133-
"check skb->mark is writeable by SK_SKB",
1133+
"invalid access of skb->mark for SK_SKB",
1134+
.insns = {
1135+
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
1136+
offsetof(struct __sk_buff, mark)),
1137+
BPF_EXIT_INSN(),
1138+
},
1139+
.result = REJECT,
1140+
.prog_type = BPF_PROG_TYPE_SK_SKB,
1141+
.errstr = "invalid bpf_context access",
1142+
},
1143+
{
1144+
"check skb->mark is not writeable by SK_SKB",
11341145
.insns = {
11351146
BPF_MOV64_IMM(BPF_REG_0, 0),
11361147
BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
11371148
offsetof(struct __sk_buff, mark)),
11381149
BPF_EXIT_INSN(),
11391150
},
1140-
.result = ACCEPT,
1151+
.result = REJECT,
11411152
.prog_type = BPF_PROG_TYPE_SK_SKB,
1153+
.errstr = "invalid bpf_context access",
11421154
},
11431155
{
11441156
"check skb->tc_index is writeable by SK_SKB",

0 commit comments

Comments
 (0)