Skip to content
This repository was archived by the owner on Oct 5, 2018. It is now read-only.

Commit 244b7f4

Browse files
committed
Merge branch 'ip6tunnel_dst'
Martin KaFai Lau says: ==================== ipv6: Fix dst_entry refcnt bugs in ip6_tunnel v4: - Fix a compilation error in patch 5 when CONFIG_LOCKDEP is turned on and re-test it v3: - Merge a 'if else if' test in patch 4 - Use rcu_dereference_protected in patch 5 to fix a sparse check when CONFIG_SPARSE_RCU_POINTER is enabled v2: - Add patch 4 and 5 to remove the spinlock v1: This patch series is to fix the dst refcnt bugs in ip6_tunnel. Patch 1 and 2 are the prep works. Patch 3 is the fix. I can reproduce the bug by adding and removing the ip6gre tunnel while running a super_netperf TCP_CRR test. I get the following trace by adding WARN_ON_ONCE(newrefcnt < 0) to dst_release(): [ 312.760432] ------------[ cut here ]------------ [ 312.774664] WARNING: CPU: 2 PID: 10263 at net/core/dst.c:288 dst_release+0xf3/0x100() [ 312.776041] Modules linked in: k10temp coretemp hwmon ip6_gre ip6_tunnel tunnel6 ipmi_devintf ipmi_ms\ ghandler ip6table_filter ip6_tables xt_NFLOG nfnetlink_log nfnetlink xt_comment xt_statistic iptable_fil\ ter ip_tables x_tables nfsv3 nfs_acl nfs fscache lockd grace mptctl netconsole autofs4 rpcsec_gss_krb5 a\ uth_rpcgss oid_registry sunrpc ipv6 dm_mod loop iTCO_wdt iTCO_vendor_support serio_raw rtc_cmos pcspkr i\ 2c_i801 i2c_core lpc_ich mfd_core ehci_pci ehci_hcd e1000e mlx4_en ptp pps_core vxlan udp_tunnel ip6_udp\ _tunnel mlx4_core sg button ext3 jbd mpt2sas raid_class [ 312.785302] CPU: 2 PID: 10263 Comm: netperf Not tainted 4.2.0-rc8-00046-g4db9b63-dirty #15 [ 312.791695] Hardware name: Quanta Freedom /Windmill-EP, BIOS F03_3B04 09/12/2013 [ 312.792965] ffffffff819dca2c ffff8811dfbdf6f8 ffffffff816537de ffff88123788fdb8 [ 312.794263] 0000000000000000 ffff8811dfbdf738 ffffffff81052646 ffff8811dfbdf768 [ 312.795593] ffff881203a98180 00000000ffffffff ffff88242927a000 ffff88120a2532e0 [ 312.796946] Call Trace: [ 312.797380] [<ffffffff816537de>] dump_stack+0x45/0x57 [ 312.798288] [<ffffffff81052646>] warn_slowpath_common+0x86/0xc0 [ 312.799699] [<ffffffff8105273a>] warn_slowpath_null+0x1a/0x20 [ 312.800852] [<ffffffff8159f9b3>] dst_release+0xf3/0x100 [ 312.801834] [<ffffffffa03f1308>] ip6_tnl_dst_store+0x48/0x70 [ip6_tunnel] [ 312.803738] [<ffffffffa03fd0b6>] ip6gre_xmit2+0x536/0x720 [ip6_gre] [ 312.804774] [<ffffffffa03fd40a>] ip6gre_tunnel_xmit+0x16a/0x410 [ip6_gre] [ 312.805986] [<ffffffff8159934b>] dev_hard_start_xmit+0x23b/0x390 [ 312.808810] [<ffffffff815a2f5f>] ? neigh_destroy+0xef/0x140 [ 312.809843] [<ffffffff81599a6c>] __dev_queue_xmit+0x48c/0x4f0 [ 312.813931] [<ffffffff81599ae3>] dev_queue_xmit_sk+0x13/0x20 [ 312.814993] [<ffffffff815a0832>] neigh_direct_output+0x12/0x20 [ 312.817448] [<ffffffffa021d633>] ip6_finish_output2+0x183/0x460 [ipv6] [ 312.818762] [<ffffffff81306fc5>] ? find_next_bit+0x15/0x20 [ 312.819671] [<ffffffffa021fd79>] ip6_finish_output+0x89/0xe0 [ipv6] [ 312.820720] [<ffffffffa021fe14>] ip6_output+0x44/0xe0 [ipv6] [ 312.821762] [<ffffffff815c8809>] ? nf_hook_slow+0x69/0xc0 [ 312.823123] [<ffffffffa021d232>] ip6_xmit+0x242/0x4c0 [ipv6] [ 312.824073] [<ffffffffa021c9f0>] ? ac6_proc_exit+0x20/0x20 [ipv6] [ 312.825116] [<ffffffffa024c751>] inet6_csk_xmit+0x61/0xa0 [ipv6] [ 312.826127] [<ffffffff815eb590>] tcp_transmit_skb+0x4f0/0x9b0 [ 312.827441] [<ffffffff815ed267>] tcp_connect+0x637/0x7a0 [ 312.828327] [<ffffffffa0245906>] tcp_v6_connect+0x2d6/0x550 [ipv6] [ 312.829581] [<ffffffff81606f05>] __inet_stream_connect+0x95/0x2f0 [ 312.830600] [<ffffffff810ae13a>] ? hrtimer_try_to_cancel+0x1a/0xf0 [ 312.833456] [<ffffffff812fba19>] ? timerqueue_add+0x59/0xb0 [ 312.834407] [<ffffffff81607198>] inet_stream_connect+0x38/0x50 [ 312.835886] [<ffffffff8157cb17>] SYSC_connect+0xb7/0xf0 [ 312.840035] [<ffffffff810af6d3>] ? do_setitimer+0x1b3/0x200 [ 312.840983] [<ffffffff810af75a>] ? alarm_setitimer+0x3a/0x70 [ 312.841941] [<ffffffff8157d7ae>] SyS_connect+0xe/0x10 [ 312.842818] [<ffffffff81659297>] entry_SYSCALL_64_fastpath+0x12/0x6a [ 312.844206] ---[ end trace 43f3ecd86c3b1313 ]--- ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents e8684c8 + 70da5b5 commit 244b7f4

File tree

6 files changed

+179
-78
lines changed

6 files changed

+179
-78
lines changed

include/net/ip6_tunnel.h

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,15 +32,20 @@ struct __ip6_tnl_parm {
3232
__be32 o_key;
3333
};
3434

35+
struct ip6_tnl_dst {
36+
seqlock_t lock;
37+
struct dst_entry __rcu *dst;
38+
u32 cookie;
39+
};
40+
3541
/* IPv6 tunnel */
3642
struct ip6_tnl {
3743
struct ip6_tnl __rcu *next; /* next tunnel in list */
3844
struct net_device *dev; /* virtual device associated with tunnel */
3945
struct net *net; /* netns for packet i/o */
4046
struct __ip6_tnl_parm parms; /* tunnel configuration parameters */
4147
struct flowi fl; /* flowi template for xmit */
42-
struct dst_entry *dst_cache; /* cached dst */
43-
u32 dst_cookie;
48+
struct ip6_tnl_dst __percpu *dst_cache; /* cached dst */
4449

4550
int err_count;
4651
unsigned long err_time;
@@ -60,9 +65,11 @@ struct ipv6_tlv_tnl_enc_lim {
6065
__u8 encap_limit; /* tunnel encapsulation limit */
6166
} __packed;
6267

63-
struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t);
68+
struct dst_entry *ip6_tnl_dst_get(struct ip6_tnl *t);
69+
int ip6_tnl_dst_init(struct ip6_tnl *t);
70+
void ip6_tnl_dst_destroy(struct ip6_tnl *t);
6471
void ip6_tnl_dst_reset(struct ip6_tnl *t);
65-
void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst);
72+
void ip6_tnl_dst_set(struct ip6_tnl *t, struct dst_entry *dst);
6673
int ip6_tnl_rcv_ctl(struct ip6_tnl *t, const struct in6_addr *laddr,
6774
const struct in6_addr *raddr);
6875
int ip6_tnl_xmit_ctl(struct ip6_tnl *t, const struct in6_addr *laddr,

net/ipv6/addrconf.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5127,13 +5127,12 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
51275127

51285128
rt = addrconf_get_prefix_route(&ifp->peer_addr, 128,
51295129
ifp->idev->dev, 0, 0);
5130-
if (rt && ip6_del_rt(rt))
5131-
dst_free(&rt->dst);
5130+
if (rt)
5131+
ip6_del_rt(rt);
51325132
}
51335133
dst_hold(&ifp->rt->dst);
51345134

5135-
if (ip6_del_rt(ifp->rt))
5136-
dst_free(&ifp->rt->dst);
5135+
ip6_del_rt(ifp->rt);
51375136

51385137
rt_genid_bump_ipv6(net);
51395138
break;

net/ipv6/ip6_fib.c

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,11 @@ static void node_free(struct fib6_node *fn)
155155
kmem_cache_free(fib6_node_kmem, fn);
156156
}
157157

158+
static void rt6_rcu_free(struct rt6_info *rt)
159+
{
160+
call_rcu(&rt->dst.rcu_head, dst_rcu_free);
161+
}
162+
158163
static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
159164
{
160165
int cpu;
@@ -169,7 +174,7 @@ static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
169174
ppcpu_rt = per_cpu_ptr(non_pcpu_rt->rt6i_pcpu, cpu);
170175
pcpu_rt = *ppcpu_rt;
171176
if (pcpu_rt) {
172-
dst_free(&pcpu_rt->dst);
177+
rt6_rcu_free(pcpu_rt);
173178
*ppcpu_rt = NULL;
174179
}
175180
}
@@ -181,7 +186,7 @@ static void rt6_release(struct rt6_info *rt)
181186
{
182187
if (atomic_dec_and_test(&rt->rt6i_ref)) {
183188
rt6_free_pcpu(rt);
184-
dst_free(&rt->dst);
189+
rt6_rcu_free(rt);
185190
}
186191
}
187192

@@ -933,6 +938,10 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
933938
int replace_required = 0;
934939
int sernum = fib6_new_sernum(info->nl_net);
935940

941+
if (WARN_ON_ONCE((rt->dst.flags & DST_NOCACHE) &&
942+
!atomic_read(&rt->dst.__refcnt)))
943+
return -EINVAL;
944+
936945
if (info->nlh) {
937946
if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
938947
allow_create = 0;
@@ -1025,6 +1034,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
10251034
fib6_start_gc(info->nl_net, rt);
10261035
if (!(rt->rt6i_flags & RTF_CACHE))
10271036
fib6_prune_clones(info->nl_net, pn);
1037+
rt->dst.flags &= ~DST_NOCACHE;
10281038
}
10291039

10301040
out:
@@ -1049,7 +1059,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
10491059
atomic_inc(&pn->leaf->rt6i_ref);
10501060
}
10511061
#endif
1052-
dst_free(&rt->dst);
1062+
if (!(rt->dst.flags & DST_NOCACHE))
1063+
dst_free(&rt->dst);
10531064
}
10541065
return err;
10551066

@@ -1060,7 +1071,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
10601071
st_failure:
10611072
if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)))
10621073
fib6_repair_tree(info->nl_net, fn);
1063-
dst_free(&rt->dst);
1074+
if (!(rt->dst.flags & DST_NOCACHE))
1075+
dst_free(&rt->dst);
10641076
return err;
10651077
#endif
10661078
}

net/ipv6/ip6_gre.c

Lines changed: 47 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -634,20 +634,20 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
634634
}
635635

636636
if (!fl6->flowi6_mark)
637-
dst = ip6_tnl_dst_check(tunnel);
637+
dst = ip6_tnl_dst_get(tunnel);
638638

639639
if (!dst) {
640-
ndst = ip6_route_output(net, NULL, fl6);
640+
dst = ip6_route_output(net, NULL, fl6);
641641

642-
if (ndst->error)
642+
if (dst->error)
643643
goto tx_err_link_failure;
644-
ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(fl6), NULL, 0);
645-
if (IS_ERR(ndst)) {
646-
err = PTR_ERR(ndst);
647-
ndst = NULL;
644+
dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), NULL, 0);
645+
if (IS_ERR(dst)) {
646+
err = PTR_ERR(dst);
647+
dst = NULL;
648648
goto tx_err_link_failure;
649649
}
650-
dst = ndst;
650+
ndst = dst;
651651
}
652652

653653
tdev = dst->dev;
@@ -702,12 +702,9 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
702702
skb = new_skb;
703703
}
704704

705-
if (fl6->flowi6_mark) {
706-
skb_dst_set(skb, dst);
707-
ndst = NULL;
708-
} else {
709-
skb_dst_set_noref(skb, dst);
710-
}
705+
if (!fl6->flowi6_mark && ndst)
706+
ip6_tnl_dst_set(tunnel, ndst);
707+
skb_dst_set(skb, dst);
711708

712709
proto = NEXTHDR_GRE;
713710
if (encap_limit >= 0) {
@@ -762,14 +759,12 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
762759
skb_set_inner_protocol(skb, protocol);
763760

764761
ip6tunnel_xmit(NULL, skb, dev);
765-
if (ndst)
766-
ip6_tnl_dst_store(tunnel, ndst);
767762
return 0;
768763
tx_err_link_failure:
769764
stats->tx_carrier_errors++;
770765
dst_link_failure(skb);
771766
tx_err_dst_release:
772-
dst_release(ndst);
767+
dst_release(dst);
773768
return err;
774769
}
775770

@@ -1223,6 +1218,9 @@ static const struct net_device_ops ip6gre_netdev_ops = {
12231218

12241219
static void ip6gre_dev_free(struct net_device *dev)
12251220
{
1221+
struct ip6_tnl *t = netdev_priv(dev);
1222+
1223+
ip6_tnl_dst_destroy(t);
12261224
free_percpu(dev->tstats);
12271225
free_netdev(dev);
12281226
}
@@ -1245,26 +1243,48 @@ static void ip6gre_tunnel_setup(struct net_device *dev)
12451243
netif_keep_dst(dev);
12461244
}
12471245

1248-
static int ip6gre_tunnel_init(struct net_device *dev)
1246+
static int ip6gre_tunnel_init_common(struct net_device *dev)
12491247
{
12501248
struct ip6_tnl *tunnel;
1249+
int ret;
12511250

12521251
tunnel = netdev_priv(dev);
12531252

12541253
tunnel->dev = dev;
12551254
tunnel->net = dev_net(dev);
12561255
strcpy(tunnel->parms.name, dev->name);
12571256

1257+
dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1258+
if (!dev->tstats)
1259+
return -ENOMEM;
1260+
1261+
ret = ip6_tnl_dst_init(tunnel);
1262+
if (ret) {
1263+
free_percpu(dev->tstats);
1264+
dev->tstats = NULL;
1265+
return ret;
1266+
}
1267+
1268+
return 0;
1269+
}
1270+
1271+
static int ip6gre_tunnel_init(struct net_device *dev)
1272+
{
1273+
struct ip6_tnl *tunnel;
1274+
int ret;
1275+
1276+
ret = ip6gre_tunnel_init_common(dev);
1277+
if (ret)
1278+
return ret;
1279+
1280+
tunnel = netdev_priv(dev);
1281+
12581282
memcpy(dev->dev_addr, &tunnel->parms.laddr, sizeof(struct in6_addr));
12591283
memcpy(dev->broadcast, &tunnel->parms.raddr, sizeof(struct in6_addr));
12601284

12611285
if (ipv6_addr_any(&tunnel->parms.raddr))
12621286
dev->header_ops = &ip6gre_header_ops;
12631287

1264-
dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1265-
if (!dev->tstats)
1266-
return -ENOMEM;
1267-
12681288
return 0;
12691289
}
12701290

@@ -1460,19 +1480,16 @@ static void ip6gre_netlink_parms(struct nlattr *data[],
14601480
static int ip6gre_tap_init(struct net_device *dev)
14611481
{
14621482
struct ip6_tnl *tunnel;
1483+
int ret;
14631484

1464-
tunnel = netdev_priv(dev);
1485+
ret = ip6gre_tunnel_init_common(dev);
1486+
if (ret)
1487+
return ret;
14651488

1466-
tunnel->dev = dev;
1467-
tunnel->net = dev_net(dev);
1468-
strcpy(tunnel->parms.name, dev->name);
1489+
tunnel = netdev_priv(dev);
14691490

14701491
ip6gre_tnl_link_config(tunnel, 1);
14711492

1472-
dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1473-
if (!dev->tstats)
1474-
return -ENOMEM;
1475-
14761493
return 0;
14771494
}
14781495

0 commit comments

Comments
 (0)