Skip to content

Commit d7a39d3

Browse files
committed
Merge branch 'bridge-mdb-bulk-delete'
Ido Schimmel says: ==================== Add MDB bulk deletion support This patchset adds MDB bulk deletion support, allowing user space to request the deletion of matching entries instead of dumping the entire MDB and issuing a separate deletion request for each matching entry. Support is added in both the bridge and VXLAN drivers in a similar fashion to the existing FDB bulk deletion support. The parameters according to which bulk deletion can be performed are similar to the FDB ones, namely: Destination port, VLAN ID, state (e.g., "permanent"), routing protocol, source / destination VNI, destination IP and UDP port. Flushing based on flags (e.g., "offload", "fast_leave", "added_by_star_ex", "blocked") is not currently supported, but can be added in the future, if a use case arises. Patch #1 adds a new uAPI attribute to allow specifying the state mask according to which bulk deletion will be performed, if any. Patch #2 adds a new policy according to which bulk deletion requests (with 'NLM_F_BULK' flag set) will be parsed. Patches #3-#4 add a new NDO for MDB bulk deletion and invoke it from the rtnetlink code when a bulk deletion request is made. Patches #5-#6 implement the MDB bulk deletion NDO in the bridge and VXLAN drivers, respectively. Patch #7 allows user space to issue MDB bulk deletion requests by no longer rejecting the 'NLM_F_BULK' flag when it is set in 'RTM_DELMDB' requests. Patches #8-#9 add selftests for both drivers, for both good and bad flows. iproute2 changes can be found here [1]. https://github.com/idosch/iproute2/tree/submit/mdb_flush_v1 ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents b6895d0 + c3e87a7 commit d7a39d3

File tree

11 files changed

+749
-31
lines changed

11 files changed

+749
-31
lines changed

drivers/net/vxlan/vxlan_core.c

+1
Original file line numberDiff line numberDiff line change
@@ -3235,6 +3235,7 @@ static const struct net_device_ops vxlan_netdev_ether_ops = {
32353235
.ndo_fdb_get = vxlan_fdb_get,
32363236
.ndo_mdb_add = vxlan_mdb_add,
32373237
.ndo_mdb_del = vxlan_mdb_del,
3238+
.ndo_mdb_del_bulk = vxlan_mdb_del_bulk,
32383239
.ndo_mdb_dump = vxlan_mdb_dump,
32393240
.ndo_mdb_get = vxlan_mdb_get,
32403241
.ndo_fill_metadata_dst = vxlan_fill_metadata_dst,

drivers/net/vxlan/vxlan_mdb.c

+150-24
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,14 @@ struct vxlan_mdb_config {
7474
u8 rt_protocol;
7575
};
7676

77+
struct vxlan_mdb_flush_desc {
78+
union vxlan_addr remote_ip;
79+
__be32 src_vni;
80+
__be32 remote_vni;
81+
__be16 remote_port;
82+
u8 rt_protocol;
83+
};
84+
7785
static const struct rhashtable_params vxlan_mdb_rht_params = {
7886
.head_offset = offsetof(struct vxlan_mdb_entry, rhnode),
7987
.key_offset = offsetof(struct vxlan_mdb_entry, key),
@@ -1306,6 +1314,145 @@ int vxlan_mdb_del(struct net_device *dev, struct nlattr *tb[],
13061314
return err;
13071315
}
13081316

1317+
static const struct nla_policy
1318+
vxlan_mdbe_attrs_del_bulk_pol[MDBE_ATTR_MAX + 1] = {
1319+
[MDBE_ATTR_RTPROT] = NLA_POLICY_MIN(NLA_U8, RTPROT_STATIC),
1320+
[MDBE_ATTR_DST] = NLA_POLICY_RANGE(NLA_BINARY,
1321+
sizeof(struct in_addr),
1322+
sizeof(struct in6_addr)),
1323+
[MDBE_ATTR_DST_PORT] = { .type = NLA_U16 },
1324+
[MDBE_ATTR_VNI] = NLA_POLICY_FULL_RANGE(NLA_U32, &vni_range),
1325+
[MDBE_ATTR_SRC_VNI] = NLA_POLICY_FULL_RANGE(NLA_U32, &vni_range),
1326+
[MDBE_ATTR_STATE_MASK] = NLA_POLICY_MASK(NLA_U8, MDB_PERMANENT),
1327+
};
1328+
1329+
static int vxlan_mdb_flush_desc_init(struct vxlan_dev *vxlan,
1330+
struct vxlan_mdb_flush_desc *desc,
1331+
struct nlattr *tb[],
1332+
struct netlink_ext_ack *extack)
1333+
{
1334+
struct br_mdb_entry *entry = nla_data(tb[MDBA_SET_ENTRY]);
1335+
struct nlattr *mdbe_attrs[MDBE_ATTR_MAX + 1];
1336+
int err;
1337+
1338+
if (entry->ifindex && entry->ifindex != vxlan->dev->ifindex) {
1339+
NL_SET_ERR_MSG_MOD(extack, "Invalid port net device");
1340+
return -EINVAL;
1341+
}
1342+
1343+
if (entry->vid) {
1344+
NL_SET_ERR_MSG_MOD(extack, "VID must not be specified");
1345+
return -EINVAL;
1346+
}
1347+
1348+
if (!tb[MDBA_SET_ENTRY_ATTRS])
1349+
return 0;
1350+
1351+
err = nla_parse_nested(mdbe_attrs, MDBE_ATTR_MAX,
1352+
tb[MDBA_SET_ENTRY_ATTRS],
1353+
vxlan_mdbe_attrs_del_bulk_pol, extack);
1354+
if (err)
1355+
return err;
1356+
1357+
if (mdbe_attrs[MDBE_ATTR_STATE_MASK]) {
1358+
u8 state_mask = nla_get_u8(mdbe_attrs[MDBE_ATTR_STATE_MASK]);
1359+
1360+
if ((state_mask & MDB_PERMANENT) && !(entry->state & MDB_PERMANENT)) {
1361+
NL_SET_ERR_MSG_MOD(extack, "Only permanent MDB entries are supported");
1362+
return -EINVAL;
1363+
}
1364+
}
1365+
1366+
if (mdbe_attrs[MDBE_ATTR_RTPROT])
1367+
desc->rt_protocol = nla_get_u8(mdbe_attrs[MDBE_ATTR_RTPROT]);
1368+
1369+
if (mdbe_attrs[MDBE_ATTR_DST])
1370+
vxlan_nla_get_addr(&desc->remote_ip, mdbe_attrs[MDBE_ATTR_DST]);
1371+
1372+
if (mdbe_attrs[MDBE_ATTR_DST_PORT])
1373+
desc->remote_port =
1374+
cpu_to_be16(nla_get_u16(mdbe_attrs[MDBE_ATTR_DST_PORT]));
1375+
1376+
if (mdbe_attrs[MDBE_ATTR_VNI])
1377+
desc->remote_vni =
1378+
cpu_to_be32(nla_get_u32(mdbe_attrs[MDBE_ATTR_VNI]));
1379+
1380+
if (mdbe_attrs[MDBE_ATTR_SRC_VNI])
1381+
desc->src_vni =
1382+
cpu_to_be32(nla_get_u32(mdbe_attrs[MDBE_ATTR_SRC_VNI]));
1383+
1384+
return 0;
1385+
}
1386+
1387+
static void vxlan_mdb_remotes_flush(struct vxlan_dev *vxlan,
1388+
struct vxlan_mdb_entry *mdb_entry,
1389+
const struct vxlan_mdb_flush_desc *desc)
1390+
{
1391+
struct vxlan_mdb_remote *remote, *tmp;
1392+
1393+
list_for_each_entry_safe(remote, tmp, &mdb_entry->remotes, list) {
1394+
struct vxlan_rdst *rd = rtnl_dereference(remote->rd);
1395+
__be32 remote_vni;
1396+
1397+
if (desc->remote_ip.sa.sa_family &&
1398+
!vxlan_addr_equal(&desc->remote_ip, &rd->remote_ip))
1399+
continue;
1400+
1401+
/* Encapsulation is performed with source VNI if remote VNI
1402+
* is not set.
1403+
*/
1404+
remote_vni = rd->remote_vni ? : mdb_entry->key.vni;
1405+
if (desc->remote_vni && desc->remote_vni != remote_vni)
1406+
continue;
1407+
1408+
if (desc->remote_port && desc->remote_port != rd->remote_port)
1409+
continue;
1410+
1411+
if (desc->rt_protocol &&
1412+
desc->rt_protocol != remote->rt_protocol)
1413+
continue;
1414+
1415+
vxlan_mdb_remote_del(vxlan, mdb_entry, remote);
1416+
}
1417+
}
1418+
1419+
static void vxlan_mdb_flush(struct vxlan_dev *vxlan,
1420+
const struct vxlan_mdb_flush_desc *desc)
1421+
{
1422+
struct vxlan_mdb_entry *mdb_entry;
1423+
struct hlist_node *tmp;
1424+
1425+
/* The removal of an entry cannot trigger the removal of another entry
1426+
* since entries are always added to the head of the list.
1427+
*/
1428+
hlist_for_each_entry_safe(mdb_entry, tmp, &vxlan->mdb_list, mdb_node) {
1429+
if (desc->src_vni && desc->src_vni != mdb_entry->key.vni)
1430+
continue;
1431+
1432+
vxlan_mdb_remotes_flush(vxlan, mdb_entry, desc);
1433+
/* Entry will only be removed if its remotes list is empty. */
1434+
vxlan_mdb_entry_put(vxlan, mdb_entry);
1435+
}
1436+
}
1437+
1438+
int vxlan_mdb_del_bulk(struct net_device *dev, struct nlattr *tb[],
1439+
struct netlink_ext_ack *extack)
1440+
{
1441+
struct vxlan_dev *vxlan = netdev_priv(dev);
1442+
struct vxlan_mdb_flush_desc desc = {};
1443+
int err;
1444+
1445+
ASSERT_RTNL();
1446+
1447+
err = vxlan_mdb_flush_desc_init(vxlan, &desc, tb, extack);
1448+
if (err)
1449+
return err;
1450+
1451+
vxlan_mdb_flush(vxlan, &desc);
1452+
1453+
return 0;
1454+
}
1455+
13091456
static const struct nla_policy vxlan_mdbe_attrs_get_pol[MDBE_ATTR_MAX + 1] = {
13101457
[MDBE_ATTR_SOURCE] = NLA_POLICY_RANGE(NLA_BINARY,
13111458
sizeof(struct in_addr),
@@ -1575,29 +1722,6 @@ static void vxlan_mdb_check_empty(void *ptr, void *arg)
15751722
WARN_ON_ONCE(1);
15761723
}
15771724

1578-
static void vxlan_mdb_remotes_flush(struct vxlan_dev *vxlan,
1579-
struct vxlan_mdb_entry *mdb_entry)
1580-
{
1581-
struct vxlan_mdb_remote *remote, *tmp;
1582-
1583-
list_for_each_entry_safe(remote, tmp, &mdb_entry->remotes, list)
1584-
vxlan_mdb_remote_del(vxlan, mdb_entry, remote);
1585-
}
1586-
1587-
static void vxlan_mdb_entries_flush(struct vxlan_dev *vxlan)
1588-
{
1589-
struct vxlan_mdb_entry *mdb_entry;
1590-
struct hlist_node *tmp;
1591-
1592-
/* The removal of an entry cannot trigger the removal of another entry
1593-
* since entries are always added to the head of the list.
1594-
*/
1595-
hlist_for_each_entry_safe(mdb_entry, tmp, &vxlan->mdb_list, mdb_node) {
1596-
vxlan_mdb_remotes_flush(vxlan, mdb_entry);
1597-
vxlan_mdb_entry_put(vxlan, mdb_entry);
1598-
}
1599-
}
1600-
16011725
int vxlan_mdb_init(struct vxlan_dev *vxlan)
16021726
{
16031727
int err;
@@ -1613,7 +1737,9 @@ int vxlan_mdb_init(struct vxlan_dev *vxlan)
16131737

16141738
void vxlan_mdb_fini(struct vxlan_dev *vxlan)
16151739
{
1616-
vxlan_mdb_entries_flush(vxlan);
1740+
struct vxlan_mdb_flush_desc desc = {};
1741+
1742+
vxlan_mdb_flush(vxlan, &desc);
16171743
WARN_ON_ONCE(vxlan->cfg.flags & VXLAN_F_MDB);
16181744
rhashtable_free_and_destroy(&vxlan->mdb_tbl, vxlan_mdb_check_empty,
16191745
NULL);

drivers/net/vxlan/vxlan_private.h

+2
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,8 @@ int vxlan_mdb_add(struct net_device *dev, struct nlattr *tb[], u16 nlmsg_flags,
235235
struct netlink_ext_ack *extack);
236236
int vxlan_mdb_del(struct net_device *dev, struct nlattr *tb[],
237237
struct netlink_ext_ack *extack);
238+
int vxlan_mdb_del_bulk(struct net_device *dev, struct nlattr *tb[],
239+
struct netlink_ext_ack *extack);
238240
int vxlan_mdb_get(struct net_device *dev, struct nlattr *tb[], u32 portid,
239241
u32 seq, struct netlink_ext_ack *extack);
240242
struct vxlan_mdb_entry *vxlan_mdb_entry_skb_get(struct vxlan_dev *vxlan,

include/linux/netdevice.h

+6
Original file line numberDiff line numberDiff line change
@@ -1329,6 +1329,9 @@ struct netdev_net_notifier {
13291329
* int (*ndo_mdb_del)(struct net_device *dev, struct nlattr *tb[],
13301330
* struct netlink_ext_ack *extack);
13311331
* Deletes the MDB entry from dev.
1332+
* int (*ndo_mdb_del_bulk)(struct net_device *dev, struct nlattr *tb[],
1333+
* struct netlink_ext_ack *extack);
1334+
* Bulk deletes MDB entries from dev.
13321335
* int (*ndo_mdb_dump)(struct net_device *dev, struct sk_buff *skb,
13331336
* struct netlink_callback *cb);
13341337
* Dumps MDB entries from dev. The first argument (marker) in the netlink
@@ -1611,6 +1614,9 @@ struct net_device_ops {
16111614
int (*ndo_mdb_del)(struct net_device *dev,
16121615
struct nlattr *tb[],
16131616
struct netlink_ext_ack *extack);
1617+
int (*ndo_mdb_del_bulk)(struct net_device *dev,
1618+
struct nlattr *tb[],
1619+
struct netlink_ext_ack *extack);
16141620
int (*ndo_mdb_dump)(struct net_device *dev,
16151621
struct sk_buff *skb,
16161622
struct netlink_callback *cb);

include/uapi/linux/if_bridge.h

+1
Original file line numberDiff line numberDiff line change
@@ -757,6 +757,7 @@ enum {
757757
MDBE_ATTR_VNI,
758758
MDBE_ATTR_IFINDEX,
759759
MDBE_ATTR_SRC_VNI,
760+
MDBE_ATTR_STATE_MASK,
760761
__MDBE_ATTR_MAX,
761762
};
762763
#define MDBE_ATTR_MAX (__MDBE_ATTR_MAX - 1)

net/bridge/br_device.c

+1
Original file line numberDiff line numberDiff line change
@@ -471,6 +471,7 @@ static const struct net_device_ops br_netdev_ops = {
471471
.ndo_fdb_get = br_fdb_get,
472472
.ndo_mdb_add = br_mdb_add,
473473
.ndo_mdb_del = br_mdb_del,
474+
.ndo_mdb_del_bulk = br_mdb_del_bulk,
474475
.ndo_mdb_dump = br_mdb_dump,
475476
.ndo_mdb_get = br_mdb_get,
476477
.ndo_bridge_getlink = br_getlink,

net/bridge/br_mdb.c

+133
Original file line numberDiff line numberDiff line change
@@ -1412,6 +1412,139 @@ int br_mdb_del(struct net_device *dev, struct nlattr *tb[],
14121412
return err;
14131413
}
14141414

1415+
struct br_mdb_flush_desc {
1416+
u32 port_ifindex;
1417+
u16 vid;
1418+
u8 rt_protocol;
1419+
u8 state;
1420+
u8 state_mask;
1421+
};
1422+
1423+
static const struct nla_policy br_mdbe_attrs_del_bulk_pol[MDBE_ATTR_MAX + 1] = {
1424+
[MDBE_ATTR_RTPROT] = NLA_POLICY_MIN(NLA_U8, RTPROT_STATIC),
1425+
[MDBE_ATTR_STATE_MASK] = NLA_POLICY_MASK(NLA_U8, MDB_PERMANENT),
1426+
};
1427+
1428+
static int br_mdb_flush_desc_init(struct br_mdb_flush_desc *desc,
1429+
struct nlattr *tb[],
1430+
struct netlink_ext_ack *extack)
1431+
{
1432+
struct br_mdb_entry *entry = nla_data(tb[MDBA_SET_ENTRY]);
1433+
struct nlattr *mdbe_attrs[MDBE_ATTR_MAX + 1];
1434+
int err;
1435+
1436+
desc->port_ifindex = entry->ifindex;
1437+
desc->vid = entry->vid;
1438+
desc->state = entry->state;
1439+
1440+
if (!tb[MDBA_SET_ENTRY_ATTRS])
1441+
return 0;
1442+
1443+
err = nla_parse_nested(mdbe_attrs, MDBE_ATTR_MAX,
1444+
tb[MDBA_SET_ENTRY_ATTRS],
1445+
br_mdbe_attrs_del_bulk_pol, extack);
1446+
if (err)
1447+
return err;
1448+
1449+
if (mdbe_attrs[MDBE_ATTR_STATE_MASK])
1450+
desc->state_mask = nla_get_u8(mdbe_attrs[MDBE_ATTR_STATE_MASK]);
1451+
1452+
if (mdbe_attrs[MDBE_ATTR_RTPROT])
1453+
desc->rt_protocol = nla_get_u8(mdbe_attrs[MDBE_ATTR_RTPROT]);
1454+
1455+
return 0;
1456+
}
1457+
1458+
static void br_mdb_flush_host(struct net_bridge *br,
1459+
struct net_bridge_mdb_entry *mp,
1460+
const struct br_mdb_flush_desc *desc)
1461+
{
1462+
u8 state;
1463+
1464+
if (desc->port_ifindex && desc->port_ifindex != br->dev->ifindex)
1465+
return;
1466+
1467+
if (desc->rt_protocol)
1468+
return;
1469+
1470+
state = br_group_is_l2(&mp->addr) ? MDB_PERMANENT : 0;
1471+
if (desc->state_mask && (state & desc->state_mask) != desc->state)
1472+
return;
1473+
1474+
br_multicast_host_leave(mp, true);
1475+
if (!mp->ports && netif_running(br->dev))
1476+
mod_timer(&mp->timer, jiffies);
1477+
}
1478+
1479+
static void br_mdb_flush_pgs(struct net_bridge *br,
1480+
struct net_bridge_mdb_entry *mp,
1481+
const struct br_mdb_flush_desc *desc)
1482+
{
1483+
struct net_bridge_port_group __rcu **pp;
1484+
struct net_bridge_port_group *p;
1485+
1486+
for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL;) {
1487+
u8 state;
1488+
1489+
if (desc->port_ifindex &&
1490+
desc->port_ifindex != p->key.port->dev->ifindex) {
1491+
pp = &p->next;
1492+
continue;
1493+
}
1494+
1495+
if (desc->rt_protocol && desc->rt_protocol != p->rt_protocol) {
1496+
pp = &p->next;
1497+
continue;
1498+
}
1499+
1500+
state = p->flags & MDB_PG_FLAGS_PERMANENT ? MDB_PERMANENT : 0;
1501+
if (desc->state_mask &&
1502+
(state & desc->state_mask) != desc->state) {
1503+
pp = &p->next;
1504+
continue;
1505+
}
1506+
1507+
br_multicast_del_pg(mp, p, pp);
1508+
}
1509+
}
1510+
1511+
static void br_mdb_flush(struct net_bridge *br,
1512+
const struct br_mdb_flush_desc *desc)
1513+
{
1514+
struct net_bridge_mdb_entry *mp;
1515+
1516+
spin_lock_bh(&br->multicast_lock);
1517+
1518+
/* Safe variant is not needed because entries are removed from the list
1519+
* upon group timer expiration or bridge deletion.
1520+
*/
1521+
hlist_for_each_entry(mp, &br->mdb_list, mdb_node) {
1522+
if (desc->vid && desc->vid != mp->addr.vid)
1523+
continue;
1524+
1525+
br_mdb_flush_host(br, mp, desc);
1526+
br_mdb_flush_pgs(br, mp, desc);
1527+
}
1528+
1529+
spin_unlock_bh(&br->multicast_lock);
1530+
}
1531+
1532+
int br_mdb_del_bulk(struct net_device *dev, struct nlattr *tb[],
1533+
struct netlink_ext_ack *extack)
1534+
{
1535+
struct net_bridge *br = netdev_priv(dev);
1536+
struct br_mdb_flush_desc desc = {};
1537+
int err;
1538+
1539+
err = br_mdb_flush_desc_init(&desc, tb, extack);
1540+
if (err)
1541+
return err;
1542+
1543+
br_mdb_flush(br, &desc);
1544+
1545+
return 0;
1546+
}
1547+
14151548
static const struct nla_policy br_mdbe_attrs_get_pol[MDBE_ATTR_MAX + 1] = {
14161549
[MDBE_ATTR_SOURCE] = NLA_POLICY_RANGE(NLA_BINARY,
14171550
sizeof(struct in_addr),

0 commit comments

Comments
 (0)