Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/workflows/selftests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ jobs:
strategy:
matrix:
KERNEL_VERSION:
- "6.12.0-0.rc2.24.fc42"
- "6.10.1-200.fc40"
- "6.6.14-200.fc39"
- "6.1.9-200.fc37"
Expand Down Expand Up @@ -42,7 +43,7 @@ jobs:
- name: Prepare packages
run: |
sudo apt-get update
sudo apt-get install zstd binutils-dev elfutils libpcap-dev libelf-dev gcc-multilib pkg-config wireshark tshark bpfcc-tools python3 python3-pip python3-setuptools qemu-kvm rpm2cpio libdw-dev libdwarf-dev libcap-ng-dev
sudo apt-get install zstd binutils-dev elfutils libpcap-dev libelf-dev gcc-multilib pkg-config wireshark tshark bpfcc-tools python3 python3-pip python3-setuptools qemu-kvm rpm2cpio libdw-dev libdwarf-dev libcap-ng-dev socat
- name: Prepare Clang
run: |
wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
Expand Down
4 changes: 4 additions & 0 deletions headers/linux/hlist.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@

struct list_head;

struct rhash_head {
struct rhash_head *next;
};

#define HLIST_POISON_POINTER_DELTA 0
#define HLIST_POISON1 ((void *) 0x100 + HLIST_POISON_POINTER_DELTA)
#define HLIST_POISON2 ((void *) 0x200 + HLIST_POISON_POINTER_DELTA)
Expand Down
114 changes: 114 additions & 0 deletions headers/linux/netfilter.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
#ifndef _LINUX_NETFILTER_H
#define _LINUX_NETFILTER_H

#include <stdbool.h>
#include <linux/types.h>
#include <bpf/bpf_helpers.h>
#include <xdp/parsing_helpers.h>

#include "hlist.h"

struct flow_ports {
__be16 source, dest;
};

enum ip_conntrack_dir {
IP_CT_DIR_ORIGINAL,
IP_CT_DIR_REPLY,
IP_CT_DIR_MAX
};

enum flow_offload_tuple_dir {
FLOW_OFFLOAD_DIR_ORIGINAL = IP_CT_DIR_ORIGINAL,
FLOW_OFFLOAD_DIR_REPLY = IP_CT_DIR_REPLY,
FLOW_OFFLOAD_DIR_MAX = IP_CT_DIR_MAX,
};

enum flow_offload_type {
NF_FLOW_OFFLOAD_UNSPEC,
NF_FLOW_OFFLOAD_ROUTE,
};

enum nf_flow_flags {
NF_FLOW_SNAT,
NF_FLOW_DNAT,
NF_FLOW_TEARDOWN,
NF_FLOW_HW,
NF_FLOW_HW_DYING,
NF_FLOW_HW_DEAD,
NF_FLOW_HW_PENDING,
NF_FLOW_HW_BIDIRECTIONAL,
NF_FLOW_HW_ESTABLISHED,
};

enum flow_offload_xmit_type {
FLOW_OFFLOAD_XMIT_UNSPEC,
FLOW_OFFLOAD_XMIT_NEIGH,
FLOW_OFFLOAD_XMIT_XFRM,
FLOW_OFFLOAD_XMIT_DIRECT,
FLOW_OFFLOAD_XMIT_TC,
};

#define NF_FLOW_TABLE_ENCAP_MAX 2
struct flow_offload_tuple {
union {
struct in_addr src_v4;
struct in6_addr src_v6;
};
union {
struct in_addr dst_v4;
struct in6_addr dst_v6;
};
struct {
__be16 src_port;
__be16 dst_port;
};

int iifidx;

__u8 l3proto;
__u8 l4proto;
struct {
__u16 id;
__be16 proto;
} encap[NF_FLOW_TABLE_ENCAP_MAX];

/* All members above are keys for lookups, see flow_offload_hash(). */
struct { } __hash;

__u8 dir:2,
xmit_type:3,
encap_num:2,
in_vlan_ingress:2;
__u16 mtu;
union {
struct {
struct dst_entry *dst_cache;
__u32 dst_cookie;
};
struct {
__u32 ifidx;
__u32 hw_ifidx;
__u8 h_source[ETH_ALEN];
__u8 h_dest[ETH_ALEN];
} out;
struct {
__u32 iifidx;
} tc;
};
};

struct flow_offload_tuple_rhash {
struct rhash_head node;
struct flow_offload_tuple tuple;
};

struct flow_offload {
struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX];
struct nf_conn *ct;
unsigned long flags;
__u16 type;
__u32 timeout;
};

#endif /* _LINUX_NETFILTER_H */
2 changes: 1 addition & 1 deletion lib/testing/test_runner.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ ALL_TESTS=""
VERBOSE_TESTS=${V:-0}
NUM_NS=2

NEEDED_TOOLS="capinfos ethtool ip ping sed tc tcpdump timeout nc tshark"
NEEDED_TOOLS="capinfos ethtool ip ping sed tc tcpdump timeout nc tshark nft socat"

if [ -f "$TEST_CONFIG" ]; then
source "$TEST_CONFIG"
Expand Down
2 changes: 1 addition & 1 deletion xdp-forward/Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0

XDP_TARGETS := xdp_forward.bpf
XDP_TARGETS := xdp_forward.bpf xdp_flowtable.bpf xdp_flowtable_sample.bpf
BPF_SKEL_TARGETS := $(XDP_TARGETS)

XDP_OBJ_INSTALL :=
Expand Down
61 changes: 52 additions & 9 deletions xdp-forward/README.org
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ xdp-forward is an XDP forwarding plane, which will accelerate packet forwarding
using XDP. To use it, simply load it on the set of interfaces to accelerate
forwarding between. The userspace component of xdp-forward will then configure
and load XDP programs on those interfaces, and forward packets between them
using XDP_REDIRECT, using the kernel routing table to determine the destination
if each packet.
using XDP_REDIRECT, using the kernel routing table or netfilter flowtable to
determine the destination for each packet.

Any packets that xdp-forward does not know how to forward will be passed up to
the networking stack and handled by the kernel like normal. Depending on the
Expand Down Expand Up @@ -56,7 +56,11 @@ Specifies which forwarding mode =xdp-forward= should operate in. Depending on
the mode selected, =xdp-forward= will perform forwarding in different ways,
which can lead to different behaviour, including which subset of kernel
configuration (such as firewall rules) is respected during forwarding. See the
section *OPERATING MODES* below for a full description of each mode.
section *FORWARDING MODES* below for a full description of each mode.

** -F, --fib-mode <mode>
Specifies how =xdp-forward= performs routing table lookup in the linux kernel.
See the section *FIB MODES* below for a full description of each mode.

** -m, --mode <mode>
Specifies which mode to load the XDP program to be loaded in. The valid values
Expand Down Expand Up @@ -98,12 +102,12 @@ Enable debug logging. Specify twice for even more verbosity.
** -h, --help
Display a summary of the available options

* OPERATING MODES
The =xdp-forward= utility supports the following operating modes (selected by
* FORWARDING MODES
The =xdp-forward= utility supports the following forwarding modes (selected by
the =--fwd-mode= parameter to =xdp-forward load=.

** fib-full (default)
In the =fib-full= operating mode, =xdp-forward= will perform a full lookup in
** fib (default)
In the =fib= forwarding mode, =xdp-forward= will perform a lookup in
the kernel routing table (or FIB) for each packet, and forward packets between
the configured interfaces based on the result of the lookup. Any packet where
the lookup fails will be passed up to the stack. This includes packets that
Expand All @@ -115,12 +119,51 @@ Note that no checks other than the FIB lookup is performed; in particular, this
completely bypasses the netfilter subsystem, so firewall rules will not be
checked before forwarding.

** fib-direct
The =fib-direct= mode functions like =fib-full=, except it passes the
** flowtable
The =flowtable= operating mode offloads netfilter sw flowtable logic in
the XDP layer if the hardware flowtable is not available.
At the moment =xdp-forward= is able to offload just TCP or UDP netfilter
flowtable entries to XDP. The user is supposed to configure the flowtable
separately.

* FIB MODES
The =xdp-forward= utility supports the following fib modes (selected by
the =--fib-mode= parameter to =xdp-forward load=.

** full (default)
In the =full= operating mode, =xdp-forward= will perform a full lookup in
the kernel routing table (or FIB) for each packet, and forward packets between
the configured interfaces based on the result of the lookup. In particular,
it will apply any policy routing rules configured by the user.

** direct
The =direct= mode functions like =full=, except it passes the
=BPF_FIB_LOOKUP_DIRECT= flag to the FIB lookup routine. This means that any
policy routing rules configured will be skipped during the lookup, which can
improve performance (but won't obey the policy of those rules, obviously).

* Examples

In order to enable flowtable offloading for tcp and udp traffic between NICs
n0 and n1, issue the following commands:

#+begin_src sh
#nft -f /dev/stdin <<EOF
table inet filter {
flowtable ft {
hook ingress priority filter
devices = { n0, n1 }
}
chain forward {
type filter hook forward priority filter
meta l4proto { tcp, udp } flow add @ft
}
}
EOF

#xdp-forward load -f flowtable n0 n1
#+end_src

* SEE ALSO
=libxdp(3)= for details on the XDP loading semantics and kernel compatibility
requirements.
Expand Down
92 changes: 86 additions & 6 deletions xdp-forward/tests/test-xdp-forward.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
XDP_LOADER=${XDP_LOADER:-./xdp-loader}
XDP_FORWARD=${XDP_FORWARD:-./xdp-forward}
ALL_TESTS="test_ping test_load test_fwd_full test_fwd_direct"

ALL_TESTS="test_ping test_load test_fwd_full test_fwd_direct test_flowtable"

test_ping()
{
Expand All @@ -27,7 +26,7 @@ test_fwd_full()
# veth NAPI GRO support added this symbol; forwarding won't work without it
skip_if_missing_kernel_symbol veth_set_features

check_run $XDP_FORWARD load -f fib-full ${NS_NAMES[@]}
check_run $XDP_FORWARD load -f fib -F full ${NS_NAMES[@]}
for ip in "${ALL_INSIDE_IP4[@]}"; do
check_run ns_exec ping -c 1 -W 2 $ip
done
Expand All @@ -42,7 +41,7 @@ test_fwd_direct()
# veth NAPI GRO support added this symbol; forwarding won't work without it
skip_if_missing_kernel_symbol veth_set_features

check_run $XDP_FORWARD load -f fib-direct ${NS_NAMES[@]}
check_run $XDP_FORWARD load -f fib -F direct ${NS_NAMES[@]}
for ip in "${ALL_INSIDE_IP4[@]}"; do
check_run ns_exec ping -c 1 -W 2 $ip
done
Expand All @@ -52,8 +51,89 @@ test_fwd_direct()
check_run $XDP_FORWARD unload ${NS_NAMES[@]}
}

test_flowtable()
{
local INPUT_FILE="${STATEDIR}/in_$$_$RANDOM"

# veth NAPI GRO support added this symbol; forwarding won't work without it
skip_if_missing_kernel_symbol veth_set_features

# disable {tx,rx} checksum offload since it is not currently suported
# by XDP_REDIRECT
for n in ${NS_NAMES[@]}; do
ip netns exec $n ethtool -K veth0 tx-checksumming off rx-checksumming off
ethtool -K $n tx-checksumming off rx-checksumming off
done

# create data to send via tcp
dd if=/dev/urandom of="${INPUT_FILE}" bs=8192 count=32 status=none

# create flowtable configuration in the main namespace
check_run nft -f /dev/stdin <<EOF
table inet nat {
# enable DNAT to server <ip:port> in pre-routing chain
chain prerouting {
type nat hook prerouting priority filter; policy accept;
iifname == "${NS_NAMES[0]}" meta nfproto ipv4 tcp dport 12345 dnat ip to ${ALL_INSIDE_IP4[-1]}:10000
iifname == "${NS_NAMES[0]}" meta nfproto ipv6 tcp dport 12345 dnat ip6 to [${ALL_INSIDE_IP6[-1]}]:10000
}
# enable SNAT of the client ip via masquerading in post-routing chain
chain postrouting {
type nat hook postrouting priority filter; policy accept;
oifname "${NS_NAMES[-1]}" masquerade
}
}
table inet filter {
flowtable ft {
hook ingress priority filter
devices = { ${NS_NAMES[0]}, ${NS_NAMES[-1]} }
}
chain forward {
type filter hook forward priority filter
meta l4proto { tcp } flow add @ft
}
}
EOF

# check if bpf flowtable lookup is available
skip_if_missing_kernel_symbol bpf_xdp_flow_lookup

# Add some nft rules to check {dnat/snat} is done properly in
# the main namespace
check_run ip netns exec ${NS_NAMES[-1]} nft -f /dev/stdin <<EOF
table inet filter {
chain input {
type filter hook input priority 0; policy drop
ip saddr $OUTSIDE_IP4 ip daddr ${ALL_INSIDE_IP4[-1]} tcp dport 10000 accept
ip6 saddr $OUTSIDE_IP6 ip6 daddr ${ALL_INSIDE_IP6[-1]} tcp dport 10000 accept
}
}
EOF
# wait a bit to configure nft
sleep 2

check_run $XDP_FORWARD load -f flowtable ${NS_NAMES[@]}

PID=$(start_background_ns_devnull "socat -4 TCP-LISTEN:10000,reuseaddr,fork -")
check_run ip netns exec ${NS_NAMES[0]} socat ${INPUT_FILE} TCP4:${OUTSIDE_IP4}:12345
stop_background $PID

PID=$(start_background_ns_devnull "socat -6 TCP-LISTEN:10000,reuseaddr,fork -")
check_run ip netns exec ${NS_NAMES[0]} socat ${INPUT_FILE} TCP6:[${OUTSIDE_IP6}]:12345
stop_background $PID
}

cleanup_tests()
{
$XDP_FORWARD unload ${NS_NAMES[@]} >/dev/null 2>&1
$XDP_LOADER unload $NS --all >/dev/null 2>&1
# enable {tx,rx} checksum offload
for n in ${NS_NAMES[@]}; do
ip netns exec $n ethtool -K veth0 tx-checksumming on rx-checksumming on
ethtool -K $n tx-checksumming on rx-checksumming on
done >/dev/null 2>&1
{
$XDP_FORWARD unload ${NS_NAMES[@]}
$XDP_LOADER unload $NS --all
check_run ip netns exec ${NS_NAMES[-1]} nft flush ruleset
check_run nft flush ruleset
} >/dev/null 2>&1
}
Loading