From 03ad047b700c2dcb7128888d946a092e2353b497 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 29 Jul 2024 17:03:01 +0200 Subject: [PATCH 1/5] xdp-forward: Introduce xdp-fwd-flowtable bpf sample Introduce xdp-fwd-flowtable sample in order to perform XDP_REDIRECT between net_devices inserted in a netfilter flowtable. xdp-fwd-flowtable relies on bpf_xdp_flow_lookup kfunc in order to perform the lookup of a given flowtable entry based on a fib tuple of incoming traffic. At the moment we are able to offload just TCP or UDP netfilter flowtable entries to the xdp layer. The user is supposed to configure the flowtable separately. Signed-off-by: Lorenzo Bianconi --- headers/linux/hlist.h | 4 + headers/linux/netfilter.h | 114 ++++++ xdp-forward/Makefile | 2 +- xdp-forward/xdp_flowtable.bpf.c | 609 ++++++++++++++++++++++++++++++++ 4 files changed, 728 insertions(+), 1 deletion(-) create mode 100644 headers/linux/netfilter.h create mode 100644 xdp-forward/xdp_flowtable.bpf.c diff --git a/headers/linux/hlist.h b/headers/linux/hlist.h index a451b49c..ae35508b 100644 --- a/headers/linux/hlist.h +++ b/headers/linux/hlist.h @@ -5,6 +5,10 @@ struct list_head; +struct rhash_head { + struct rhash_head *next; +}; + #define HLIST_POISON_POINTER_DELTA 0 #define HLIST_POISON1 ((void *) 0x100 + HLIST_POISON_POINTER_DELTA) #define HLIST_POISON2 ((void *) 0x200 + HLIST_POISON_POINTER_DELTA) diff --git a/headers/linux/netfilter.h b/headers/linux/netfilter.h new file mode 100644 index 00000000..604d945e --- /dev/null +++ b/headers/linux/netfilter.h @@ -0,0 +1,114 @@ +#ifndef _LINUX_NETFILTER_H +#define _LINUX_NETFILTER_H + +#include +#include +#include +#include + +#include "hlist.h" + +struct flow_ports { + __be16 source, dest; +}; + +enum ip_conntrack_dir { + IP_CT_DIR_ORIGINAL, + IP_CT_DIR_REPLY, + IP_CT_DIR_MAX +}; + +enum flow_offload_tuple_dir { + FLOW_OFFLOAD_DIR_ORIGINAL = IP_CT_DIR_ORIGINAL, + FLOW_OFFLOAD_DIR_REPLY = IP_CT_DIR_REPLY, + FLOW_OFFLOAD_DIR_MAX = IP_CT_DIR_MAX, +}; + +enum flow_offload_type { + NF_FLOW_OFFLOAD_UNSPEC, + NF_FLOW_OFFLOAD_ROUTE, +}; + +enum nf_flow_flags { + NF_FLOW_SNAT, + NF_FLOW_DNAT, + NF_FLOW_TEARDOWN, + NF_FLOW_HW, + NF_FLOW_HW_DYING, + NF_FLOW_HW_DEAD, + NF_FLOW_HW_PENDING, + NF_FLOW_HW_BIDIRECTIONAL, + NF_FLOW_HW_ESTABLISHED, +}; + +enum flow_offload_xmit_type { + FLOW_OFFLOAD_XMIT_UNSPEC, + FLOW_OFFLOAD_XMIT_NEIGH, + FLOW_OFFLOAD_XMIT_XFRM, + FLOW_OFFLOAD_XMIT_DIRECT, + FLOW_OFFLOAD_XMIT_TC, +}; + +#define NF_FLOW_TABLE_ENCAP_MAX 2 +struct flow_offload_tuple { + union { + struct in_addr src_v4; + struct in6_addr src_v6; + }; + union { + struct in_addr dst_v4; + struct in6_addr dst_v6; + }; + struct { + __be16 src_port; + __be16 dst_port; + }; + + int iifidx; + + __u8 l3proto; + __u8 l4proto; + struct { + __u16 id; + __be16 proto; + } encap[NF_FLOW_TABLE_ENCAP_MAX]; + + /* All members above are keys for lookups, see flow_offload_hash(). */ + struct { } __hash; + + __u8 dir:2, + xmit_type:3, + encap_num:2, + in_vlan_ingress:2; + __u16 mtu; + union { + struct { + struct dst_entry *dst_cache; + __u32 dst_cookie; + }; + struct { + __u32 ifidx; + __u32 hw_ifidx; + __u8 h_source[ETH_ALEN]; + __u8 h_dest[ETH_ALEN]; + } out; + struct { + __u32 iifidx; + } tc; + }; +}; + +struct flow_offload_tuple_rhash { + struct rhash_head node; + struct flow_offload_tuple tuple; +}; + +struct flow_offload { + struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX]; + struct nf_conn *ct; + unsigned long flags; + __u16 type; + __u32 timeout; +}; + +#endif /* _LINUX_NETFILTER_H */ diff --git a/xdp-forward/Makefile b/xdp-forward/Makefile index 51201772..17e8374e 100644 --- a/xdp-forward/Makefile +++ b/xdp-forward/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -XDP_TARGETS := xdp_forward.bpf +XDP_TARGETS := xdp_forward.bpf xdp_flowtable.bpf BPF_SKEL_TARGETS := $(XDP_TARGETS) XDP_OBJ_INSTALL := diff --git a/xdp-forward/xdp_flowtable.bpf.c b/xdp-forward/xdp_flowtable.bpf.c new file mode 100644 index 00000000..32f627d7 --- /dev/null +++ b/xdp-forward/xdp_flowtable.bpf.c @@ -0,0 +1,609 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Original xdp_fwd sample Copyright (c) 2017-18 David Ahern + */ + +#include +#include +#include +#include + +#define AF_INET 2 +#define AF_INET6 10 + +#define IPV6_FLOWINFO_MASK bpf_htons(0x0FFFFFFF) + +#define IP_MF 0x2000 /* "More Fragments" */ +#define IP_OFFSET 0x1fff /* "Fragment Offset" */ +#define CSUM_MANGLED_0 ((__sum16)0xffff) + +#define BIT(x) (1 << (x)) + +struct { + __uint(type, BPF_MAP_TYPE_DEVMAP); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); + __uint(max_entries, 64); +} xdp_tx_ports SEC(".maps"); + +struct bpf_flowtable_opts { + __s32 error; +}; + +struct flow_offload_tuple_rhash * +bpf_xdp_flow_lookup(struct xdp_md *, struct bpf_fib_lookup *, + struct bpf_flowtable_opts *, __u32) __ksym; + +/* from include/net/ip.h */ +static __always_inline int ip_decrease_ttl(struct iphdr *iph) +{ + __u32 check = (__u32)iph->check; + + check += (__u32)bpf_htons(0x0100); + iph->check = (__sum16)(check + (check >= 0xFFFF)); + return --iph->ttl; +} + +static __always_inline __u32 csum_add(__u32 csum, __u32 addend) +{ + __u32 res = csum + addend; + + return res + (res < addend); +} + +static __always_inline __u16 csum_fold(__u32 csum) +{ + csum = (csum & 0xffff) + (csum >> 16); + csum = (csum & 0xffff) + (csum >> 16); + return ~csum; +} + +static __always_inline __u16 csum_replace4(__u32 csum, __u32 from, __u32 to) +{ + __u32 tmp = csum_add(~csum, ~from); + + return csum_fold(csum_add(tmp, to)); +} + +static __always_inline __u16 csum_replace16(__u32 csum, __u32 *from, __u32 *to) +{ + __u32 diff[] = { + ~from[0], ~from[1], ~from[2], ~from[3], + to[0], to[1], to[2], to[3], + }; + + csum = bpf_csum_diff(0, 0, diff, sizeof(diff), ~csum); + return csum_fold(csum); +} + +static __always_inline int +xdp_flowtable_check_tcp_state(void *ports, void *data_end, __u8 proto) +{ + if (proto == IPPROTO_TCP) { + struct tcphdr *tcph = ports; + + if (tcph + 1 > data_end) + return -1; + + if (tcph->fin || tcph->rst) + return -1; + } + + return 0; +} + +static __always_inline void +xdp_flowtable_update_port_csum(struct flow_ports *ports, void *data_end, + __u8 proto, __be16 port, __be16 nat_port) +{ + switch (proto) { + case IPPROTO_TCP: { + struct tcphdr *tcph = (struct tcphdr *)ports; + + if (tcph + 1 > data_end) + break; + + tcph->check = csum_replace4((__u32)tcph->check, (__u32)port, + (__u32)nat_port); + break; + } + case IPPROTO_UDP: { + struct udphdr *udph = (struct udphdr *)ports; + + if (udph + 1 > data_end) + break; + + if (!udph->check) + break; + + udph->check = csum_replace4((__u32)udph->check, (__u32)port, + (__u32)nat_port); + if (!udph->check) + udph->check = CSUM_MANGLED_0; + break; + } + default: + break; + } +} + +static __always_inline void +xdp_flowtable_snat_port(const struct flow_offload *flow, + struct flow_ports *ports, void *data_end, + __u8 proto, enum flow_offload_tuple_dir dir) +{ + __be16 port, nat_port; + + if (ports + 1 > data_end) + return; + + switch (dir) { + case FLOW_OFFLOAD_DIR_ORIGINAL: + port = ports->source; + /* For original direction (FLOW_OFFLOAD_DIR_ORIGINAL): + * - tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port contains + * the source port used for the traffic transmitted by the + * host. + * - tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port contains + * the destination port used for the traffic transmitted by + * the host. + */ + bpf_core_read(&nat_port, bpf_core_type_size(nat_port), + &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port); + ports->source = nat_port; + break; + case FLOW_OFFLOAD_DIR_REPLY: + /* For reply direction (FLOW_OFFLOAD_DIR_REPLY): + * - tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port + * contains source port used for the traffic received by the + * host. + * - tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port + * contains the destination port used for the traffic + * received by the host. + */ + port = ports->dest; + bpf_core_read(&nat_port, bpf_core_type_size(nat_port), + &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port); + ports->dest = nat_port; + break; + default: + return; + } + + xdp_flowtable_update_port_csum(ports, data_end, proto, port, nat_port); +} + +static __always_inline void +xdp_flowtable_dnat_port(const struct flow_offload *flow, + struct flow_ports *ports, void *data_end, __u8 proto, + enum flow_offload_tuple_dir dir) +{ + __be16 port, nat_port; + + if (ports + 1 > data_end) + return; + + switch (dir) { + case FLOW_OFFLOAD_DIR_ORIGINAL: + /* For original direction (FLOW_OFFLOAD_DIR_ORIGINAL): + * - tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port contains + * the source port used for the traffic transmitted by the + * host. + * - tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port contains + * the destination port used for the traffic transmitted by + * the host. + */ + port = ports->dest; + bpf_core_read(&nat_port, bpf_core_type_size(nat_port), + &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port); + ports->dest = nat_port; + break; + case FLOW_OFFLOAD_DIR_REPLY: + /* For reply direction (FLOW_OFFLOAD_DIR_REPLY): + * - tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port + * contains the source port used for the traffic received by + * the host. + * - tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port + * contains destination port used for the traffic received by + * the host. + */ + port = ports->source; + bpf_core_read(&nat_port, bpf_core_type_size(nat_port), + &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port); + ports->source = nat_port; + break; + default: + return; + } + + xdp_flowtable_update_port_csum(ports, data_end, proto, port, nat_port); +} + +static __always_inline void +xdp_flowtable_update_ipv4_csum(struct iphdr *iph, void *data_end, + __be32 addr, __be32 nat_addr) +{ + switch (iph->protocol) { + case IPPROTO_TCP: { + struct tcphdr *tcph = (struct tcphdr *)(iph + 1); + + if (tcph + 1 > data_end) + break; + + tcph->check = csum_replace4((__u32)tcph->check, addr, + nat_addr); + break; + } + case IPPROTO_UDP: { + struct udphdr *udph = (struct udphdr *)(iph + 1); + + if (udph + 1 > data_end) + break; + + if (!udph->check) + break; + + udph->check = csum_replace4((__u32)udph->check, addr, + nat_addr); + if (!udph->check) + udph->check = CSUM_MANGLED_0; + break; + } + default: + break; + } +} + +static __always_inline void +xdp_flowtable_snat_ip(const struct flow_offload *flow, struct iphdr *iph, + void *data_end, enum flow_offload_tuple_dir dir) +{ + __be32 addr, nat_addr; + + switch (dir) { + case FLOW_OFFLOAD_DIR_ORIGINAL: + addr = iph->saddr; + bpf_core_read(&nat_addr, bpf_core_type_size(nat_addr), + &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr); + iph->saddr = nat_addr; + break; + case FLOW_OFFLOAD_DIR_REPLY: + addr = iph->daddr; + bpf_core_read(&nat_addr, bpf_core_type_size(nat_addr), + &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr); + iph->daddr = nat_addr; + break; + default: + return; + } + iph->check = csum_replace4((__u32)iph->check, addr, nat_addr); + + xdp_flowtable_update_ipv4_csum(iph, data_end, addr, nat_addr); +} + +static __always_inline void +xdp_flowtable_get_dnat_ip(__be32 *addr, const struct flow_offload *flow, + enum flow_offload_tuple_dir dir) +{ + switch (dir) { + case FLOW_OFFLOAD_DIR_ORIGINAL: + bpf_core_read(addr, sizeof(*addr), + &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr); + break; + case FLOW_OFFLOAD_DIR_REPLY: + bpf_core_read(addr, sizeof(*addr), + &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr); + break; + default: + break; + } +} + +static __always_inline void +xdp_flowtable_dnat_ip(const struct flow_offload *flow, struct iphdr *iph, + void *data_end, enum flow_offload_tuple_dir dir) +{ + __be32 addr, nat_addr; + + xdp_flowtable_get_dnat_ip(&nat_addr, flow, dir); + switch (dir) { + case FLOW_OFFLOAD_DIR_ORIGINAL: + addr = iph->daddr; + iph->daddr = nat_addr; + break; + case FLOW_OFFLOAD_DIR_REPLY: + addr = iph->saddr; + iph->saddr = nat_addr; + break; + default: + return; + } + iph->check = csum_replace4((__u32)iph->check, addr, nat_addr); + + xdp_flowtable_update_ipv4_csum(iph, data_end, addr, nat_addr); +} + +static __always_inline void +xdp_flowtable_update_ipv6_csum(struct ipv6hdr *ip6h, void *data_end, + struct in6_addr *addr, + struct in6_addr *nat_addr) +{ + switch (ip6h->nexthdr) { + case IPPROTO_TCP: { + struct tcphdr *tcph = (struct tcphdr *)(ip6h + 1); + + if (tcph + 1 > data_end) + break; + + tcph->check = csum_replace16((__u32)tcph->check, + addr->in6_u.u6_addr32, + nat_addr->in6_u.u6_addr32); + break; + } + case IPPROTO_UDP: { + struct udphdr *udph = (struct udphdr *)(ip6h + 1); + + if (udph + 1 > data_end) + break; + + if (!udph->check) + break; + + udph->check = csum_replace16((__u32)udph->check, + addr->in6_u.u6_addr32, + nat_addr->in6_u.u6_addr32); + if (!udph->check) + udph->check = CSUM_MANGLED_0; + break; + } + default: + break; + } +} + +static __always_inline void +xdp_flowtable_snat_ipv6(const struct flow_offload *flow, struct ipv6hdr *ip6h, + void *data_end, enum flow_offload_tuple_dir dir) +{ + struct in6_addr addr, nat_addr; + + switch (dir) { + case FLOW_OFFLOAD_DIR_ORIGINAL: + addr = ip6h->saddr; + bpf_core_read(&nat_addr, bpf_core_type_size(nat_addr), + &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6); + ip6h->saddr = nat_addr; + break; + case FLOW_OFFLOAD_DIR_REPLY: + addr = ip6h->daddr; + bpf_core_read(&nat_addr, bpf_core_type_size(nat_addr), + &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6); + ip6h->daddr = nat_addr; + break; + default: + return; + } + + xdp_flowtable_update_ipv6_csum(ip6h, data_end, &addr, &nat_addr); +} + +static __always_inline void +xdp_flowtable_get_dnat_ipv6(struct in6_addr *addr, + const struct flow_offload *flow, + enum flow_offload_tuple_dir dir) +{ + switch (dir) { + case FLOW_OFFLOAD_DIR_ORIGINAL: + bpf_core_read(addr, sizeof(*addr), + &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6); + break; + case FLOW_OFFLOAD_DIR_REPLY: + bpf_core_read(addr, sizeof(*addr), + &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6); + break; + default: + break; + } +} + +static __always_inline void +xdp_flowtable_dnat_ipv6(const struct flow_offload *flow, struct ipv6hdr *ip6h, + void *data_end, enum flow_offload_tuple_dir dir) +{ + struct in6_addr addr, nat_addr; + + xdp_flowtable_get_dnat_ipv6(&nat_addr, flow, dir); + switch (dir) { + case FLOW_OFFLOAD_DIR_ORIGINAL: + addr = ip6h->daddr; + ip6h->daddr = nat_addr; + break; + case FLOW_OFFLOAD_DIR_REPLY: + addr = ip6h->saddr; + ip6h->saddr = nat_addr; + break; + default: + return; + } + + xdp_flowtable_update_ipv6_csum(ip6h, data_end, &addr, &nat_addr); +} + +static __always_inline void +xdp_flowtable_forward_ip(const struct flow_offload *flow, void *data, + void *data_end, struct flow_ports *ports, + enum flow_offload_tuple_dir dir, + unsigned long flags) +{ + struct iphdr *iph = data + sizeof(struct ethhdr); + + if (iph + 1 > data_end) + return; + + if (flags & BIT(NF_FLOW_SNAT)) { + xdp_flowtable_snat_port(flow, ports, data_end, iph->protocol, + dir); + xdp_flowtable_snat_ip(flow, iph, data_end, dir); + } + if (flags & BIT(NF_FLOW_DNAT)) { + xdp_flowtable_dnat_port(flow, ports, data_end, iph->protocol, + dir); + xdp_flowtable_dnat_ip(flow, iph, data_end, dir); + } + + ip_decrease_ttl(iph); +} + +static __always_inline void +xdp_flowtable_forward_ipv6(const struct flow_offload *flow, void *data, + void *data_end, struct flow_ports *ports, + enum flow_offload_tuple_dir dir, + unsigned long flags) +{ + struct ipv6hdr *ip6h = data + sizeof(struct ethhdr); + + if (ip6h + 1 > data_end) + return; + + if (flags & BIT(NF_FLOW_SNAT)) { + xdp_flowtable_snat_port(flow, ports, data_end, ip6h->nexthdr, + dir); + xdp_flowtable_snat_ipv6(flow, ip6h, data_end, dir); + } + if (flags & BIT(NF_FLOW_DNAT)) { + xdp_flowtable_dnat_port(flow, ports, data_end, ip6h->nexthdr, + dir); + xdp_flowtable_dnat_ipv6(flow, ip6h, data_end, dir); + } + + ip6h->hop_limit--; +} + +SEC("xdp") +int xdp_fwd_flowtable(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + struct flow_offload_tuple_rhash *tuplehash; + struct bpf_fib_lookup tuple = { + .ifindex = ctx->ingress_ifindex, + }; + void *data = (void *)(long)ctx->data; + struct bpf_flowtable_opts opts = {}; + enum flow_offload_tuple_dir dir; + struct ethhdr *eth = data; + struct flow_offload *flow; + struct flow_ports *ports; + unsigned long flags; + + if (eth + 1 > data_end) + return XDP_PASS; + + switch (eth->h_proto) { + case bpf_htons(ETH_P_IP): { + struct iphdr *iph = data + sizeof(*eth); + + ports = (struct flow_ports *)(iph + 1); + if (ports + 1 > data_end) + return XDP_PASS; + + /* ip fragmented traffic */ + if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) + return XDP_PASS; + + /* ip options */ + if (iph->ihl * 4 != sizeof(*iph)) + return XDP_PASS; + + if (iph->ttl <= 1) + return XDP_PASS; + + if (xdp_flowtable_check_tcp_state(ports, data_end, + iph->protocol) < 0) + return XDP_PASS; + + tuple.family = AF_INET; + tuple.tos = iph->tos; + tuple.l4_protocol = iph->protocol; + tuple.tot_len = bpf_ntohs(iph->tot_len); + tuple.ipv4_src = iph->saddr; + tuple.ipv4_dst = iph->daddr; + tuple.sport = ports->source; + tuple.dport = ports->dest; + break; + } + case bpf_htons(ETH_P_IPV6): { + struct in6_addr *src = (struct in6_addr *)tuple.ipv6_src; + struct in6_addr *dst = (struct in6_addr *)tuple.ipv6_dst; + struct ipv6hdr *ip6h = data + sizeof(*eth); + + ports = (struct flow_ports *)(ip6h + 1); + if (ports + 1 > data_end) + return XDP_PASS; + + if (ip6h->hop_limit <= 1) + return XDP_PASS; + + if (xdp_flowtable_check_tcp_state(ports, data_end, + ip6h->nexthdr) < 0) + return XDP_PASS; + + tuple.family = AF_INET6; + tuple.l4_protocol = ip6h->nexthdr; + tuple.tot_len = bpf_ntohs(ip6h->payload_len); + *src = ip6h->saddr; + *dst = ip6h->daddr; + tuple.sport = ports->source; + tuple.dport = ports->dest; + break; + } + default: + return XDP_PASS; + } + + tuplehash = bpf_xdp_flow_lookup(ctx, &tuple, &opts, sizeof(opts)); + if (!tuplehash) + return XDP_PASS; + + flow = container_of(tuplehash, struct flow_offload, tuplehash); + if (bpf_core_read(&flags, sizeof(flags), &flow->flags)) + return XDP_PASS; + + if (tuplehash->tuple.xmit_type != FLOW_OFFLOAD_XMIT_NEIGH) + return XDP_PASS; + + dir = tuplehash->tuple.dir; + if (dir >= FLOW_OFFLOAD_DIR_MAX) + return XDP_PASS; + + /* update the destination address in case of dnatting before + * performing the route lookup + */ + if (tuple.family == AF_INET6) { + struct in6_addr *dst_addr = (struct in6_addr *)&tuple.ipv6_dst; + + xdp_flowtable_get_dnat_ipv6(dst_addr, flow, dir); + } else { + xdp_flowtable_get_dnat_ip(&tuple.ipv4_dst, flow, dir); + } + + if (bpf_fib_lookup(ctx, &tuple, sizeof(tuple), 0) != + BPF_FIB_LKUP_RET_SUCCESS) + return XDP_PASS; + + /* Verify egress index has been configured as TX-port */ + if (!bpf_map_lookup_elem(&xdp_tx_ports, &tuple.ifindex)) + return XDP_PASS; + + if (tuple.family == AF_INET6) + xdp_flowtable_forward_ipv6(flow, data, data_end, ports, dir, + flags); + else + xdp_flowtable_forward_ip(flow, data, data_end, ports, dir, + flags); + + __builtin_memcpy(eth->h_dest, tuple.dmac, ETH_ALEN); + __builtin_memcpy(eth->h_source, tuple.smac, ETH_ALEN); + + return bpf_redirect_map(&xdp_tx_ports, tuple.ifindex, 0); +} + +char _license[] SEC("license") = "GPL"; From 30e9f7cff1ffbb462f03aba3fbcd1fe9ef45cebc Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 29 Jul 2024 17:36:19 +0200 Subject: [PATCH 2/5] xdp-forward: Add the capability to load xdp_fwd_flowtable sample from userspace Introduce the capability to load xdp-fw-flowtable sample to offload in xdp the processing of sw netfilter flowtable. Signed-off-by: Lorenzo Bianconi --- xdp-forward/Makefile | 2 +- xdp-forward/README.org | 33 +++++++++++- xdp-forward/xdp-forward.8 | 39 ++++++++++++-- xdp-forward/xdp-forward.c | 71 +++++++++++++++++++++----- xdp-forward/xdp_flowtable_sample.bpf.c | 37 ++++++++++++++ 5 files changed, 164 insertions(+), 18 deletions(-) create mode 100644 xdp-forward/xdp_flowtable_sample.bpf.c diff --git a/xdp-forward/Makefile b/xdp-forward/Makefile index 17e8374e..17aa4317 100644 --- a/xdp-forward/Makefile +++ b/xdp-forward/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -XDP_TARGETS := xdp_forward.bpf xdp_flowtable.bpf +XDP_TARGETS := xdp_forward.bpf xdp_flowtable.bpf xdp_flowtable_sample.bpf BPF_SKEL_TARGETS := $(XDP_TARGETS) XDP_OBJ_INSTALL := diff --git a/xdp-forward/README.org b/xdp-forward/README.org index d640e18c..bb2dfff8 100644 --- a/xdp-forward/README.org +++ b/xdp-forward/README.org @@ -14,8 +14,8 @@ xdp-forward is an XDP forwarding plane, which will accelerate packet forwarding using XDP. To use it, simply load it on the set of interfaces to accelerate forwarding between. The userspace component of xdp-forward will then configure and load XDP programs on those interfaces, and forward packets between them -using XDP_REDIRECT, using the kernel routing table to determine the destination -if each packet. +using XDP_REDIRECT, using the kernel routing table or netfilter flowtable to +determine the destination for each packet. Any packets that xdp-forward does not know how to forward will be passed up to the networking stack and handled by the kernel like normal. Depending on the @@ -121,6 +121,35 @@ The =fib-direct= mode functions like =fib-full=, except it passes the policy routing rules configured will be skipped during the lookup, which can improve performance (but won't obey the policy of those rules, obviously). +** flowtable +The =flowtable= operating mode offloads netfilter sw flowtable logic in +the XDP layer if the hardware flowtable is not available. +At the moment =xdp-forward= is able to offload just TCP or UDP netfilter +flowtable entries to XDP. The user is supposed to configure the flowtable +separately. + +* Examples + +In order to enable flowtable offloading for tcp and udp traffic between NICs +n0 and n1, issue the following commands: + +#+begin_src sh +#nft -f /dev/stdin <fwd_mode) { case FWD_FIB_FULL: @@ -125,23 +144,48 @@ static int do_load(const void *cfg, __unused const char *pin_root_path) case FWD_FIB_DIRECT: opts.prog_name = "xdp_fwd_fib_direct"; break; + case FWD_FLOWTABLE: + opts.prog_name = "xdp_fwd_flowtable"; + break; default: goto end; } - skel = xdp_forward__open(); - if (!skel) { - pr_warn("Failed to load skeleton: %s\n", strerror(errno)); - goto end; + if (opt->fwd_mode == FWD_FLOWTABLE) { + struct xdp_flowtable *xdp_flowtable_skel; + + if (!sample_probe_bpf_xdp_flow_lookup()) { + pr_warn("The kernel does not support the bpf_xdp_flow_lookup() kfunc\n"); + goto end; + } + + xdp_flowtable_skel = xdp_flowtable__open(); + if (!xdp_flowtable_skel) { + pr_warn("Failed to load skeleton: %s\n", strerror(errno)); + goto end; + } + map = xdp_flowtable_skel->maps.xdp_tx_ports; + obj = xdp_flowtable_skel->obj; + skel = (void *)xdp_flowtable_skel; + } else { + struct xdp_forward *xdp_forward_skel = xdp_forward__open(); + + if (!xdp_forward_skel) { + pr_warn("Failed to load skeleton: %s\n", strerror(errno)); + goto end; + } + map = xdp_forward_skel->maps.xdp_tx_ports; + obj = xdp_forward_skel->obj; + skel = (void *)xdp_forward_skel; } /* Make sure we only load the one XDP program we are interested in */ - while ((prog = bpf_object__next_program(skel->obj, prog)) != NULL) + while ((prog = bpf_object__next_program(obj, prog)) != NULL) if (bpf_program__type(prog) == BPF_PROG_TYPE_XDP && bpf_program__expected_attach_type(prog) == BPF_XDP) bpf_program__set_autoload(prog, false); - opts.obj = skel->obj; + opts.obj = obj; xdp_prog = xdp_program__create(&opts); if (!xdp_prog) { ret = -errno; @@ -177,8 +221,8 @@ static int do_load(const void *cfg, __unused const char *pin_root_path) goto end_detach; } - ret = bpf_map_update_elem(bpf_map__fd(skel->maps.xdp_tx_ports), - &iface->ifindex, &iface->ifindex, 0); + ret = bpf_map_update_elem(bpf_map__fd(map), &iface->ifindex, + &iface->ifindex, 0); if (ret) { pr_warn("Failed to update devmap value: %s\n", strerror(errno)); @@ -188,7 +232,10 @@ static int do_load(const void *cfg, __unused const char *pin_root_path) } end_destroy: - xdp_forward__destroy(skel); + if (opt->fwd_mode == FWD_FLOWTABLE) + xdp_flowtable__destroy(skel); + else + xdp_forward__destroy(skel); end: return ret; diff --git a/xdp-forward/xdp_flowtable_sample.bpf.c b/xdp-forward/xdp_flowtable_sample.bpf.c new file mode 100644 index 00000000..185120a2 --- /dev/null +++ b/xdp-forward/xdp_flowtable_sample.bpf.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Original xdp_fwd sample Copyright (c) 2017-18 David Ahern + */ + +#include +#include +#include +#include + +#define AF_INET 2 + +struct bpf_flowtable_opts { + __s32 error; +}; + +struct flow_offload_tuple_rhash * +bpf_xdp_flow_lookup(struct xdp_md *, struct bpf_fib_lookup *, + struct bpf_flowtable_opts *, __u32) __ksym; + +SEC("xdp") +int xdp_fwd_flowtable_sample(struct xdp_md *ctx) +{ + struct flow_offload_tuple_rhash *tuplehash; + struct bpf_flowtable_opts opts = {}; + struct bpf_fib_lookup tuple = { + .family = AF_INET, + .ifindex = ctx->ingress_ifindex, + }; + + tuplehash = bpf_xdp_flow_lookup(ctx, &tuple, &opts, sizeof(opts)); + if (!tuplehash) + return XDP_DROP; + + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; From 3d59f383a3ea8826f15bf37b080156b477114809 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sat, 14 Sep 2024 19:19:36 +0200 Subject: [PATCH 3/5] xdp-forward: Add selftest for flowtable mode Signed-off-by: Lorenzo Bianconi --- .github/workflows/selftests.yml | 2 +- lib/testing/test_runner.sh | 2 +- xdp-forward/tests/test-xdp-forward.sh | 88 +++++++++++++++++++++++++-- 3 files changed, 86 insertions(+), 6 deletions(-) diff --git a/.github/workflows/selftests.yml b/.github/workflows/selftests.yml index 0b29d153..cda46f3d 100644 --- a/.github/workflows/selftests.yml +++ b/.github/workflows/selftests.yml @@ -42,7 +42,7 @@ jobs: - name: Prepare packages run: | sudo apt-get update - sudo apt-get install zstd binutils-dev elfutils libpcap-dev libelf-dev gcc-multilib pkg-config wireshark tshark bpfcc-tools python3 python3-pip python3-setuptools qemu-kvm rpm2cpio libdw-dev libdwarf-dev libcap-ng-dev + sudo apt-get install zstd binutils-dev elfutils libpcap-dev libelf-dev gcc-multilib pkg-config wireshark tshark bpfcc-tools python3 python3-pip python3-setuptools qemu-kvm rpm2cpio libdw-dev libdwarf-dev libcap-ng-dev socat - name: Prepare Clang run: | wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add - diff --git a/lib/testing/test_runner.sh b/lib/testing/test_runner.sh index 8805e38f..90c7cda7 100755 --- a/lib/testing/test_runner.sh +++ b/lib/testing/test_runner.sh @@ -25,7 +25,7 @@ ALL_TESTS="" VERBOSE_TESTS=${V:-0} NUM_NS=2 -NEEDED_TOOLS="capinfos ethtool ip ping sed tc tcpdump timeout nc tshark" +NEEDED_TOOLS="capinfos ethtool ip ping sed tc tcpdump timeout nc tshark nft socat" if [ -f "$TEST_CONFIG" ]; then source "$TEST_CONFIG" diff --git a/xdp-forward/tests/test-xdp-forward.sh b/xdp-forward/tests/test-xdp-forward.sh index 2a6988e8..f3a4b080 100644 --- a/xdp-forward/tests/test-xdp-forward.sh +++ b/xdp-forward/tests/test-xdp-forward.sh @@ -1,7 +1,6 @@ XDP_LOADER=${XDP_LOADER:-./xdp-loader} XDP_FORWARD=${XDP_FORWARD:-./xdp-forward} -ALL_TESTS="test_ping test_load test_fwd_full test_fwd_direct" - +ALL_TESTS="test_ping test_load test_fwd_full test_fwd_direct test_flowtable" test_ping() { @@ -52,8 +51,89 @@ test_fwd_direct() check_run $XDP_FORWARD unload ${NS_NAMES[@]} } +test_flowtable() +{ + local INPUT_FILE="${STATEDIR}/in_$$_$RANDOM" + + # veth NAPI GRO support added this symbol; forwarding won't work without it + skip_if_missing_kernel_symbol veth_set_features + + # disable {tx,rx} checksum offload since it is not currently suported + # by XDP_REDIRECT + for n in ${NS_NAMES[@]}; do + ip netns exec $n ethtool -K veth0 tx-checksumming off rx-checksumming off + ethtool -K $n tx-checksumming off rx-checksumming off + done + + # create data to send via tcp + dd if=/dev/urandom of="${INPUT_FILE}" bs=8192 count=32 status=none + + # create flowtable configuration in the main namespace + check_run nft -f /dev/stdin < in pre-routing chain + chain prerouting { + type nat hook prerouting priority filter; policy accept; + iifname == "${NS_NAMES[0]}" meta nfproto ipv4 tcp dport 12345 dnat ip to ${ALL_INSIDE_IP4[-1]}:10000 + iifname == "${NS_NAMES[0]}" meta nfproto ipv6 tcp dport 12345 dnat ip6 to [${ALL_INSIDE_IP6[-1]}]:10000 + } + # enable SNAT of the client ip via masquerading in post-routing chain + chain postrouting { + type nat hook postrouting priority filter; policy accept; + oifname "${NS_NAMES[-1]}" masquerade + } +} +table inet filter { + flowtable ft { + hook ingress priority filter + devices = { ${NS_NAMES[0]}, ${NS_NAMES[-1]} } + } + chain forward { + type filter hook forward priority filter + meta l4proto { tcp } flow add @ft + } +} +EOF + + # check if bpf flowtable lookup is available + skip_if_missing_kernel_symbol bpf_xdp_flow_lookup + + # Add some nft rules to check {dnat/snat} is done properly in + # the main namespace + check_run ip netns exec ${NS_NAMES[-1]} nft -f /dev/stdin </dev/null 2>&1 - $XDP_LOADER unload $NS --all >/dev/null 2>&1 + # enable {tx,rx} checksum offload + for n in ${NS_NAMES[@]}; do + ip netns exec $n ethtool -K veth0 tx-checksumming on rx-checksumming on + ethtool -K $n tx-checksumming on rx-checksumming on + done >/dev/null 2>&1 + { + $XDP_FORWARD unload ${NS_NAMES[@]} + $XDP_LOADER unload $NS --all + check_run ip netns exec ${NS_NAMES[-1]} nft flush ruleset + check_run nft flush ruleset + } >/dev/null 2>&1 } From 9c35c0bd171328d2c256a704ae7a4767b136d6f4 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 2 Oct 2024 16:21:40 +0200 Subject: [PATCH 4/5] xdp-forward: splict fwd-mode in fib-mode and fwd-mode Signed-off-by: Lorenzo Bianconi --- xdp-forward/README.org | 36 ++++++++++++++++------- xdp-forward/tests/test-xdp-forward.sh | 4 +-- xdp-forward/xdp-forward.8 | 41 +++++++++++++++++++-------- xdp-forward/xdp-forward.c | 39 ++++++++++++++++--------- xdp-forward/xdp_flowtable.bpf.c | 18 ++++++++++-- 5 files changed, 97 insertions(+), 41 deletions(-) diff --git a/xdp-forward/README.org b/xdp-forward/README.org index bb2dfff8..ab30906b 100644 --- a/xdp-forward/README.org +++ b/xdp-forward/README.org @@ -56,7 +56,11 @@ Specifies which forwarding mode =xdp-forward= should operate in. Depending on the mode selected, =xdp-forward= will perform forwarding in different ways, which can lead to different behaviour, including which subset of kernel configuration (such as firewall rules) is respected during forwarding. See the -section *OPERATING MODES* below for a full description of each mode. +section *FORWARDING MODES* below for a full description of each mode. + +** -F, --fib-mode +Specifies how =xdp-forward= performs routing table lookup in the linux kernel. +See the section *FIB MODES* below for a full description of each mode. ** -m, --mode Specifies which mode to load the XDP program to be loaded in. The valid values @@ -98,12 +102,12 @@ Enable debug logging. Specify twice for even more verbosity. ** -h, --help Display a summary of the available options -* OPERATING MODES -The =xdp-forward= utility supports the following operating modes (selected by +* FORWARDING MODES +The =xdp-forward= utility supports the following forwarding modes (selected by the =--fwd-mode= parameter to =xdp-forward load=. -** fib-full (default) -In the =fib-full= operating mode, =xdp-forward= will perform a full lookup in +** fib (default) +In the =fib= forwarding mode, =xdp-forward= will perform a lookup in the kernel routing table (or FIB) for each packet, and forward packets between the configured interfaces based on the result of the lookup. Any packet where the lookup fails will be passed up to the stack. This includes packets that @@ -115,12 +119,6 @@ Note that no checks other than the FIB lookup is performed; in particular, this completely bypasses the netfilter subsystem, so firewall rules will not be checked before forwarding. -** fib-direct -The =fib-direct= mode functions like =fib-full=, except it passes the -=BPF_FIB_LOOKUP_DIRECT= flag to the FIB lookup routine. This means that any -policy routing rules configured will be skipped during the lookup, which can -improve performance (but won't obey the policy of those rules, obviously). - ** flowtable The =flowtable= operating mode offloads netfilter sw flowtable logic in the XDP layer if the hardware flowtable is not available. @@ -128,6 +126,22 @@ At the moment =xdp-forward= is able to offload just TCP or UDP netfilter flowtable entries to XDP. The user is supposed to configure the flowtable separately. +* FIB MODES +The =xdp-forward= utility supports the following fib modes (selected by +the =--fib-mode= parameter to =xdp-forward load=. + +** full (default) +In the =full= operating mode, =xdp-forward= will perform a full lookup in +the kernel routing table (or FIB) for each packet, and forward packets between +the configured interfaces based on the result of the lookup. In particular, +it will apply any policy routing rules configured by the user. + +** direct +The =direct= mode functions like =full=, except it passes the +=BPF_FIB_LOOKUP_DIRECT= flag to the FIB lookup routine. This means that any +policy routing rules configured will be skipped during the lookup, which can +improve performance (but won't obey the policy of those rules, obviously). + * Examples In order to enable flowtable offloading for tcp and udp traffic between NICs diff --git a/xdp-forward/tests/test-xdp-forward.sh b/xdp-forward/tests/test-xdp-forward.sh index f3a4b080..68fe0a0a 100644 --- a/xdp-forward/tests/test-xdp-forward.sh +++ b/xdp-forward/tests/test-xdp-forward.sh @@ -26,7 +26,7 @@ test_fwd_full() # veth NAPI GRO support added this symbol; forwarding won't work without it skip_if_missing_kernel_symbol veth_set_features - check_run $XDP_FORWARD load -f fib-full ${NS_NAMES[@]} + check_run $XDP_FORWARD load -f fib -F full ${NS_NAMES[@]} for ip in "${ALL_INSIDE_IP4[@]}"; do check_run ns_exec ping -c 1 -W 2 $ip done @@ -41,7 +41,7 @@ test_fwd_direct() # veth NAPI GRO support added this symbol; forwarding won't work without it skip_if_missing_kernel_symbol veth_set_features - check_run $XDP_FORWARD load -f fib-direct ${NS_NAMES[@]} + check_run $XDP_FORWARD load -f fib -F direct ${NS_NAMES[@]} for ip in "${ALL_INSIDE_IP4[@]}"; do check_run ns_exec ping -c 1 -W 2 $ip done diff --git a/xdp-forward/xdp-forward.8 b/xdp-forward/xdp-forward.8 index 64ec2de6..3c8f7804 100644 --- a/xdp-forward/xdp-forward.8 +++ b/xdp-forward/xdp-forward.8 @@ -62,7 +62,12 @@ Specifies which forwarding mode \fIxdp\-forward\fP should operate in. Depending the mode selected, \fIxdp\-forward\fP will perform forwarding in different ways, which can lead to different behaviour, including which subset of kernel configuration (such as firewall rules) is respected during forwarding. See the -section \fBOPERATING MODES\fP below for a full description of each mode. +section \fBFORWARDING MODES\fP below for a full description of each mode. + +.SS "-F, --fib-mode " +.PP +Specifies how \fIxdp\-forward\fP performs routing table lookup in the linux kernel. +See the section \fBFIB MODES\fP below for a full description of each mode. .SS "-m, --mode " .PP @@ -114,14 +119,14 @@ Enable debug logging. Specify twice for even more verbosity. .PP Display a summary of the available options -.SH "OPERATING MODES" +.SH "FORWARDING MODES" .PP -The \fIxdp\-forward\fP utility supports the following operating modes (selected by +The \fIxdp\-forward\fP utility supports the following forwarding modes (selected by the \fI\-\-fwd\-mode\fP parameter to \fIxdp\-forward load\fP. -.SS "fib-full (default)" +.SS "fib (default)" .PP -In the \fIfib\-full\fP operating mode, \fIxdp\-forward\fP will perform a full lookup in +In the \fIfib\fP forwarding mode, \fIxdp\-forward\fP will perform a lookup in the kernel routing table (or FIB) for each packet, and forward packets between the configured interfaces based on the result of the lookup. Any packet where the lookup fails will be passed up to the stack. This includes packets that @@ -134,13 +139,6 @@ Note that no checks other than the FIB lookup is performed; in particular, this completely bypasses the netfilter subsystem, so firewall rules will not be checked before forwarding. -.SS "fib-direct" -.PP -The \fIfib\-direct\fP mode functions like \fIfib\-full\fP, except it passes the -\fIBPF_FIB_LOOKUP_DIRECT\fP flag to the FIB lookup routine. This means that any -policy routing rules configured will be skipped during the lookup, which can -improve performance (but won't obey the policy of those rules, obviously). - .SS "flowtable" .PP The \fIflowtable\fP operating mode offloads netfilter sw flowtable logic in @@ -149,6 +147,25 @@ At the moment \fIxdp\-forward\fP is able to offload just TCP or UDP netfilter flowtable entries to XDP. The user is supposed to configure the flowtable separately. +.SH "FIB MODES" +.PP +The \fIxdp\-forward\fP utility supports the following fib modes (selected by +the \fI\-\-fib\-mode\fP parameter to \fIxdp\-forward load\fP. + +.SS "full (default)" +.PP +In the \fIfull\fP operating mode, \fIxdp\-forward\fP will perform a full lookup in +the kernel routing table (or FIB) for each packet, and forward packets between +the configured interfaces based on the result of the lookup. In particular, +it will apply any policy routing rules configured by the user. + +.SS "direct" +.PP +The \fIdirect\fP mode functions like \fIfull\fP, except it passes the +\fIBPF_FIB_LOOKUP_DIRECT\fP flag to the FIB lookup routine. This means that any +policy routing rules configured will be skipped during the lookup, which can +improve performance (but won't obey the policy of those rules, obviously). + .SH "Examples" .PP In order to enable flowtable offloading for tcp and udp traffic between NICs diff --git a/xdp-forward/xdp-forward.c b/xdp-forward/xdp-forward.c index c44e5c66..5549e6f4 100644 --- a/xdp-forward/xdp-forward.c +++ b/xdp-forward/xdp-forward.c @@ -37,16 +37,23 @@ struct enum_val xdp_modes[] = { { "native", XDP_MODE_NATIVE }, { NULL, 0 } }; enum fwd_mode { - FWD_FIB_DIRECT, - FWD_FIB_FULL, + FWD_FIB, FWD_FLOWTABLE, }; -struct enum_val fwd_modes[] = { { "fib-direct", FWD_FIB_DIRECT }, - { "fib-full", FWD_FIB_FULL }, +struct enum_val fwd_modes[] = { { "fib", FWD_FIB }, { "flowtable", FWD_FLOWTABLE }, { NULL, 0 } }; +enum fib_mode { + FIB_DIRECT, + FIB_FULL, +}; + +struct enum_val fib_modes[] = { { "direct", FIB_DIRECT }, + { "full", FIB_FULL }, + { NULL, 0 } }; + static int find_prog(struct iface *iface, bool detach) { struct xdp_program *prog = NULL; @@ -88,16 +95,22 @@ static int find_prog(struct iface *iface, bool detach) struct load_opts { enum fwd_mode fwd_mode; + enum fib_mode fib_mode; enum xdp_attach_mode xdp_mode; struct iface *ifaces; -} defaults_load = { .fwd_mode = FWD_FIB_FULL }; +} defaults_load = { .fwd_mode = FWD_FIB, .fib_mode = FIB_FULL, }; struct prog_option load_options[] = { DEFINE_OPTION("fwd-mode", OPT_ENUM, struct load_opts, fwd_mode, .short_opt = 'f', .typearg = fwd_modes, - .metavar = "", - .help = "Forward mode to run in; see man page. Default fib-full"), + .metavar = "", + .help = "Forward mode to run in; see man page. Default fib"), + DEFINE_OPTION("fib-mode", OPT_ENUM, struct load_opts, fib_mode, + .short_opt = 'F', + .typearg = fib_modes, + .metavar = "", + .help = "Fib mode to run in; see man page. Default full"), DEFINE_OPTION("xdp-mode", OPT_ENUM, struct load_opts, xdp_mode, .short_opt = 'm', .typearg = xdp_modes, @@ -138,14 +151,14 @@ static int do_load(const void *cfg, __unused const char *pin_root_path) void *skel; switch (opt->fwd_mode) { - case FWD_FIB_FULL: - opts.prog_name = "xdp_fwd_fib_full"; - break; - case FWD_FIB_DIRECT: - opts.prog_name = "xdp_fwd_fib_direct"; + case FWD_FIB: + opts.prog_name = opt->fib_mode == FIB_DIRECT + ? "xdp_fwd_fib_direct" : "xdp_fwd_fib_full"; break; case FWD_FLOWTABLE: - opts.prog_name = "xdp_fwd_flowtable"; + opts.prog_name = opt->fib_mode == FIB_DIRECT + ? "xdp_fwd_flowtable_direct" + : "xdp_fwd_flowtable_full"; break; default: goto end; diff --git a/xdp-forward/xdp_flowtable.bpf.c b/xdp-forward/xdp_flowtable.bpf.c index 32f627d7..192c50b6 100644 --- a/xdp-forward/xdp_flowtable.bpf.c +++ b/xdp-forward/xdp_flowtable.bpf.c @@ -478,8 +478,8 @@ xdp_flowtable_forward_ipv6(const struct flow_offload *flow, void *data, ip6h->hop_limit--; } -SEC("xdp") -int xdp_fwd_flowtable(struct xdp_md *ctx) +static __always_inline int xdp_flowtable_flags(struct xdp_md *ctx, + __u32 fib_flags) { void *data_end = (void *)(long)ctx->data_end; struct flow_offload_tuple_rhash *tuplehash; @@ -585,7 +585,7 @@ int xdp_fwd_flowtable(struct xdp_md *ctx) xdp_flowtable_get_dnat_ip(&tuple.ipv4_dst, flow, dir); } - if (bpf_fib_lookup(ctx, &tuple, sizeof(tuple), 0) != + if (bpf_fib_lookup(ctx, &tuple, sizeof(tuple), fib_flags) != BPF_FIB_LKUP_RET_SUCCESS) return XDP_PASS; @@ -606,4 +606,16 @@ int xdp_fwd_flowtable(struct xdp_md *ctx) return bpf_redirect_map(&xdp_tx_ports, tuple.ifindex, 0); } +SEC("xdp") +int xdp_fwd_flowtable_full(struct xdp_md *ctx) +{ + return xdp_flowtable_flags(ctx, 0); +} + +SEC("xdp") +int xdp_fwd_flowtable_direct(struct xdp_md *ctx) +{ + return xdp_flowtable_flags(ctx, BPF_FIB_LOOKUP_DIRECT); +} + char _license[] SEC("license") = "GPL"; From fdf6536d06fac45805aeea71d98cfaea55fd598e Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 11 Oct 2024 11:34:45 +0200 Subject: [PATCH 5/5] CI: add 6.12 kernel version Signed-off-by: Lorenzo Bianconi --- .github/workflows/selftests.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/selftests.yml b/.github/workflows/selftests.yml index cda46f3d..94b95920 100644 --- a/.github/workflows/selftests.yml +++ b/.github/workflows/selftests.yml @@ -12,6 +12,7 @@ jobs: strategy: matrix: KERNEL_VERSION: + - "6.12.0-0.rc2.24.fc42" - "6.10.1-200.fc40" - "6.6.14-200.fc39" - "6.1.9-200.fc37"