Skip to content

Commit 299603e

Browse files
hkchudavem330
authored andcommitted
net-gro: Prepare GRO stack for the upcoming tunneling support
This patch modifies the GRO stack to avoid the use of "network_header" and associated macros like ip_hdr() and ipv6_hdr() in order to allow an arbitary number of IP hdrs (v4 or v6) to be used in the encapsulation chain. This lays the foundation for various IP tunneling support (IP-in-IP, GRE, VXLAN, SIT,...) to be added later. With this patch, the GRO stack traversing now is mostly based on skb_gro_offset rather than special hdr offsets saved in skb (e.g., skb->network_header). As a result all but the top layer (i.e., the the transport layer) must have hdrs of the same length in order for a pkt to be considered for aggregation. Therefore when adding a new encap layer (e.g., for tunneling), one must check and skip flows (e.g., by setting NAPI_GRO_CB(p)->same_flow to 0) that have a different hdr length. Note that unlike the network header, the transport header can and will continue to be set by the GRO code since there will be at most one "transport layer" in the encap chain. Signed-off-by: H.K. Jerry Chu <[email protected]> Suggested-by: Eric Dumazet <[email protected]> Reviewed-by: Eric Dumazet <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent a46dc74 commit 299603e

File tree

6 files changed

+97
-75
lines changed

6 files changed

+97
-75
lines changed

include/linux/netdevice.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1676,7 +1676,7 @@ struct offload_callbacks {
16761676
int (*gso_send_check)(struct sk_buff *skb);
16771677
struct sk_buff **(*gro_receive)(struct sk_buff **head,
16781678
struct sk_buff *skb);
1679-
int (*gro_complete)(struct sk_buff *skb);
1679+
int (*gro_complete)(struct sk_buff *skb, int nhoff);
16801680
};
16811681

16821682
struct packet_offload {

net/core/dev.c

Lines changed: 25 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -3752,7 +3752,7 @@ static int napi_gro_complete(struct sk_buff *skb)
37523752
if (ptype->type != type || !ptype->callbacks.gro_complete)
37533753
continue;
37543754

3755-
err = ptype->callbacks.gro_complete(skb);
3755+
err = ptype->callbacks.gro_complete(skb, 0);
37563756
break;
37573757
}
37583758
rcu_read_unlock();
@@ -3818,6 +3818,23 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
38183818
}
38193819
}
38203820

3821+
static void skb_gro_reset_offset(struct sk_buff *skb)
3822+
{
3823+
const struct skb_shared_info *pinfo = skb_shinfo(skb);
3824+
const skb_frag_t *frag0 = &pinfo->frags[0];
3825+
3826+
NAPI_GRO_CB(skb)->data_offset = 0;
3827+
NAPI_GRO_CB(skb)->frag0 = NULL;
3828+
NAPI_GRO_CB(skb)->frag0_len = 0;
3829+
3830+
if (skb_mac_header(skb) == skb_tail_pointer(skb) &&
3831+
pinfo->nr_frags &&
3832+
!PageHighMem(skb_frag_page(frag0))) {
3833+
NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
3834+
NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0);
3835+
}
3836+
}
3837+
38213838
static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
38223839
{
38233840
struct sk_buff **pp = NULL;
@@ -3833,6 +3850,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
38333850
if (skb_is_gso(skb) || skb_has_frag_list(skb))
38343851
goto normal;
38353852

3853+
skb_gro_reset_offset(skb);
38363854
gro_list_prepare(napi, skb);
38373855

38383856
rcu_read_lock();
@@ -3938,27 +3956,8 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
39383956
return ret;
39393957
}
39403958

3941-
static void skb_gro_reset_offset(struct sk_buff *skb)
3942-
{
3943-
const struct skb_shared_info *pinfo = skb_shinfo(skb);
3944-
const skb_frag_t *frag0 = &pinfo->frags[0];
3945-
3946-
NAPI_GRO_CB(skb)->data_offset = 0;
3947-
NAPI_GRO_CB(skb)->frag0 = NULL;
3948-
NAPI_GRO_CB(skb)->frag0_len = 0;
3949-
3950-
if (skb_mac_header(skb) == skb_tail_pointer(skb) &&
3951-
pinfo->nr_frags &&
3952-
!PageHighMem(skb_frag_page(frag0))) {
3953-
NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
3954-
NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0);
3955-
}
3956-
}
3957-
39583959
gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
39593960
{
3960-
skb_gro_reset_offset(skb);
3961-
39623961
return napi_skb_finish(dev_gro_receive(napi, skb), skb);
39633962
}
39643963
EXPORT_SYMBOL(napi_gro_receive);
@@ -3992,12 +3991,7 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *
39923991
{
39933992
switch (ret) {
39943993
case GRO_NORMAL:
3995-
case GRO_HELD:
3996-
skb->protocol = eth_type_trans(skb, skb->dev);
3997-
3998-
if (ret == GRO_HELD)
3999-
skb_gro_pull(skb, -ETH_HLEN);
4000-
else if (netif_receive_skb(skb))
3994+
if (netif_receive_skb(skb))
40013995
ret = GRO_DROP;
40023996
break;
40033997

@@ -4006,6 +4000,7 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *
40064000
napi_reuse_skb(napi, skb);
40074001
break;
40084002

4003+
case GRO_HELD:
40094004
case GRO_MERGED:
40104005
break;
40114006
}
@@ -4016,36 +4011,15 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *
40164011
static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
40174012
{
40184013
struct sk_buff *skb = napi->skb;
4019-
struct ethhdr *eth;
4020-
unsigned int hlen;
4021-
unsigned int off;
40224014

40234015
napi->skb = NULL;
40244016

4025-
skb_reset_mac_header(skb);
4026-
skb_gro_reset_offset(skb);
4027-
4028-
off = skb_gro_offset(skb);
4029-
hlen = off + sizeof(*eth);
4030-
eth = skb_gro_header_fast(skb, off);
4031-
if (skb_gro_header_hard(skb, hlen)) {
4032-
eth = skb_gro_header_slow(skb, hlen, off);
4033-
if (unlikely(!eth)) {
4034-
napi_reuse_skb(napi, skb);
4035-
skb = NULL;
4036-
goto out;
4037-
}
4017+
if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr)))) {
4018+
napi_reuse_skb(napi, skb);
4019+
return NULL;
40384020
}
4021+
skb->protocol = eth_type_trans(skb, skb->dev);
40394022

4040-
skb_gro_pull(skb, sizeof(*eth));
4041-
4042-
/*
4043-
* This works because the only protocols we care about don't require
4044-
* special handling. We'll fix it up properly at the end.
4045-
*/
4046-
skb->protocol = eth->h_proto;
4047-
4048-
out:
40494023
return skb;
40504024
}
40514025

net/ipv4/af_inet.c

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1377,8 +1377,12 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
13771377
if (!NAPI_GRO_CB(p)->same_flow)
13781378
continue;
13791379

1380-
iph2 = ip_hdr(p);
1381-
1380+
iph2 = (struct iphdr *)(p->data + off);
1381+
/* The above works because, with the exception of the top
1382+
* (inner most) layer, we only aggregate pkts with the same
1383+
* hdr length so all the hdrs we'll need to verify will start
1384+
* at the same offset.
1385+
*/
13821386
if ((iph->protocol ^ iph2->protocol) |
13831387
((__force u32)iph->saddr ^ (__force u32)iph2->saddr) |
13841388
((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) {
@@ -1397,6 +1401,11 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
13971401
}
13981402

13991403
NAPI_GRO_CB(skb)->flush |= flush;
1404+
skb_set_network_header(skb, off);
1405+
/* The above will be needed by the transport layer if there is one
1406+
* immediately following this IP hdr.
1407+
*/
1408+
14001409
skb_gro_pull(skb, sizeof(*iph));
14011410
skb_set_transport_header(skb, skb_gro_offset(skb));
14021411

@@ -1411,10 +1420,10 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
14111420
return pp;
14121421
}
14131422

1414-
static int inet_gro_complete(struct sk_buff *skb)
1423+
static int inet_gro_complete(struct sk_buff *skb, int nhoff)
14151424
{
1416-
__be16 newlen = htons(skb->len - skb_network_offset(skb));
1417-
struct iphdr *iph = ip_hdr(skb);
1425+
__be16 newlen = htons(skb->len - nhoff);
1426+
struct iphdr *iph = (struct iphdr *)(skb->data + nhoff);
14181427
const struct net_offload *ops;
14191428
int proto = iph->protocol;
14201429
int err = -ENOSYS;
@@ -1427,7 +1436,11 @@ static int inet_gro_complete(struct sk_buff *skb)
14271436
if (WARN_ON(!ops || !ops->callbacks.gro_complete))
14281437
goto out_unlock;
14291438

1430-
err = ops->callbacks.gro_complete(skb);
1439+
/* Only need to add sizeof(*iph) to get to the next hdr below
1440+
* because any hdr with option will have been flushed in
1441+
* inet_gro_receive().
1442+
*/
1443+
err = ops->callbacks.gro_complete(skb, nhoff + sizeof(*iph));
14311444

14321445
out_unlock:
14331446
rcu_read_unlock();

net/ipv4/tcp_offload.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ int tcp_gro_complete(struct sk_buff *skb)
240240
{
241241
struct tcphdr *th = tcp_hdr(skb);
242242

243-
skb->csum_start = skb_transport_header(skb) - skb->head;
243+
skb->csum_start = (unsigned char *)th - skb->head;
244244
skb->csum_offset = offsetof(struct tcphdr, check);
245245
skb->ip_summed = CHECKSUM_PARTIAL;
246246

@@ -272,6 +272,7 @@ static int tcp_v4_gso_send_check(struct sk_buff *skb)
272272

273273
static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
274274
{
275+
/* Use the IP hdr immediately proceeding for this transport */
275276
const struct iphdr *iph = skb_gro_network_header(skb);
276277
__wsum wsum;
277278

@@ -303,13 +304,13 @@ static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *
303304
return tcp_gro_receive(head, skb);
304305
}
305306

306-
static int tcp4_gro_complete(struct sk_buff *skb)
307+
static int tcp4_gro_complete(struct sk_buff *skb, int thoff)
307308
{
308309
const struct iphdr *iph = ip_hdr(skb);
309310
struct tcphdr *th = tcp_hdr(skb);
310311

311-
th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
312-
iph->saddr, iph->daddr, 0);
312+
th->check = ~tcp_v4_check(skb->len - thoff, iph->saddr,
313+
iph->daddr, 0);
313314
skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
314315

315316
return tcp_gro_complete(skb);

net/ipv6/ip6_offload.c

Lines changed: 44 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,35 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
154154
return segs;
155155
}
156156

157+
/* Return the total length of all the extension hdrs, following the same
158+
* logic in ipv6_gso_pull_exthdrs() when parsing ext-hdrs.
159+
*/
160+
static int ipv6_exthdrs_len(struct ipv6hdr *iph,
161+
const struct net_offload **opps)
162+
{
163+
struct ipv6_opt_hdr *opth = NULL;
164+
int len = 0, proto, optlen;
165+
166+
proto = iph->nexthdr;
167+
for (;;) {
168+
if (proto != NEXTHDR_HOP) {
169+
*opps = rcu_dereference(inet6_offloads[proto]);
170+
if (unlikely(!(*opps)))
171+
break;
172+
if (!((*opps)->flags & INET6_PROTO_GSO_EXTHDR))
173+
break;
174+
}
175+
if (opth == NULL)
176+
opth = (void *)(iph+1);
177+
else
178+
opth = (void *)opth + optlen;
179+
optlen = ipv6_optlen(opth);
180+
len += optlen;
181+
proto = opth->nexthdr;
182+
}
183+
return len;
184+
}
185+
157186
static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
158187
struct sk_buff *skb)
159188
{
@@ -177,6 +206,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
177206
goto out;
178207
}
179208

209+
skb_set_network_header(skb, off);
180210
skb_gro_pull(skb, sizeof(*iph));
181211
skb_set_transport_header(skb, skb_gro_offset(skb));
182212

@@ -211,12 +241,16 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
211241
if (!NAPI_GRO_CB(p)->same_flow)
212242
continue;
213243

214-
iph2 = ipv6_hdr(p);
244+
iph2 = (struct ipv6hdr *)(p->data + off);
215245
first_word = *(__be32 *)iph ^ *(__be32 *)iph2 ;
216246

217-
/* All fields must match except length and Traffic Class. */
218-
if (nlen != skb_network_header_len(p) ||
219-
(first_word & htonl(0xF00FFFFF)) ||
247+
/* All fields must match except length and Traffic Class.
248+
* XXX skbs on the gro_list have all been parsed and pulled
249+
* already so we don't need to compare nlen
250+
* (nlen != (sizeof(*iph2) + ipv6_exthdrs_len(iph2, &ops)))
251+
* memcmp() alone below is suffcient, right?
252+
*/
253+
if ((first_word & htonl(0xF00FFFFF)) ||
220254
memcmp(&iph->nexthdr, &iph2->nexthdr,
221255
nlen - offsetof(struct ipv6hdr, nexthdr))) {
222256
NAPI_GRO_CB(p)->same_flow = 0;
@@ -245,21 +279,21 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
245279
return pp;
246280
}
247281

248-
static int ipv6_gro_complete(struct sk_buff *skb)
282+
static int ipv6_gro_complete(struct sk_buff *skb, int nhoff)
249283
{
250284
const struct net_offload *ops;
251-
struct ipv6hdr *iph = ipv6_hdr(skb);
285+
struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + nhoff);
252286
int err = -ENOSYS;
253287

254-
iph->payload_len = htons(skb->len - skb_network_offset(skb) -
255-
sizeof(*iph));
288+
iph->payload_len = htons(skb->len - nhoff - sizeof(*iph));
256289

257290
rcu_read_lock();
258-
ops = rcu_dereference(inet6_offloads[NAPI_GRO_CB(skb)->proto]);
291+
292+
nhoff += sizeof(*iph) + ipv6_exthdrs_len(iph, &ops);
259293
if (WARN_ON(!ops || !ops->callbacks.gro_complete))
260294
goto out_unlock;
261295

262-
err = ops->callbacks.gro_complete(skb);
296+
err = ops->callbacks.gro_complete(skb, nhoff);
263297

264298
out_unlock:
265299
rcu_read_unlock();

net/ipv6/tcpv6_offload.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,13 +66,13 @@ static struct sk_buff **tcp6_gro_receive(struct sk_buff **head,
6666
return tcp_gro_receive(head, skb);
6767
}
6868

69-
static int tcp6_gro_complete(struct sk_buff *skb)
69+
static int tcp6_gro_complete(struct sk_buff *skb, int thoff)
7070
{
7171
const struct ipv6hdr *iph = ipv6_hdr(skb);
7272
struct tcphdr *th = tcp_hdr(skb);
7373

74-
th->check = ~tcp_v6_check(skb->len - skb_transport_offset(skb),
75-
&iph->saddr, &iph->daddr, 0);
74+
th->check = ~tcp_v6_check(skb->len - thoff, &iph->saddr,
75+
&iph->daddr, 0);
7676
skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
7777

7878
return tcp_gro_complete(skb);

0 commit comments

Comments
 (0)