Skip to content

Commit 2613af0

Browse files
Michael Daltondavem330
authored andcommitted
virtio_net: migrate mergeable rx buffers to page frag allocators
The virtio_net driver's mergeable receive buffer allocator uses 4KB packet buffers. For MTU-sized traffic, SKB truesize is > 4KB but only ~1500 bytes of the buffer is used to store packet data, reducing the effective TCP window size substantially. This patch addresses the performance concerns with mergeable receive buffers by allocating MTU-sized packet buffers using page frag allocators. If more than MAX_SKB_FRAGS buffers are needed, the SKB frag_list is used. Signed-off-by: Michael Dalton <[email protected]> Signed-off-by: Eric Dumazet <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 5d9efa7 commit 2613af0

File tree

1 file changed

+106
-58
lines changed

1 file changed

+106
-58
lines changed

drivers/net/virtio_net.c

Lines changed: 106 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,11 @@ struct virtnet_info {
124124
/* Lock for config space updates */
125125
struct mutex config_lock;
126126

127+
/* Page_frag for GFP_KERNEL packet buffer allocation when we run
128+
* low on memory.
129+
*/
130+
struct page_frag alloc_frag;
131+
127132
/* Does the affinity hint is set for virtqueues? */
128133
bool affinity_hint_set;
129134

@@ -217,33 +222,18 @@ static void skb_xmit_done(struct virtqueue *vq)
217222
netif_wake_subqueue(vi->dev, vq2txq(vq));
218223
}
219224

220-
static void set_skb_frag(struct sk_buff *skb, struct page *page,
221-
unsigned int offset, unsigned int *len)
222-
{
223-
int size = min((unsigned)PAGE_SIZE - offset, *len);
224-
int i = skb_shinfo(skb)->nr_frags;
225-
226-
__skb_fill_page_desc(skb, i, page, offset, size);
227-
228-
skb->data_len += size;
229-
skb->len += size;
230-
skb->truesize += PAGE_SIZE;
231-
skb_shinfo(skb)->nr_frags++;
232-
skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
233-
*len -= size;
234-
}
235-
236225
/* Called from bottom half context */
237226
static struct sk_buff *page_to_skb(struct receive_queue *rq,
238-
struct page *page, unsigned int len)
227+
struct page *page, unsigned int offset,
228+
unsigned int len, unsigned int truesize)
239229
{
240230
struct virtnet_info *vi = rq->vq->vdev->priv;
241231
struct sk_buff *skb;
242232
struct skb_vnet_hdr *hdr;
243-
unsigned int copy, hdr_len, offset;
233+
unsigned int copy, hdr_len, hdr_padded_len;
244234
char *p;
245235

246-
p = page_address(page);
236+
p = page_address(page) + offset;
247237

248238
/* copy small packet so we can reuse these pages for small data */
249239
skb = netdev_alloc_skb_ip_align(vi->dev, GOOD_COPY_LEN);
@@ -254,16 +244,17 @@ static struct sk_buff *page_to_skb(struct receive_queue *rq,
254244

255245
if (vi->mergeable_rx_bufs) {
256246
hdr_len = sizeof hdr->mhdr;
257-
offset = hdr_len;
247+
hdr_padded_len = sizeof hdr->mhdr;
258248
} else {
259249
hdr_len = sizeof hdr->hdr;
260-
offset = sizeof(struct padded_vnet_hdr);
250+
hdr_padded_len = sizeof(struct padded_vnet_hdr);
261251
}
262252

263253
memcpy(hdr, p, hdr_len);
264254

265255
len -= hdr_len;
266-
p += offset;
256+
offset += hdr_padded_len;
257+
p += hdr_padded_len;
267258

268259
copy = len;
269260
if (copy > skb_tailroom(skb))
@@ -273,6 +264,14 @@ static struct sk_buff *page_to_skb(struct receive_queue *rq,
273264
len -= copy;
274265
offset += copy;
275266

267+
if (vi->mergeable_rx_bufs) {
268+
if (len)
269+
skb_add_rx_frag(skb, 0, page, offset, len, truesize);
270+
else
271+
put_page(page);
272+
return skb;
273+
}
274+
276275
/*
277276
* Verify that we can indeed put this data into a skb.
278277
* This is here to handle cases when the device erroneously
@@ -284,9 +283,12 @@ static struct sk_buff *page_to_skb(struct receive_queue *rq,
284283
dev_kfree_skb(skb);
285284
return NULL;
286285
}
287-
286+
BUG_ON(offset >= PAGE_SIZE);
288287
while (len) {
289-
set_skb_frag(skb, page, offset, &len);
288+
unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len);
289+
skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, offset,
290+
frag_size, truesize);
291+
len -= frag_size;
290292
page = (struct page *)page->private;
291293
offset = 0;
292294
}
@@ -297,33 +299,52 @@ static struct sk_buff *page_to_skb(struct receive_queue *rq,
297299
return skb;
298300
}
299301

300-
static int receive_mergeable(struct receive_queue *rq, struct sk_buff *skb)
302+
static int receive_mergeable(struct receive_queue *rq, struct sk_buff *head_skb)
301303
{
302-
struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb);
304+
struct skb_vnet_hdr *hdr = skb_vnet_hdr(head_skb);
305+
struct sk_buff *curr_skb = head_skb;
306+
char *buf;
303307
struct page *page;
304-
int num_buf, i, len;
308+
int num_buf, len;
305309

306310
num_buf = hdr->mhdr.num_buffers;
307311
while (--num_buf) {
308-
i = skb_shinfo(skb)->nr_frags;
309-
if (i >= MAX_SKB_FRAGS) {
310-
pr_debug("%s: packet too long\n", skb->dev->name);
311-
skb->dev->stats.rx_length_errors++;
312-
return -EINVAL;
313-
}
314-
page = virtqueue_get_buf(rq->vq, &len);
315-
if (!page) {
312+
int num_skb_frags = skb_shinfo(curr_skb)->nr_frags;
313+
buf = virtqueue_get_buf(rq->vq, &len);
314+
if (unlikely(!buf)) {
316315
pr_debug("%s: rx error: %d buffers missing\n",
317-
skb->dev->name, hdr->mhdr.num_buffers);
318-
skb->dev->stats.rx_length_errors++;
316+
head_skb->dev->name, hdr->mhdr.num_buffers);
317+
head_skb->dev->stats.rx_length_errors++;
319318
return -EINVAL;
320319
}
321-
322-
if (len > PAGE_SIZE)
323-
len = PAGE_SIZE;
324-
325-
set_skb_frag(skb, page, 0, &len);
326-
320+
if (unlikely(len > MAX_PACKET_LEN)) {
321+
pr_debug("%s: rx error: merge buffer too long\n",
322+
head_skb->dev->name);
323+
len = MAX_PACKET_LEN;
324+
}
325+
if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) {
326+
struct sk_buff *nskb = alloc_skb(0, GFP_ATOMIC);
327+
if (unlikely(!nskb)) {
328+
head_skb->dev->stats.rx_dropped++;
329+
return -ENOMEM;
330+
}
331+
if (curr_skb == head_skb)
332+
skb_shinfo(curr_skb)->frag_list = nskb;
333+
else
334+
curr_skb->next = nskb;
335+
curr_skb = nskb;
336+
head_skb->truesize += nskb->truesize;
337+
num_skb_frags = 0;
338+
}
339+
if (curr_skb != head_skb) {
340+
head_skb->data_len += len;
341+
head_skb->len += len;
342+
head_skb->truesize += MAX_PACKET_LEN;
343+
}
344+
page = virt_to_head_page(buf);
345+
skb_add_rx_frag(curr_skb, num_skb_frags, page,
346+
buf - (char *)page_address(page), len,
347+
MAX_PACKET_LEN);
327348
--rq->num;
328349
}
329350
return 0;
@@ -341,8 +362,10 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
341362
if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) {
342363
pr_debug("%s: short packet %i\n", dev->name, len);
343364
dev->stats.rx_length_errors++;
344-
if (vi->mergeable_rx_bufs || vi->big_packets)
365+
if (vi->big_packets)
345366
give_pages(rq, buf);
367+
else if (vi->mergeable_rx_bufs)
368+
put_page(virt_to_head_page(buf));
346369
else
347370
dev_kfree_skb(buf);
348371
return;
@@ -352,19 +375,28 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
352375
skb = buf;
353376
len -= sizeof(struct virtio_net_hdr);
354377
skb_trim(skb, len);
378+
} else if (vi->mergeable_rx_bufs) {
379+
struct page *page = virt_to_head_page(buf);
380+
skb = page_to_skb(rq, page,
381+
(char *)buf - (char *)page_address(page),
382+
len, MAX_PACKET_LEN);
383+
if (unlikely(!skb)) {
384+
dev->stats.rx_dropped++;
385+
put_page(page);
386+
return;
387+
}
388+
if (receive_mergeable(rq, skb)) {
389+
dev_kfree_skb(skb);
390+
return;
391+
}
355392
} else {
356393
page = buf;
357-
skb = page_to_skb(rq, page, len);
394+
skb = page_to_skb(rq, page, 0, len, PAGE_SIZE);
358395
if (unlikely(!skb)) {
359396
dev->stats.rx_dropped++;
360397
give_pages(rq, page);
361398
return;
362399
}
363-
if (vi->mergeable_rx_bufs)
364-
if (receive_mergeable(rq, skb)) {
365-
dev_kfree_skb(skb);
366-
return;
367-
}
368400
}
369401

370402
hdr = skb_vnet_hdr(skb);
@@ -501,18 +533,28 @@ static int add_recvbuf_big(struct receive_queue *rq, gfp_t gfp)
501533

502534
static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp)
503535
{
504-
struct page *page;
536+
struct virtnet_info *vi = rq->vq->vdev->priv;
537+
char *buf = NULL;
505538
int err;
506539

507-
page = get_a_page(rq, gfp);
508-
if (!page)
540+
if (gfp & __GFP_WAIT) {
541+
if (skb_page_frag_refill(MAX_PACKET_LEN, &vi->alloc_frag,
542+
gfp)) {
543+
buf = (char *)page_address(vi->alloc_frag.page) +
544+
vi->alloc_frag.offset;
545+
get_page(vi->alloc_frag.page);
546+
vi->alloc_frag.offset += MAX_PACKET_LEN;
547+
}
548+
} else {
549+
buf = netdev_alloc_frag(MAX_PACKET_LEN);
550+
}
551+
if (!buf)
509552
return -ENOMEM;
510553

511-
sg_init_one(rq->sg, page_address(page), PAGE_SIZE);
512-
513-
err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, page, gfp);
554+
sg_init_one(rq->sg, buf, MAX_PACKET_LEN);
555+
err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, buf, gfp);
514556
if (err < 0)
515-
give_pages(rq, page);
557+
put_page(virt_to_head_page(buf));
516558

517559
return err;
518560
}
@@ -1343,8 +1385,10 @@ static void free_unused_bufs(struct virtnet_info *vi)
13431385
struct virtqueue *vq = vi->rq[i].vq;
13441386

13451387
while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
1346-
if (vi->mergeable_rx_bufs || vi->big_packets)
1388+
if (vi->big_packets)
13471389
give_pages(&vi->rq[i], buf);
1390+
else if (vi->mergeable_rx_bufs)
1391+
put_page(virt_to_head_page(buf));
13481392
else
13491393
dev_kfree_skb(buf);
13501394
--vi->rq[i].num;
@@ -1650,6 +1694,8 @@ static int virtnet_probe(struct virtio_device *vdev)
16501694
free_vqs:
16511695
cancel_delayed_work_sync(&vi->refill);
16521696
virtnet_del_vqs(vi);
1697+
if (vi->alloc_frag.page)
1698+
put_page(vi->alloc_frag.page);
16531699
free_index:
16541700
free_percpu(vi->vq_index);
16551701
free_stats:
@@ -1685,6 +1731,8 @@ static void virtnet_remove(struct virtio_device *vdev)
16851731
unregister_netdev(vi->dev);
16861732

16871733
remove_vq_common(vi);
1734+
if (vi->alloc_frag.page)
1735+
put_page(vi->alloc_frag.page);
16881736

16891737
flush_work(&vi->config_work);
16901738

0 commit comments

Comments
 (0)