From f23bc46c30ca5ef58b8549434899fcbac41b2cfc Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 15 Dec 2016 12:12:54 -0800 Subject: net: xdp: add invalid buffer warning This adds a warning for drivers to use when encountering an invalid buffer for XDP. For normal cases this should not happen but to catch this in virtual/qemu setups that I may not have expected from the emulation layer having a standard warning is useful. Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- include/linux/filter.h | 1 + net/core/filter.c | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/include/linux/filter.h b/include/linux/filter.h index 6a1658308612..af8a1804cac6 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -602,6 +602,7 @@ bool bpf_helper_changes_pkt_data(void *func); struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, const struct bpf_insn *patch, u32 len); void bpf_warn_invalid_xdp_action(u32 act); +void bpf_warn_invalid_xdp_buffer(void); #ifdef CONFIG_BPF_JIT extern int bpf_jit_enable; diff --git a/net/core/filter.c b/net/core/filter.c index b1461708a977..7190bd648154 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2972,6 +2972,12 @@ void bpf_warn_invalid_xdp_action(u32 act) } EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action); +void bpf_warn_invalid_xdp_buffer(void) +{ + WARN_ONCE(1, "Illegal XDP buffer encountered, expect throughput degradation\n"); +} +EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_buffer); + static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg, int src_reg, int ctx_off, struct bpf_insn *insn_buf, -- cgit v1.2.3 From f600b690501550b94e83e07295d9c8b9c4c39f4e Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 15 Dec 2016 12:13:24 -0800 Subject: virtio_net: Add XDP support This adds XDP support to virtio_net. Some requirements must be met for XDP to be enabled depending on the mode. First it will only be supported with LRO disabled so that data is not pushed across multiple buffers. Second the MTU must be less than a page size to avoid having to handle XDP across multiple pages. If mergeable receive is enabled this patch only supports the case where header and data are in the same buf which we can check when a packet is received by looking at num_buf. If the num_buf is greater than 1 and a XDP program is loaded the packet is dropped and a warning is thrown. When any_header_sg is set this does not happen and both header and data is put in a single buffer as expected so we check this when XDP programs are loaded. Subsequent patches will process the packet in a degraded mode to ensure connectivity and correctness is not lost even if backend pushes packets into multiple buffers. If big packets mode is enabled and MTU/LRO conditions above are met then XDP is allowed. This patch was tested with qemu with vhost=on and vhost=off where mergeable and big_packet modes were forced via hard coding feature negotiation. Multiple buffers per packet was forced via a small test patch to vhost.c in the vhost=on qemu mode. Suggested-by: Shrijeet Mukherjee Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 176 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 171 insertions(+), 5 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index fe9f7726e990..6fdb6065b940 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -81,6 +82,8 @@ struct receive_queue { struct napi_struct napi; + struct bpf_prog __rcu *xdp_prog; + /* Chain pages by the private ptr. */ struct page *pages; @@ -324,6 +327,38 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, return skb; } +static u32 do_xdp_prog(struct virtnet_info *vi, + struct bpf_prog *xdp_prog, + struct page *page, int offset, int len) +{ + int hdr_padded_len; + struct xdp_buff xdp; + u32 act; + u8 *buf; + + buf = page_address(page) + offset; + + if (vi->mergeable_rx_bufs) + hdr_padded_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); + else + hdr_padded_len = sizeof(struct padded_vnet_hdr); + + xdp.data = buf + hdr_padded_len; + xdp.data_end = xdp.data + (len - vi->hdr_len); + + act = bpf_prog_run_xdp(xdp_prog, &xdp); + switch (act) { + case XDP_PASS: + return XDP_PASS; + default: + bpf_warn_invalid_xdp_action(act); + case XDP_TX: + case XDP_ABORTED: + case XDP_DROP: + return XDP_DROP; + } +} + static struct sk_buff *receive_small(struct virtnet_info *vi, void *buf, unsigned int len) { struct sk_buff * skb = buf; @@ -340,14 +375,32 @@ static struct sk_buff *receive_big(struct net_device *dev, void *buf, unsigned int len) { + struct bpf_prog *xdp_prog; struct page *page = buf; - struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE); + struct sk_buff *skb; + rcu_read_lock(); + xdp_prog = rcu_dereference(rq->xdp_prog); + if (xdp_prog) { + struct virtio_net_hdr_mrg_rxbuf *hdr = buf; + u32 act; + + if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags)) + goto err_xdp; + act = do_xdp_prog(vi, xdp_prog, page, 0, len); + if (act == XDP_DROP) + goto err_xdp; + } + rcu_read_unlock(); + + skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE); if (unlikely(!skb)) goto err; return skb; +err_xdp: + rcu_read_unlock(); err: dev->stats.rx_dropped++; give_pages(rq, page); @@ -365,11 +418,42 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, u16 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); struct page *page = virt_to_head_page(buf); int offset = buf - page_address(page); - unsigned int truesize = max(len, mergeable_ctx_to_buf_truesize(ctx)); + struct sk_buff *head_skb, *curr_skb; + struct bpf_prog *xdp_prog; + unsigned int truesize; + + rcu_read_lock(); + xdp_prog = rcu_dereference(rq->xdp_prog); + if (xdp_prog) { + u32 act; + + /* No known backend devices should send packets with + * more than a single buffer when XDP conditions are + * met. However it is not strictly illegal so the case + * is handled as an exception and a warning is thrown. + */ + if (unlikely(num_buf > 1)) { + bpf_warn_invalid_xdp_buffer(); + goto err_xdp; + } + + /* Transient failure which in theory could occur if + * in-flight packets from before XDP was enabled reach + * the receive path after XDP is loaded. In practice I + * was not able to create this condition. + */ + if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags)) + goto err_xdp; + + act = do_xdp_prog(vi, xdp_prog, page, offset, len); + if (act == XDP_DROP) + goto err_xdp; + } + rcu_read_unlock(); - struct sk_buff *head_skb = page_to_skb(vi, rq, page, offset, len, - truesize); - struct sk_buff *curr_skb = head_skb; + truesize = max(len, mergeable_ctx_to_buf_truesize(ctx)); + head_skb = page_to_skb(vi, rq, page, offset, len, truesize); + curr_skb = head_skb; if (unlikely(!curr_skb)) goto err_skb; @@ -423,6 +507,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, ewma_pkt_len_add(&rq->mrg_avg_pkt_len, head_skb->len); return head_skb; +err_xdp: + rcu_read_unlock(); err_skb: put_page(page); while (--num_buf) { @@ -1337,6 +1423,13 @@ static int virtnet_set_channels(struct net_device *dev, if (queue_pairs > vi->max_queue_pairs || queue_pairs == 0) return -EINVAL; + /* For now we don't support modifying channels while XDP is loaded + * also when XDP is loaded all RX queues have XDP programs so we only + * need to check a single RX queue. + */ + if (vi->rq[0].xdp_prog) + return -EINVAL; + get_online_cpus(); err = virtnet_set_queues(vi, queue_pairs); if (!err) { @@ -1428,6 +1521,70 @@ static const struct ethtool_ops virtnet_ethtool_ops = { .set_settings = virtnet_set_settings, }; +static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog) +{ + unsigned long int max_sz = PAGE_SIZE - sizeof(struct padded_vnet_hdr); + struct virtnet_info *vi = netdev_priv(dev); + struct bpf_prog *old_prog; + int i; + + if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || + virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6)) { + netdev_warn(dev, "can't set XDP while host is implementing LRO, disable LRO first\n"); + return -EOPNOTSUPP; + } + + if (vi->mergeable_rx_bufs && !vi->any_header_sg) { + netdev_warn(dev, "XDP expects header/data in single page, any_header_sg required\n"); + return -EINVAL; + } + + if (dev->mtu > max_sz) { + netdev_warn(dev, "XDP requires MTU less than %lu\n", max_sz); + return -EINVAL; + } + + if (prog) { + prog = bpf_prog_add(prog, vi->max_queue_pairs - 1); + if (IS_ERR(prog)) + return PTR_ERR(prog); + } + + for (i = 0; i < vi->max_queue_pairs; i++) { + old_prog = rtnl_dereference(vi->rq[i].xdp_prog); + rcu_assign_pointer(vi->rq[i].xdp_prog, prog); + if (old_prog) + bpf_prog_put(old_prog); + } + + return 0; +} + +static bool virtnet_xdp_query(struct net_device *dev) +{ + struct virtnet_info *vi = netdev_priv(dev); + int i; + + for (i = 0; i < vi->max_queue_pairs; i++) { + if (vi->rq[i].xdp_prog) + return true; + } + return false; +} + +static int virtnet_xdp(struct net_device *dev, struct netdev_xdp *xdp) +{ + switch (xdp->command) { + case XDP_SETUP_PROG: + return virtnet_xdp_set(dev, xdp->prog); + case XDP_QUERY_PROG: + xdp->prog_attached = virtnet_xdp_query(dev); + return 0; + default: + return -EINVAL; + } +} + static const struct net_device_ops virtnet_netdev = { .ndo_open = virtnet_open, .ndo_stop = virtnet_close, @@ -1444,6 +1601,7 @@ static const struct net_device_ops virtnet_netdev = { #ifdef CONFIG_NET_RX_BUSY_POLL .ndo_busy_poll = virtnet_busy_poll, #endif + .ndo_xdp = virtnet_xdp, }; static void virtnet_config_changed_work(struct work_struct *work) @@ -1505,12 +1663,20 @@ static void virtnet_free_queues(struct virtnet_info *vi) static void free_receive_bufs(struct virtnet_info *vi) { + struct bpf_prog *old_prog; int i; + rtnl_lock(); for (i = 0; i < vi->max_queue_pairs; i++) { while (vi->rq[i].pages) __free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0); + + old_prog = rtnl_dereference(vi->rq[i].xdp_prog); + RCU_INIT_POINTER(vi->rq[i].xdp_prog, NULL); + if (old_prog) + bpf_prog_put(old_prog); } + rtnl_unlock(); } static void free_receive_page_frags(struct virtnet_info *vi) -- cgit v1.2.3 From 672aafd5d88a951f394334802b938b502010d9eb Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 15 Dec 2016 12:13:49 -0800 Subject: virtio_net: add dedicated XDP transmit queues XDP requires using isolated transmit queues to avoid interference with normal networking stack (BQL, NETDEV_TX_BUSY, etc). This patch adds a XDP queue per cpu when a XDP program is loaded and does not expose the queues to the OS via the normal API call to netif_set_real_num_tx_queues(). This way the stack will never push an skb to these queues. However virtio/vhost/qemu implementation only allows for creating TX/RX queue pairs at this time so creating only TX queues was not possible. And because the associated RX queues are being created I went ahead and exposed these to the stack and let the backend use them. This creates more RX queues visible to the network stack than TX queues which is worth mentioning but does not cause any issues as far as I can tell. Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 6fdb6065b940..1d16862c5710 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -114,6 +114,9 @@ struct virtnet_info { /* # of queue pairs currently used by the driver */ u16 curr_queue_pairs; + /* # of XDP queue pairs currently used by the driver */ + u16 xdp_queue_pairs; + /* I like... big packets and I cannot lie! */ bool big_packets; @@ -1526,7 +1529,8 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog) unsigned long int max_sz = PAGE_SIZE - sizeof(struct padded_vnet_hdr); struct virtnet_info *vi = netdev_priv(dev); struct bpf_prog *old_prog; - int i; + u16 xdp_qp = 0, curr_qp; + int i, err; if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6)) { @@ -1544,12 +1548,34 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog) return -EINVAL; } + curr_qp = vi->curr_queue_pairs - vi->xdp_queue_pairs; + if (prog) + xdp_qp = nr_cpu_ids; + + /* XDP requires extra queues for XDP_TX */ + if (curr_qp + xdp_qp > vi->max_queue_pairs) { + netdev_warn(dev, "request %i queues but max is %i\n", + curr_qp + xdp_qp, vi->max_queue_pairs); + return -ENOMEM; + } + + err = virtnet_set_queues(vi, curr_qp + xdp_qp); + if (err) { + dev_warn(&dev->dev, "XDP Device queue allocation failure.\n"); + return err; + } + if (prog) { prog = bpf_prog_add(prog, vi->max_queue_pairs - 1); - if (IS_ERR(prog)) + if (IS_ERR(prog)) { + virtnet_set_queues(vi, curr_qp); return PTR_ERR(prog); + } } + vi->xdp_queue_pairs = xdp_qp; + netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); + for (i = 0; i < vi->max_queue_pairs; i++) { old_prog = rtnl_dereference(vi->rq[i].xdp_prog); rcu_assign_pointer(vi->rq[i].xdp_prog, prog); -- cgit v1.2.3 From 56434a01b12e99eb60908f5f2b27b90726d0a183 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 15 Dec 2016 12:14:13 -0800 Subject: virtio_net: add XDP_TX support This adds support for the XDP_TX action to virtio_net. When an XDP program is run and returns the XDP_TX action the virtio_net XDP implementation will transmit the packet on a TX queue that aligns with the current CPU that the XDP packet was processed on. Before sending the packet the header is zeroed. Also XDP is expected to handle checksum correctly so no checksum offload support is provided. Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 100 +++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 93 insertions(+), 7 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 1d16862c5710..ca3d2e2350c6 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -330,12 +330,58 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, return skb; } +static void virtnet_xdp_xmit(struct virtnet_info *vi, + struct receive_queue *rq, + struct send_queue *sq, + struct xdp_buff *xdp) +{ + struct page *page = virt_to_head_page(xdp->data); + struct virtio_net_hdr_mrg_rxbuf *hdr; + unsigned int num_sg, len; + void *xdp_sent; + int err; + + /* Free up any pending old buffers before queueing new ones. */ + while ((xdp_sent = virtqueue_get_buf(sq->vq, &len)) != NULL) { + struct page *sent_page = virt_to_head_page(xdp_sent); + + if (vi->mergeable_rx_bufs) + put_page(sent_page); + else + give_pages(rq, sent_page); + } + + /* Zero header and leave csum up to XDP layers */ + hdr = xdp->data; + memset(hdr, 0, vi->hdr_len); + + num_sg = 1; + sg_init_one(sq->sg, xdp->data, xdp->data_end - xdp->data); + err = virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, + xdp->data, GFP_ATOMIC); + if (unlikely(err)) { + if (vi->mergeable_rx_bufs) + put_page(page); + else + give_pages(rq, page); + return; // On error abort to avoid unnecessary kick + } else if (!vi->mergeable_rx_bufs) { + /* If not mergeable bufs must be big packets so cleanup pages */ + give_pages(rq, (struct page *)page->private); + page->private = 0; + } + + virtqueue_kick(sq->vq); +} + static u32 do_xdp_prog(struct virtnet_info *vi, + struct receive_queue *rq, struct bpf_prog *xdp_prog, struct page *page, int offset, int len) { int hdr_padded_len; struct xdp_buff xdp; + unsigned int qp; u32 act; u8 *buf; @@ -353,9 +399,15 @@ static u32 do_xdp_prog(struct virtnet_info *vi, switch (act) { case XDP_PASS: return XDP_PASS; + case XDP_TX: + qp = vi->curr_queue_pairs - + vi->xdp_queue_pairs + + smp_processor_id(); + xdp.data = buf + (vi->mergeable_rx_bufs ? 0 : 4); + virtnet_xdp_xmit(vi, rq, &vi->sq[qp], &xdp); + return XDP_TX; default: bpf_warn_invalid_xdp_action(act); - case XDP_TX: case XDP_ABORTED: case XDP_DROP: return XDP_DROP; @@ -390,9 +442,17 @@ static struct sk_buff *receive_big(struct net_device *dev, if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags)) goto err_xdp; - act = do_xdp_prog(vi, xdp_prog, page, 0, len); - if (act == XDP_DROP) + act = do_xdp_prog(vi, rq, xdp_prog, page, 0, len); + switch (act) { + case XDP_PASS: + break; + case XDP_TX: + rcu_read_unlock(); + goto xdp_xmit; + case XDP_DROP: + default: goto err_xdp; + } } rcu_read_unlock(); @@ -407,6 +467,7 @@ err_xdp: err: dev->stats.rx_dropped++; give_pages(rq, page); +xdp_xmit: return NULL; } @@ -425,6 +486,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, struct bpf_prog *xdp_prog; unsigned int truesize; + head_skb = NULL; + rcu_read_lock(); xdp_prog = rcu_dereference(rq->xdp_prog); if (xdp_prog) { @@ -448,9 +511,17 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags)) goto err_xdp; - act = do_xdp_prog(vi, xdp_prog, page, offset, len); - if (act == XDP_DROP) + act = do_xdp_prog(vi, rq, xdp_prog, page, offset, len); + switch (act) { + case XDP_PASS: + break; + case XDP_TX: + rcu_read_unlock(); + goto xdp_xmit; + case XDP_DROP: + default: goto err_xdp; + } } rcu_read_unlock(); @@ -528,6 +599,7 @@ err_skb: err_buf: dev->stats.rx_dropped++; dev_kfree_skb(head_skb); +xdp_xmit: return NULL; } @@ -1713,6 +1785,16 @@ static void free_receive_page_frags(struct virtnet_info *vi) put_page(vi->rq[i].alloc_frag.page); } +static bool is_xdp_queue(struct virtnet_info *vi, int q) +{ + if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs)) + return false; + else if (q < vi->curr_queue_pairs) + return true; + else + return false; +} + static void free_unused_bufs(struct virtnet_info *vi) { void *buf; @@ -1720,8 +1802,12 @@ static void free_unused_bufs(struct virtnet_info *vi) for (i = 0; i < vi->max_queue_pairs; i++) { struct virtqueue *vq = vi->sq[i].vq; - while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) - dev_kfree_skb(buf); + while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) { + if (!is_xdp_queue(vi, i)) + dev_kfree_skb(buf); + else + put_page(virt_to_head_page(buf)); + } } for (i = 0; i < vi->max_queue_pairs; i++) { -- cgit v1.2.3 From 72979a6c35907b6a7ab85e7bc60e0d52dba68f9d Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 15 Dec 2016 12:14:36 -0800 Subject: virtio_net: xdp, add slowpath case for non contiguous buffers virtio_net XDP support expects receive buffers to be contiguous. If this is not the case we enable a slowpath to allow connectivity to continue but at a significan performance overhead associated with linearizing data. To make it painfully aware to users that XDP is running in a degraded mode we throw an xdp buffer error. To linearize packets we allocate a page and copy the segments of the data, including the header, into it. After this the page can be handled by XDP code flow as normal. Then depending on the return code the page is either freed or sent to the XDP xmit path. There is no attempt to optimize this path. This case is being handled simple as a precaution in case some unknown backend were to generate packets in this form. To test this I had to hack qemu and force it to generate these packets. I do not expect this case to be generated by "real" backends. Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 74 insertions(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index ca3d2e2350c6..08327e005ccc 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -471,6 +471,64 @@ xdp_xmit: return NULL; } +/* The conditions to enable XDP should preclude the underlying device from + * sending packets across multiple buffers (num_buf > 1). However per spec + * it does not appear to be illegal to do so but rather just against convention. + * So in order to avoid making a system unresponsive the packets are pushed + * into a page and the XDP program is run. This will be extremely slow and we + * push a warning to the user to fix this as soon as possible. Fixing this may + * require resolving the underlying hardware to determine why multiple buffers + * are being received or simply loading the XDP program in the ingress stack + * after the skb is built because there is no advantage to running it here + * anymore. + */ +static struct page *xdp_linearize_page(struct receive_queue *rq, + u16 num_buf, + struct page *p, + int offset, + unsigned int *len) +{ + struct page *page = alloc_page(GFP_ATOMIC); + unsigned int page_off = 0; + + if (!page) + return NULL; + + memcpy(page_address(page) + page_off, page_address(p) + offset, *len); + page_off += *len; + + while (--num_buf) { + unsigned int buflen; + unsigned long ctx; + void *buf; + int off; + + ctx = (unsigned long)virtqueue_get_buf(rq->vq, &buflen); + if (unlikely(!ctx)) + goto err_buf; + + /* guard against a misconfigured or uncooperative backend that + * is sending packet larger than the MTU. + */ + if ((page_off + buflen) > PAGE_SIZE) + goto err_buf; + + buf = mergeable_ctx_to_buf_address(ctx); + p = virt_to_head_page(buf); + off = buf - page_address(p); + + memcpy(page_address(page) + page_off, + page_address(p) + off, buflen); + page_off += buflen; + } + + *len = page_off; + return page; +err_buf: + __free_pages(page, 0); + return NULL; +} + static struct sk_buff *receive_mergeable(struct net_device *dev, struct virtnet_info *vi, struct receive_queue *rq, @@ -491,6 +549,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, rcu_read_lock(); xdp_prog = rcu_dereference(rq->xdp_prog); if (xdp_prog) { + struct page *xdp_page; u32 act; /* No known backend devices should send packets with @@ -500,7 +559,15 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, */ if (unlikely(num_buf > 1)) { bpf_warn_invalid_xdp_buffer(); - goto err_xdp; + + /* linearize data for XDP */ + xdp_page = xdp_linearize_page(rq, num_buf, + page, offset, &len); + if (!xdp_page) + goto err_xdp; + offset = 0; + } else { + xdp_page = page; } /* Transient failure which in theory could occur if @@ -514,12 +581,18 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, act = do_xdp_prog(vi, rq, xdp_prog, page, offset, len); switch (act) { case XDP_PASS: + if (unlikely(xdp_page != page)) + __free_pages(xdp_page, 0); break; case XDP_TX: + if (unlikely(xdp_page != page)) + goto err_xdp; rcu_read_unlock(); goto xdp_xmit; case XDP_DROP: default: + if (unlikely(xdp_page != page)) + __free_pages(xdp_page, 0); goto err_xdp; } } -- cgit v1.2.3