diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-06-07 03:39:49 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-06-07 03:39:49 +0200 |
commit | 1c8c5a9d38f607c0b6fd12c91cbe1a4418762a21 (patch) | |
tree | dcc97181d4d187252e0cc8fdf29d9b365fa3ffd0 /drivers/net/virtio_net.c | |
parent | Merge tag 'overflow-v4.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/g... (diff) | |
parent | strparser: Add __strp_unpause and use it in ktls. (diff) | |
download | linux-1c8c5a9d38f607c0b6fd12c91cbe1a4418762a21.tar.xz linux-1c8c5a9d38f607c0b6fd12c91cbe1a4418762a21.zip |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
1) Add Maglev hashing scheduler to IPVS, from Inju Song.
2) Lots of new TC subsystem tests from Roman Mashak.
3) Add TCP zero copy receive and fix delayed acks and autotuning with
SO_RCVLOWAT, from Eric Dumazet.
4) Add XDP_REDIRECT support to mlx5 driver, from Jesper Dangaard
Brouer.
5) Add ttl inherit support to vxlan, from Hangbin Liu.
6) Properly separate ipv6 routes into their logically independant
components. fib6_info for the routing table, and fib6_nh for sets of
nexthops, which thus can be shared. From David Ahern.
7) Add bpf_xdp_adjust_tail helper, which can be used to generate ICMP
messages from XDP programs. From Nikita V. Shirokov.
8) Lots of long overdue cleanups to the r8169 driver, from Heiner
Kallweit.
9) Add BTF ("BPF Type Format"), from Martin KaFai Lau.
10) Add traffic condition monitoring to iwlwifi, from Luca Coelho.
11) Plumb extack down into fib_rules, from Roopa Prabhu.
12) Add Flower classifier offload support to igb, from Vinicius Costa
Gomes.
13) Add UDP GSO support, from Willem de Bruijn.
14) Add documentation for eBPF helpers, from Quentin Monnet.
15) Add TLS tx offload to mlx5, from Ilya Lesokhin.
16) Allow applications to be given the number of bytes available to read
on a socket via a control message returned from recvmsg(), from
Soheil Hassas Yeganeh.
17) Add x86_32 eBPF JIT compiler, from Wang YanQing.
18) Add AF_XDP sockets, with zerocopy support infrastructure as well.
From Björn Töpel.
19) Remove indirect load support from all of the BPF JITs and handle
these operations in the verifier by translating them into native BPF
instead. From Daniel Borkmann.
20) Add GRO support to ipv6 gre tunnels, from Eran Ben Elisha.
21) Allow XDP programs to do lookups in the main kernel routing tables
for forwarding. From David Ahern.
22) Allow drivers to store hardware state into an ELF section of kernel
dump vmcore files, and use it in cxgb4. From Rahul Lakkireddy.
23) Various RACK and loss detection improvements in TCP, from Yuchung
Cheng.
24) Add TCP SACK compression, from Eric Dumazet.
25) Add User Mode Helper support and basic bpfilter infrastructure, from
Alexei Starovoitov.
26) Support ports and protocol values in RTM_GETROUTE, from Roopa
Prabhu.
27) Support bulking in ->ndo_xdp_xmit() API, from Jesper Dangaard
Brouer.
28) Add lots of forwarding selftests, from Petr Machata.
29) Add generic network device failover driver, from Sridhar Samudrala.
* ra.kernel.org:/pub/scm/linux/kernel/git/davem/net-next: (1959 commits)
strparser: Add __strp_unpause and use it in ktls.
rxrpc: Fix terminal retransmission connection ID to include the channel
net: hns3: Optimize PF CMDQ interrupt switching process
net: hns3: Fix for VF mailbox receiving unknown message
net: hns3: Fix for VF mailbox cannot receiving PF response
bnx2x: use the right constant
Revert "net: sched: cls: Fix offloading when ingress dev is vxlan"
net: dsa: b53: Fix for brcm tag issue in Cygnus SoC
enic: fix UDP rss bits
netdev-FAQ: clarify DaveM's position for stable backports
rtnetlink: validate attributes in do_setlink()
mlxsw: Add extack messages for port_{un, }split failures
netdevsim: Add extack error message for devlink reload
devlink: Add extack to reload and port_{un, }split operations
net: metrics: add proper netlink validation
ipmr: fix error path when ipmr_new_table fails
ip6mr: only set ip6mr_table from setsockopt when ip6mr_new_table succeeds
net: hns3: remove unused hclgevf_cfg_func_mta_filter
netfilter: provide udp*_lib_lookup for nf_tproxy
qed*: Utilize FW 8.37.2.0
...
Diffstat (limited to 'drivers/net/virtio_net.c')
-rw-r--r-- | drivers/net/virtio_net.c | 182 |
1 files changed, 131 insertions, 51 deletions
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 032e1ac10a30..2aaa18ec7d46 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -30,8 +30,11 @@ #include <linux/cpu.h> #include <linux/average.h> #include <linux/filter.h> +#include <linux/netdevice.h> +#include <linux/pci.h> #include <net/route.h> #include <net/xdp.h> +#include <net/net_failover.h> static int napi_weight = NAPI_POLL_WEIGHT; module_param(napi_weight, int, 0444); @@ -210,6 +213,9 @@ struct virtnet_info { u32 speed; unsigned long guest_offloads; + + /* failover when STANDBY feature enabled */ + struct failover *failover; }; struct padded_vnet_hdr { @@ -407,58 +413,73 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, return skb; } -static void virtnet_xdp_flush(struct net_device *dev) +static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, + struct send_queue *sq, + struct xdp_frame *xdpf) { - struct virtnet_info *vi = netdev_priv(dev); - struct send_queue *sq; - unsigned int qp; + struct virtio_net_hdr_mrg_rxbuf *hdr; + int err; - qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id(); - sq = &vi->sq[qp]; + /* virtqueue want to use data area in-front of packet */ + if (unlikely(xdpf->metasize > 0)) + return -EOPNOTSUPP; - virtqueue_kick(sq->vq); + if (unlikely(xdpf->headroom < vi->hdr_len)) + return -EOVERFLOW; + + /* Make room for virtqueue hdr (also change xdpf->headroom?) */ + xdpf->data -= vi->hdr_len; + /* Zero header and leave csum up to XDP layers */ + hdr = xdpf->data; + memset(hdr, 0, vi->hdr_len); + xdpf->len += vi->hdr_len; + + sg_init_one(sq->sg, xdpf->data, xdpf->len); + + err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdpf, GFP_ATOMIC); + if (unlikely(err)) + return -ENOSPC; /* Caller handle free/refcnt */ + + return 0; } -static bool __virtnet_xdp_xmit(struct virtnet_info *vi, - struct xdp_buff *xdp) +static int __virtnet_xdp_tx_xmit(struct virtnet_info *vi, + struct xdp_frame *xdpf) { - struct virtio_net_hdr_mrg_rxbuf *hdr; - unsigned int len; + struct xdp_frame *xdpf_sent; struct send_queue *sq; + unsigned int len; unsigned int qp; - void *xdp_sent; - int err; qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id(); sq = &vi->sq[qp]; /* Free up any pending old buffers before queueing new ones. */ - while ((xdp_sent = virtqueue_get_buf(sq->vq, &len)) != NULL) { - struct page *sent_page = virt_to_head_page(xdp_sent); - - put_page(sent_page); - } - - xdp->data -= vi->hdr_len; - /* Zero header and leave csum up to XDP layers */ - hdr = xdp->data; - memset(hdr, 0, vi->hdr_len); - - sg_init_one(sq->sg, xdp->data, xdp->data_end - xdp->data); - - err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdp->data, GFP_ATOMIC); - if (unlikely(err)) - return false; /* Caller handle free/refcnt */ + while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL) + xdp_return_frame(xdpf_sent); - return true; + return __virtnet_xdp_xmit_one(vi, sq, xdpf); } -static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp) +static int virtnet_xdp_xmit(struct net_device *dev, + int n, struct xdp_frame **frames, u32 flags) { struct virtnet_info *vi = netdev_priv(dev); struct receive_queue *rq = vi->rq; + struct xdp_frame *xdpf_sent; struct bpf_prog *xdp_prog; - bool sent; + struct send_queue *sq; + unsigned int len; + unsigned int qp; + int drops = 0; + int err; + int i; + + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) + return -EINVAL; + + qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id(); + sq = &vi->sq[qp]; /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this * indicate XDP resources have been successfully allocated. @@ -467,10 +488,24 @@ static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp) if (!xdp_prog) return -ENXIO; - sent = __virtnet_xdp_xmit(vi, xdp); - if (!sent) - return -ENOSPC; - return 0; + /* Free up any pending old buffers before queueing new ones. */ + while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL) + xdp_return_frame(xdpf_sent); + + for (i = 0; i < n; i++) { + struct xdp_frame *xdpf = frames[i]; + + err = __virtnet_xdp_xmit_one(vi, sq, xdpf); + if (err) { + xdp_return_frame_rx_napi(xdpf); + drops++; + } + } + + if (flags & XDP_XMIT_FLUSH) + virtqueue_kick(sq->vq); + + return n - drops; } static unsigned int virtnet_get_headroom(struct virtnet_info *vi) @@ -559,7 +594,6 @@ static struct sk_buff *receive_small(struct net_device *dev, struct page *page = virt_to_head_page(buf); unsigned int delta = 0; struct page *xdp_page; - bool sent; int err; len -= vi->hdr_len; @@ -568,6 +602,7 @@ static struct sk_buff *receive_small(struct net_device *dev, xdp_prog = rcu_dereference(rq->xdp_prog); if (xdp_prog) { struct virtio_net_hdr_mrg_rxbuf *hdr = buf + header_offset; + struct xdp_frame *xdpf; struct xdp_buff xdp; void *orig_data; u32 act; @@ -608,10 +643,14 @@ static struct sk_buff *receive_small(struct net_device *dev, case XDP_PASS: /* Recalculate length in case bpf program changed it */ delta = orig_data - xdp.data; + len = xdp.data_end - xdp.data; break; case XDP_TX: - sent = __virtnet_xdp_xmit(vi, &xdp); - if (unlikely(!sent)) { + xdpf = convert_to_xdp_frame(&xdp); + if (unlikely(!xdpf)) + goto err_xdp; + err = __virtnet_xdp_tx_xmit(vi, xdpf); + if (unlikely(err)) { trace_xdp_exception(vi->dev, xdp_prog, act); goto err_xdp; } @@ -641,7 +680,7 @@ static struct sk_buff *receive_small(struct net_device *dev, goto err; } skb_reserve(skb, headroom - delta); - skb_put(skb, len + delta); + skb_put(skb, len); if (!delta) { buf += header_offset; memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len); @@ -694,7 +733,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, struct bpf_prog *xdp_prog; unsigned int truesize; unsigned int headroom = mergeable_ctx_to_headroom(ctx); - bool sent; int err; head_skb = NULL; @@ -702,6 +740,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, rcu_read_lock(); xdp_prog = rcu_dereference(rq->xdp_prog); if (xdp_prog) { + struct xdp_frame *xdpf; struct page *xdp_page; struct xdp_buff xdp; void *data; @@ -755,6 +794,10 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, offset = xdp.data - page_address(xdp_page) - vi->hdr_len; + /* recalculate len if xdp.data or xdp.data_end were + * adjusted + */ + len = xdp.data_end - xdp.data + vi->hdr_len; /* We can only create skb based on xdp_page. */ if (unlikely(xdp_page != page)) { rcu_read_unlock(); @@ -765,8 +808,11 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, } break; case XDP_TX: - sent = __virtnet_xdp_xmit(vi, &xdp); - if (unlikely(!sent)) { + xdpf = convert_to_xdp_frame(&xdp); + if (unlikely(!xdpf)) + goto err_xdp; + err = __virtnet_xdp_tx_xmit(vi, xdpf); + if (unlikely(err)) { trace_xdp_exception(vi->dev, xdp_prog, act); if (unlikely(xdp_page != page)) put_page(xdp_page); @@ -1311,6 +1357,13 @@ static int virtnet_open(struct net_device *dev) if (err < 0) return err; + err = xdp_rxq_info_reg_mem_model(&vi->rq[i].xdp_rxq, + MEM_TYPE_PAGE_SHARED, NULL); + if (err < 0) { + xdp_rxq_info_unreg(&vi->rq[i].xdp_rxq); + return err; + } + virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); virtnet_napi_tx_enable(vi, vi->sq[i].vq, &vi->sq[i].napi); } @@ -1502,6 +1555,9 @@ static int virtnet_set_mac_address(struct net_device *dev, void *p) struct sockaddr *addr; struct scatterlist sg; + if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY)) + return -EOPNOTSUPP; + addr = kmemdup(p, sizeof(*addr), GFP_KERNEL); if (!addr) return -ENOMEM; @@ -2285,6 +2341,22 @@ static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp) } } +static int virtnet_get_phys_port_name(struct net_device *dev, char *buf, + size_t len) +{ + struct virtnet_info *vi = netdev_priv(dev); + int ret; + + if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY)) + return -EOPNOTSUPP; + + ret = snprintf(buf, len, "sby"); + if (ret >= len) + return -EOPNOTSUPP; + + return 0; +} + static const struct net_device_ops virtnet_netdev = { .ndo_open = virtnet_open, .ndo_stop = virtnet_close, @@ -2300,8 +2372,8 @@ static const struct net_device_ops virtnet_netdev = { #endif .ndo_bpf = virtnet_xdp, .ndo_xdp_xmit = virtnet_xdp_xmit, - .ndo_xdp_flush = virtnet_xdp_flush, .ndo_features_check = passthru_features_check, + .ndo_get_phys_port_name = virtnet_get_phys_port_name, }; static void virtnet_config_changed_work(struct work_struct *work) @@ -2531,12 +2603,8 @@ static int virtnet_find_vqs(struct virtnet_info *vi) vi->sq[i].vq = vqs[txq2vq(i)]; } - kfree(names); - kfree(callbacks); - kfree(vqs); - kfree(ctx); + /* run here: ret == 0. */ - return 0; err_find: kfree(ctx); @@ -2855,10 +2923,18 @@ static int virtnet_probe(struct virtio_device *vdev) virtnet_init_settings(dev); + if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY)) { + vi->failover = net_failover_create(vi->dev); + if (IS_ERR(vi->failover)) { + err = PTR_ERR(vi->failover); + goto free_vqs; + } + } + err = register_netdev(dev); if (err) { pr_debug("virtio_net: registering device failed\n"); - goto free_vqs; + goto free_failover; } virtio_device_ready(vdev); @@ -2895,6 +2971,8 @@ free_unregister_netdev: vi->vdev->config->reset(vdev); unregister_netdev(dev); +free_failover: + net_failover_destroy(vi->failover); free_vqs: cancel_delayed_work_sync(&vi->refill); free_receive_page_frags(vi); @@ -2929,6 +3007,8 @@ static void virtnet_remove(struct virtio_device *vdev) unregister_netdev(vi->dev); + net_failover_destroy(vi->failover); + remove_vq_common(vi); free_netdev(vi->dev); @@ -2978,7 +3058,7 @@ static struct virtio_device_id id_table[] = { VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \ VIRTIO_NET_F_CTRL_MAC_ADDR, \ VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ - VIRTIO_NET_F_SPEED_DUPLEX + VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY static unsigned int features[] = { VIRTNET_FEATURES, |