diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-05-10 20:33:08 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-05-10 20:33:08 +0200 |
commit | c44b59430393c38873fd933333d945f426857a59 (patch) | |
tree | 1af82d499fb4abb67487962f8d03f4088b367f37 | |
parent | Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm (diff) | |
parent | s390/virtio: change maintainership (diff) | |
download | linux-c44b59430393c38873fd933333d945f426857a59.tar.xz linux-c44b59430393c38873fd933333d945f426857a59.zip |
Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
Pull virtio updates from Michael Tsirkin:
"Fixes, cleanups, performance
A bunch of changes to virtio, most affecting virtio net. Also ptr_ring
batched zeroing - first of batching enhancements that seems ready."
* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost:
s390/virtio: change maintainership
tools/virtio: fix spelling mistake: "wakeus" -> "wakeups"
virtio_net: tidy a couple debug statements
ptr_ring: support testing different batching sizes
ringtest: support test specific parameters
ptr_ring: batch ring zeroing
virtio: virtio_driver doc
virtio_net: don't reset twice on XDP on/off
virtio_net: fix support for small rings
virtio_net: reduce alignment for buffers
virtio_net: rework mergeable buffer handling
virtio_net: allow specifying context for rx
virtio: allow extra context per descriptor
tools/virtio: fix build breakage
virtio: add context flag to find vqs
virtio: wrap find_vqs
ringtest: fix an assert statement
32 files changed, 330 insertions, 146 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 2decf40d28e1..66b1e1a2d203 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13556,8 +13556,8 @@ F: include/uapi/linux/virtio_*.h F: drivers/crypto/virtio/ VIRTIO DRIVERS FOR S390 -M: Christian Borntraeger <borntraeger@de.ibm.com> M: Cornelia Huck <cornelia.huck@de.ibm.com> +M: Halil Pasic <pasic@linux.vnet.ibm.com> L: linux-s390@vger.kernel.org L: virtualization@lists.linux-foundation.org L: kvm@vger.kernel.org diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 94173de1efaa..553cc4c542b4 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -452,8 +452,7 @@ static int init_vq(struct virtio_blk *vblk) } /* Discover virtqueues and write information to configuration. */ - err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names, - &desc); + err = virtio_find_vqs(vdev, num_vqs, vqs, callbacks, names, &desc); if (err) goto out; diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 7d041d026680..ad843eb02ae7 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -1945,9 +1945,9 @@ static int init_vqs(struct ports_device *portdev) } } /* Find the queues. */ - err = portdev->vdev->config->find_vqs(portdev->vdev, nr_queues, vqs, - io_callbacks, - (const char **)io_names, NULL); + err = virtio_find_vqs(portdev->vdev, nr_queues, vqs, + io_callbacks, + (const char **)io_names, NULL); if (err) goto free; diff --git a/drivers/crypto/virtio/virtio_crypto_core.c b/drivers/crypto/virtio/virtio_crypto_core.c index 21472e427f6f..a111cd72797b 100644 --- a/drivers/crypto/virtio/virtio_crypto_core.c +++ b/drivers/crypto/virtio/virtio_crypto_core.c @@ -119,8 +119,7 @@ static int virtcrypto_find_vqs(struct virtio_crypto *vi) names[i] = vi->data_vq[i].name; } - ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks, - names, NULL); + ret = virtio_find_vqs(vi->vdev, total_vqs, vqs, callbacks, names, NULL); if (ret) goto err_find; diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c index 491866865c33..1e1c90b30d4a 100644 --- a/drivers/gpu/drm/virtio/virtgpu_kms.c +++ b/drivers/gpu/drm/virtio/virtgpu_kms.c @@ -175,8 +175,7 @@ int virtio_gpu_driver_load(struct drm_device *dev, unsigned long flags) DRM_INFO("virgl 3d acceleration not supported by guest\n"); #endif - ret = vgdev->vdev->config->find_vqs(vgdev->vdev, 2, vqs, - callbacks, names, NULL); + ret = virtio_find_vqs(vgdev->vdev, 2, vqs, callbacks, names, NULL); if (ret) { DRM_ERROR("failed to find virt queues\n"); goto err_vqs; diff --git a/drivers/misc/mic/vop/vop_main.c b/drivers/misc/mic/vop/vop_main.c index c2e29d7f0de8..a341938c7e2c 100644 --- a/drivers/misc/mic/vop/vop_main.c +++ b/drivers/misc/mic/vop/vop_main.c @@ -278,7 +278,7 @@ static void vop_del_vqs(struct virtio_device *dev) static struct virtqueue *vop_find_vq(struct virtio_device *dev, unsigned index, void (*callback)(struct virtqueue *vq), - const char *name) + const char *name, bool ctx) { struct _vop_vdev *vdev = to_vopvdev(dev); struct vop_device *vpdev = vdev->vpdev; @@ -314,6 +314,7 @@ static struct virtqueue *vop_find_vq(struct virtio_device *dev, le16_to_cpu(config.num), MIC_VIRTIO_RING_ALIGN, dev, false, + ctx, (void __force *)va, vop_notify, callback, name); if (!vq) { err = -ENOMEM; @@ -374,7 +375,8 @@ unmap: static int vop_find_vqs(struct virtio_device *dev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[], struct irq_affinity *desc) + const char * const names[], const bool *ctx, + struct irq_affinity *desc) { struct _vop_vdev *vdev = to_vopvdev(dev); struct vop_device *vpdev = vdev->vpdev; @@ -388,7 +390,8 @@ static int vop_find_vqs(struct virtio_device *dev, unsigned nvqs, for (i = 0; i < nvqs; ++i) { dev_dbg(_vop_dev(vdev), "%s: %d: %s\n", __func__, i, names[i]); - vqs[i] = vop_find_vq(dev, i, callbacks[i], names[i]); + vqs[i] = vop_find_vq(dev, i, callbacks[i], names[i], + ctx ? ctx[i] : false); if (IS_ERR(vqs[i])) { err = PTR_ERR(vqs[i]); goto error; diff --git a/drivers/net/caif/caif_virtio.c b/drivers/net/caif/caif_virtio.c index bc0eb47eccee..6122768c8644 100644 --- a/drivers/net/caif/caif_virtio.c +++ b/drivers/net/caif/caif_virtio.c @@ -679,8 +679,7 @@ static int cfv_probe(struct virtio_device *vdev) goto err; /* Get the TX virtio ring. This is a "guest side vring". */ - err = vdev->config->find_vqs(vdev, 1, &cfv->vq_tx, &vq_cbs, &names, - NULL); + err = virtio_find_vqs(vdev, 1, &cfv->vq_tx, &vq_cbs, &names, NULL); if (err) goto err; diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 1c6d3923c224..9320d96a1632 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -29,6 +29,7 @@ #include <linux/slab.h> #include <linux/cpu.h> #include <linux/average.h> +#include <net/route.h> static int napi_weight = NAPI_POLL_WEIGHT; module_param(napi_weight, int, 0444); @@ -54,17 +55,6 @@ module_param(napi_tx, bool, 0644); */ DECLARE_EWMA(pkt_len, 0, 64) -/* With mergeable buffers we align buffer address and use the low bits to - * encode its true size. Buffer size is up to 1 page so we need to align to - * square root of page size to ensure we reserve enough bits to encode the true - * size. - */ -#define MERGEABLE_BUFFER_MIN_ALIGN_SHIFT ((PAGE_SHIFT + 1) / 2) - -/* Minimum alignment for mergeable packet buffers. */ -#define MERGEABLE_BUFFER_ALIGN max(L1_CACHE_BYTES, \ - 1 << MERGEABLE_BUFFER_MIN_ALIGN_SHIFT) - #define VIRTNET_DRIVER_VERSION "1.0.0" struct virtnet_stats { @@ -112,6 +102,9 @@ struct receive_queue { /* RX: fragments + linear part + virtio header */ struct scatterlist sg[MAX_SKB_FRAGS + 2]; + /* Min single buffer size for mergeable buffers case. */ + unsigned int min_buf_len; + /* Name of this receive queue: input.$index */ char name[40]; }; @@ -277,24 +270,6 @@ static void skb_xmit_done(struct virtqueue *vq) netif_wake_subqueue(vi->dev, vq2txq(vq)); } -static unsigned int mergeable_ctx_to_buf_truesize(unsigned long mrg_ctx) -{ - unsigned int truesize = mrg_ctx & (MERGEABLE_BUFFER_ALIGN - 1); - return (truesize + 1) * MERGEABLE_BUFFER_ALIGN; -} - -static void *mergeable_ctx_to_buf_address(unsigned long mrg_ctx) -{ - return (void *)(mrg_ctx & -MERGEABLE_BUFFER_ALIGN); - -} - -static unsigned long mergeable_buf_to_ctx(void *buf, unsigned int truesize) -{ - unsigned int size = truesize / MERGEABLE_BUFFER_ALIGN; - return (unsigned long)buf | (size - 1); -} - /* Called from bottom half context */ static struct sk_buff *page_to_skb(struct virtnet_info *vi, struct receive_queue *rq, @@ -538,15 +513,13 @@ static struct page *xdp_linearize_page(struct receive_queue *rq, while (--*num_buf) { unsigned int buflen; - unsigned long ctx; void *buf; int off; - ctx = (unsigned long)virtqueue_get_buf(rq->vq, &buflen); - if (unlikely(!ctx)) + buf = virtqueue_get_buf(rq->vq, &buflen); + if (unlikely(!buf)) goto err_buf; - buf = mergeable_ctx_to_buf_address(ctx); p = virt_to_head_page(buf); off = buf - page_address(p); @@ -575,10 +548,10 @@ err_buf: static struct sk_buff *receive_mergeable(struct net_device *dev, struct virtnet_info *vi, struct receive_queue *rq, - unsigned long ctx, + void *buf, + void *ctx, unsigned int len) { - void *buf = mergeable_ctx_to_buf_address(ctx); struct virtio_net_hdr_mrg_rxbuf *hdr = buf; u16 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); struct page *page = virt_to_head_page(buf); @@ -666,7 +639,13 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, } rcu_read_unlock(); - truesize = max(len, mergeable_ctx_to_buf_truesize(ctx)); + if (unlikely(len > (unsigned long)ctx)) { + pr_debug("%s: rx error: len %u exceeds truesize %lu\n", + dev->name, len, (unsigned long)ctx); + dev->stats.rx_length_errors++; + goto err_skb; + } + truesize = (unsigned long)ctx; head_skb = page_to_skb(vi, rq, page, offset, len, truesize); curr_skb = head_skb; @@ -675,7 +654,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, while (--num_buf) { int num_skb_frags; - ctx = (unsigned long)virtqueue_get_buf(rq->vq, &len); + buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx); if (unlikely(!ctx)) { pr_debug("%s: rx error: %d buffers out of %d missing\n", dev->name, num_buf, @@ -685,8 +664,14 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, goto err_buf; } - buf = mergeable_ctx_to_buf_address(ctx); page = virt_to_head_page(buf); + if (unlikely(len > (unsigned long)ctx)) { + pr_debug("%s: rx error: len %u exceeds truesize %lu\n", + dev->name, len, (unsigned long)ctx); + dev->stats.rx_length_errors++; + goto err_skb; + } + truesize = (unsigned long)ctx; num_skb_frags = skb_shinfo(curr_skb)->nr_frags; if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) { @@ -702,7 +687,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, head_skb->truesize += nskb->truesize; num_skb_frags = 0; } - truesize = max(len, mergeable_ctx_to_buf_truesize(ctx)); if (curr_skb != head_skb) { head_skb->data_len += len; head_skb->len += len; @@ -727,14 +711,14 @@ err_xdp: err_skb: put_page(page); while (--num_buf) { - ctx = (unsigned long)virtqueue_get_buf(rq->vq, &len); - if (unlikely(!ctx)) { + buf = virtqueue_get_buf(rq->vq, &len); + if (unlikely(!buf)) { pr_debug("%s: rx error: %d buffers missing\n", dev->name, num_buf); dev->stats.rx_length_errors++; break; } - page = virt_to_head_page(mergeable_ctx_to_buf_address(ctx)); + page = virt_to_head_page(buf); put_page(page); } err_buf: @@ -745,7 +729,7 @@ xdp_xmit: } static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq, - void *buf, unsigned int len) + void *buf, unsigned int len, void **ctx) { struct net_device *dev = vi->dev; struct sk_buff *skb; @@ -756,9 +740,7 @@ static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq, pr_debug("%s: short packet %i\n", dev->name, len); dev->stats.rx_length_errors++; if (vi->mergeable_rx_bufs) { - unsigned long ctx = (unsigned long)buf; - void *base = mergeable_ctx_to_buf_address(ctx); - put_page(virt_to_head_page(base)); + put_page(virt_to_head_page(buf)); } else if (vi->big_packets) { give_pages(rq, buf); } else { @@ -768,7 +750,7 @@ static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq, } if (vi->mergeable_rx_bufs) - skb = receive_mergeable(dev, vi, rq, (unsigned long)buf, len); + skb = receive_mergeable(dev, vi, rq, buf, ctx, len); else if (vi->big_packets) skb = receive_big(dev, vi, rq, buf, len); else @@ -880,14 +862,15 @@ static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq, return err; } -static unsigned int get_mergeable_buf_len(struct ewma_pkt_len *avg_pkt_len) +static unsigned int get_mergeable_buf_len(struct receive_queue *rq, + struct ewma_pkt_len *avg_pkt_len) { const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); unsigned int len; len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len), - GOOD_PACKET_LEN, PAGE_SIZE - hdr_len); - return ALIGN(len, MERGEABLE_BUFFER_ALIGN); + rq->min_buf_len - hdr_len, PAGE_SIZE - hdr_len); + return ALIGN(len, L1_CACHE_BYTES); } static int add_recvbuf_mergeable(struct virtnet_info *vi, @@ -896,17 +879,17 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi, struct page_frag *alloc_frag = &rq->alloc_frag; unsigned int headroom = virtnet_get_headroom(vi); char *buf; - unsigned long ctx; + void *ctx; int err; unsigned int len, hole; - len = get_mergeable_buf_len(&rq->mrg_avg_pkt_len); + len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len); if (unlikely(!skb_page_frag_refill(len + headroom, alloc_frag, gfp))) return -ENOMEM; buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset; buf += headroom; /* advance address leaving hole at front of pkt */ - ctx = mergeable_buf_to_ctx(buf, len); + ctx = (void *)(unsigned long)len; get_page(alloc_frag->page); alloc_frag->offset += len + headroom; hole = alloc_frag->size - alloc_frag->offset; @@ -921,7 +904,7 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi, } sg_init_one(rq->sg, buf, len); - err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, (void *)ctx, gfp); + err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp); if (err < 0) put_page(virt_to_head_page(buf)); @@ -1032,10 +1015,20 @@ static int virtnet_receive(struct receive_queue *rq, int budget) void *buf; struct virtnet_stats *stats = this_cpu_ptr(vi->stats); - while (received < budget && - (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) { - bytes += receive_buf(vi, rq, buf, len); - received++; + if (vi->mergeable_rx_bufs) { + void *ctx; + + while (received < budget && + (buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx))) { + bytes += receive_buf(vi, rq, buf, len, ctx); + received++; + } + } else { + while (received < budget && + (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) { + bytes += receive_buf(vi, rq, buf, len, NULL); + received++; + } } if (rq->vq->num_free > virtqueue_get_vring_size(rq->vq) / 2) { @@ -1854,7 +1847,6 @@ static int virtnet_reset(struct virtnet_info *vi, int curr_qp, int xdp_qp) virtnet_freeze_down(dev); _remove_vq_common(vi); - dev->config->reset(dev); virtio_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE); virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER); @@ -2118,9 +2110,7 @@ static void free_unused_bufs(struct virtnet_info *vi) while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) { if (vi->mergeable_rx_bufs) { - unsigned long ctx = (unsigned long)buf; - void *base = mergeable_ctx_to_buf_address(ctx); - put_page(virt_to_head_page(base)); + put_page(virt_to_head_page(buf)); } else if (vi->big_packets) { give_pages(&vi->rq[i], buf); } else { @@ -2141,6 +2131,21 @@ static void virtnet_del_vqs(struct virtnet_info *vi) virtnet_free_queues(vi); } +/* How large should a single buffer be so a queue full of these can fit at + * least one full packet? + * Logic below assumes the mergeable buffer header is used. + */ +static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqueue *vq) +{ + const unsigned int hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); + unsigned int rq_size = virtqueue_get_vring_size(vq); + unsigned int packet_len = vi->big_packets ? IP_MAX_MTU : vi->dev->max_mtu; + unsigned int buf_len = hdr_len + ETH_HLEN + VLAN_HLEN + packet_len; + unsigned int min_buf_len = DIV_ROUND_UP(buf_len, rq_size); + + return max(min_buf_len, hdr_len); +} + static int virtnet_find_vqs(struct virtnet_info *vi) { vq_callback_t **callbacks; @@ -2148,6 +2153,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi) int ret = -ENOMEM; int i, total_vqs; const char **names; + bool *ctx; /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by @@ -2166,6 +2172,13 @@ static int virtnet_find_vqs(struct virtnet_info *vi) names = kmalloc(total_vqs * sizeof(*names), GFP_KERNEL); if (!names) goto err_names; + if (vi->mergeable_rx_bufs) { + ctx = kzalloc(total_vqs * sizeof(*ctx), GFP_KERNEL); + if (!ctx) + goto err_ctx; + } else { + ctx = NULL; + } /* Parameters for control virtqueue, if any */ if (vi->has_cvq) { @@ -2181,10 +2194,12 @@ static int virtnet_find_vqs(struct virtnet_info *vi) sprintf(vi->sq[i].name, "output.%d", i); names[rxq2vq(i)] = vi->rq[i].name; names[txq2vq(i)] = vi->sq[i].name; + if (ctx) + ctx[rxq2vq(i)] = true; } ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks, - names, NULL); + names, ctx, NULL); if (ret) goto err_find; @@ -2196,6 +2211,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi) for (i = 0; i < vi->max_queue_pairs; i++) { vi->rq[i].vq = vqs[rxq2vq(i)]; + vi->rq[i].min_buf_len = mergeable_min_buf_len(vi, vi->rq[i].vq); vi->sq[i].vq = vqs[txq2vq(i)]; } @@ -2206,6 +2222,8 @@ static int virtnet_find_vqs(struct virtnet_info *vi) return 0; err_find: + kfree(ctx); +err_ctx: kfree(names); err_names: kfree(callbacks); @@ -2282,7 +2300,8 @@ static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue, BUG_ON(queue_index >= vi->max_queue_pairs); avg = &vi->rq[queue_index].mrg_avg_pkt_len; - return sprintf(buf, "%u\n", get_mergeable_buf_len(avg)); + return sprintf(buf, "%u\n", + get_mergeable_buf_len(&vi->rq[queue_index], avg)); } static struct rx_queue_attribute mergeable_rx_buffer_size_attribute = diff --git a/drivers/remoteproc/remoteproc_virtio.c b/drivers/remoteproc/remoteproc_virtio.c index 0142cc3f0c91..294634836b32 100644 --- a/drivers/remoteproc/remoteproc_virtio.c +++ b/drivers/remoteproc/remoteproc_virtio.c @@ -71,7 +71,7 @@ EXPORT_SYMBOL(rproc_vq_interrupt); static struct virtqueue *rp_find_vq(struct virtio_device *vdev, unsigned int id, void (*callback)(struct virtqueue *vq), - const char *name) + const char *name, bool ctx) { struct rproc_vdev *rvdev = vdev_to_rvdev(vdev); struct rproc *rproc = vdev_to_rproc(vdev); @@ -103,8 +103,8 @@ static struct virtqueue *rp_find_vq(struct virtio_device *vdev, * Create the new vq, and tell virtio we're not interested in * the 'weak' smp barriers, since we're talking with a real device. */ - vq = vring_new_virtqueue(id, len, rvring->align, vdev, false, addr, - rproc_virtio_notify, callback, name); + vq = vring_new_virtqueue(id, len, rvring->align, vdev, false, ctx, + addr, rproc_virtio_notify, callback, name); if (!vq) { dev_err(dev, "vring_new_virtqueue %s failed\n", name); rproc_free_vring(rvring); @@ -138,12 +138,14 @@ static int rproc_virtio_find_vqs(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], const char * const names[], + const bool * ctx, struct irq_affinity *desc) { int i, ret; for (i = 0; i < nvqs; ++i) { - vqs[i] = rp_find_vq(vdev, i, callbacks[i], names[i]); + vqs[i] = rp_find_vq(vdev, i, callbacks[i], names[i], + ctx ? ctx[i] : false); if (IS_ERR(vqs[i])) { ret = PTR_ERR(vqs[i]); goto error; diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c index 5e66e081027e..f7cade09d38a 100644 --- a/drivers/rpmsg/virtio_rpmsg_bus.c +++ b/drivers/rpmsg/virtio_rpmsg_bus.c @@ -869,7 +869,7 @@ static int rpmsg_probe(struct virtio_device *vdev) init_waitqueue_head(&vrp->sendq); /* We expect two virtqueues, rx and tx (and in this order) */ - err = vdev->config->find_vqs(vdev, 2, vqs, vq_cbs, names, NULL); + err = virtio_find_vqs(vdev, 2, vqs, vq_cbs, names, NULL); if (err) goto free_vrp; diff --git a/drivers/s390/virtio/kvm_virtio.c b/drivers/s390/virtio/kvm_virtio.c index 2ce0b3eb2efe..a99d09a11f05 100644 --- a/drivers/s390/virtio/kvm_virtio.c +++ b/drivers/s390/virtio/kvm_virtio.c @@ -189,7 +189,7 @@ static bool kvm_notify(struct virtqueue *vq) static struct virtqueue *kvm_find_vq(struct virtio_device *vdev, unsigned index, void (*callback)(struct virtqueue *vq), - const char *name) + const char *name, bool ctx) { struct kvm_device *kdev = to_kvmdev(vdev); struct kvm_vqconfig *config; @@ -211,7 +211,7 @@ static struct virtqueue *kvm_find_vq(struct virtio_device *vdev, goto out; vq = vring_new_virtqueue(index, config->num, KVM_S390_VIRTIO_RING_ALIGN, - vdev, true, (void *) config->address, + vdev, true, ctx, (void *) config->address, kvm_notify, callback, name); if (!vq) { err = -ENOMEM; @@ -256,6 +256,7 @@ static int kvm_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], const char * const names[], + const bool *ctx, struct irq_affinity *desc) { struct kvm_device *kdev = to_kvmdev(vdev); @@ -266,7 +267,8 @@ static int kvm_find_vqs(struct virtio_device *vdev, unsigned nvqs, return -ENOENT; for (i = 0; i < nvqs; ++i) { - vqs[i] = kvm_find_vq(vdev, i, callbacks[i], names[i]); + vqs[i] = kvm_find_vq(vdev, i, callbacks[i], names[i], + ctx ? ctx[i] : false); if (IS_ERR(vqs[i])) goto error; } diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index 0ed209f3d8b0..2a76ea78a0bf 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -484,7 +484,7 @@ static void virtio_ccw_del_vqs(struct virtio_device *vdev) static struct virtqueue *virtio_ccw_setup_vq(struct virtio_device *vdev, int i, vq_callback_t *callback, - const char *name, + const char *name, bool ctx, struct ccw1 *ccw) { struct virtio_ccw_device *vcdev = to_vc_device(vdev); @@ -522,7 +522,7 @@ static struct virtqueue *virtio_ccw_setup_vq(struct virtio_device *vdev, } vq = vring_new_virtqueue(i, info->num, KVM_VIRTIO_CCW_RING_ALIGN, vdev, - true, info->queue, virtio_ccw_kvm_notify, + true, ctx, info->queue, virtio_ccw_kvm_notify, callback, name); if (!vq) { /* For now, we fail if we can't get the requested size. */ @@ -629,6 +629,7 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], const char * const names[], + const bool *ctx, struct irq_affinity *desc) { struct virtio_ccw_device *vcdev = to_vc_device(vdev); @@ -642,7 +643,7 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs, for (i = 0; i < nvqs; ++i) { vqs[i] = virtio_ccw_setup_vq(vdev, i, callbacks[i], names[i], - ccw); + ctx ? ctx[i] : false, ccw); if (IS_ERR(vqs[i])) { ret = PTR_ERR(vqs[i]); vqs[i] = NULL; diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index a29d068b7696..f8dbfeee6c63 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -894,8 +894,7 @@ static int virtscsi_init(struct virtio_device *vdev, } /* Discover virtqueues and write information to configuration. */ - err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names, - &desc); + err = virtio_find_vqs(vdev, num_vqs, vqs, callbacks, names, &desc); if (err) goto out; diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 34adf9b9c053..408c174ef0d5 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -418,8 +418,7 @@ static int init_vqs(struct virtio_balloon *vb) * optionally stat. */ nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2; - err = vb->vdev->config->find_vqs(vb->vdev, nvqs, vqs, callbacks, names, - NULL); + err = virtio_find_vqs(vb->vdev, nvqs, vqs, callbacks, names, NULL); if (err) return err; diff --git a/drivers/virtio/virtio_input.c b/drivers/virtio/virtio_input.c index 79f1293cda93..3a0468f2ceb0 100644 --- a/drivers/virtio/virtio_input.c +++ b/drivers/virtio/virtio_input.c @@ -173,8 +173,7 @@ static int virtinput_init_vqs(struct virtio_input *vi) static const char * const names[] = { "events", "status" }; int err; - err = vi->vdev->config->find_vqs(vi->vdev, 2, vqs, cbs, names, - NULL); + err = virtio_find_vqs(vi->vdev, 2, vqs, cbs, names, NULL); if (err) return err; vi->evt = vqs[0]; diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index 78343b8f9034..74dc7170fd35 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -351,7 +351,7 @@ static void vm_del_vqs(struct virtio_device *vdev) static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index, void (*callback)(struct virtqueue *vq), - const char *name) + const char *name, bool ctx) { struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); struct virtio_mmio_vq_info *info; @@ -388,7 +388,7 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index, /* Create the vring */ vq = vring_create_virtqueue(index, num, VIRTIO_MMIO_VRING_ALIGN, vdev, - true, true, vm_notify, callback, name); + true, true, ctx, vm_notify, callback, name); if (!vq) { err = -ENOMEM; goto error_new_virtqueue; @@ -447,6 +447,7 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], const char * const names[], + const bool *ctx, struct irq_affinity *desc) { struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); @@ -459,7 +460,8 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs, return err; for (i = 0; i < nvqs; ++i) { - vqs[i] = vm_setup_vq(vdev, i, callbacks[i], names[i]); + vqs[i] = vm_setup_vq(vdev, i, callbacks[i], names[i], + ctx ? ctx[i] : false); if (IS_ERR(vqs[i])) { vm_del_vqs(vdev); return PTR_ERR(vqs[i]); diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index 698d5d06fa03..007a4f366086 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -172,6 +172,7 @@ error: static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned index, void (*callback)(struct virtqueue *vq), const char *name, + bool ctx, u16 msix_vec) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); @@ -183,7 +184,7 @@ static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned index, if (!info) return ERR_PTR(-ENOMEM); - vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, + vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, ctx, msix_vec); if (IS_ERR(vq)) goto out_info; @@ -274,6 +275,7 @@ void vp_del_vqs(struct virtio_device *vdev) static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], const char * const names[], bool per_vq_vectors, + const bool *ctx, struct irq_affinity *desc) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); @@ -315,6 +317,7 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, else msix_vec = VP_MSIX_VQ_VECTOR; vqs[i] = vp_setup_vq(vdev, i, callbacks[i], names[i], + ctx ? ctx[i] : false, msix_vec); if (IS_ERR(vqs[i])) { err = PTR_ERR(vqs[i]); @@ -345,7 +348,7 @@ error_find: static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[]) + const char * const names[], const bool *ctx) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); int i, err; @@ -367,6 +370,7 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned nvqs, continue; } vqs[i] = vp_setup_vq(vdev, i, callbacks[i], names[i], + ctx ? ctx[i] : false, VIRTIO_MSI_NO_VECTOR); if (IS_ERR(vqs[i])) { err = PTR_ERR(vqs[i]); @@ -383,20 +387,21 @@ out_del_vqs: /* the config->find_vqs() implementation */ int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[], struct irq_affinity *desc) + const char * const names[], const bool *ctx, + struct irq_affinity *desc) { int err; /* Try MSI-X with one vector per queue. */ - err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, true, desc); + err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, true, ctx, desc); if (!err) return 0; /* Fallback: MSI-X with one vector for config, one shared for queues. */ - err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, false, desc); + err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, false, ctx, desc); if (!err) return 0; /* Finally fall back to regular interrupts. */ - return vp_find_vqs_intx(vdev, nvqs, vqs, callbacks, names); + return vp_find_vqs_intx(vdev, nvqs, vqs, callbacks, names, ctx); } const char *vp_bus_name(struct virtio_device *vdev) diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h index e96334aec1e0..135ee3cf7175 100644 --- a/drivers/virtio/virtio_pci_common.h +++ b/drivers/virtio/virtio_pci_common.h @@ -102,6 +102,7 @@ struct virtio_pci_device { unsigned idx, void (*callback)(struct virtqueue *vq), const char *name, + bool ctx, u16 msix_vec); void (*del_vq)(struct virtio_pci_vq_info *info); @@ -131,7 +132,8 @@ void vp_del_vqs(struct virtio_device *vdev); /* the config->find_vqs() implementation */ int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[], struct irq_affinity *desc); + const char * const names[], const bool *ctx, + struct irq_affinity *desc); const char *vp_bus_name(struct virtio_device *vdev); /* Setup the affinity for a virtqueue: diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c index 4bfa48fb1324..2780886e8ba3 100644 --- a/drivers/virtio/virtio_pci_legacy.c +++ b/drivers/virtio/virtio_pci_legacy.c @@ -116,6 +116,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, unsigned index, void (*callback)(struct virtqueue *vq), const char *name, + bool ctx, u16 msix_vec) { struct virtqueue *vq; @@ -135,7 +136,8 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, /* create the vring */ vq = vring_create_virtqueue(index, num, VIRTIO_PCI_VRING_ALIGN, &vp_dev->vdev, - true, false, vp_notify, callback, name); + true, false, ctx, + vp_notify, callback, name); if (!vq) return ERR_PTR(-ENOMEM); diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index 8978f109d2d7..2555d80f6eec 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -297,6 +297,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, unsigned index, void (*callback)(struct virtqueue *vq), const char *name, + bool ctx, u16 msix_vec) { struct virtio_pci_common_cfg __iomem *cfg = vp_dev->common; @@ -328,7 +329,8 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, /* create the vring */ vq = vring_create_virtqueue(index, num, SMP_CACHE_BYTES, &vp_dev->vdev, - true, true, vp_notify, callback, name); + true, true, ctx, + vp_notify, callback, name); if (!vq) return ERR_PTR(-ENOMEM); @@ -387,12 +389,14 @@ err_map_notify: } static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned nvqs, - struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[], struct irq_affinity *desc) + struct virtqueue *vqs[], + vq_callback_t *callbacks[], + const char * const names[], const bool *ctx, + struct irq_affinity *desc) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtqueue *vq; - int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names, desc); + int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names, ctx, desc); if (rc) return rc; diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 409aeaa49246..5e1b548828e6 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -263,6 +263,7 @@ static inline int virtqueue_add(struct virtqueue *_vq, unsigned int out_sgs, unsigned int in_sgs, void *data, + void *ctx, gfp_t gfp) { struct vring_virtqueue *vq = to_vvq(_vq); @@ -275,6 +276,7 @@ static inline int virtqueue_add(struct virtqueue *_vq, START_USE(vq); BUG_ON(data == NULL); + BUG_ON(ctx && vq->indirect); if (unlikely(vq->broken)) { END_USE(vq); @@ -389,6 +391,8 @@ static inline int virtqueue_add(struct virtqueue *_vq, vq->desc_state[head].data = data; if (indirect) vq->desc_state[head].indir_desc = desc; + if (ctx) + vq->desc_state[head].indir_desc = ctx; /* Put entry in available array (but don't update avail->idx until they * do sync). */ @@ -461,7 +465,8 @@ int virtqueue_add_sgs(struct virtqueue *_vq, for (sg = sgs[i]; sg; sg = sg_next(sg)) total_sg++; } - return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs, data, gfp); + return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs, + data, NULL, gfp); } EXPORT_SYMBOL_GPL(virtqueue_add_sgs); @@ -483,7 +488,7 @@ int virtqueue_add_outbuf(struct virtqueue *vq, void *data, gfp_t gfp) { - return virtqueue_add(vq, &sg, num, 1, 0, data, gfp); + return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp); } EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); @@ -505,11 +510,35 @@ int virtqueue_add_inbuf(struct virtqueue *vq, void *data, gfp_t gfp) { - return virtqueue_add(vq, &sg, num, 0, 1, data, gfp); + return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp); } EXPORT_SYMBOL_GPL(virtqueue_add_inbuf); /** + * virtqueue_add_inbuf_ctx - expose input buffers to other end + * @vq: the struct virtqueue we're talking about. + * @sg: scatterlist (must be well-formed and terminated!) + * @num: the number of entries in @sg writable by other side + * @data: the token identifying the buffer. + * @ctx: extra context for the token + * @gfp: how to do memory allocations (if necessary). + * + * Caller must ensure we don't call this with other virtqueue operations + * at the same time (except where noted). + * + * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). + */ +int virtqueue_add_inbuf_ctx(struct virtqueue *vq, + struct scatterlist *sg, unsigned int num, + void *data, + void *ctx, + gfp_t gfp) +{ + return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp); +} +EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx); + +/** * virtqueue_kick_prepare - first half of split virtqueue_kick call. * @vq: the struct virtqueue * @@ -598,7 +627,8 @@ bool virtqueue_kick(struct virtqueue *vq) } EXPORT_SYMBOL_GPL(virtqueue_kick); -static void detach_buf(struct vring_virtqueue *vq, unsigned int head) +static void detach_buf(struct vring_virtqueue *vq, unsigned int head, + void **ctx) { unsigned int i, j; __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT); @@ -622,10 +652,15 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned int head) /* Plus final descriptor */ vq->vq.num_free++; - /* Free the indirect table, if any, now that it's unmapped. */ - if (vq->desc_state[head].indir_desc) { + if (vq->indirect) { struct vring_desc *indir_desc = vq->desc_state[head].indir_desc; - u32 len = virtio32_to_cpu(vq->vq.vdev, vq->vring.desc[head].len); + u32 len; + + /* Free the indirect table, if any, now that it's unmapped. */ + if (!indir_desc) + return; + + len = virtio32_to_cpu(vq->vq.vdev, vq->vring.desc[head].len); BUG_ON(!(vq->vring.desc[head].flags & cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT))); @@ -634,8 +669,10 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned int head) for (j = 0; j < len / sizeof(struct vring_desc); j++) vring_unmap_one(vq, &indir_desc[j]); - kfree(vq->desc_state[head].indir_desc); + kfree(indir_desc); vq->desc_state[head].indir_desc = NULL; + } else if (ctx) { + *ctx = vq->desc_state[head].indir_desc; } } @@ -660,7 +697,8 @@ static inline bool more_used(const struct vring_virtqueue *vq) * Returns NULL if there are no used buffers, or the "data" token * handed to virtqueue_add_*(). */ -void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) +void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len, + void **ctx) { struct vring_virtqueue *vq = to_vvq(_vq); void *ret; @@ -698,7 +736,7 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) /* detach_buf clears data, so grab it now. */ ret = vq->desc_state[i].data; - detach_buf(vq, i); + detach_buf(vq, i, ctx); vq->last_used_idx++; /* If we expect an interrupt for the next entry, tell host * by writing event index and flush out the write before @@ -715,8 +753,13 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) END_USE(vq); return ret; } -EXPORT_SYMBOL_GPL(virtqueue_get_buf); +EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx); +void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) +{ + return virtqueue_get_buf_ctx(_vq, len, NULL); +} +EXPORT_SYMBOL_GPL(virtqueue_get_buf); /** * virtqueue_disable_cb - disable callbacks * @vq: the struct virtqueue we're talking about. @@ -878,7 +921,7 @@ void *virtqueue_detach_unused_buf(struct virtqueue *_vq) continue; /* detach_buf clears data, so grab it now. */ buf = vq->desc_state[i].data; - detach_buf(vq, i); + detach_buf(vq, i, NULL); vq->avail_idx_shadow--; vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow); END_USE(vq); @@ -916,6 +959,7 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, struct vring vring, struct virtio_device *vdev, bool weak_barriers, + bool context, bool (*notify)(struct virtqueue *), void (*callback)(struct virtqueue *), const char *name) @@ -950,7 +994,8 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, vq->last_add_time_valid = false; #endif - vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC); + vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && + !context; vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); /* No callback? Tell other side not to bother us. */ @@ -1019,6 +1064,7 @@ struct virtqueue *vring_create_virtqueue( struct virtio_device *vdev, bool weak_barriers, bool may_reduce_num, + bool context, bool (*notify)(struct virtqueue *), void (*callback)(struct virtqueue *), const char *name) @@ -1058,7 +1104,7 @@ struct virtqueue *vring_create_virtqueue( queue_size_in_bytes = vring_size(num, vring_align); vring_init(&vring, num, queue, vring_align); - vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, + vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, notify, callback, name); if (!vq) { vring_free_queue(vdev, queue_size_in_bytes, queue, @@ -1079,6 +1125,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int index, unsigned int vring_align, struct virtio_device *vdev, bool weak_barriers, + bool context, void *pages, bool (*notify)(struct virtqueue *vq), void (*callback)(struct virtqueue *vq), @@ -1086,7 +1133,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int index, { struct vring vring; vring_init(&vring, num, pages, vring_align); - return __vring_new_virtqueue(index, vring, vdev, weak_barriers, + return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, notify, callback, name); } EXPORT_SYMBOL_GPL(vring_new_virtqueue); diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index 6c70444da3b9..6b2e0dd88569 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -34,11 +34,13 @@ struct ptr_ring { int producer ____cacheline_aligned_in_smp; spinlock_t producer_lock; - int consumer ____cacheline_aligned_in_smp; + int consumer_head ____cacheline_aligned_in_smp; /* next valid entry */ + int consumer_tail; /* next entry to invalidate */ spinlock_t consumer_lock; /* Shared consumer/producer data */ /* Read-only by both the producer and the consumer */ int size ____cacheline_aligned_in_smp; /* max entries in queue */ + int batch; /* number of entries to consume in a batch */ void **queue; }; @@ -170,7 +172,7 @@ static inline int ptr_ring_produce_bh(struct ptr_ring *r, void *ptr) static inline void *__ptr_ring_peek(struct ptr_ring *r) { if (likely(r->size)) - return r->queue[r->consumer]; + return r->queue[r->consumer_head]; return NULL; } @@ -231,9 +233,38 @@ static inline bool ptr_ring_empty_bh(struct ptr_ring *r) /* Must only be called after __ptr_ring_peek returned !NULL */ static inline void __ptr_ring_discard_one(struct ptr_ring *r) { - r->queue[r->consumer++] = NULL; - if (unlikely(r->consumer >= r->size)) - r->consumer = 0; + /* Fundamentally, what we want to do is update consumer + * index and zero out the entry so producer can reuse it. + * Doing it naively at each consume would be as simple as: + * r->queue[r->consumer++] = NULL; + * if (unlikely(r->consumer >= r->size)) + * r->consumer = 0; + * but that is suboptimal when the ring is full as producer is writing + * out new entries in the same cache line. Defer these updates until a + * batch of entries has been consumed. + */ + int head = r->consumer_head++; + + /* Once we have processed enough entries invalidate them in + * the ring all at once so producer can reuse their space in the ring. + * We also do this when we reach end of the ring - not mandatory + * but helps keep the implementation simple. + */ + if (unlikely(r->consumer_head - r->consumer_tail >= r->batch || + r->consumer_head >= r->size)) { + /* Zero out entries in the reverse order: this way we touch the + * cache line that producer might currently be reading the last; + * producer won't make progress and touch other cache lines + * besides the first one until we write out all entries. + */ + while (likely(head >= r->consumer_tail)) + r->queue[head--] = NULL; + r->consumer_tail = r->consumer_head; + } + if (unlikely(r->consumer_head >= r->size)) { + r->consumer_head = 0; + r->consumer_tail = 0; + } } static inline void *__ptr_ring_consume(struct ptr_ring *r) @@ -345,14 +376,27 @@ static inline void **__ptr_ring_init_queue_alloc(int size, gfp_t gfp) return kzalloc(ALIGN(size * sizeof(void *), SMP_CACHE_BYTES), gfp); } +static inline void __ptr_ring_set_size(struct ptr_ring *r, int size) +{ + r->size = size; + r->batch = SMP_CACHE_BYTES * 2 / sizeof(*(r->queue)); + /* We need to set batch at least to 1 to make logic + * in __ptr_ring_discard_one work correctly. + * Batching too much (because ring is small) would cause a lot of + * burstiness. Needs tuning, for now disable batching. + */ + if (r->batch > r->size / 2 || !r->batch) + r->batch = 1; +} + static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp) { r->queue = __ptr_ring_init_queue_alloc(size, gfp); if (!r->queue) return -ENOMEM; - r->size = size; - r->producer = r->consumer = 0; + __ptr_ring_set_size(r, size); + r->producer = r->consumer_head = r->consumer_tail = 0; spin_lock_init(&r->producer_lock); spin_lock_init(&r->consumer_lock); @@ -373,9 +417,10 @@ static inline void **__ptr_ring_swap_queue(struct ptr_ring *r, void **queue, else if (destroy) destroy(ptr); - r->size = size; + __ptr_ring_set_size(r, size); r->producer = producer; - r->consumer = 0; + r->consumer_head = 0; + r->consumer_tail = 0; old = r->queue; r->queue = queue; diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 7edfbdb55a99..28b0e965360f 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -44,6 +44,12 @@ int virtqueue_add_inbuf(struct virtqueue *vq, void *data, gfp_t gfp); +int virtqueue_add_inbuf_ctx(struct virtqueue *vq, + struct scatterlist sg[], unsigned int num, + void *data, + void *ctx, + gfp_t gfp); + int virtqueue_add_sgs(struct virtqueue *vq, struct scatterlist *sgs[], unsigned int out_sgs, @@ -59,6 +65,9 @@ bool virtqueue_notify(struct virtqueue *vq); void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len); +void *virtqueue_get_buf_ctx(struct virtqueue *vq, unsigned int *len, + void **ctx); + void virtqueue_disable_cb(struct virtqueue *vq); bool virtqueue_enable_cb(struct virtqueue *vq); @@ -156,9 +165,13 @@ int virtio_device_restore(struct virtio_device *dev); * @feature_table_legacy: same as feature_table but when working in legacy mode. * @feature_table_size_legacy: number of entries in feature table legacy array. * @probe: the function to call when a device is found. Returns 0 or -errno. + * @scan: optional function to call after successful probe; intended + * for virtio-scsi to invoke a scan. * @remove: the function to call when a device is removed. * @config_changed: optional function to call when the device configuration * changes; may be called in interrupt context. + * @freeze: optional function to call during suspend/hibernation. + * @restore: optional function to call on resume. */ struct virtio_driver { struct device_driver driver; diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 8355bab175e1..0133d8a12ccd 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -72,7 +72,8 @@ struct virtio_config_ops { void (*reset)(struct virtio_device *vdev); int (*find_vqs)(struct virtio_device *, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[], struct irq_affinity *desc); + const char * const names[], const bool *ctx, + struct irq_affinity *desc); void (*del_vqs)(struct virtio_device *); u64 (*get_features)(struct virtio_device *vdev); int (*finalize_features)(struct virtio_device *vdev); @@ -173,12 +174,32 @@ struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev, vq_callback_t *callbacks[] = { c }; const char *names[] = { n }; struct virtqueue *vq; - int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names, NULL); + int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names, NULL, + NULL); if (err < 0) return ERR_PTR(err); return vq; } +static inline +int virtio_find_vqs(struct virtio_device *vdev, unsigned nvqs, + struct virtqueue *vqs[], vq_callback_t *callbacks[], + const char * const names[], + struct irq_affinity *desc) +{ + return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, NULL, desc); +} + +static inline +int virtio_find_vqs_ctx(struct virtio_device *vdev, unsigned nvqs, + struct virtqueue *vqs[], vq_callback_t *callbacks[], + const char * const names[], const bool *ctx, + struct irq_affinity *desc) +{ + return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, ctx, + desc); +} + /** * virtio_device_ready - enable vq use in probe function * @vdev: the device diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h index e8d36938f09a..270cfa81830e 100644 --- a/include/linux/virtio_ring.h +++ b/include/linux/virtio_ring.h @@ -71,6 +71,7 @@ struct virtqueue *vring_create_virtqueue(unsigned int index, struct virtio_device *vdev, bool weak_barriers, bool may_reduce_num, + bool ctx, bool (*notify)(struct virtqueue *vq), void (*callback)(struct virtqueue *vq), const char *name); @@ -80,6 +81,7 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, struct vring vring, struct virtio_device *vdev, bool weak_barriers, + bool ctx, bool (*notify)(struct virtqueue *), void (*callback)(struct virtqueue *), const char *name); @@ -93,6 +95,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int index, unsigned int vring_align, struct virtio_device *vdev, bool weak_barriers, + bool ctx, void *pages, bool (*notify)(struct virtqueue *vq), void (*callback)(struct virtqueue *vq), diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 9dffe0282ad4..403d86e80162 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -576,9 +576,9 @@ static int virtio_vsock_probe(struct virtio_device *vdev) vsock->vdev = vdev; - ret = vsock->vdev->config->find_vqs(vsock->vdev, VSOCK_VQ_MAX, - vsock->vqs, callbacks, names, - NULL); + ret = virtio_find_vqs(vsock->vdev, VSOCK_VQ_MAX, + vsock->vqs, callbacks, names, + NULL); if (ret < 0) goto out; diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h index 9377c8b4ac16..d8f534025b7f 100644 --- a/tools/virtio/linux/virtio.h +++ b/tools/virtio/linux/virtio.h @@ -57,6 +57,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int index, unsigned int vring_align, struct virtio_device *vdev, bool weak_barriers, + bool ctx, void *pages, bool (*notify)(struct virtqueue *vq), void (*callback)(struct virtqueue *vq), diff --git a/tools/virtio/ringtest/main.c b/tools/virtio/ringtest/main.c index f31353fac541..453ca3c21193 100644 --- a/tools/virtio/ringtest/main.c +++ b/tools/virtio/ringtest/main.c @@ -20,6 +20,7 @@ int runcycles = 10000000; int max_outstanding = INT_MAX; int batch = 1; +int param = 0; bool do_sleep = false; bool do_relax = false; @@ -86,7 +87,7 @@ void set_affinity(const char *arg) cpu = strtol(arg, &endptr, 0); assert(!*endptr); - assert(cpu >= 0 || cpu < CPU_SETSIZE); + assert(cpu >= 0 && cpu < CPU_SETSIZE); self = pthread_self(); CPU_ZERO(&cpuset); @@ -247,6 +248,11 @@ static const struct option longopts[] = { .val = 'b', }, { + .name = "param", + .has_arg = required_argument, + .val = 'p', + }, + { .name = "sleep", .has_arg = no_argument, .val = 's', @@ -274,6 +280,7 @@ static void help(void) " [--run-cycles C (default: %d)]" " [--batch b]" " [--outstanding o]" + " [--param p]" " [--sleep]" " [--relax]" " [--exit]" @@ -328,6 +335,12 @@ int main(int argc, char **argv) assert(c > 0 && c < INT_MAX); max_outstanding = c; break; + case 'p': + c = strtol(optarg, &endptr, 0); + assert(!*endptr); + assert(c > 0 && c < INT_MAX); + param = c; + break; case 'b': c = strtol(optarg, &endptr, 0); assert(!*endptr); diff --git a/tools/virtio/ringtest/main.h b/tools/virtio/ringtest/main.h index 14142faf040b..90b0133004e1 100644 --- a/tools/virtio/ringtest/main.h +++ b/tools/virtio/ringtest/main.h @@ -10,6 +10,8 @@ #include <stdbool.h> +extern int param; + extern bool do_exit; #if defined(__x86_64__) || defined(__i386__) diff --git a/tools/virtio/ringtest/ptr_ring.c b/tools/virtio/ringtest/ptr_ring.c index 635b07b4fdd3..7b22f1b20652 100644 --- a/tools/virtio/ringtest/ptr_ring.c +++ b/tools/virtio/ringtest/ptr_ring.c @@ -97,6 +97,9 @@ void alloc_ring(void) { int ret = ptr_ring_init(&array, ring_size, 0); assert(!ret); + /* Hacky way to poke at ring internals. Useful for testing though. */ + if (param) + array.batch = param; } /* guest side */ diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c index e0445898f08f..0fecaec90d0d 100644 --- a/tools/virtio/virtio_test.c +++ b/tools/virtio/virtio_test.c @@ -100,7 +100,7 @@ static void vq_info_add(struct vdev_info *dev, int num) vring_init(&info->vring, num, info->ring, 4096); info->vq = vring_new_virtqueue(info->idx, info->vring.num, 4096, &dev->vdev, - true, info->ring, + true, false, info->ring, vq_notify, vq_callback, "test"); assert(info->vq); info->vq->priv = info; @@ -202,7 +202,7 @@ static void run_test(struct vdev_info *dev, struct vq_info *vq, test = 0; r = ioctl(dev->control, VHOST_TEST_RUN, &test); assert(r >= 0); - fprintf(stderr, "spurious wakeus: 0x%llx\n", spurious); + fprintf(stderr, "spurious wakeups: 0x%llx\n", spurious); } const char optstring[] = "h"; diff --git a/tools/virtio/vringh_test.c b/tools/virtio/vringh_test.c index 5f94f5105678..9476c616d064 100644 --- a/tools/virtio/vringh_test.c +++ b/tools/virtio/vringh_test.c @@ -314,7 +314,8 @@ static int parallel_test(u64 features, err(1, "Could not set affinity to cpu %u", first_cpu); vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &gvdev.vdev, true, - guest_map, fast_vringh ? no_notify_host + false, guest_map, + fast_vringh ? no_notify_host : parallel_notify_host, never_callback_guest, "guest vq"); @@ -479,7 +480,7 @@ int main(int argc, char *argv[]) memset(__user_addr_min, 0, vring_size(RINGSIZE, ALIGN)); /* Set up guest side. */ - vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &vdev, true, + vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &vdev, true, false, __user_addr_min, never_notify_host, never_callback_guest, "guest vq"); @@ -663,7 +664,7 @@ int main(int argc, char *argv[]) /* Force creation of direct, which we modify. */ __virtio_clear_bit(&vdev, VIRTIO_RING_F_INDIRECT_DESC); vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &vdev, true, - __user_addr_min, + false, __user_addr_min, never_notify_host, never_callback_guest, "guest vq"); |