diff options
Diffstat (limited to 'drivers/vhost/net.c')
-rw-r--r-- | drivers/vhost/net.c | 64 |
1 files changed, 59 insertions, 5 deletions
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index ab11b2bee273..36f3d0f49e60 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -141,6 +141,10 @@ struct vhost_net { unsigned tx_zcopy_err; /* Flush in progress. Protected by tx vq lock. */ bool tx_flush; + /* Private page frag */ + struct page_frag page_frag; + /* Refcount bias of page frag */ + int refcnt_bias; }; static unsigned vhost_net_zcopy_mask __read_mostly; @@ -513,7 +517,13 @@ static void vhost_net_busy_poll(struct vhost_net *net, struct socket *sock; struct vhost_virtqueue *vq = poll_rx ? tvq : rvq; - mutex_lock_nested(&vq->mutex, poll_rx ? VHOST_NET_VQ_TX: VHOST_NET_VQ_RX); + /* Try to hold the vq mutex of the paired virtqueue. We can't + * use mutex_lock() here since we could not guarantee a + * consistenet lock ordering. + */ + if (!mutex_trylock(&vq->mutex)) + return; + vhost_disable_notify(&net->dev, vq); sock = rvq->private_data; @@ -637,14 +647,53 @@ static bool tx_can_batch(struct vhost_virtqueue *vq, size_t total_len) !vhost_vq_avail_empty(vq->dev, vq); } +#define SKB_FRAG_PAGE_ORDER get_order(32768) + +static bool vhost_net_page_frag_refill(struct vhost_net *net, unsigned int sz, + struct page_frag *pfrag, gfp_t gfp) +{ + if (pfrag->page) { + if (pfrag->offset + sz <= pfrag->size) + return true; + __page_frag_cache_drain(pfrag->page, net->refcnt_bias); + } + + pfrag->offset = 0; + net->refcnt_bias = 0; + if (SKB_FRAG_PAGE_ORDER) { + /* Avoid direct reclaim but allow kswapd to wake */ + pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) | + __GFP_COMP | __GFP_NOWARN | + __GFP_NORETRY, + SKB_FRAG_PAGE_ORDER); + if (likely(pfrag->page)) { + pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER; + goto done; + } + } + pfrag->page = alloc_page(gfp); + if (likely(pfrag->page)) { + pfrag->size = PAGE_SIZE; + goto done; + } + return false; + +done: + net->refcnt_bias = USHRT_MAX; + page_ref_add(pfrag->page, USHRT_MAX - 1); + return true; +} + #define VHOST_NET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD) static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq, struct iov_iter *from) { struct vhost_virtqueue *vq = &nvq->vq; + struct vhost_net *net = container_of(vq->dev, struct vhost_net, + dev); struct socket *sock = vq->private_data; - struct page_frag *alloc_frag = ¤t->task_frag; + struct page_frag *alloc_frag = &net->page_frag; struct virtio_net_hdr *gso; struct xdp_buff *xdp = &nvq->xdp[nvq->batched_xdp]; struct tun_xdp_hdr *hdr; @@ -665,7 +714,8 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq, buflen += SKB_DATA_ALIGN(len + pad); alloc_frag->offset = ALIGN((u64)alloc_frag->offset, SMP_CACHE_BYTES); - if (unlikely(!skb_page_frag_refill(buflen, alloc_frag, GFP_KERNEL))) + if (unlikely(!vhost_net_page_frag_refill(net, buflen, + alloc_frag, GFP_KERNEL))) return -ENOMEM; buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset; @@ -703,7 +753,7 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq, xdp->data_end = xdp->data + len; hdr->buflen = buflen; - get_page(alloc_frag->page); + --net->refcnt_bias; alloc_frag->offset += buflen; ++nvq->batched_xdp; @@ -1292,6 +1342,8 @@ static int vhost_net_open(struct inode *inode, struct file *f) vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, EPOLLIN, dev); f->private_data = n; + n->page_frag.page = NULL; + n->refcnt_bias = 0; return 0; } @@ -1359,13 +1411,15 @@ static int vhost_net_release(struct inode *inode, struct file *f) if (rx_sock) sockfd_put(rx_sock); /* Make sure no callbacks are outstanding */ - synchronize_rcu_bh(); + synchronize_rcu(); /* We do an extra flush before freeing memory, * since jobs can re-queue themselves. */ vhost_net_flush(n); kfree(n->vqs[VHOST_NET_VQ_RX].rxq.queue); kfree(n->vqs[VHOST_NET_VQ_TX].xdp); kfree(n->dev.vqs); + if (n->page_frag.page) + __page_frag_cache_drain(n->page_frag.page, n->refcnt_bias); kvfree(n); return 0; } |