diff options
Diffstat (limited to 'drivers/net/virtio_net.c')
-rw-r--r-- | drivers/net/virtio_net.c | 237 |
1 files changed, 133 insertions, 104 deletions
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index defec2b3c5a4..bf7c734259ad 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -124,12 +124,14 @@ struct virtnet_info { /* Lock for config space updates */ struct mutex config_lock; + /* Page_frag for GFP_KERNEL packet buffer allocation when we run + * low on memory. + */ + struct page_frag alloc_frag; + /* Does the affinity hint is set for virtqueues? */ bool affinity_hint_set; - /* Per-cpu variable to show the mapping from CPU to virtqueue */ - int __percpu *vq_index; - /* CPU hot plug notifier */ struct notifier_block nb; }; @@ -217,33 +219,18 @@ static void skb_xmit_done(struct virtqueue *vq) netif_wake_subqueue(vi->dev, vq2txq(vq)); } -static void set_skb_frag(struct sk_buff *skb, struct page *page, - unsigned int offset, unsigned int *len) -{ - int size = min((unsigned)PAGE_SIZE - offset, *len); - int i = skb_shinfo(skb)->nr_frags; - - __skb_fill_page_desc(skb, i, page, offset, size); - - skb->data_len += size; - skb->len += size; - skb->truesize += PAGE_SIZE; - skb_shinfo(skb)->nr_frags++; - skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG; - *len -= size; -} - /* Called from bottom half context */ static struct sk_buff *page_to_skb(struct receive_queue *rq, - struct page *page, unsigned int len) + struct page *page, unsigned int offset, + unsigned int len, unsigned int truesize) { struct virtnet_info *vi = rq->vq->vdev->priv; struct sk_buff *skb; struct skb_vnet_hdr *hdr; - unsigned int copy, hdr_len, offset; + unsigned int copy, hdr_len, hdr_padded_len; char *p; - p = page_address(page); + p = page_address(page) + offset; /* copy small packet so we can reuse these pages for small data */ skb = netdev_alloc_skb_ip_align(vi->dev, GOOD_COPY_LEN); @@ -254,16 +241,17 @@ static struct sk_buff *page_to_skb(struct receive_queue *rq, if (vi->mergeable_rx_bufs) { hdr_len = sizeof hdr->mhdr; - offset = hdr_len; + hdr_padded_len = sizeof hdr->mhdr; } else { hdr_len = sizeof hdr->hdr; - offset = sizeof(struct padded_vnet_hdr); + hdr_padded_len = sizeof(struct padded_vnet_hdr); } memcpy(hdr, p, hdr_len); len -= hdr_len; - p += offset; + offset += hdr_padded_len; + p += hdr_padded_len; copy = len; if (copy > skb_tailroom(skb)) @@ -273,6 +261,14 @@ static struct sk_buff *page_to_skb(struct receive_queue *rq, len -= copy; offset += copy; + if (vi->mergeable_rx_bufs) { + if (len) + skb_add_rx_frag(skb, 0, page, offset, len, truesize); + else + put_page(page); + return skb; + } + /* * Verify that we can indeed put this data into a skb. * This is here to handle cases when the device erroneously @@ -284,9 +280,12 @@ static struct sk_buff *page_to_skb(struct receive_queue *rq, dev_kfree_skb(skb); return NULL; } - + BUG_ON(offset >= PAGE_SIZE); while (len) { - set_skb_frag(skb, page, offset, &len); + unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len); + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, offset, + frag_size, truesize); + len -= frag_size; page = (struct page *)page->private; offset = 0; } @@ -297,33 +296,59 @@ static struct sk_buff *page_to_skb(struct receive_queue *rq, return skb; } -static int receive_mergeable(struct receive_queue *rq, struct sk_buff *skb) +static int receive_mergeable(struct receive_queue *rq, struct sk_buff *head_skb) { - struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb); + struct skb_vnet_hdr *hdr = skb_vnet_hdr(head_skb); + struct sk_buff *curr_skb = head_skb; + char *buf; struct page *page; - int num_buf, i, len; + int num_buf, len, offset; num_buf = hdr->mhdr.num_buffers; while (--num_buf) { - i = skb_shinfo(skb)->nr_frags; - if (i >= MAX_SKB_FRAGS) { - pr_debug("%s: packet too long\n", skb->dev->name); - skb->dev->stats.rx_length_errors++; - return -EINVAL; - } - page = virtqueue_get_buf(rq->vq, &len); - if (!page) { + int num_skb_frags = skb_shinfo(curr_skb)->nr_frags; + buf = virtqueue_get_buf(rq->vq, &len); + if (unlikely(!buf)) { pr_debug("%s: rx error: %d buffers missing\n", - skb->dev->name, hdr->mhdr.num_buffers); - skb->dev->stats.rx_length_errors++; + head_skb->dev->name, hdr->mhdr.num_buffers); + head_skb->dev->stats.rx_length_errors++; return -EINVAL; } - - if (len > PAGE_SIZE) - len = PAGE_SIZE; - - set_skb_frag(skb, page, 0, &len); - + if (unlikely(len > MAX_PACKET_LEN)) { + pr_debug("%s: rx error: merge buffer too long\n", + head_skb->dev->name); + len = MAX_PACKET_LEN; + } + if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) { + struct sk_buff *nskb = alloc_skb(0, GFP_ATOMIC); + if (unlikely(!nskb)) { + head_skb->dev->stats.rx_dropped++; + return -ENOMEM; + } + if (curr_skb == head_skb) + skb_shinfo(curr_skb)->frag_list = nskb; + else + curr_skb->next = nskb; + curr_skb = nskb; + head_skb->truesize += nskb->truesize; + num_skb_frags = 0; + } + if (curr_skb != head_skb) { + head_skb->data_len += len; + head_skb->len += len; + head_skb->truesize += MAX_PACKET_LEN; + } + page = virt_to_head_page(buf); + offset = buf - (char *)page_address(page); + if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) { + put_page(page); + skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1, + len, MAX_PACKET_LEN); + } else { + skb_add_rx_frag(curr_skb, num_skb_frags, page, + offset, len, + MAX_PACKET_LEN); + } --rq->num; } return 0; @@ -341,8 +366,10 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len) if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) { pr_debug("%s: short packet %i\n", dev->name, len); dev->stats.rx_length_errors++; - if (vi->mergeable_rx_bufs || vi->big_packets) + if (vi->big_packets) give_pages(rq, buf); + else if (vi->mergeable_rx_bufs) + put_page(virt_to_head_page(buf)); else dev_kfree_skb(buf); return; @@ -352,19 +379,28 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len) skb = buf; len -= sizeof(struct virtio_net_hdr); skb_trim(skb, len); + } else if (vi->mergeable_rx_bufs) { + struct page *page = virt_to_head_page(buf); + skb = page_to_skb(rq, page, + (char *)buf - (char *)page_address(page), + len, MAX_PACKET_LEN); + if (unlikely(!skb)) { + dev->stats.rx_dropped++; + put_page(page); + return; + } + if (receive_mergeable(rq, skb)) { + dev_kfree_skb(skb); + return; + } } else { page = buf; - skb = page_to_skb(rq, page, len); + skb = page_to_skb(rq, page, 0, len, PAGE_SIZE); if (unlikely(!skb)) { dev->stats.rx_dropped++; give_pages(rq, page); return; } - if (vi->mergeable_rx_bufs) - if (receive_mergeable(rq, skb)) { - dev_kfree_skb(skb); - return; - } } hdr = skb_vnet_hdr(skb); @@ -501,18 +537,28 @@ static int add_recvbuf_big(struct receive_queue *rq, gfp_t gfp) static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp) { - struct page *page; + struct virtnet_info *vi = rq->vq->vdev->priv; + char *buf = NULL; int err; - page = get_a_page(rq, gfp); - if (!page) + if (gfp & __GFP_WAIT) { + if (skb_page_frag_refill(MAX_PACKET_LEN, &vi->alloc_frag, + gfp)) { + buf = (char *)page_address(vi->alloc_frag.page) + + vi->alloc_frag.offset; + get_page(vi->alloc_frag.page); + vi->alloc_frag.offset += MAX_PACKET_LEN; + } + } else { + buf = netdev_alloc_frag(MAX_PACKET_LEN); + } + if (!buf) return -ENOMEM; - sg_init_one(rq->sg, page_address(page), PAGE_SIZE); - - err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, page, gfp); + sg_init_one(rq->sg, buf, MAX_PACKET_LEN); + err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, buf, gfp); if (err < 0) - give_pages(rq, page); + put_page(virt_to_head_page(buf)); return err; } @@ -938,7 +984,9 @@ static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) return -EINVAL; } else { vi->curr_queue_pairs = queue_pairs; - schedule_delayed_work(&vi->refill, 0); + /* virtnet_open() will refill when device is going to up. */ + if (dev->flags & IFF_UP) + schedule_delayed_work(&vi->refill, 0); } return 0; @@ -1063,7 +1111,6 @@ static int virtnet_vlan_rx_kill_vid(struct net_device *dev, static void virtnet_clean_affinity(struct virtnet_info *vi, long hcpu) { int i; - int cpu; if (vi->affinity_hint_set) { for (i = 0; i < vi->max_queue_pairs; i++) { @@ -1073,16 +1120,6 @@ static void virtnet_clean_affinity(struct virtnet_info *vi, long hcpu) vi->affinity_hint_set = false; } - - i = 0; - for_each_online_cpu(cpu) { - if (cpu == hcpu) { - *per_cpu_ptr(vi->vq_index, cpu) = -1; - } else { - *per_cpu_ptr(vi->vq_index, cpu) = - ++i % vi->curr_queue_pairs; - } - } } static void virtnet_set_affinity(struct virtnet_info *vi) @@ -1104,7 +1141,7 @@ static void virtnet_set_affinity(struct virtnet_info *vi) for_each_online_cpu(cpu) { virtqueue_set_affinity(vi->rq[i].vq, cpu); virtqueue_set_affinity(vi->sq[i].vq, cpu); - *per_cpu_ptr(vi->vq_index, cpu) = i; + netif_set_xps_queue(vi->dev, cpumask_of(cpu), i); i++; } @@ -1128,6 +1165,7 @@ static int virtnet_cpu_callback(struct notifier_block *nfb, default: break; } + return NOTIFY_OK; } @@ -1217,28 +1255,6 @@ static int virtnet_change_mtu(struct net_device *dev, int new_mtu) return 0; } -/* To avoid contending a lock hold by a vcpu who would exit to host, select the - * txq based on the processor id. - */ -static u16 virtnet_select_queue(struct net_device *dev, struct sk_buff *skb) -{ - int txq; - struct virtnet_info *vi = netdev_priv(dev); - - if (skb_rx_queue_recorded(skb)) { - txq = skb_get_rx_queue(skb); - } else { - txq = *__this_cpu_ptr(vi->vq_index); - if (txq == -1) - txq = 0; - } - - while (unlikely(txq >= dev->real_num_tx_queues)) - txq -= dev->real_num_tx_queues; - - return txq; -} - static const struct net_device_ops virtnet_netdev = { .ndo_open = virtnet_open, .ndo_stop = virtnet_close, @@ -1250,7 +1266,6 @@ static const struct net_device_ops virtnet_netdev = { .ndo_get_stats64 = virtnet_stats, .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid, - .ndo_select_queue = virtnet_select_queue, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = virtnet_netpoll, #endif @@ -1333,8 +1348,10 @@ static void free_unused_bufs(struct virtnet_info *vi) struct virtqueue *vq = vi->rq[i].vq; while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) { - if (vi->mergeable_rx_bufs || vi->big_packets) + if (vi->big_packets) give_pages(&vi->rq[i], buf); + else if (vi->mergeable_rx_bufs) + put_page(virt_to_head_page(buf)); else dev_kfree_skb(buf); --vi->rq[i].num; @@ -1559,9 +1576,12 @@ static int virtnet_probe(struct virtio_device *vdev) if (vi->stats == NULL) goto free; - vi->vq_index = alloc_percpu(int); - if (vi->vq_index == NULL) - goto free_stats; + for_each_possible_cpu(i) { + struct virtnet_stats *virtnet_stats; + virtnet_stats = per_cpu_ptr(vi->stats, i); + u64_stats_init(&virtnet_stats->tx_syncp); + u64_stats_init(&virtnet_stats->rx_syncp); + } mutex_init(&vi->config_lock); vi->config_enable = true; @@ -1589,7 +1609,7 @@ static int virtnet_probe(struct virtio_device *vdev) /* Allocate/initialize the rx/tx queues, and invoke find_vqs */ err = init_vqs(vi); if (err) - goto free_index; + goto free_stats; netif_set_real_num_tx_queues(dev, 1); netif_set_real_num_rx_queues(dev, 1); @@ -1640,8 +1660,8 @@ free_recv_bufs: free_vqs: cancel_delayed_work_sync(&vi->refill); virtnet_del_vqs(vi); -free_index: - free_percpu(vi->vq_index); + if (vi->alloc_frag.page) + put_page(vi->alloc_frag.page); free_stats: free_percpu(vi->stats); free: @@ -1675,10 +1695,11 @@ static void virtnet_remove(struct virtio_device *vdev) unregister_netdev(vi->dev); remove_vq_common(vi); + if (vi->alloc_frag.page) + put_page(vi->alloc_frag.page); flush_work(&vi->config_work); - free_percpu(vi->vq_index); free_percpu(vi->stats); free_netdev(vi->dev); } @@ -1689,6 +1710,8 @@ static int virtnet_freeze(struct virtio_device *vdev) struct virtnet_info *vi = vdev->priv; int i; + unregister_hotcpu_notifier(&vi->nb); + /* Prevent config work handler from accessing the device */ mutex_lock(&vi->config_lock); vi->config_enable = false; @@ -1733,7 +1756,13 @@ static int virtnet_restore(struct virtio_device *vdev) vi->config_enable = true; mutex_unlock(&vi->config_lock); + rtnl_lock(); virtnet_set_queues(vi, vi->curr_queue_pairs); + rtnl_unlock(); + + err = register_hotcpu_notifier(&vi->nb); + if (err) + return err; return 0; } |