diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-08-12 18:50:34 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-08-12 18:50:34 +0200 |
commit | 7a53e17accce9d310d2e522dfc701d8da7ccfa65 (patch) | |
tree | c1ccf061aee42178159cbe9c31c2c4e004b76947 /drivers/net | |
parent | Merge tag 'loongarch-5.20' of git://git.kernel.org/pub/scm/linux/kernel/git/c... (diff) | |
parent | vdpa/mlx5: Fix possible uninitialized return value (diff) | |
download | linux-7a53e17accce9d310d2e522dfc701d8da7ccfa65.tar.xz linux-7a53e17accce9d310d2e522dfc701d8da7ccfa65.zip |
Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
Pull virtio updates from Michael Tsirkin:
- A huge patchset supporting vq resize using the new vq reset
capability
- Features, fixes, and cleanups all over the place
* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: (88 commits)
vdpa/mlx5: Fix possible uninitialized return value
vdpa_sim_blk: add support for discard and write-zeroes
vdpa_sim_blk: add support for VIRTIO_BLK_T_FLUSH
vdpa_sim_blk: make vdpasim_blk_check_range usable by other requests
vdpa_sim_blk: check if sector is 0 for commands other than read or write
vdpa_sim: Implement suspend vdpa op
vhost-vdpa: uAPI to suspend the device
vhost-vdpa: introduce SUSPEND backend feature bit
vdpa: Add suspend operation
virtio-blk: Avoid use-after-free on suspend/resume
virtio_vdpa: support the arg sizes of find_vqs()
vhost-vdpa: Call ida_simple_remove() when failed
vDPA: fix 'cast to restricted le16' warnings in vdpa.c
vDPA: !FEATURES_OK should not block querying device config space
vDPA/ifcvf: support userspace to query features and MQ of a management device
vDPA/ifcvf: get_config_size should return a value no greater than dev implementation
vhost scsi: Allow user to control num virtqueues
vhost-scsi: Fix max number of virtqueues
vdpa/mlx5: Support different address spaces for control and data
vdpa/mlx5: Implement susupend virtqueue callback
...
Diffstat (limited to 'drivers/net')
-rw-r--r-- | drivers/net/virtio_net.c | 325 |
1 files changed, 286 insertions, 39 deletions
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 3b3eebad3977..d934774e9733 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -135,6 +135,9 @@ struct send_queue { struct virtnet_sq_stats stats; struct napi_struct napi; + + /* Record whether sq is in reset state. */ + bool reset; }; /* Internal representation of a receive virtqueue */ @@ -267,6 +270,12 @@ struct virtnet_info { u8 duplex; u32 speed; + /* Interrupt coalescing settings */ + u32 tx_usecs; + u32 rx_usecs; + u32 tx_max_packets; + u32 rx_max_packets; + unsigned long guest_offloads; unsigned long guest_offloads_capable; @@ -284,6 +293,9 @@ struct padded_vnet_hdr { char padding[12]; }; +static void virtnet_rq_free_unused_buf(struct virtqueue *vq, void *buf); +static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf); + static bool is_xdp_frame(void *ptr) { return (unsigned long)ptr & VIRTIO_XDP_FLAG; @@ -1628,6 +1640,11 @@ static void virtnet_poll_cleantx(struct receive_queue *rq) return; if (__netif_tx_trylock(txq)) { + if (sq->reset) { + __netif_tx_unlock(txq); + return; + } + do { virtqueue_disable_cb(sq->vq); free_old_xmit_skbs(sq, true); @@ -1875,6 +1892,70 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } +static int virtnet_rx_resize(struct virtnet_info *vi, + struct receive_queue *rq, u32 ring_num) +{ + bool running = netif_running(vi->dev); + int err, qindex; + + qindex = rq - vi->rq; + + if (running) + napi_disable(&rq->napi); + + err = virtqueue_resize(rq->vq, ring_num, virtnet_rq_free_unused_buf); + if (err) + netdev_err(vi->dev, "resize rx fail: rx queue index: %d err: %d\n", qindex, err); + + if (!try_fill_recv(vi, rq, GFP_KERNEL)) + schedule_delayed_work(&vi->refill, 0); + + if (running) + virtnet_napi_enable(rq->vq, &rq->napi); + return err; +} + +static int virtnet_tx_resize(struct virtnet_info *vi, + struct send_queue *sq, u32 ring_num) +{ + bool running = netif_running(vi->dev); + struct netdev_queue *txq; + int err, qindex; + + qindex = sq - vi->sq; + + if (running) + virtnet_napi_tx_disable(&sq->napi); + + txq = netdev_get_tx_queue(vi->dev, qindex); + + /* 1. wait all ximt complete + * 2. fix the race of netif_stop_subqueue() vs netif_start_subqueue() + */ + __netif_tx_lock_bh(txq); + + /* Prevent rx poll from accessing sq. */ + sq->reset = true; + + /* Prevent the upper layer from trying to send packets. */ + netif_stop_subqueue(vi->dev, qindex); + + __netif_tx_unlock_bh(txq); + + err = virtqueue_resize(sq->vq, ring_num, virtnet_sq_free_unused_buf); + if (err) + netdev_err(vi->dev, "resize tx fail: tx queue index: %d err: %d\n", qindex, err); + + __netif_tx_lock_bh(txq); + sq->reset = false; + netif_tx_wake_queue(txq); + __netif_tx_unlock_bh(txq); + + if (running) + virtnet_napi_tx_enable(vi, sq->vq, &sq->napi); + return err; +} + /* * Send command via the control virtqueue and check status. Commands * supported by the hypervisor, as indicated by feature bits, should @@ -2285,10 +2366,57 @@ static void virtnet_get_ringparam(struct net_device *dev, { struct virtnet_info *vi = netdev_priv(dev); - ring->rx_max_pending = virtqueue_get_vring_size(vi->rq[0].vq); - ring->tx_max_pending = virtqueue_get_vring_size(vi->sq[0].vq); - ring->rx_pending = ring->rx_max_pending; - ring->tx_pending = ring->tx_max_pending; + ring->rx_max_pending = vi->rq[0].vq->num_max; + ring->tx_max_pending = vi->sq[0].vq->num_max; + ring->rx_pending = virtqueue_get_vring_size(vi->rq[0].vq); + ring->tx_pending = virtqueue_get_vring_size(vi->sq[0].vq); +} + +static int virtnet_set_ringparam(struct net_device *dev, + struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kernel_ring, + struct netlink_ext_ack *extack) +{ + struct virtnet_info *vi = netdev_priv(dev); + u32 rx_pending, tx_pending; + struct receive_queue *rq; + struct send_queue *sq; + int i, err; + + if (ring->rx_mini_pending || ring->rx_jumbo_pending) + return -EINVAL; + + rx_pending = virtqueue_get_vring_size(vi->rq[0].vq); + tx_pending = virtqueue_get_vring_size(vi->sq[0].vq); + + if (ring->rx_pending == rx_pending && + ring->tx_pending == tx_pending) + return 0; + + if (ring->rx_pending > vi->rq[0].vq->num_max) + return -EINVAL; + + if (ring->tx_pending > vi->sq[0].vq->num_max) + return -EINVAL; + + for (i = 0; i < vi->max_queue_pairs; i++) { + rq = vi->rq + i; + sq = vi->sq + i; + + if (ring->tx_pending != tx_pending) { + err = virtnet_tx_resize(vi, sq, ring->tx_pending); + if (err) + return err; + } + + if (ring->rx_pending != rx_pending) { + err = virtnet_rx_resize(vi, rq, ring->rx_pending); + if (err) + return err; + } + } + + return 0; } static bool virtnet_commit_rss_command(struct virtnet_info *vi) @@ -2618,27 +2746,89 @@ static int virtnet_get_link_ksettings(struct net_device *dev, return 0; } +static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi, + struct ethtool_coalesce *ec) +{ + struct scatterlist sgs_tx, sgs_rx; + struct virtio_net_ctrl_coal_tx coal_tx; + struct virtio_net_ctrl_coal_rx coal_rx; + + coal_tx.tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs); + coal_tx.tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames); + sg_init_one(&sgs_tx, &coal_tx, sizeof(coal_tx)); + + if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, + VIRTIO_NET_CTRL_NOTF_COAL_TX_SET, + &sgs_tx)) + return -EINVAL; + + /* Save parameters */ + vi->tx_usecs = ec->tx_coalesce_usecs; + vi->tx_max_packets = ec->tx_max_coalesced_frames; + + coal_rx.rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs); + coal_rx.rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames); + sg_init_one(&sgs_rx, &coal_rx, sizeof(coal_rx)); + + if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, + VIRTIO_NET_CTRL_NOTF_COAL_RX_SET, + &sgs_rx)) + return -EINVAL; + + /* Save parameters */ + vi->rx_usecs = ec->rx_coalesce_usecs; + vi->rx_max_packets = ec->rx_max_coalesced_frames; + + return 0; +} + +static int virtnet_coal_params_supported(struct ethtool_coalesce *ec) +{ + /* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL + * feature is negotiated. + */ + if (ec->rx_coalesce_usecs || ec->tx_coalesce_usecs) + return -EOPNOTSUPP; + + if (ec->tx_max_coalesced_frames > 1 || + ec->rx_max_coalesced_frames != 1) + return -EINVAL; + + return 0; +} + static int virtnet_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec, struct kernel_ethtool_coalesce *kernel_coal, struct netlink_ext_ack *extack) { struct virtnet_info *vi = netdev_priv(dev); - int i, napi_weight; - - if (ec->tx_max_coalesced_frames > 1 || - ec->rx_max_coalesced_frames != 1) - return -EINVAL; + int ret, i, napi_weight; + bool update_napi = false; + /* Can't change NAPI weight if the link is up */ napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; if (napi_weight ^ vi->sq[0].napi.weight) { if (dev->flags & IFF_UP) return -EBUSY; + else + update_napi = true; + } + + if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) + ret = virtnet_send_notf_coal_cmds(vi, ec); + else + ret = virtnet_coal_params_supported(ec); + + if (ret) + return ret; + + if (update_napi) { for (i = 0; i < vi->max_queue_pairs; i++) vi->sq[i].napi.weight = napi_weight; } - return 0; + return ret; } static int virtnet_get_coalesce(struct net_device *dev, @@ -2646,16 +2836,19 @@ static int virtnet_get_coalesce(struct net_device *dev, struct kernel_ethtool_coalesce *kernel_coal, struct netlink_ext_ack *extack) { - struct ethtool_coalesce ec_default = { - .cmd = ETHTOOL_GCOALESCE, - .rx_max_coalesced_frames = 1, - }; struct virtnet_info *vi = netdev_priv(dev); - memcpy(ec, &ec_default, sizeof(ec_default)); + if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { + ec->rx_coalesce_usecs = vi->rx_usecs; + ec->tx_coalesce_usecs = vi->tx_usecs; + ec->tx_max_coalesced_frames = vi->tx_max_packets; + ec->rx_max_coalesced_frames = vi->rx_max_packets; + } else { + ec->rx_max_coalesced_frames = 1; - if (vi->sq[0].napi.weight) - ec->tx_max_coalesced_frames = 1; + if (vi->sq[0].napi.weight) + ec->tx_max_coalesced_frames = 1; + } return 0; } @@ -2774,10 +2967,12 @@ static int virtnet_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info) } static const struct ethtool_ops virtnet_ethtool_ops = { - .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES, + .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES | + ETHTOOL_COALESCE_USECS, .get_drvinfo = virtnet_get_drvinfo, .get_link = ethtool_op_get_link, .get_ringparam = virtnet_get_ringparam, + .set_ringparam = virtnet_set_ringparam, .get_strings = virtnet_get_strings, .get_sset_count = virtnet_get_sset_count, .get_ethtool_stats = virtnet_get_ethtool_stats, @@ -3171,6 +3366,27 @@ static void free_receive_page_frags(struct virtnet_info *vi) put_page(vi->rq[i].alloc_frag.page); } +static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf) +{ + if (!is_xdp_frame(buf)) + dev_kfree_skb(buf); + else + xdp_return_frame(ptr_to_xdp(buf)); +} + +static void virtnet_rq_free_unused_buf(struct virtqueue *vq, void *buf) +{ + struct virtnet_info *vi = vq->vdev->priv; + int i = vq2rxq(vq); + + if (vi->mergeable_rx_bufs) + put_page(virt_to_head_page(buf)); + else if (vi->big_packets) + give_pages(&vi->rq[i], buf); + else + put_page(virt_to_head_page(buf)); +} + static void free_unused_bufs(struct virtnet_info *vi) { void *buf; @@ -3178,26 +3394,14 @@ static void free_unused_bufs(struct virtnet_info *vi) for (i = 0; i < vi->max_queue_pairs; i++) { struct virtqueue *vq = vi->sq[i].vq; - while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) { - if (!is_xdp_frame(buf)) - dev_kfree_skb(buf); - else - xdp_return_frame(ptr_to_xdp(buf)); - } + while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) + virtnet_sq_free_unused_buf(vq, buf); } for (i = 0; i < vi->max_queue_pairs; i++) { struct virtqueue *vq = vi->rq[i].vq; - - while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) { - if (vi->mergeable_rx_bufs) { - put_page(virt_to_head_page(buf)); - } else if (vi->big_packets) { - give_pages(&vi->rq[i], buf); - } else { - put_page(virt_to_head_page(buf)); - } - } + while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) + virtnet_rq_free_unused_buf(vq, buf); } } @@ -3228,6 +3432,29 @@ static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqu (unsigned int)GOOD_PACKET_LEN); } +static void virtnet_config_sizes(struct virtnet_info *vi, u32 *sizes) +{ + u32 i, rx_size, tx_size; + + if (vi->speed == SPEED_UNKNOWN || vi->speed < SPEED_10000) { + rx_size = 1024; + tx_size = 1024; + + } else if (vi->speed < SPEED_40000) { + rx_size = 1024 * 4; + tx_size = 1024 * 4; + + } else { + rx_size = 1024 * 8; + tx_size = 1024 * 8; + } + + for (i = 0; i < vi->max_queue_pairs; i++) { + sizes[rxq2vq(i)] = rx_size; + sizes[txq2vq(i)] = tx_size; + } +} + static int virtnet_find_vqs(struct virtnet_info *vi) { vq_callback_t **callbacks; @@ -3235,6 +3462,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi) int ret = -ENOMEM; int i, total_vqs; const char **names; + u32 *sizes; bool *ctx; /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by @@ -3262,10 +3490,15 @@ static int virtnet_find_vqs(struct virtnet_info *vi) ctx = NULL; } + sizes = kmalloc_array(total_vqs, sizeof(*sizes), GFP_KERNEL); + if (!sizes) + goto err_sizes; + /* Parameters for control virtqueue, if any */ if (vi->has_cvq) { callbacks[total_vqs - 1] = NULL; names[total_vqs - 1] = "control"; + sizes[total_vqs - 1] = 64; } /* Allocate/initialize parameters for send/receive virtqueues */ @@ -3280,8 +3513,10 @@ static int virtnet_find_vqs(struct virtnet_info *vi) ctx[rxq2vq(i)] = true; } - ret = virtio_find_vqs_ctx(vi->vdev, total_vqs, vqs, callbacks, - names, ctx, NULL); + virtnet_config_sizes(vi, sizes); + + ret = virtio_find_vqs_ctx_size(vi->vdev, total_vqs, vqs, callbacks, + names, sizes, ctx, NULL); if (ret) goto err_find; @@ -3301,6 +3536,8 @@ static int virtnet_find_vqs(struct virtnet_info *vi) err_find: + kfree(sizes); +err_sizes: kfree(ctx); err_ctx: kfree(names); @@ -3444,6 +3681,8 @@ static bool virtnet_validate_features(struct virtio_device *vdev) VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS, "VIRTIO_NET_F_CTRL_VQ") || VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT, + "VIRTIO_NET_F_CTRL_VQ") || + VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_NOTF_COAL, "VIRTIO_NET_F_CTRL_VQ"))) { return false; } @@ -3580,6 +3819,13 @@ static int virtnet_probe(struct virtio_device *vdev) if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) vi->mergeable_rx_bufs = true; + if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { + vi->rx_usecs = 0; + vi->tx_usecs = 0; + vi->tx_max_packets = 0; + vi->rx_max_packets = 0; + } + if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) vi->has_rss_hash_report = true; @@ -3651,6 +3897,9 @@ static int virtnet_probe(struct virtio_device *vdev) vi->curr_queue_pairs = num_online_cpus(); vi->max_queue_pairs = max_queue_pairs; + virtnet_init_settings(dev); + virtnet_update_settings(vi); + /* Allocate/initialize the rx/tx queues, and invoke find_vqs */ err = init_vqs(vi); if (err) @@ -3663,8 +3912,6 @@ static int virtnet_probe(struct virtio_device *vdev) netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs); netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs); - virtnet_init_settings(dev); - if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY)) { vi->failover = net_failover_create(vi->dev); if (IS_ERR(vi->failover)) { @@ -3814,7 +4061,7 @@ static struct virtio_device_id id_table[] = { VIRTIO_NET_F_CTRL_MAC_ADDR, \ VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \ - VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT + VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL static unsigned int features[] = { VIRTNET_FEATURES, |