summaryrefslogtreecommitdiffstats
path: root/drivers/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-08-12 18:50:34 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2022-08-12 18:50:34 +0200
commit7a53e17accce9d310d2e522dfc701d8da7ccfa65 (patch)
treec1ccf061aee42178159cbe9c31c2c4e004b76947 /drivers/net
parentMerge tag 'loongarch-5.20' of git://git.kernel.org/pub/scm/linux/kernel/git/c... (diff)
parentvdpa/mlx5: Fix possible uninitialized return value (diff)
downloadlinux-7a53e17accce9d310d2e522dfc701d8da7ccfa65.tar.xz
linux-7a53e17accce9d310d2e522dfc701d8da7ccfa65.zip
Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
Pull virtio updates from Michael Tsirkin: - A huge patchset supporting vq resize using the new vq reset capability - Features, fixes, and cleanups all over the place * tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: (88 commits) vdpa/mlx5: Fix possible uninitialized return value vdpa_sim_blk: add support for discard and write-zeroes vdpa_sim_blk: add support for VIRTIO_BLK_T_FLUSH vdpa_sim_blk: make vdpasim_blk_check_range usable by other requests vdpa_sim_blk: check if sector is 0 for commands other than read or write vdpa_sim: Implement suspend vdpa op vhost-vdpa: uAPI to suspend the device vhost-vdpa: introduce SUSPEND backend feature bit vdpa: Add suspend operation virtio-blk: Avoid use-after-free on suspend/resume virtio_vdpa: support the arg sizes of find_vqs() vhost-vdpa: Call ida_simple_remove() when failed vDPA: fix 'cast to restricted le16' warnings in vdpa.c vDPA: !FEATURES_OK should not block querying device config space vDPA/ifcvf: support userspace to query features and MQ of a management device vDPA/ifcvf: get_config_size should return a value no greater than dev implementation vhost scsi: Allow user to control num virtqueues vhost-scsi: Fix max number of virtqueues vdpa/mlx5: Support different address spaces for control and data vdpa/mlx5: Implement susupend virtqueue callback ...
Diffstat (limited to 'drivers/net')
-rw-r--r--drivers/net/virtio_net.c325
1 files changed, 286 insertions, 39 deletions
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 3b3eebad3977..d934774e9733 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -135,6 +135,9 @@ struct send_queue {
struct virtnet_sq_stats stats;
struct napi_struct napi;
+
+ /* Record whether sq is in reset state. */
+ bool reset;
};
/* Internal representation of a receive virtqueue */
@@ -267,6 +270,12 @@ struct virtnet_info {
u8 duplex;
u32 speed;
+ /* Interrupt coalescing settings */
+ u32 tx_usecs;
+ u32 rx_usecs;
+ u32 tx_max_packets;
+ u32 rx_max_packets;
+
unsigned long guest_offloads;
unsigned long guest_offloads_capable;
@@ -284,6 +293,9 @@ struct padded_vnet_hdr {
char padding[12];
};
+static void virtnet_rq_free_unused_buf(struct virtqueue *vq, void *buf);
+static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf);
+
static bool is_xdp_frame(void *ptr)
{
return (unsigned long)ptr & VIRTIO_XDP_FLAG;
@@ -1628,6 +1640,11 @@ static void virtnet_poll_cleantx(struct receive_queue *rq)
return;
if (__netif_tx_trylock(txq)) {
+ if (sq->reset) {
+ __netif_tx_unlock(txq);
+ return;
+ }
+
do {
virtqueue_disable_cb(sq->vq);
free_old_xmit_skbs(sq, true);
@@ -1875,6 +1892,70 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
return NETDEV_TX_OK;
}
+static int virtnet_rx_resize(struct virtnet_info *vi,
+ struct receive_queue *rq, u32 ring_num)
+{
+ bool running = netif_running(vi->dev);
+ int err, qindex;
+
+ qindex = rq - vi->rq;
+
+ if (running)
+ napi_disable(&rq->napi);
+
+ err = virtqueue_resize(rq->vq, ring_num, virtnet_rq_free_unused_buf);
+ if (err)
+ netdev_err(vi->dev, "resize rx fail: rx queue index: %d err: %d\n", qindex, err);
+
+ if (!try_fill_recv(vi, rq, GFP_KERNEL))
+ schedule_delayed_work(&vi->refill, 0);
+
+ if (running)
+ virtnet_napi_enable(rq->vq, &rq->napi);
+ return err;
+}
+
+static int virtnet_tx_resize(struct virtnet_info *vi,
+ struct send_queue *sq, u32 ring_num)
+{
+ bool running = netif_running(vi->dev);
+ struct netdev_queue *txq;
+ int err, qindex;
+
+ qindex = sq - vi->sq;
+
+ if (running)
+ virtnet_napi_tx_disable(&sq->napi);
+
+ txq = netdev_get_tx_queue(vi->dev, qindex);
+
+ /* 1. wait all ximt complete
+ * 2. fix the race of netif_stop_subqueue() vs netif_start_subqueue()
+ */
+ __netif_tx_lock_bh(txq);
+
+ /* Prevent rx poll from accessing sq. */
+ sq->reset = true;
+
+ /* Prevent the upper layer from trying to send packets. */
+ netif_stop_subqueue(vi->dev, qindex);
+
+ __netif_tx_unlock_bh(txq);
+
+ err = virtqueue_resize(sq->vq, ring_num, virtnet_sq_free_unused_buf);
+ if (err)
+ netdev_err(vi->dev, "resize tx fail: tx queue index: %d err: %d\n", qindex, err);
+
+ __netif_tx_lock_bh(txq);
+ sq->reset = false;
+ netif_tx_wake_queue(txq);
+ __netif_tx_unlock_bh(txq);
+
+ if (running)
+ virtnet_napi_tx_enable(vi, sq->vq, &sq->napi);
+ return err;
+}
+
/*
* Send command via the control virtqueue and check status. Commands
* supported by the hypervisor, as indicated by feature bits, should
@@ -2285,10 +2366,57 @@ static void virtnet_get_ringparam(struct net_device *dev,
{
struct virtnet_info *vi = netdev_priv(dev);
- ring->rx_max_pending = virtqueue_get_vring_size(vi->rq[0].vq);
- ring->tx_max_pending = virtqueue_get_vring_size(vi->sq[0].vq);
- ring->rx_pending = ring->rx_max_pending;
- ring->tx_pending = ring->tx_max_pending;
+ ring->rx_max_pending = vi->rq[0].vq->num_max;
+ ring->tx_max_pending = vi->sq[0].vq->num_max;
+ ring->rx_pending = virtqueue_get_vring_size(vi->rq[0].vq);
+ ring->tx_pending = virtqueue_get_vring_size(vi->sq[0].vq);
+}
+
+static int virtnet_set_ringparam(struct net_device *dev,
+ struct ethtool_ringparam *ring,
+ struct kernel_ethtool_ringparam *kernel_ring,
+ struct netlink_ext_ack *extack)
+{
+ struct virtnet_info *vi = netdev_priv(dev);
+ u32 rx_pending, tx_pending;
+ struct receive_queue *rq;
+ struct send_queue *sq;
+ int i, err;
+
+ if (ring->rx_mini_pending || ring->rx_jumbo_pending)
+ return -EINVAL;
+
+ rx_pending = virtqueue_get_vring_size(vi->rq[0].vq);
+ tx_pending = virtqueue_get_vring_size(vi->sq[0].vq);
+
+ if (ring->rx_pending == rx_pending &&
+ ring->tx_pending == tx_pending)
+ return 0;
+
+ if (ring->rx_pending > vi->rq[0].vq->num_max)
+ return -EINVAL;
+
+ if (ring->tx_pending > vi->sq[0].vq->num_max)
+ return -EINVAL;
+
+ for (i = 0; i < vi->max_queue_pairs; i++) {
+ rq = vi->rq + i;
+ sq = vi->sq + i;
+
+ if (ring->tx_pending != tx_pending) {
+ err = virtnet_tx_resize(vi, sq, ring->tx_pending);
+ if (err)
+ return err;
+ }
+
+ if (ring->rx_pending != rx_pending) {
+ err = virtnet_rx_resize(vi, rq, ring->rx_pending);
+ if (err)
+ return err;
+ }
+ }
+
+ return 0;
}
static bool virtnet_commit_rss_command(struct virtnet_info *vi)
@@ -2618,27 +2746,89 @@ static int virtnet_get_link_ksettings(struct net_device *dev,
return 0;
}
+static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi,
+ struct ethtool_coalesce *ec)
+{
+ struct scatterlist sgs_tx, sgs_rx;
+ struct virtio_net_ctrl_coal_tx coal_tx;
+ struct virtio_net_ctrl_coal_rx coal_rx;
+
+ coal_tx.tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs);
+ coal_tx.tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames);
+ sg_init_one(&sgs_tx, &coal_tx, sizeof(coal_tx));
+
+ if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
+ VIRTIO_NET_CTRL_NOTF_COAL_TX_SET,
+ &sgs_tx))
+ return -EINVAL;
+
+ /* Save parameters */
+ vi->tx_usecs = ec->tx_coalesce_usecs;
+ vi->tx_max_packets = ec->tx_max_coalesced_frames;
+
+ coal_rx.rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs);
+ coal_rx.rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames);
+ sg_init_one(&sgs_rx, &coal_rx, sizeof(coal_rx));
+
+ if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
+ VIRTIO_NET_CTRL_NOTF_COAL_RX_SET,
+ &sgs_rx))
+ return -EINVAL;
+
+ /* Save parameters */
+ vi->rx_usecs = ec->rx_coalesce_usecs;
+ vi->rx_max_packets = ec->rx_max_coalesced_frames;
+
+ return 0;
+}
+
+static int virtnet_coal_params_supported(struct ethtool_coalesce *ec)
+{
+ /* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL
+ * feature is negotiated.
+ */
+ if (ec->rx_coalesce_usecs || ec->tx_coalesce_usecs)
+ return -EOPNOTSUPP;
+
+ if (ec->tx_max_coalesced_frames > 1 ||
+ ec->rx_max_coalesced_frames != 1)
+ return -EINVAL;
+
+ return 0;
+}
+
static int virtnet_set_coalesce(struct net_device *dev,
struct ethtool_coalesce *ec,
struct kernel_ethtool_coalesce *kernel_coal,
struct netlink_ext_ack *extack)
{
struct virtnet_info *vi = netdev_priv(dev);
- int i, napi_weight;
-
- if (ec->tx_max_coalesced_frames > 1 ||
- ec->rx_max_coalesced_frames != 1)
- return -EINVAL;
+ int ret, i, napi_weight;
+ bool update_napi = false;
+ /* Can't change NAPI weight if the link is up */
napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0;
if (napi_weight ^ vi->sq[0].napi.weight) {
if (dev->flags & IFF_UP)
return -EBUSY;
+ else
+ update_napi = true;
+ }
+
+ if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL))
+ ret = virtnet_send_notf_coal_cmds(vi, ec);
+ else
+ ret = virtnet_coal_params_supported(ec);
+
+ if (ret)
+ return ret;
+
+ if (update_napi) {
for (i = 0; i < vi->max_queue_pairs; i++)
vi->sq[i].napi.weight = napi_weight;
}
- return 0;
+ return ret;
}
static int virtnet_get_coalesce(struct net_device *dev,
@@ -2646,16 +2836,19 @@ static int virtnet_get_coalesce(struct net_device *dev,
struct kernel_ethtool_coalesce *kernel_coal,
struct netlink_ext_ack *extack)
{
- struct ethtool_coalesce ec_default = {
- .cmd = ETHTOOL_GCOALESCE,
- .rx_max_coalesced_frames = 1,
- };
struct virtnet_info *vi = netdev_priv(dev);
- memcpy(ec, &ec_default, sizeof(ec_default));
+ if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) {
+ ec->rx_coalesce_usecs = vi->rx_usecs;
+ ec->tx_coalesce_usecs = vi->tx_usecs;
+ ec->tx_max_coalesced_frames = vi->tx_max_packets;
+ ec->rx_max_coalesced_frames = vi->rx_max_packets;
+ } else {
+ ec->rx_max_coalesced_frames = 1;
- if (vi->sq[0].napi.weight)
- ec->tx_max_coalesced_frames = 1;
+ if (vi->sq[0].napi.weight)
+ ec->tx_max_coalesced_frames = 1;
+ }
return 0;
}
@@ -2774,10 +2967,12 @@ static int virtnet_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info)
}
static const struct ethtool_ops virtnet_ethtool_ops = {
- .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES,
+ .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES |
+ ETHTOOL_COALESCE_USECS,
.get_drvinfo = virtnet_get_drvinfo,
.get_link = ethtool_op_get_link,
.get_ringparam = virtnet_get_ringparam,
+ .set_ringparam = virtnet_set_ringparam,
.get_strings = virtnet_get_strings,
.get_sset_count = virtnet_get_sset_count,
.get_ethtool_stats = virtnet_get_ethtool_stats,
@@ -3171,6 +3366,27 @@ static void free_receive_page_frags(struct virtnet_info *vi)
put_page(vi->rq[i].alloc_frag.page);
}
+static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf)
+{
+ if (!is_xdp_frame(buf))
+ dev_kfree_skb(buf);
+ else
+ xdp_return_frame(ptr_to_xdp(buf));
+}
+
+static void virtnet_rq_free_unused_buf(struct virtqueue *vq, void *buf)
+{
+ struct virtnet_info *vi = vq->vdev->priv;
+ int i = vq2rxq(vq);
+
+ if (vi->mergeable_rx_bufs)
+ put_page(virt_to_head_page(buf));
+ else if (vi->big_packets)
+ give_pages(&vi->rq[i], buf);
+ else
+ put_page(virt_to_head_page(buf));
+}
+
static void free_unused_bufs(struct virtnet_info *vi)
{
void *buf;
@@ -3178,26 +3394,14 @@ static void free_unused_bufs(struct virtnet_info *vi)
for (i = 0; i < vi->max_queue_pairs; i++) {
struct virtqueue *vq = vi->sq[i].vq;
- while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
- if (!is_xdp_frame(buf))
- dev_kfree_skb(buf);
- else
- xdp_return_frame(ptr_to_xdp(buf));
- }
+ while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
+ virtnet_sq_free_unused_buf(vq, buf);
}
for (i = 0; i < vi->max_queue_pairs; i++) {
struct virtqueue *vq = vi->rq[i].vq;
-
- while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
- if (vi->mergeable_rx_bufs) {
- put_page(virt_to_head_page(buf));
- } else if (vi->big_packets) {
- give_pages(&vi->rq[i], buf);
- } else {
- put_page(virt_to_head_page(buf));
- }
- }
+ while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
+ virtnet_rq_free_unused_buf(vq, buf);
}
}
@@ -3228,6 +3432,29 @@ static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqu
(unsigned int)GOOD_PACKET_LEN);
}
+static void virtnet_config_sizes(struct virtnet_info *vi, u32 *sizes)
+{
+ u32 i, rx_size, tx_size;
+
+ if (vi->speed == SPEED_UNKNOWN || vi->speed < SPEED_10000) {
+ rx_size = 1024;
+ tx_size = 1024;
+
+ } else if (vi->speed < SPEED_40000) {
+ rx_size = 1024 * 4;
+ tx_size = 1024 * 4;
+
+ } else {
+ rx_size = 1024 * 8;
+ tx_size = 1024 * 8;
+ }
+
+ for (i = 0; i < vi->max_queue_pairs; i++) {
+ sizes[rxq2vq(i)] = rx_size;
+ sizes[txq2vq(i)] = tx_size;
+ }
+}
+
static int virtnet_find_vqs(struct virtnet_info *vi)
{
vq_callback_t **callbacks;
@@ -3235,6 +3462,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
int ret = -ENOMEM;
int i, total_vqs;
const char **names;
+ u32 *sizes;
bool *ctx;
/* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by
@@ -3262,10 +3490,15 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
ctx = NULL;
}
+ sizes = kmalloc_array(total_vqs, sizeof(*sizes), GFP_KERNEL);
+ if (!sizes)
+ goto err_sizes;
+
/* Parameters for control virtqueue, if any */
if (vi->has_cvq) {
callbacks[total_vqs - 1] = NULL;
names[total_vqs - 1] = "control";
+ sizes[total_vqs - 1] = 64;
}
/* Allocate/initialize parameters for send/receive virtqueues */
@@ -3280,8 +3513,10 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
ctx[rxq2vq(i)] = true;
}
- ret = virtio_find_vqs_ctx(vi->vdev, total_vqs, vqs, callbacks,
- names, ctx, NULL);
+ virtnet_config_sizes(vi, sizes);
+
+ ret = virtio_find_vqs_ctx_size(vi->vdev, total_vqs, vqs, callbacks,
+ names, sizes, ctx, NULL);
if (ret)
goto err_find;
@@ -3301,6 +3536,8 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
err_find:
+ kfree(sizes);
+err_sizes:
kfree(ctx);
err_ctx:
kfree(names);
@@ -3444,6 +3681,8 @@ static bool virtnet_validate_features(struct virtio_device *vdev)
VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS,
"VIRTIO_NET_F_CTRL_VQ") ||
VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT,
+ "VIRTIO_NET_F_CTRL_VQ") ||
+ VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_NOTF_COAL,
"VIRTIO_NET_F_CTRL_VQ"))) {
return false;
}
@@ -3580,6 +3819,13 @@ static int virtnet_probe(struct virtio_device *vdev)
if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
vi->mergeable_rx_bufs = true;
+ if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) {
+ vi->rx_usecs = 0;
+ vi->tx_usecs = 0;
+ vi->tx_max_packets = 0;
+ vi->rx_max_packets = 0;
+ }
+
if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT))
vi->has_rss_hash_report = true;
@@ -3651,6 +3897,9 @@ static int virtnet_probe(struct virtio_device *vdev)
vi->curr_queue_pairs = num_online_cpus();
vi->max_queue_pairs = max_queue_pairs;
+ virtnet_init_settings(dev);
+ virtnet_update_settings(vi);
+
/* Allocate/initialize the rx/tx queues, and invoke find_vqs */
err = init_vqs(vi);
if (err)
@@ -3663,8 +3912,6 @@ static int virtnet_probe(struct virtio_device *vdev)
netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs);
netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs);
- virtnet_init_settings(dev);
-
if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
vi->failover = net_failover_create(vi->dev);
if (IS_ERR(vi->failover)) {
@@ -3814,7 +4061,7 @@ static struct virtio_device_id id_table[] = {
VIRTIO_NET_F_CTRL_MAC_ADDR, \
VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \
VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \
- VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT
+ VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL
static unsigned int features[] = {
VIRTNET_FEATURES,