diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-11-03 23:00:39 +0100 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-11-03 23:00:39 +0100 |
commit | 43e1b12927276cde8052122a24ff796649f09d60 (patch) | |
tree | e1629f35aca8b865691932bea03d51517926acb0 | |
parent | Merge tag 'vfio-v5.16-rc1' of git://github.com/awilliam/linux-vfio (diff) | |
parent | vdpa/mlx5: Forward only packets with allowed MAC address (diff) | |
download | linux-43e1b12927276cde8052122a24ff796649f09d60.tar.xz linux-43e1b12927276cde8052122a24ff796649f09d60.zip |
Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
Pull virtio updates from Michael Tsirkin:
"vhost and virtio fixes and features:
- Hardening work by Jason
- vdpa driver for Alibaba ENI
- Performance tweaks for virtio blk
- virtio rng rework using an internal buffer
- mac/mtu programming for mlx5 vdpa
- Misc fixes, cleanups"
* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: (45 commits)
vdpa/mlx5: Forward only packets with allowed MAC address
vdpa/mlx5: Support configuration of MAC
vdpa/mlx5: Fix clearing of VIRTIO_NET_F_MAC feature bit
vdpa_sim_net: Enable user to set mac address and mtu
vdpa: Enable user to set mac and mtu of vdpa device
vdpa: Use kernel coding style for structure comments
vdpa: Introduce query of device config layout
vdpa: Introduce and use vdpa device get, set config helpers
virtio-scsi: don't let virtio core to validate used buffer length
virtio-blk: don't let virtio core to validate used length
virtio-net: don't let virtio core to validate used length
virtio_ring: validate used buffer length
virtio_blk: correct types for status handling
virtio_blk: allow 0 as num_request_queues
i2c: virtio: Add support for zero-length requests
virtio-blk: fixup coccinelle warnings
virtio_ring: fix typos in vring_desc_extra
virtio-pci: harden INTX interrupts
virtio_pci: harden MSI-X interrupts
virtio_config: introduce a new .enable_cbs method
...
36 files changed, 1780 insertions, 296 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 2c9070aeba2a..9ebd990d9edd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -20083,6 +20083,13 @@ S: Maintained F: drivers/i2c/busses/i2c-virtio.c F: include/uapi/linux/virtio_i2c.h +VIRTIO PMEM DRIVER +M: Pankaj Gupta <pankaj.gupta.linux@gmail.com> +L: virtualization@lists.linux-foundation.org +S: Maintained +F: drivers/nvdimm/virtio_pmem.c +F: drivers/nvdimm/nd_virtio.c + VIRTUAL BOX GUEST DEVICE DRIVER M: Hans de Goede <hdegoede@redhat.com> M: Arnd Bergmann <arnd@arndb.de> diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index d97eaf6adb6d..2a51dfb09c8f 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -371,6 +371,7 @@ config XEN_BLKDEV_BACKEND config VIRTIO_BLK tristate "Virtio block driver" depends on VIRTIO + select SG_POOL help This is the virtual block driver for virtio. It can be used with QEMU based VMMs (like KVM or Xen). Say Y or M. diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index fc4fc951dba7..97bf051a50ce 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -24,6 +24,19 @@ /* The maximum number of sg elements that fit into a virtqueue */ #define VIRTIO_BLK_MAX_SG_ELEMS 32768 +#ifdef CONFIG_ARCH_NO_SG_CHAIN +#define VIRTIO_BLK_INLINE_SG_CNT 0 +#else +#define VIRTIO_BLK_INLINE_SG_CNT 2 +#endif + +static unsigned int num_request_queues; +module_param(num_request_queues, uint, 0644); +MODULE_PARM_DESC(num_request_queues, + "Limit the number of request queues to use for blk device. " + "0 for no limit. " + "Values > nr_cpu_ids truncated to nr_cpu_ids."); + static int major; static DEFINE_IDA(vd_index_ida); @@ -77,6 +90,7 @@ struct virtio_blk { struct virtblk_req { struct virtio_blk_outhdr out_hdr; u8 status; + struct sg_table sg_table; struct scatterlist sg[]; }; @@ -162,12 +176,93 @@ static int virtblk_setup_discard_write_zeroes(struct request *req, bool unmap) return 0; } -static inline void virtblk_request_done(struct request *req) +static void virtblk_unmap_data(struct request *req, struct virtblk_req *vbr) { - struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); + if (blk_rq_nr_phys_segments(req)) + sg_free_table_chained(&vbr->sg_table, + VIRTIO_BLK_INLINE_SG_CNT); +} + +static int virtblk_map_data(struct blk_mq_hw_ctx *hctx, struct request *req, + struct virtblk_req *vbr) +{ + int err; + + if (!blk_rq_nr_phys_segments(req)) + return 0; + + vbr->sg_table.sgl = vbr->sg; + err = sg_alloc_table_chained(&vbr->sg_table, + blk_rq_nr_phys_segments(req), + vbr->sg_table.sgl, + VIRTIO_BLK_INLINE_SG_CNT); + if (unlikely(err)) + return -ENOMEM; + return blk_rq_map_sg(hctx->queue, req, vbr->sg_table.sgl); +} + +static void virtblk_cleanup_cmd(struct request *req) +{ if (req->rq_flags & RQF_SPECIAL_PAYLOAD) kfree(bvec_virt(&req->special_vec)); +} + +static blk_status_t virtblk_setup_cmd(struct virtio_device *vdev, + struct request *req, + struct virtblk_req *vbr) +{ + bool unmap = false; + u32 type; + + vbr->out_hdr.sector = 0; + + switch (req_op(req)) { + case REQ_OP_READ: + type = VIRTIO_BLK_T_IN; + vbr->out_hdr.sector = cpu_to_virtio64(vdev, + blk_rq_pos(req)); + break; + case REQ_OP_WRITE: + type = VIRTIO_BLK_T_OUT; + vbr->out_hdr.sector = cpu_to_virtio64(vdev, + blk_rq_pos(req)); + break; + case REQ_OP_FLUSH: + type = VIRTIO_BLK_T_FLUSH; + break; + case REQ_OP_DISCARD: + type = VIRTIO_BLK_T_DISCARD; + break; + case REQ_OP_WRITE_ZEROES: + type = VIRTIO_BLK_T_WRITE_ZEROES; + unmap = !(req->cmd_flags & REQ_NOUNMAP); + break; + case REQ_OP_DRV_IN: + type = VIRTIO_BLK_T_GET_ID; + break; + default: + WARN_ON_ONCE(1); + return BLK_STS_IOERR; + } + + vbr->out_hdr.type = cpu_to_virtio32(vdev, type); + vbr->out_hdr.ioprio = cpu_to_virtio32(vdev, req_get_ioprio(req)); + + if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) { + if (virtblk_setup_discard_write_zeroes(req, unmap)) + return BLK_STS_RESOURCE; + } + + return 0; +} + +static inline void virtblk_request_done(struct request *req) +{ + struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); + + virtblk_unmap_data(req, vbr); + virtblk_cleanup_cmd(req); blk_mq_end_request(req, virtblk_result(vbr)); } @@ -223,59 +318,26 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx, unsigned long flags; unsigned int num; int qid = hctx->queue_num; - int err; bool notify = false; - bool unmap = false; - u32 type; + blk_status_t status; + int err; BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems); - switch (req_op(req)) { - case REQ_OP_READ: - case REQ_OP_WRITE: - type = 0; - break; - case REQ_OP_FLUSH: - type = VIRTIO_BLK_T_FLUSH; - break; - case REQ_OP_DISCARD: - type = VIRTIO_BLK_T_DISCARD; - break; - case REQ_OP_WRITE_ZEROES: - type = VIRTIO_BLK_T_WRITE_ZEROES; - unmap = !(req->cmd_flags & REQ_NOUNMAP); - break; - case REQ_OP_DRV_IN: - type = VIRTIO_BLK_T_GET_ID; - break; - default: - WARN_ON_ONCE(1); - return BLK_STS_IOERR; - } - - vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, type); - vbr->out_hdr.sector = type ? - 0 : cpu_to_virtio64(vblk->vdev, blk_rq_pos(req)); - vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(req)); + status = virtblk_setup_cmd(vblk->vdev, req, vbr); + if (unlikely(status)) + return status; blk_mq_start_request(req); - if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) { - err = virtblk_setup_discard_write_zeroes(req, unmap); - if (err) - return BLK_STS_RESOURCE; - } - - num = blk_rq_map_sg(hctx->queue, req, vbr->sg); - if (num) { - if (rq_data_dir(req) == WRITE) - vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_OUT); - else - vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_IN); + num = virtblk_map_data(hctx, req, vbr); + if (unlikely(num < 0)) { + virtblk_cleanup_cmd(req); + return BLK_STS_RESOURCE; } spin_lock_irqsave(&vblk->vqs[qid].lock, flags); - err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num); + err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg_table.sgl, num); if (err) { virtqueue_kick(vblk->vqs[qid].vq); /* Don't stop the queue if -ENOMEM: we may have failed to @@ -284,6 +346,8 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx, if (err == -ENOSPC) blk_mq_stop_hw_queue(hctx); spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); + virtblk_unmap_data(req, vbr); + virtblk_cleanup_cmd(req); switch (err) { case -ENOSPC: return BLK_STS_DEV_RESOURCE; @@ -497,8 +561,14 @@ static int init_vq(struct virtio_blk *vblk) &num_vqs); if (err) num_vqs = 1; + if (!err && !num_vqs) { + dev_err(&vdev->dev, "MQ advertised but zero queues reported\n"); + return -EINVAL; + } - num_vqs = min_t(unsigned int, nr_cpu_ids, num_vqs); + num_vqs = min_t(unsigned int, + min_not_zero(num_request_queues, nr_cpu_ids), + num_vqs); vblk->vqs = kmalloc_array(num_vqs, sizeof(*vblk->vqs), GFP_KERNEL); if (!vblk->vqs) @@ -624,7 +694,7 @@ cache_type_show(struct device *dev, struct device_attribute *attr, char *buf) u8 writeback = virtblk_get_cache_mode(vblk->vdev); BUG_ON(writeback >= ARRAY_SIZE(virtblk_cache_types)); - return snprintf(buf, 40, "%s\n", virtblk_cache_types[writeback]); + return sysfs_emit(buf, "%s\n", virtblk_cache_types[writeback]); } static DEVICE_ATTR_RW(cache_type); @@ -660,16 +730,6 @@ static const struct attribute_group *virtblk_attr_groups[] = { NULL, }; -static int virtblk_init_request(struct blk_mq_tag_set *set, struct request *rq, - unsigned int hctx_idx, unsigned int numa_node) -{ - struct virtio_blk *vblk = set->driver_data; - struct virtblk_req *vbr = blk_mq_rq_to_pdu(rq); - - sg_init_table(vbr->sg, vblk->sg_elems); - return 0; -} - static int virtblk_map_queues(struct blk_mq_tag_set *set) { struct virtio_blk *vblk = set->driver_data; @@ -682,7 +742,6 @@ static const struct blk_mq_ops virtio_mq_ops = { .queue_rq = virtio_queue_rq, .commit_rqs = virtio_commit_rqs, .complete = virtblk_request_done, - .init_request = virtblk_init_request, .map_queues = virtblk_map_queues, }; @@ -762,7 +821,7 @@ static int virtblk_probe(struct virtio_device *vdev) vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; vblk->tag_set.cmd_size = sizeof(struct virtblk_req) + - sizeof(struct scatterlist) * sg_elems; + sizeof(struct scatterlist) * VIRTIO_BLK_INLINE_SG_CNT; vblk->tag_set.driver_data = vblk; vblk->tag_set.nr_hw_queues = vblk->num_vqs; @@ -990,6 +1049,7 @@ static struct virtio_driver virtio_blk = { .feature_table_size = ARRAY_SIZE(features), .feature_table_legacy = features_legacy, .feature_table_size_legacy = ARRAY_SIZE(features_legacy), + .suppress_used_validation = true, .driver.name = KBUILD_MODNAME, .driver.owner = THIS_MODULE, .id_table = id_table, diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c index a90001e02bf7..0a7dde135db1 100644 --- a/drivers/char/hw_random/virtio-rng.c +++ b/drivers/char/hw_random/virtio-rng.c @@ -18,13 +18,20 @@ static DEFINE_IDA(rng_index_ida); struct virtrng_info { struct hwrng hwrng; struct virtqueue *vq; - struct completion have_data; char name[25]; - unsigned int data_avail; int index; - bool busy; bool hwrng_register_done; bool hwrng_removed; + /* data transfer */ + struct completion have_data; + unsigned int data_avail; + unsigned int data_idx; + /* minimal size returned by rng_buffer_size() */ +#if SMP_CACHE_BYTES < 32 + u8 data[32]; +#else + u8 data[SMP_CACHE_BYTES]; +#endif }; static void random_recv_done(struct virtqueue *vq) @@ -35,54 +42,88 @@ static void random_recv_done(struct virtqueue *vq) if (!virtqueue_get_buf(vi->vq, &vi->data_avail)) return; + vi->data_idx = 0; + complete(&vi->have_data); } -/* The host will fill any buffer we give it with sweet, sweet randomness. */ -static void register_buffer(struct virtrng_info *vi, u8 *buf, size_t size) +static void request_entropy(struct virtrng_info *vi) { struct scatterlist sg; - sg_init_one(&sg, buf, size); + reinit_completion(&vi->have_data); + vi->data_avail = 0; + vi->data_idx = 0; + + sg_init_one(&sg, vi->data, sizeof(vi->data)); /* There should always be room for one buffer. */ - virtqueue_add_inbuf(vi->vq, &sg, 1, buf, GFP_KERNEL); + virtqueue_add_inbuf(vi->vq, &sg, 1, vi->data, GFP_KERNEL); virtqueue_kick(vi->vq); } +static unsigned int copy_data(struct virtrng_info *vi, void *buf, + unsigned int size) +{ + size = min_t(unsigned int, size, vi->data_avail); + memcpy(buf, vi->data + vi->data_idx, size); + vi->data_idx += size; + vi->data_avail -= size; + if (vi->data_avail == 0) + request_entropy(vi); + return size; +} + static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait) { int ret; struct virtrng_info *vi = (struct virtrng_info *)rng->priv; + unsigned int chunk; + size_t read; if (vi->hwrng_removed) return -ENODEV; - if (!vi->busy) { - vi->busy = true; - reinit_completion(&vi->have_data); - register_buffer(vi, buf, size); + read = 0; + + /* copy available data */ + if (vi->data_avail) { + chunk = copy_data(vi, buf, size); + size -= chunk; + read += chunk; } if (!wait) - return 0; - - ret = wait_for_completion_killable(&vi->have_data); - if (ret < 0) - return ret; + return read; + + /* We have already copied available entropy, + * so either size is 0 or data_avail is 0 + */ + while (size != 0) { + /* data_avail is 0 but a request is pending */ + ret = wait_for_completion_killable(&vi->have_data); + if (ret < 0) + return ret; + /* if vi->data_avail is 0, we have been interrupted + * by a cleanup, but buffer stays in the queue + */ + if (vi->data_avail == 0) + return read; - vi->busy = false; + chunk = copy_data(vi, buf + read, size); + size -= chunk; + read += chunk; + } - return vi->data_avail; + return read; } static void virtio_cleanup(struct hwrng *rng) { struct virtrng_info *vi = (struct virtrng_info *)rng->priv; - if (vi->busy) - wait_for_completion(&vi->have_data); + complete(&vi->have_data); } static int probe_common(struct virtio_device *vdev) @@ -118,6 +159,9 @@ static int probe_common(struct virtio_device *vdev) goto err_find; } + /* we always have a pending entropy request */ + request_entropy(vi); + return 0; err_find: @@ -133,9 +177,9 @@ static void remove_common(struct virtio_device *vdev) vi->hwrng_removed = true; vi->data_avail = 0; + vi->data_idx = 0; complete(&vi->have_data); vdev->config->reset(vdev); - vi->busy = false; if (vi->hwrng_register_done) hwrng_unregister(&vi->hwrng); vdev->config->del_vqs(vdev); diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 7eaf303a7a86..660c5c388c29 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -28,6 +28,7 @@ #include "../tty/hvc/hvc_console.h" #define is_rproc_enabled IS_ENABLED(CONFIG_REMOTEPROC) +#define VIRTCONS_MAX_PORTS 0x8000 /* * This is a global struct for storing common data for all the devices @@ -2036,6 +2037,14 @@ static int virtcons_probe(struct virtio_device *vdev) virtio_cread_feature(vdev, VIRTIO_CONSOLE_F_MULTIPORT, struct virtio_console_config, max_nr_ports, &portdev->max_nr_ports) == 0) { + if (portdev->max_nr_ports == 0 || + portdev->max_nr_ports > VIRTCONS_MAX_PORTS) { + dev_err(&vdev->dev, + "Invalidate max_nr_ports %d", + portdev->max_nr_ports); + err = -EINVAL; + goto free; + } multiport = true; } diff --git a/drivers/i2c/busses/i2c-virtio.c b/drivers/i2c/busses/i2c-virtio.c index f10a603b13fb..1ed4daa918a0 100644 --- a/drivers/i2c/busses/i2c-virtio.c +++ b/drivers/i2c/busses/i2c-virtio.c @@ -63,34 +63,32 @@ static int virtio_i2c_prepare_reqs(struct virtqueue *vq, int outcnt = 0, incnt = 0; /* - * We don't support 0 length messages and so filter out - * 0 length transfers by using i2c_adapter_quirks. - */ - if (!msgs[i].len) - break; - - /* * Only 7-bit mode supported for this moment. For the address * format, Please check the Virtio I2C Specification. */ reqs[i].out_hdr.addr = cpu_to_le16(msgs[i].addr << 1); + if (msgs[i].flags & I2C_M_RD) + reqs[i].out_hdr.flags |= cpu_to_le32(VIRTIO_I2C_FLAGS_M_RD); + if (i != num - 1) - reqs[i].out_hdr.flags = cpu_to_le32(VIRTIO_I2C_FLAGS_FAIL_NEXT); + reqs[i].out_hdr.flags |= cpu_to_le32(VIRTIO_I2C_FLAGS_FAIL_NEXT); sg_init_one(&out_hdr, &reqs[i].out_hdr, sizeof(reqs[i].out_hdr)); sgs[outcnt++] = &out_hdr; - reqs[i].buf = i2c_get_dma_safe_msg_buf(&msgs[i], 1); - if (!reqs[i].buf) - break; + if (msgs[i].len) { + reqs[i].buf = i2c_get_dma_safe_msg_buf(&msgs[i], 1); + if (!reqs[i].buf) + break; - sg_init_one(&msg_buf, reqs[i].buf, msgs[i].len); + sg_init_one(&msg_buf, reqs[i].buf, msgs[i].len); - if (msgs[i].flags & I2C_M_RD) - sgs[outcnt + incnt++] = &msg_buf; - else - sgs[outcnt++] = &msg_buf; + if (msgs[i].flags & I2C_M_RD) + sgs[outcnt + incnt++] = &msg_buf; + else + sgs[outcnt++] = &msg_buf; + } sg_init_one(&in_hdr, &reqs[i].in_hdr, sizeof(reqs[i].in_hdr)); sgs[outcnt + incnt++] = &in_hdr; @@ -191,7 +189,7 @@ static int virtio_i2c_setup_vqs(struct virtio_i2c *vi) static u32 virtio_i2c_func(struct i2c_adapter *adap) { - return I2C_FUNC_I2C | (I2C_FUNC_SMBUS_EMUL & ~I2C_FUNC_SMBUS_QUICK); + return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL; } static struct i2c_algorithm virtio_algorithm = { @@ -199,15 +197,16 @@ static struct i2c_algorithm virtio_algorithm = { .functionality = virtio_i2c_func, }; -static const struct i2c_adapter_quirks virtio_i2c_quirks = { - .flags = I2C_AQ_NO_ZERO_LEN, -}; - static int virtio_i2c_probe(struct virtio_device *vdev) { struct virtio_i2c *vi; int ret; + if (!virtio_has_feature(vdev, VIRTIO_I2C_F_ZERO_LENGTH_REQUEST)) { + dev_err(&vdev->dev, "Zero-length request feature is mandatory\n"); + return -EINVAL; + } + vi = devm_kzalloc(&vdev->dev, sizeof(*vi), GFP_KERNEL); if (!vi) return -ENOMEM; @@ -225,7 +224,6 @@ static int virtio_i2c_probe(struct virtio_device *vdev) snprintf(vi->adap.name, sizeof(vi->adap.name), "i2c_virtio at virtio bus %d", vdev->index); vi->adap.algo = &virtio_algorithm; - vi->adap.quirks = &virtio_i2c_quirks; vi->adap.dev.parent = &vdev->dev; vi->adap.dev.of_node = vdev->dev.of_node; i2c_set_adapdata(&vi->adap, vi); @@ -270,11 +268,17 @@ static int virtio_i2c_restore(struct virtio_device *vdev) } #endif +static const unsigned int features[] = { + VIRTIO_I2C_F_ZERO_LENGTH_REQUEST, +}; + static struct virtio_driver virtio_i2c_driver = { - .id_table = id_table, - .probe = virtio_i2c_probe, - .remove = virtio_i2c_remove, - .driver = { + .feature_table = features, + .feature_table_size = ARRAY_SIZE(features), + .id_table = id_table, + .probe = virtio_i2c_probe, + .remove = virtio_i2c_remove, + .driver = { .name = "i2c_virtio", }, #ifdef CONFIG_PM_SLEEP diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index cc79343cd220..1771d6e5224f 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -408,12 +408,13 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, * add_recvbuf_mergeable() + get_mergeable_buf_len() */ truesize = headroom ? PAGE_SIZE : truesize; - tailroom = truesize - len - headroom - (hdr_padded_len - hdr_len); + tailroom = truesize - headroom; buf = p - headroom; len -= hdr_len; offset += hdr_padded_len; p += hdr_padded_len; + tailroom -= hdr_padded_len + len; shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); @@ -3422,6 +3423,7 @@ static struct virtio_driver virtio_net_driver = { .feature_table_size = ARRAY_SIZE(features), .feature_table_legacy = features_legacy, .feature_table_size_legacy = ARRAY_SIZE(features_legacy), + .suppress_used_validation = true, .driver.name = KBUILD_MODNAME, .driver.owner = THIS_MODULE, .id_table = id_table, diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index b8455fcbf18b..b11b57355914 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -978,6 +978,7 @@ static unsigned int features[] = { static struct virtio_driver virtio_scsi_driver = { .feature_table = features, .feature_table_size = ARRAY_SIZE(features), + .suppress_used_validation = true, .driver.name = KBUILD_MODNAME, .driver.owner = THIS_MODULE, .id_table = id_table, diff --git a/drivers/vdpa/Kconfig b/drivers/vdpa/Kconfig index 3d91982d8371..50f45d037611 100644 --- a/drivers/vdpa/Kconfig +++ b/drivers/vdpa/Kconfig @@ -78,4 +78,12 @@ config VP_VDPA help This kernel module bridges virtio PCI device to vDPA bus. +config ALIBABA_ENI_VDPA + tristate "vDPA driver for Alibaba ENI" + select VIRTIO_PCI_LIB_LEGACY + depends on PCI_MSI && X86 + help + VDPA driver for Alibaba ENI (Elastic Network Interface) which is built upon + virtio 0.9.5 specification. + endif # VDPA diff --git a/drivers/vdpa/Makefile b/drivers/vdpa/Makefile index f02ebed33f19..15665563a7f4 100644 --- a/drivers/vdpa/Makefile +++ b/drivers/vdpa/Makefile @@ -5,3 +5,4 @@ obj-$(CONFIG_VDPA_USER) += vdpa_user/ obj-$(CONFIG_IFCVF) += ifcvf/ obj-$(CONFIG_MLX5_VDPA) += mlx5/ obj-$(CONFIG_VP_VDPA) += virtio_pci/ +obj-$(CONFIG_ALIBABA_ENI_VDPA) += alibaba/ diff --git a/drivers/vdpa/alibaba/Makefile b/drivers/vdpa/alibaba/Makefile new file mode 100644 index 000000000000..ef4aae69f87a --- /dev/null +++ b/drivers/vdpa/alibaba/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_ALIBABA_ENI_VDPA) += eni_vdpa.o + diff --git a/drivers/vdpa/alibaba/eni_vdpa.c b/drivers/vdpa/alibaba/eni_vdpa.c new file mode 100644 index 000000000000..3f788794571a --- /dev/null +++ b/drivers/vdpa/alibaba/eni_vdpa.c @@ -0,0 +1,553 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * vDPA bridge driver for Alibaba ENI(Elastic Network Interface) + * + * Copyright (c) 2021, Alibaba Inc. All rights reserved. + * Author: Wu Zongyong <wuzongyong@linux.alibaba.com> + * + */ + +#include "linux/bits.h" +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/vdpa.h> +#include <linux/virtio.h> +#include <linux/virtio_config.h> +#include <linux/virtio_ring.h> +#include <linux/virtio_pci.h> +#include <linux/virtio_pci_legacy.h> +#include <uapi/linux/virtio_net.h> + +#define ENI_MSIX_NAME_SIZE 256 + +#define ENI_ERR(pdev, fmt, ...) \ + dev_err(&pdev->dev, "%s"fmt, "eni_vdpa: ", ##__VA_ARGS__) +#define ENI_DBG(pdev, fmt, ...) \ + dev_dbg(&pdev->dev, "%s"fmt, "eni_vdpa: ", ##__VA_ARGS__) +#define ENI_INFO(pdev, fmt, ...) \ + dev_info(&pdev->dev, "%s"fmt, "eni_vdpa: ", ##__VA_ARGS__) + +struct eni_vring { + void __iomem *notify; + char msix_name[ENI_MSIX_NAME_SIZE]; + struct vdpa_callback cb; + int irq; +}; + +struct eni_vdpa { + struct vdpa_device vdpa; + struct virtio_pci_legacy_device ldev; + struct eni_vring *vring; + struct vdpa_callback config_cb; + char msix_name[ENI_MSIX_NAME_SIZE]; + int config_irq; + int queues; + int vectors; +}; + +static struct eni_vdpa *vdpa_to_eni(struct vdpa_device *vdpa) +{ + return container_of(vdpa, struct eni_vdpa, vdpa); +} + +static struct virtio_pci_legacy_device *vdpa_to_ldev(struct vdpa_device *vdpa) +{ + struct eni_vdpa *eni_vdpa = vdpa_to_eni(vdpa); + + return &eni_vdpa->ldev; +} + +static u64 eni_vdpa_get_features(struct vdpa_device *vdpa) +{ + struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa); + u64 features = vp_legacy_get_features(ldev); + + features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM); + features |= BIT_ULL(VIRTIO_F_ORDER_PLATFORM); + + return features; +} + +static int eni_vdpa_set_features(struct vdpa_device *vdpa, u64 features) +{ + struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa); + + if (!(features & BIT_ULL(VIRTIO_NET_F_MRG_RXBUF)) && features) { + ENI_ERR(ldev->pci_dev, + "VIRTIO_NET_F_MRG_RXBUF is not negotiated\n"); + return -EINVAL; + } + + vp_legacy_set_features(ldev, (u32)features); + + return 0; +} + +static u8 eni_vdpa_get_status(struct vdpa_device *vdpa) +{ + struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa); + + return vp_legacy_get_status(ldev); +} + +static int eni_vdpa_get_vq_irq(struct vdpa_device *vdpa, u16 idx) +{ + struct eni_vdpa *eni_vdpa = vdpa_to_eni(vdpa); + int irq = eni_vdpa->vring[idx].irq; + + if (irq == VIRTIO_MSI_NO_VECTOR) + return -EINVAL; + + return irq; +} + +static void eni_vdpa_free_irq(struct eni_vdpa *eni_vdpa) +{ + struct virtio_pci_legacy_device *ldev = &eni_vdpa->ldev; + struct pci_dev *pdev = ldev->pci_dev; + int i; + + for (i = 0; i < eni_vdpa->queues; i++) { + if (eni_vdpa->vring[i].irq != VIRTIO_MSI_NO_VECTOR) { + vp_legacy_queue_vector(ldev, i, VIRTIO_MSI_NO_VECTOR); + devm_free_irq(&pdev->dev, eni_vdpa->vring[i].irq, + &eni_vdpa->vring[i]); + eni_vdpa->vring[i].irq = VIRTIO_MSI_NO_VECTOR; + } + } + + if (eni_vdpa->config_irq != VIRTIO_MSI_NO_VECTOR) { + vp_legacy_config_vector(ldev, VIRTIO_MSI_NO_VECTOR); + devm_free_irq(&pdev->dev, eni_vdpa->config_irq, eni_vdpa); + eni_vdpa->config_irq = VIRTIO_MSI_NO_VECTOR; + } + + if (eni_vdpa->vectors) { + pci_free_irq_vectors(pdev); + eni_vdpa->vectors = 0; + } +} + +static irqreturn_t eni_vdpa_vq_handler(int irq, void *arg) +{ + struct eni_vring *vring = arg; + + if (vring->cb.callback) + return vring->cb.callback(vring->cb.private); + + return IRQ_HANDLED; +} + +static irqreturn_t eni_vdpa_config_handler(int irq, void *arg) +{ + struct eni_vdpa *eni_vdpa = arg; + + if (eni_vdpa->config_cb.callback) + return eni_vdpa->config_cb.callback(eni_vdpa->config_cb.private); + + return IRQ_HANDLED; +} + +static int eni_vdpa_request_irq(struct eni_vdpa *eni_vdpa) +{ + struct virtio_pci_legacy_device *ldev = &eni_vdpa->ldev; + struct pci_dev *pdev = ldev->pci_dev; + int i, ret, irq; + int queues = eni_vdpa->queues; + int vectors = queues + 1; + + ret = pci_alloc_irq_vectors(pdev, vectors, vectors, PCI_IRQ_MSIX); + if (ret != vectors) { + ENI_ERR(pdev, + "failed to allocate irq vectors want %d but %d\n", + vectors, ret); + return ret; + } + + eni_vdpa->vectors = vectors; + + for (i = 0; i < queues; i++) { + snprintf(eni_vdpa->vring[i].msix_name, ENI_MSIX_NAME_SIZE, + "eni-vdpa[%s]-%d\n", pci_name(pdev), i); + irq = pci_irq_vector(pdev, i); + ret = devm_request_irq(&pdev->dev, irq, + eni_vdpa_vq_handler, + 0, eni_vdpa->vring[i].msix_name, + &eni_vdpa->vring[i]); + if (ret) { + ENI_ERR(pdev, "failed to request irq for vq %d\n", i); + goto err; + } + vp_legacy_queue_vector(ldev, i, i); + eni_vdpa->vring[i].irq = irq; + } + + snprintf(eni_vdpa->msix_name, ENI_MSIX_NAME_SIZE, "eni-vdpa[%s]-config\n", + pci_name(pdev)); + irq = pci_irq_vector(pdev, queues); + ret = devm_request_irq(&pdev->dev, irq, eni_vdpa_config_handler, 0, + eni_vdpa->msix_name, eni_vdpa); + if (ret) { + ENI_ERR(pdev, "failed to request irq for config vq %d\n", i); + goto err; + } + vp_legacy_config_vector(ldev, queues); + eni_vdpa->config_irq = irq; + + return 0; +err: + eni_vdpa_free_irq(eni_vdpa); + return ret; +} + +static void eni_vdpa_set_status(struct vdpa_device *vdpa, u8 status) +{ + struct eni_vdpa *eni_vdpa = vdpa_to_eni(vdpa); + struct virtio_pci_legacy_device *ldev = &eni_vdpa->ldev; + u8 s = eni_vdpa_get_status(vdpa); + + if (status & VIRTIO_CONFIG_S_DRIVER_OK && + !(s & VIRTIO_CONFIG_S_DRIVER_OK)) { + eni_vdpa_request_irq(eni_vdpa); + } + + vp_legacy_set_status(ldev, status); + + if (!(status & VIRTIO_CONFIG_S_DRIVER_OK) && + (s & VIRTIO_CONFIG_S_DRIVER_OK)) + eni_vdpa_free_irq(eni_vdpa); +} + +static int eni_vdpa_reset(struct vdpa_device *vdpa) +{ + struct eni_vdpa *eni_vdpa = vdpa_to_eni(vdpa); + struct virtio_pci_legacy_device *ldev = &eni_vdpa->ldev; + u8 s = eni_vdpa_get_status(vdpa); + + vp_legacy_set_status(ldev, 0); + + if (s & VIRTIO_CONFIG_S_DRIVER_OK) + eni_vdpa_free_irq(eni_vdpa); + + return 0; +} + +static u16 eni_vdpa_get_vq_num_max(struct vdpa_device *vdpa) +{ + struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa); + + return vp_legacy_get_queue_size(ldev, 0); +} + +static u16 eni_vdpa_get_vq_num_min(struct vdpa_device *vdpa) +{ + struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa); + + return vp_legacy_get_queue_size(ldev, 0); +} + +static int eni_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 qid, + struct vdpa_vq_state *state) +{ + return -EOPNOTSUPP; +} + +static int eni_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 qid, + const struct vdpa_vq_state *state) +{ + struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa); + const struct vdpa_vq_state_split *split = &state->split; + + /* ENI is build upon virtio-pci specfication which not support + * to set state of virtqueue. But if the state is equal to the + * device initial state by chance, we can let it go. + */ + if (!vp_legacy_get_queue_enable(ldev, qid) + && split->avail_index == 0) + return 0; + + return -EOPNOTSUPP; +} + + +static void eni_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 qid, + struct vdpa_callback *cb) +{ + struct eni_vdpa *eni_vdpa = vdpa_to_eni(vdpa); + + eni_vdpa->vring[qid].cb = *cb; +} + +static void eni_vdpa_set_vq_ready(struct vdpa_device *vdpa, u16 qid, + bool ready) +{ + struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa); + + /* ENI is a legacy virtio-pci device. This is not supported + * by specification. But we can disable virtqueue by setting + * address to 0. + */ + if (!ready) + vp_legacy_set_queue_address(ldev, qid, 0); +} + +static bool eni_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 qid) +{ + struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa); + + return vp_legacy_get_queue_enable(ldev, qid); +} + +static void eni_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 qid, + u32 num) +{ + struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa); + struct pci_dev *pdev = ldev->pci_dev; + u16 n = vp_legacy_get_queue_size(ldev, qid); + + /* ENI is a legacy virtio-pci device which not allow to change + * virtqueue size. Just report a error if someone tries to + * change it. + */ + if (num != n) + ENI_ERR(pdev, + "not support to set vq %u fixed num %u to %u\n", + qid, n, num); +} + +static int eni_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 qid, + u64 desc_area, u64 driver_area, + u64 device_area) +{ + struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa); + u32 pfn = desc_area >> VIRTIO_PCI_QUEUE_ADDR_SHIFT; + + vp_legacy_set_queue_address(ldev, qid, pfn); + + return 0; +} + +static void eni_vdpa_kick_vq(struct vdpa_device *vdpa, u16 qid) +{ + struct eni_vdpa *eni_vdpa = vdpa_to_eni(vdpa); + + iowrite16(qid, eni_vdpa->vring[qid].notify); +} + +static u32 eni_vdpa_get_device_id(struct vdpa_device *vdpa) +{ + struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa); + + return ldev->id.device; +} + +static u32 eni_vdpa_get_vendor_id(struct vdpa_device *vdpa) +{ + struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa); + + return ldev->id.vendor; +} + +static u32 eni_vdpa_get_vq_align(struct vdpa_device *vdpa) +{ + return VIRTIO_PCI_VRING_ALIGN; +} + +static size_t eni_vdpa_get_config_size(struct vdpa_device *vdpa) +{ + return sizeof(struct virtio_net_config); +} + + +static void eni_vdpa_get_config(struct vdpa_device *vdpa, + unsigned int offset, + void *buf, unsigned int len) +{ + struct eni_vdpa *eni_vdpa = vdpa_to_eni(vdpa); + struct virtio_pci_legacy_device *ldev = &eni_vdpa->ldev; + void __iomem *ioaddr = ldev->ioaddr + + VIRTIO_PCI_CONFIG_OFF(eni_vdpa->vectors) + + offset; + u8 *p = buf; + int i; + + for (i = 0; i < len; i++) + *p++ = ioread8(ioaddr + i); +} + +static void eni_vdpa_set_config(struct vdpa_device *vdpa, + unsigned int offset, const void *buf, + unsigned int len) +{ + struct eni_vdpa *eni_vdpa = vdpa_to_eni(vdpa); + struct virtio_pci_legacy_device *ldev = &eni_vdpa->ldev; + void __iomem *ioaddr = ldev->ioaddr + + VIRTIO_PCI_CONFIG_OFF(eni_vdpa->vectors) + + offset; + const u8 *p = buf; + int i; + + for (i = 0; i < len; i++) + iowrite8(*p++, ioaddr + i); +} + +static void eni_vdpa_set_config_cb(struct vdpa_device *vdpa, + struct vdpa_callback *cb) +{ + struct eni_vdpa *eni_vdpa = vdpa_to_eni(vdpa); + + eni_vdpa->config_cb = *cb; +} + +static const struct vdpa_config_ops eni_vdpa_ops = { + .get_features = eni_vdpa_get_features, + .set_features = eni_vdpa_set_features, + .get_status = eni_vdpa_get_status, + .set_status = eni_vdpa_set_status, + .reset = eni_vdpa_reset, + .get_vq_num_max = eni_vdpa_get_vq_num_max, + .get_vq_num_min = eni_vdpa_get_vq_num_min, + .get_vq_state = eni_vdpa_get_vq_state, + .set_vq_state = eni_vdpa_set_vq_state, + .set_vq_cb = eni_vdpa_set_vq_cb, + .set_vq_ready = eni_vdpa_set_vq_ready, + .get_vq_ready = eni_vdpa_get_vq_ready, + .set_vq_num = eni_vdpa_set_vq_num, + .set_vq_address = eni_vdpa_set_vq_address, + .kick_vq = eni_vdpa_kick_vq, + .get_device_id = eni_vdpa_get_device_id, + .get_vendor_id = eni_vdpa_get_vendor_id, + .get_vq_align = eni_vdpa_get_vq_align, + .get_config_size = eni_vdpa_get_config_size, + .get_config = eni_vdpa_get_config, + .set_config = eni_vdpa_set_config, + .set_config_cb = eni_vdpa_set_config_cb, + .get_vq_irq = eni_vdpa_get_vq_irq, +}; + + +static u16 eni_vdpa_get_num_queues(struct eni_vdpa *eni_vdpa) +{ + struct virtio_pci_legacy_device *ldev = &eni_vdpa->ldev; + u32 features = vp_legacy_get_features(ldev); + u16 num = 2; + + if (features & BIT_ULL(VIRTIO_NET_F_MQ)) { + __virtio16 max_virtqueue_pairs; + + eni_vdpa_get_config(&eni_vdpa->vdpa, + offsetof(struct virtio_net_config, max_virtqueue_pairs), + &max_virtqueue_pairs, + sizeof(max_virtqueue_pairs)); + num = 2 * __virtio16_to_cpu(virtio_legacy_is_little_endian(), + max_virtqueue_pairs); + } + + if (features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) + num += 1; + + return num; +} + +static void eni_vdpa_free_irq_vectors(void *data) +{ + pci_free_irq_vectors(data); +} + +static int eni_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct device *dev = &pdev->dev; + struct eni_vdpa *eni_vdpa; + struct virtio_pci_legacy_device *ldev; + int ret, i; + + ret = pcim_enable_device(pdev); + if (ret) + return ret; + + eni_vdpa = vdpa_alloc_device(struct eni_vdpa, vdpa, + dev, &eni_vdpa_ops, NULL, false); + if (IS_ERR(eni_vdpa)) { + ENI_ERR(pdev, "failed to allocate vDPA structure\n"); + return PTR_ERR(eni_vdpa); + } + + ldev = &eni_vdpa->ldev; + ldev->pci_dev = pdev; + + ret = vp_legacy_probe(ldev); + if (ret) { + ENI_ERR(pdev, "failed to probe legacy PCI device\n"); + goto err; + } + + pci_set_master(pdev); + pci_set_drvdata(pdev, eni_vdpa); + + eni_vdpa->vdpa.dma_dev = &pdev->dev; + eni_vdpa->queues = eni_vdpa_get_num_queues(eni_vdpa); + + ret = devm_add_action_or_reset(dev, eni_vdpa_free_irq_vectors, pdev); + if (ret) { + ENI_ERR(pdev, + "failed for adding devres for freeing irq vectors\n"); + goto err; + } + + eni_vdpa->vring = devm_kcalloc(&pdev->dev, eni_vdpa->queues, + sizeof(*eni_vdpa->vring), + GFP_KERNEL); + if (!eni_vdpa->vring) { + ret = -ENOMEM; + ENI_ERR(pdev, "failed to allocate virtqueues\n"); + goto err; + } + + for (i = 0; i < eni_vdpa->queues; i++) { + eni_vdpa->vring[i].irq = VIRTIO_MSI_NO_VECTOR; + eni_vdpa->vring[i].notify = ldev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY; + } + eni_vdpa->config_irq = VIRTIO_MSI_NO_VECTOR; + + ret = vdpa_register_device(&eni_vdpa->vdpa, eni_vdpa->queues); + if (ret) { + ENI_ERR(pdev, "failed to register to vdpa bus\n"); + goto err; + } + + return 0; + +err: + put_device(&eni_vdpa->vdpa.dev); + return ret; +} + +static void eni_vdpa_remove(struct pci_dev *pdev) +{ + struct eni_vdpa *eni_vdpa = pci_get_drvdata(pdev); + + vdpa_unregister_device(&eni_vdpa->vdpa); + vp_legacy_remove(&eni_vdpa->ldev); +} + +static struct pci_device_id eni_pci_ids[] = { + { PCI_DEVICE_SUB(PCI_VENDOR_ID_REDHAT_QUMRANET, + VIRTIO_TRANS_ID_NET, + PCI_SUBVENDOR_ID_REDHAT_QUMRANET, + VIRTIO_ID_NET) }, + { 0 }, +}; + +static struct pci_driver eni_vdpa_driver = { + .name = "alibaba-eni-vdpa", + .id_table = eni_pci_ids, + .probe = eni_vdpa_probe, + .remove = eni_vdpa_remove, +}; + +module_pci_driver(eni_vdpa_driver); + +MODULE_AUTHOR("Wu Zongyong <wuzongyong@linux.alibaba.com>"); +MODULE_DESCRIPTION("Alibaba ENI vDPA driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c index dcd648e1f7e7..6dc75ca70b37 100644 --- a/drivers/vdpa/ifcvf/ifcvf_main.c +++ b/drivers/vdpa/ifcvf/ifcvf_main.c @@ -499,7 +499,8 @@ static u32 get_dev_type(struct pci_dev *pdev) return dev_type; } -static int ifcvf_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name) +static int ifcvf_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, + const struct vdpa_dev_set_config *config) { struct ifcvf_vdpa_mgmt_dev *ifcvf_mgmt_dev; struct ifcvf_adapter *adapter; diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h index 3163b313a470..daaf7b503677 100644 --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h @@ -63,7 +63,7 @@ struct mlx5_control_vq { unsigned short head; }; -struct mlx5_ctrl_wq_ent { +struct mlx5_vdpa_wq_ent { struct work_struct work; struct mlx5_vdpa_dev *mvdev; }; diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 5c7d2a953dbd..63813fbb5f62 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -6,6 +6,7 @@ #include <linux/vringh.h> #include <uapi/linux/virtio_net.h> #include <uapi/linux/virtio_ids.h> +#include <uapi/linux/vdpa.h> #include <linux/virtio_config.h> #include <linux/auxiliary_bus.h> #include <linux/mlx5/cq.h> @@ -157,10 +158,12 @@ struct mlx5_vdpa_net { struct mutex reslock; struct mlx5_flow_table *rxft; struct mlx5_fc *rx_counter; - struct mlx5_flow_handle *rx_rule; + struct mlx5_flow_handle *rx_rule_ucast; + struct mlx5_flow_handle *rx_rule_mcast; bool setup; - u16 mtu; u32 cur_num_vqs; + struct notifier_block nb; + struct vdpa_callback config_cb; }; static void free_resources(struct mlx5_vdpa_net *ndev); @@ -1381,21 +1384,33 @@ static int add_fwd_to_tir(struct mlx5_vdpa_net *ndev) struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_flow_act flow_act = {}; struct mlx5_flow_namespace *ns; + struct mlx5_flow_spec *spec; + void *headers_c; + void *headers_v; + u8 *dmac_c; + u8 *dmac_v; int err; - /* for now, one entry, match all, forward to tir */ - ft_attr.max_fte = 1; - ft_attr.autogroup.max_num_groups = 1; + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + ft_attr.max_fte = 2; + ft_attr.autogroup.max_num_groups = 2; ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS); if (!ns) { - mlx5_vdpa_warn(&ndev->mvdev, "get flow namespace\n"); - return -EOPNOTSUPP; + mlx5_vdpa_warn(&ndev->mvdev, "failed to get flow namespace\n"); + err = -EOPNOTSUPP; + goto err_ns; } ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); - if (IS_ERR(ndev->rxft)) - return PTR_ERR(ndev->rxft); + if (IS_ERR(ndev->rxft)) { + err = PTR_ERR(ndev->rxft); + goto err_ns; + } ndev->rx_counter = mlx5_fc_create(ndev->mvdev.mdev, false); if (IS_ERR(ndev->rx_counter)) { @@ -1403,37 +1418,64 @@ static int add_fwd_to_tir(struct mlx5_vdpa_net *ndev) goto err_fc; } + headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers); + dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16); + memset(dmac_c, 0xff, ETH_ALEN); + headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); + dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16); + ether_addr_copy(dmac_v, ndev->config.mac); + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; dest[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR; dest[0].tir_num = ndev->res.tirn; dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; dest[1].counter_id = mlx5_fc_id(ndev->rx_counter); - ndev->rx_rule = mlx5_add_flow_rules(ndev->rxft, NULL, &flow_act, dest, 2); - if (IS_ERR(ndev->rx_rule)) { - err = PTR_ERR(ndev->rx_rule); - ndev->rx_rule = NULL; - goto err_rule; + ndev->rx_rule_ucast = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dest, 2); + + if (IS_ERR(ndev->rx_rule_ucast)) { + err = PTR_ERR(ndev->rx_rule_ucast); + ndev->rx_rule_ucast = NULL; + goto err_rule_ucast; } + memset(dmac_c, 0, ETH_ALEN); + memset(dmac_v, 0, ETH_ALEN); + dmac_c[0] = 1; + dmac_v[0] = 1; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + ndev->rx_rule_mcast = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dest, 1); + if (IS_ERR(ndev->rx_rule_mcast)) { + err = PTR_ERR(ndev->rx_rule_mcast); + ndev->rx_rule_mcast = NULL; + goto err_rule_mcast; + } + + kvfree(spec); return 0; -err_rule: +err_rule_mcast: + mlx5_del_flow_rules(ndev->rx_rule_ucast); + ndev->rx_rule_ucast = NULL; +err_rule_ucast: mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter); err_fc: mlx5_destroy_flow_table(ndev->rxft); +err_ns: + kvfree(spec); return err; } static void remove_fwd_to_tir(struct mlx5_vdpa_net *ndev) { - if (!ndev->rx_rule) + if (!ndev->rx_rule_ucast) return; - mlx5_del_flow_rules(ndev->rx_rule); + mlx5_del_flow_rules(ndev->rx_rule_mcast); + ndev->rx_rule_mcast = NULL; + mlx5_del_flow_rules(ndev->rx_rule_ucast); + ndev->rx_rule_ucast = NULL; mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter); mlx5_destroy_flow_table(ndev->rxft); - - ndev->rx_rule = NULL; } static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd) @@ -1557,14 +1599,14 @@ static void mlx5_cvq_kick_handler(struct work_struct *work) { virtio_net_ctrl_ack status = VIRTIO_NET_ERR; struct virtio_net_ctrl_hdr ctrl; - struct mlx5_ctrl_wq_ent *wqent; + struct mlx5_vdpa_wq_ent *wqent; struct mlx5_vdpa_dev *mvdev; struct mlx5_control_vq *cvq; struct mlx5_vdpa_net *ndev; size_t read, write; int err; - wqent = container_of(work, struct mlx5_ctrl_wq_ent, work); + wqent = container_of(work, struct mlx5_vdpa_wq_ent, work); mvdev = wqent->mvdev; ndev = to_mlx5_vdpa_ndev(mvdev); cvq = &mvdev->cvq; @@ -1616,7 +1658,7 @@ static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx) struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); struct mlx5_vdpa_virtqueue *mvq; - struct mlx5_ctrl_wq_ent *wqent; + struct mlx5_vdpa_wq_ent *wqent; if (!is_index_valid(mvdev, idx)) return; @@ -1852,6 +1894,7 @@ static u64 mlx5_vdpa_get_features(struct vdpa_device *vdev) ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ); ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR); ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MQ); + ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_STATUS); print_features(mvdev, ndev->mvdev.mlx_features, false); return ndev->mvdev.mlx_features; @@ -1942,16 +1985,16 @@ static int mlx5_vdpa_set_features(struct vdpa_device *vdev, u64 features) return err; ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features; - ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, ndev->mtu); - ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP); update_cvq_info(mvdev); return err; } static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb) { - /* not implemented */ - mlx5_vdpa_warn(to_mvdev(vdev), "set config callback not supported\n"); + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + + ndev->config_cb = *cb; } #define MLX5_VDPA_MAX_VQ_ENTRIES 256 @@ -2192,7 +2235,6 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev) clear_vqs_ready(ndev); mlx5_vdpa_destroy_mr(&ndev->mvdev); ndev->mvdev.status = 0; - ndev->mvdev.mlx_features = 0; memset(ndev->event_cbs, 0, sizeof(ndev->event_cbs)); ndev->mvdev.actual_features = 0; ++mvdev->generation; @@ -2404,7 +2446,84 @@ struct mlx5_vdpa_mgmtdev { struct mlx5_vdpa_net *ndev; }; -static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name) +static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport) +{ + u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {}; + int err; + + MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE); + MLX5_SET(query_vport_state_in, in, op_mod, opmod); + MLX5_SET(query_vport_state_in, in, vport_number, vport); + if (vport) + MLX5_SET(query_vport_state_in, in, other_vport, 1); + + err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out); + if (err) + return 0; + + return MLX5_GET(query_vport_state_out, out, state); +} + +static bool get_link_state(struct mlx5_vdpa_dev *mvdev) +{ + if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) == + VPORT_STATE_UP) + return true; + + return false; +} + +static void update_carrier(struct work_struct *work) +{ + struct mlx5_vdpa_wq_ent *wqent; + struct mlx5_vdpa_dev *mvdev; + struct mlx5_vdpa_net *ndev; + + wqent = container_of(work, struct mlx5_vdpa_wq_ent, work); + mvdev = wqent->mvdev; + ndev = to_mlx5_vdpa_ndev(mvdev); + if (get_link_state(mvdev)) + ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP); + else + ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP); + + if (ndev->config_cb.callback) + ndev->config_cb.callback(ndev->config_cb.private); + + kfree(wqent); +} + +static int event_handler(struct notifier_block *nb, unsigned long event, void *param) +{ + struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb); + struct mlx5_eqe *eqe = param; + int ret = NOTIFY_DONE; + struct mlx5_vdpa_wq_ent *wqent; + + if (event == MLX5_EVENT_TYPE_PORT_CHANGE) { + switch (eqe->sub_type) { + case MLX5_PORT_CHANGE_SUBTYPE_DOWN: + case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: + wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC); + if (!wqent) + return NOTIFY_DONE; + + wqent->mvdev = &ndev->mvdev; + INIT_WORK(&wqent->work, update_carrier); + queue_work(ndev->mvdev.wq, &wqent->work); + ret = NOTIFY_OK; + break; + default: + return NOTIFY_DONE; + } + return ret; + } + return ret; +} + +static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, + const struct vdpa_dev_set_config *add_config) { struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev); struct virtio_net_config *config; @@ -2413,6 +2532,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name) struct mlx5_vdpa_net *ndev; struct mlx5_core_dev *mdev; u32 max_vqs; + u16 mtu; int err; if (mgtdev->ndev) @@ -2440,13 +2560,24 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name) init_mvqs(ndev); mutex_init(&ndev->reslock); config = &ndev->config; - err = query_mtu(mdev, &ndev->mtu); + err = query_mtu(mdev, &mtu); if (err) goto err_mtu; - err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac); - if (err) - goto err_mtu; + ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu); + + if (get_link_state(mvdev)) + ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP); + else + ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP); + + if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) { + memcpy(ndev->config.mac, add_config->net.mac, ETH_ALEN); + } else { + err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac); + if (err) + goto err_mtu; + } if (!is_zero_ether_addr(config->mac)) { pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev)); @@ -2473,12 +2604,14 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name) if (err) goto err_mr; - mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_ctrl_wq"); + mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq"); if (!mvdev->wq) { err = -ENOMEM; goto err_res2; } + ndev->nb.notifier_call = event_handler; + mlx5_notifier_register(mdev, &ndev->nb); ndev->cur_num_vqs = 2 * mlx5_vdpa_max_qps(max_vqs); mvdev->vdev.mdev = &mgtdev->mgtdev; err = _vdpa_register_device(&mvdev->vdev, ndev->cur_num_vqs + 1); @@ -2509,7 +2642,9 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device * { struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev); struct mlx5_vdpa_dev *mvdev = to_mvdev(dev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + mlx5_notifier_unregister(mvdev->mdev, &ndev->nb); destroy_workqueue(mvdev->wq); _vdpa_unregister_device(dev); mgtdev->ndev = NULL; @@ -2541,6 +2676,7 @@ static int mlx5v_probe(struct auxiliary_device *adev, mgtdev->mgtdev.ops = &mdev_ops; mgtdev->mgtdev.device = mdev->device; mgtdev->mgtdev.id_table = id_table; + mgtdev->mgtdev.config_attr_mask = (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR); mgtdev->madev = madev; err = vdpa_mgmtdev_register(&mgtdev->mgtdev); diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index 1dc121a07a93..7332a74a4b00 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -14,6 +14,7 @@ #include <uapi/linux/vdpa.h> #include <net/genetlink.h> #include <linux/mod_devicetable.h> +#include <linux/virtio_ids.h> static LIST_HEAD(mdev_head); /* A global mutex that protects vdpa management device and device level operations. */ @@ -26,8 +27,16 @@ static int vdpa_dev_probe(struct device *d) { struct vdpa_device *vdev = dev_to_vdpa(d); struct vdpa_driver *drv = drv_to_vdpa(vdev->dev.driver); + const struct vdpa_config_ops *ops = vdev->config; + u32 max_num, min_num = 1; int ret = 0; + max_num = ops->get_vq_num_max(vdev); + if (ops->get_vq_num_min) + min_num = ops->get_vq_num_min(vdev); + if (max_num < min_num) + return -EINVAL; + if (drv && drv->probe) ret = drv->probe(vdev); @@ -58,6 +67,7 @@ static void vdpa_release_dev(struct device *d) ops->free(vdev); ida_simple_remove(&vdpa_index_ida, vdev->index); + mutex_destroy(&vdev->cf_mutex); kfree(vdev); } @@ -119,6 +129,7 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent, if (err) goto err_name; + mutex_init(&vdev->cf_mutex); device_initialize(&vdev->dev); return vdev; @@ -289,6 +300,46 @@ void vdpa_mgmtdev_unregister(struct vdpa_mgmt_dev *mdev) } EXPORT_SYMBOL_GPL(vdpa_mgmtdev_unregister); +/** + * vdpa_get_config - Get one or more device configuration fields. + * @vdev: vdpa device to operate on + * @offset: starting byte offset of the field + * @buf: buffer pointer to read to + * @len: length of the configuration fields in bytes + */ +void vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, + void *buf, unsigned int len) +{ + const struct vdpa_config_ops *ops = vdev->config; + + mutex_lock(&vdev->cf_mutex); + /* + * Config accesses aren't supposed to trigger before features are set. + * If it does happen we assume a legacy guest. + */ + if (!vdev->features_valid) + vdpa_set_features(vdev, 0); + ops->get_config(vdev, offset, buf, len); + mutex_unlock(&vdev->cf_mutex); +} +EXPORT_SYMBOL_GPL(vdpa_get_config); + +/** + * vdpa_set_config - Set one or more device configuration fields. + * @vdev: vdpa device to operate on + * @offset: starting byte offset of the field + * @buf: buffer pointer to read from + * @length: length of the configuration fields in bytes + */ +void vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, + const void *buf, unsigned int length) +{ + mutex_lock(&vdev->cf_mutex); + vdev->config->set_config(vdev, offset, buf, length); + mutex_unlock(&vdev->cf_mutex); +} +EXPORT_SYMBOL_GPL(vdpa_set_config); + static bool mgmtdev_handle_match(const struct vdpa_mgmt_dev *mdev, const char *busname, const char *devname) { @@ -428,9 +479,15 @@ out: return msg->len; } +#define VDPA_DEV_NET_ATTRS_MASK ((1 << VDPA_ATTR_DEV_NET_CFG_MACADDR) | \ + (1 << VDPA_ATTR_DEV_NET_CFG_MTU)) + static int vdpa_nl_cmd_dev_add_set_doit(struct sk_buff *skb, struct genl_info *info) { + struct vdpa_dev_set_config config = {}; + struct nlattr **nl_attrs = info->attrs; struct vdpa_mgmt_dev *mdev; + const u8 *macaddr; const char *name; int err = 0; @@ -439,6 +496,26 @@ static int vdpa_nl_cmd_dev_add_set_doit(struct sk_buff *skb, struct genl_info *i name = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]); + if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MACADDR]) { + macaddr = nla_data(nl_attrs[VDPA_ATTR_DEV_NET_CFG_MACADDR]); + memcpy(config.net.mac, macaddr, sizeof(config.net.mac)); + config.mask |= (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR); + } + if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MTU]) { + config.net.mtu = + nla_get_u16(nl_attrs[VDPA_ATTR_DEV_NET_CFG_MTU]); + config.mask |= (1 << VDPA_ATTR_DEV_NET_CFG_MTU); + } + + /* Skip checking capability if user didn't prefer to configure any + * device networking attributes. It is likely that user might have used + * a device specific method to configure such attributes or using device + * default attributes. + */ + if ((config.mask & VDPA_DEV_NET_ATTRS_MASK) && + !netlink_capable(skb, CAP_NET_ADMIN)) + return -EPERM; + mutex_lock(&vdpa_dev_mutex); mdev = vdpa_mgmtdev_get_from_attr(info->attrs); if (IS_ERR(mdev)) { @@ -446,8 +523,14 @@ static int vdpa_nl_cmd_dev_add_set_doit(struct sk_buff *skb, struct genl_info *i err = PTR_ERR(mdev); goto err; } + if ((config.mask & mdev->config_attr_mask) != config.mask) { + NL_SET_ERR_MSG_MOD(info->extack, + "All provided attributes are not supported"); + err = -EOPNOTSUPP; + goto err; + } - err = mdev->ops->dev_add(mdev, name); + err = mdev->ops->dev_add(mdev, name, &config); err: mutex_unlock(&vdpa_dev_mutex); return err; @@ -492,6 +575,7 @@ vdpa_dev_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid, u32 seq int flags, struct netlink_ext_ack *extack) { u16 max_vq_size; + u16 min_vq_size = 1; u32 device_id; u32 vendor_id; void *hdr; @@ -508,6 +592,8 @@ vdpa_dev_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid, u32 seq device_id = vdev->config->get_device_id(vdev); vendor_id = vdev->config->get_vendor_id(vdev); max_vq_size = vdev->config->get_vq_num_max(vdev); + if (vdev->config->get_vq_num_min) + min_vq_size = vdev->config->get_vq_num_min(vdev); err = -EMSGSIZE; if (nla_put_string(msg, VDPA_ATTR_DEV_NAME, dev_name(&vdev->dev))) @@ -520,6 +606,8 @@ vdpa_dev_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid, u32 seq goto msg_err; if (nla_put_u16(msg, VDPA_ATTR_DEV_MAX_VQ_SIZE, max_vq_size)) goto msg_err; + if (nla_put_u16(msg, VDPA_ATTR_DEV_MIN_VQ_SIZE, min_vq_size)) + goto msg_err; genlmsg_end(msg, hdr); return 0; @@ -612,10 +700,175 @@ static int vdpa_nl_cmd_dev_get_dumpit(struct sk_buff *msg, struct netlink_callba return msg->len; } +static int vdpa_dev_net_mq_config_fill(struct vdpa_device *vdev, + struct sk_buff *msg, u64 features, + const struct virtio_net_config *config) +{ + u16 val_u16; + + if ((features & (1ULL << VIRTIO_NET_F_MQ)) == 0) + return 0; + + val_u16 = le16_to_cpu(config->max_virtqueue_pairs); + return nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MAX_VQP, val_u16); +} + +static int vdpa_dev_net_config_fill(struct vdpa_device *vdev, struct sk_buff *msg) +{ + struct virtio_net_config config = {}; + u64 features; + u16 val_u16; + + vdpa_get_config(vdev, 0, &config, sizeof(config)); + + if (nla_put(msg, VDPA_ATTR_DEV_NET_CFG_MACADDR, sizeof(config.mac), + config.mac)) + return -EMSGSIZE; + + val_u16 = le16_to_cpu(config.status); + if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_STATUS, val_u16)) + return -EMSGSIZE; + + val_u16 = le16_to_cpu(config.mtu); + if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MTU, val_u16)) + return -EMSGSIZE; + + features = vdev->config->get_features(vdev); + + return vdpa_dev_net_mq_config_fill(vdev, msg, features, &config); +} + +static int +vdpa_dev_config_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid, u32 seq, + int flags, struct netlink_ext_ack *extack) +{ + u32 device_id; + void *hdr; + int err; + + hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags, + VDPA_CMD_DEV_CONFIG_GET); + if (!hdr) + return -EMSGSIZE; + + if (nla_put_string(msg, VDPA_ATTR_DEV_NAME, dev_name(&vdev->dev))) { + err = -EMSGSIZE; + goto msg_err; + } + + device_id = vdev->config->get_device_id(vdev); + if (nla_put_u32(msg, VDPA_ATTR_DEV_ID, device_id)) { + err = -EMSGSIZE; + goto msg_err; + } + + switch (device_id) { + case VIRTIO_ID_NET: + err = vdpa_dev_net_config_fill(vdev, msg); + break; + default: + err = -EOPNOTSUPP; + break; + } + if (err) + goto msg_err; + + genlmsg_end(msg, hdr); + return 0; + +msg_err: + genlmsg_cancel(msg, hdr); + return err; +} + +static int vdpa_nl_cmd_dev_config_get_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct vdpa_device *vdev; + struct sk_buff *msg; + const char *devname; + struct device *dev; + int err; + + if (!info->attrs[VDPA_ATTR_DEV_NAME]) + return -EINVAL; + devname = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + mutex_lock(&vdpa_dev_mutex); + dev = bus_find_device(&vdpa_bus, NULL, devname, vdpa_name_match); + if (!dev) { + NL_SET_ERR_MSG_MOD(info->extack, "device not found"); + err = -ENODEV; + goto dev_err; + } + vdev = container_of(dev, struct vdpa_device, dev); + if (!vdev->mdev) { + NL_SET_ERR_MSG_MOD(info->extack, "unmanaged vdpa device"); + err = -EINVAL; + goto mdev_err; + } + err = vdpa_dev_config_fill(vdev, msg, info->snd_portid, info->snd_seq, + 0, info->extack); + if (!err) + err = genlmsg_reply(msg, info); + +mdev_err: + put_device(dev); +dev_err: + mutex_unlock(&vdpa_dev_mutex); + if (err) + nlmsg_free(msg); + return err; +} + +static int vdpa_dev_config_dump(struct device *dev, void *data) +{ + struct vdpa_device *vdev = container_of(dev, struct vdpa_device, dev); + struct vdpa_dev_dump_info *info = data; + int err; + + if (!vdev->mdev) + return 0; + if (info->idx < info->start_idx) { + info->idx++; + return 0; + } + err = vdpa_dev_config_fill(vdev, info->msg, NETLINK_CB(info->cb->skb).portid, + info->cb->nlh->nlmsg_seq, NLM_F_MULTI, + info->cb->extack); + if (err) + return err; + + info->idx++; + return 0; +} + +static int +vdpa_nl_cmd_dev_config_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb) +{ + struct vdpa_dev_dump_info info; + + info.msg = msg; + info.cb = cb; + info.start_idx = cb->args[0]; + info.idx = 0; + + mutex_lock(&vdpa_dev_mutex); + bus_for_each_dev(&vdpa_bus, NULL, &info, vdpa_dev_config_dump); + mutex_unlock(&vdpa_dev_mutex); + cb->args[0] = info.idx; + return msg->len; +} + static const struct nla_policy vdpa_nl_policy[VDPA_ATTR_MAX + 1] = { [VDPA_ATTR_MGMTDEV_BUS_NAME] = { .type = NLA_NUL_STRING }, [VDPA_ATTR_MGMTDEV_DEV_NAME] = { .type = NLA_STRING }, [VDPA_ATTR_DEV_NAME] = { .type = NLA_STRING }, + [VDPA_ATTR_DEV_NET_CFG_MACADDR] = NLA_POLICY_ETH_ADDR, + /* virtio spec 1.1 section 5.1.4.1 for valid MTU range */ + [VDPA_ATTR_DEV_NET_CFG_MTU] = NLA_POLICY_MIN(NLA_U16, 68), }; static const struct genl_ops vdpa_nl_ops[] = { @@ -643,6 +896,12 @@ static const struct genl_ops vdpa_nl_ops[] = { .doit = vdpa_nl_cmd_dev_get_doit, .dumpit = vdpa_nl_cmd_dev_get_dumpit, }, + { + .cmd = VDPA_CMD_DEV_CONFIG_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = vdpa_nl_cmd_dev_config_get_doit, + .dumpit = vdpa_nl_cmd_dev_config_get_dumpit, + }, }; static struct genl_family vdpa_nl_family __ro_after_init = { diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c index a790903f243e..42d401d43911 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c @@ -248,7 +248,8 @@ static struct device vdpasim_blk_mgmtdev = { .release = vdpasim_blk_mgmtdev_release, }; -static int vdpasim_blk_dev_add(struct vdpa_mgmt_dev *mdev, const char *name) +static int vdpasim_blk_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, + const struct vdpa_dev_set_config *config) { struct vdpasim_dev_attr dev_attr = {}; struct vdpasim *simdev; diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c index a1ab6163f7d1..76dd24abc791 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c @@ -16,6 +16,7 @@ #include <linux/vringh.h> #include <linux/vdpa.h> #include <uapi/linux/virtio_net.h> +#include <uapi/linux/vdpa.h> #include "vdpa_sim.h" @@ -29,12 +30,6 @@ #define VDPASIM_NET_VQ_NUM 2 -static char *macaddr; -module_param(macaddr, charp, 0); -MODULE_PARM_DESC(macaddr, "Ethernet MAC address"); - -static u8 macaddr_buf[ETH_ALEN]; - static void vdpasim_net_work(struct work_struct *work) { struct vdpasim *vdpasim = container_of(work, struct vdpasim, work); @@ -112,9 +107,21 @@ static void vdpasim_net_get_config(struct vdpasim *vdpasim, void *config) { struct virtio_net_config *net_config = config; - net_config->mtu = cpu_to_vdpasim16(vdpasim, 1500); net_config->status = cpu_to_vdpasim16(vdpasim, VIRTIO_NET_S_LINK_UP); - memcpy(net_config->mac, macaddr_buf, ETH_ALEN); +} + +static void vdpasim_net_setup_config(struct vdpasim *vdpasim, + const struct vdpa_dev_set_config *config) +{ + struct virtio_net_config *vio_config = vdpasim->config; + + if (config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) + memcpy(vio_config->mac, config->net.mac, ETH_ALEN); + if (config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MTU)) + vio_config->mtu = cpu_to_vdpasim16(vdpasim, config->net.mtu); + else + /* Setup default MTU to be 1500 */ + vio_config->mtu = cpu_to_vdpasim16(vdpasim, 1500); } static void vdpasim_net_mgmtdev_release(struct device *dev) @@ -126,7 +133,8 @@ static struct device vdpasim_net_mgmtdev = { .release = vdpasim_net_mgmtdev_release, }; -static int vdpasim_net_dev_add(struct vdpa_mgmt_dev *mdev, const char *name) +static int vdpasim_net_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, + const struct vdpa_dev_set_config *config) { struct vdpasim_dev_attr dev_attr = {}; struct vdpasim *simdev; @@ -146,6 +154,8 @@ static int vdpasim_net_dev_add(struct vdpa_mgmt_dev *mdev, const char *name) if (IS_ERR(simdev)) return PTR_ERR(simdev); + vdpasim_net_setup_config(simdev, config); + ret = _vdpa_register_device(&simdev->vdpa, VDPASIM_NET_VQ_NUM); if (ret) goto reg_err; @@ -179,20 +189,14 @@ static struct vdpa_mgmt_dev mgmt_dev = { .device = &vdpasim_net_mgmtdev, .id_table = id_table, .ops = &vdpasim_net_mgmtdev_ops, + .config_attr_mask = (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR | + 1 << VDPA_ATTR_DEV_NET_CFG_MTU), }; static int __init vdpasim_net_init(void) { int ret; - if (macaddr) { - mac_pton(macaddr, macaddr_buf); - if (!is_valid_ether_addr(macaddr_buf)) - return -EADDRNOTAVAIL; - } else { - eth_random_addr(macaddr_buf); - } - ret = device_register(&vdpasim_net_mgmtdev); if (ret) return ret; diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index 841667a896dd..c9204c62f339 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -1503,7 +1503,8 @@ static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name) return 0; } -static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name) +static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, + const struct vdpa_dev_set_config *config) { struct vduse_dev *dev; int ret; diff --git a/drivers/vdpa/virtio_pci/vp_vdpa.c b/drivers/vdpa/virtio_pci/vp_vdpa.c index 5bcd00246d2e..e3ff7875e123 100644 --- a/drivers/vdpa/virtio_pci/vp_vdpa.c +++ b/drivers/vdpa/virtio_pci/vp_vdpa.c @@ -76,6 +76,17 @@ static u8 vp_vdpa_get_status(struct vdpa_device *vdpa) return vp_modern_get_status(mdev); } +static int vp_vdpa_get_vq_irq(struct vdpa_device *vdpa, u16 idx) +{ + struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa); + int irq = vp_vdpa->vring[idx].irq; + + if (irq == VIRTIO_MSI_NO_VECTOR) + return -EINVAL; + + return irq; +} + static void vp_vdpa_free_irq(struct vp_vdpa *vp_vdpa) { struct virtio_pci_modern_device *mdev = &vp_vdpa->mdev; @@ -427,6 +438,7 @@ static const struct vdpa_config_ops vp_vdpa_ops = { .get_config = vp_vdpa_get_config, .set_config = vp_vdpa_set_config, .set_config_cb = vp_vdpa_set_config_cb, + .get_vq_irq = vp_vdpa_get_vq_irq, }; static void vp_vdpa_free_irq_vectors(void *data) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 39039e046117..01c59ce7e250 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -237,7 +237,6 @@ static long vhost_vdpa_set_config(struct vhost_vdpa *v, struct vhost_vdpa_config __user *c) { struct vdpa_device *vdpa = v->vdpa; - const struct vdpa_config_ops *ops = vdpa->config; struct vhost_vdpa_config config; unsigned long size = offsetof(struct vhost_vdpa_config, buf); u8 *buf; @@ -251,7 +250,7 @@ static long vhost_vdpa_set_config(struct vhost_vdpa *v, if (IS_ERR(buf)) return PTR_ERR(buf); - ops->set_config(vdpa, config.off, buf, config.len); + vdpa_set_config(vdpa, config.off, buf, config.len); kvfree(buf); return 0; diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig index ce1b3f6ec325..8fcf94cd2c96 100644 --- a/drivers/virtio/Kconfig +++ b/drivers/virtio/Kconfig @@ -20,6 +20,15 @@ config VIRTIO_PCI_LIB PCI device with possible vendor specific extensions. Any module that selects this module must depend on PCI. +config VIRTIO_PCI_LIB_LEGACY + tristate + help + Legacy PCI device (Virtio PCI Card 0.9.x Draft and older device) + implementation. + This module implements the basic probe and control for devices + which are based on legacy PCI device. Any module that selects this + module must depend on PCI. + menuconfig VIRTIO_MENU bool "Virtio drivers" default y @@ -43,6 +52,7 @@ config VIRTIO_PCI_LEGACY bool "Support for legacy virtio draft 0.9.X and older devices" default y depends on VIRTIO_PCI + select VIRTIO_PCI_LIB_LEGACY help Virtio PCI Card 0.9.X Draft (circa 2014) and older device support. diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile index 699bbea0465f..0a82d0873248 100644 --- a/drivers/virtio/Makefile +++ b/drivers/virtio/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o obj-$(CONFIG_VIRTIO_PCI_LIB) += virtio_pci_modern_dev.o +obj-$(CONFIG_VIRTIO_PCI_LIB_LEGACY) += virtio_pci_legacy_dev.o obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o virtio_pci-y := virtio_pci_modern.o virtio_pci_common.o diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index b35bb2d57f62..fdbde1db5ec5 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -24,17 +24,46 @@ MODULE_PARM_DESC(force_legacy, "Force legacy mode for transitional virtio 1 devices"); #endif -/* wait for pending irq handlers */ -void vp_synchronize_vectors(struct virtio_device *vdev) +/* disable irq handlers */ +void vp_disable_cbs(struct virtio_device *vdev) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); int i; - if (vp_dev->intx_enabled) + if (vp_dev->intx_enabled) { + /* + * The below synchronize() guarantees that any + * interrupt for this line arriving after + * synchronize_irq() has completed is guaranteed to see + * intx_soft_enabled == false. + */ + WRITE_ONCE(vp_dev->intx_soft_enabled, false); synchronize_irq(vp_dev->pci_dev->irq); + } for (i = 0; i < vp_dev->msix_vectors; ++i) - synchronize_irq(pci_irq_vector(vp_dev->pci_dev, i)); + disable_irq(pci_irq_vector(vp_dev->pci_dev, i)); +} + +/* enable irq handlers */ +void vp_enable_cbs(struct virtio_device *vdev) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + int i; + + if (vp_dev->intx_enabled) { + disable_irq(vp_dev->pci_dev->irq); + /* + * The above disable_irq() provides TSO ordering and + * as such promotes the below store to store-release. + */ + WRITE_ONCE(vp_dev->intx_soft_enabled, true); + enable_irq(vp_dev->pci_dev->irq); + return; + } + + for (i = 0; i < vp_dev->msix_vectors; ++i) + enable_irq(pci_irq_vector(vp_dev->pci_dev, i)); } /* the notify function used when creating a virt queue */ @@ -84,6 +113,9 @@ static irqreturn_t vp_interrupt(int irq, void *opaque) struct virtio_pci_device *vp_dev = opaque; u8 isr; + if (!READ_ONCE(vp_dev->intx_soft_enabled)) + return IRQ_NONE; + /* reading the ISR has the effect of also clearing it so it's very * important to save off the value. */ isr = ioread8(vp_dev->isr); @@ -141,7 +173,8 @@ static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, "%s-config", name); err = request_irq(pci_irq_vector(vp_dev->pci_dev, v), - vp_config_changed, 0, vp_dev->msix_names[v], + vp_config_changed, IRQF_NO_AUTOEN, + vp_dev->msix_names[v], vp_dev); if (err) goto error; @@ -160,7 +193,8 @@ static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, "%s-virtqueues", name); err = request_irq(pci_irq_vector(vp_dev->pci_dev, v), - vp_vring_interrupt, 0, vp_dev->msix_names[v], + vp_vring_interrupt, IRQF_NO_AUTOEN, + vp_dev->msix_names[v], vp_dev); if (err) goto error; @@ -337,7 +371,7 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, "%s-%s", dev_name(&vp_dev->vdev.dev), names[i]); err = request_irq(pci_irq_vector(vp_dev->pci_dev, msix_vec), - vring_interrupt, 0, + vring_interrupt, IRQF_NO_AUTOEN, vp_dev->msix_names[msix_vec], vqs[i]); if (err) @@ -549,6 +583,8 @@ static int virtio_pci_probe(struct pci_dev *pci_dev, pci_set_master(pci_dev); + vp_dev->is_legacy = vp_dev->ldev.ioaddr ? true : false; + rc = register_virtio_device(&vp_dev->vdev); reg_dev = vp_dev; if (rc) @@ -557,10 +593,10 @@ static int virtio_pci_probe(struct pci_dev *pci_dev, return 0; err_register: - if (vp_dev->ioaddr) - virtio_pci_legacy_remove(vp_dev); + if (vp_dev->is_legacy) + virtio_pci_legacy_remove(vp_dev); else - virtio_pci_modern_remove(vp_dev); + virtio_pci_modern_remove(vp_dev); err_probe: pci_disable_device(pci_dev); err_enable_device: @@ -587,7 +623,7 @@ static void virtio_pci_remove(struct pci_dev *pci_dev) unregister_virtio_device(&vp_dev->vdev); - if (vp_dev->ioaddr) + if (vp_dev->is_legacy) virtio_pci_legacy_remove(vp_dev); else virtio_pci_modern_remove(vp_dev); diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h index beec047a8f8d..23f6c5c678d5 100644 --- a/drivers/virtio/virtio_pci_common.h +++ b/drivers/virtio/virtio_pci_common.h @@ -25,6 +25,7 @@ #include <linux/virtio_config.h> #include <linux/virtio_ring.h> #include <linux/virtio_pci.h> +#include <linux/virtio_pci_legacy.h> #include <linux/virtio_pci_modern.h> #include <linux/highmem.h> #include <linux/spinlock.h> @@ -44,16 +45,14 @@ struct virtio_pci_vq_info { struct virtio_pci_device { struct virtio_device vdev; struct pci_dev *pci_dev; + struct virtio_pci_legacy_device ldev; struct virtio_pci_modern_device mdev; - /* In legacy mode, these two point to within ->legacy. */ + bool is_legacy; + /* Where to read and clear interrupt */ u8 __iomem *isr; - /* Legacy only field */ - /* the IO mapping for the PCI config space */ - void __iomem *ioaddr; - /* a list of queues so we can dispatch IRQs */ spinlock_t lock; struct list_head virtqueues; @@ -64,6 +63,7 @@ struct virtio_pci_device { /* MSI-X support */ int msix_enabled; int intx_enabled; + bool intx_soft_enabled; cpumask_var_t *msix_affinity_masks; /* Name strings for interrupts. This size should be enough, * and I'm too lazy to allocate each name separately. */ @@ -102,8 +102,10 @@ static struct virtio_pci_device *to_vp_device(struct virtio_device *vdev) return container_of(vdev, struct virtio_pci_device, vdev); } -/* wait for pending irq handlers */ -void vp_synchronize_vectors(struct virtio_device *vdev); +/* disable irq handlers */ +void vp_disable_cbs(struct virtio_device *vdev); +/* enable irq handlers */ +void vp_enable_cbs(struct virtio_device *vdev); /* the notify function used when creating a virt queue */ bool vp_notify(struct virtqueue *vq); /* the config->del_vqs() implementation */ diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c index d62e9835aeec..b3f8128b7983 100644 --- a/drivers/virtio/virtio_pci_legacy.c +++ b/drivers/virtio/virtio_pci_legacy.c @@ -14,6 +14,7 @@ * Michael S. Tsirkin <mst@redhat.com> */ +#include "linux/virtio_pci_legacy.h" #include "virtio_pci_common.h" /* virtio config->get_features() implementation */ @@ -23,7 +24,7 @@ static u64 vp_get_features(struct virtio_device *vdev) /* When someone needs more than 32 feature bits, we'll need to * steal a bit to indicate that the rest are somewhere else. */ - return ioread32(vp_dev->ioaddr + VIRTIO_PCI_HOST_FEATURES); + return vp_legacy_get_features(&vp_dev->ldev); } /* virtio config->finalize_features() implementation */ @@ -38,7 +39,7 @@ static int vp_finalize_features(struct virtio_device *vdev) BUG_ON((u32)vdev->features != vdev->features); /* We only support 32 feature bits. */ - iowrite32(vdev->features, vp_dev->ioaddr + VIRTIO_PCI_GUEST_FEATURES); + vp_legacy_set_features(&vp_dev->ldev, vdev->features); return 0; } @@ -48,7 +49,7 @@ static void vp_get(struct virtio_device *vdev, unsigned offset, void *buf, unsigned len) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); - void __iomem *ioaddr = vp_dev->ioaddr + + void __iomem *ioaddr = vp_dev->ldev.ioaddr + VIRTIO_PCI_CONFIG_OFF(vp_dev->msix_enabled) + offset; u8 *ptr = buf; @@ -64,7 +65,7 @@ static void vp_set(struct virtio_device *vdev, unsigned offset, const void *buf, unsigned len) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); - void __iomem *ioaddr = vp_dev->ioaddr + + void __iomem *ioaddr = vp_dev->ldev.ioaddr + VIRTIO_PCI_CONFIG_OFF(vp_dev->msix_enabled) + offset; const u8 *ptr = buf; @@ -78,7 +79,7 @@ static void vp_set(struct virtio_device *vdev, unsigned offset, static u8 vp_get_status(struct virtio_device *vdev) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); - return ioread8(vp_dev->ioaddr + VIRTIO_PCI_STATUS); + return vp_legacy_get_status(&vp_dev->ldev); } static void vp_set_status(struct virtio_device *vdev, u8 status) @@ -86,28 +87,24 @@ static void vp_set_status(struct virtio_device *vdev, u8 status) struct virtio_pci_device *vp_dev = to_vp_device(vdev); /* We should never be setting status to 0. */ BUG_ON(status == 0); - iowrite8(status, vp_dev->ioaddr + VIRTIO_PCI_STATUS); + vp_legacy_set_status(&vp_dev->ldev, status); } static void vp_reset(struct virtio_device *vdev) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); /* 0 status means a reset. */ - iowrite8(0, vp_dev->ioaddr + VIRTIO_PCI_STATUS); + vp_legacy_set_status(&vp_dev->ldev, 0); /* Flush out the status write, and flush in device writes, * including MSi-X interrupts, if any. */ - ioread8(vp_dev->ioaddr + VIRTIO_PCI_STATUS); - /* Flush pending VQ/configuration callbacks. */ - vp_synchronize_vectors(vdev); + vp_legacy_get_status(&vp_dev->ldev); + /* Disable VQ/configuration callbacks. */ + vp_disable_cbs(vdev); } static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector) { - /* Setup the vector used for configuration events */ - iowrite16(vector, vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); - /* Verify we had enough resources to assign the vector */ - /* Will also flush the write out to device */ - return ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); + return vp_legacy_config_vector(&vp_dev->ldev, vector); } static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, @@ -123,12 +120,9 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, int err; u64 q_pfn; - /* Select the queue we're interested in */ - iowrite16(index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); - /* Check if queue is either not available or already active. */ - num = ioread16(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NUM); - if (!num || ioread32(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN)) + num = vp_legacy_get_queue_size(&vp_dev->ldev, index); + if (!num || vp_legacy_get_queue_enable(&vp_dev->ldev, index)) return ERR_PTR(-ENOENT); info->msix_vector = msix_vec; @@ -151,13 +145,12 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, } /* activate the queue */ - iowrite32(q_pfn, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); + vp_legacy_set_queue_address(&vp_dev->ldev, index, q_pfn); - vq->priv = (void __force *)vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY; + vq->priv = (void __force *)vp_dev->ldev.ioaddr + VIRTIO_PCI_QUEUE_NOTIFY; if (msix_vec != VIRTIO_MSI_NO_VECTOR) { - iowrite16(msix_vec, vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); - msix_vec = ioread16(vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); + msix_vec = vp_legacy_queue_vector(&vp_dev->ldev, index, msix_vec); if (msix_vec == VIRTIO_MSI_NO_VECTOR) { err = -EBUSY; goto out_deactivate; @@ -167,7 +160,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, return vq; out_deactivate: - iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); + vp_legacy_set_queue_address(&vp_dev->ldev, index, 0); out_del_vq: vring_del_virtqueue(vq); return ERR_PTR(err); @@ -178,22 +171,21 @@ static void del_vq(struct virtio_pci_vq_info *info) struct virtqueue *vq = info->vq; struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); - iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); - if (vp_dev->msix_enabled) { - iowrite16(VIRTIO_MSI_NO_VECTOR, - vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); + vp_legacy_queue_vector(&vp_dev->ldev, vq->index, + VIRTIO_MSI_NO_VECTOR); /* Flush the write out to device */ - ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR); + ioread8(vp_dev->ldev.ioaddr + VIRTIO_PCI_ISR); } /* Select and deactivate the queue */ - iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); + vp_legacy_set_queue_address(&vp_dev->ldev, vq->index, 0); vring_del_virtqueue(vq); } static const struct virtio_config_ops virtio_pci_config_ops = { + .enable_cbs = vp_enable_cbs, .get = vp_get, .set = vp_set, .get_status = vp_get_status, @@ -211,51 +203,18 @@ static const struct virtio_config_ops virtio_pci_config_ops = { /* the PCI probing function */ int virtio_pci_legacy_probe(struct virtio_pci_device *vp_dev) { + struct virtio_pci_legacy_device *ldev = &vp_dev->ldev; struct pci_dev *pci_dev = vp_dev->pci_dev; int rc; - /* We only own devices >= 0x1000 and <= 0x103f: leave the rest. */ - if (pci_dev->device < 0x1000 || pci_dev->device > 0x103f) - return -ENODEV; - - if (pci_dev->revision != VIRTIO_PCI_ABI_VERSION) { - printk(KERN_ERR "virtio_pci: expected ABI version %d, got %d\n", - VIRTIO_PCI_ABI_VERSION, pci_dev->revision); - return -ENODEV; - } - - rc = dma_set_mask(&pci_dev->dev, DMA_BIT_MASK(64)); - if (rc) { - rc = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(32)); - } else { - /* - * The virtio ring base address is expressed as a 32-bit PFN, - * with a page size of 1 << VIRTIO_PCI_QUEUE_ADDR_SHIFT. - */ - dma_set_coherent_mask(&pci_dev->dev, - DMA_BIT_MASK(32 + VIRTIO_PCI_QUEUE_ADDR_SHIFT)); - } - - if (rc) - dev_warn(&pci_dev->dev, "Failed to enable 64-bit or 32-bit DMA. Trying to continue, but this might not work.\n"); + ldev->pci_dev = pci_dev; - rc = pci_request_region(pci_dev, 0, "virtio-pci-legacy"); + rc = vp_legacy_probe(ldev); if (rc) return rc; - rc = -ENOMEM; - vp_dev->ioaddr = pci_iomap(pci_dev, 0, 0); - if (!vp_dev->ioaddr) - goto err_iomap; - - vp_dev->isr = vp_dev->ioaddr + VIRTIO_PCI_ISR; - - /* we use the subsystem vendor/device id as the virtio vendor/device - * id. this allows us to use the same PCI vendor/device id for all - * virtio devices and to identify the particular virtio driver by - * the subsystem ids */ - vp_dev->vdev.id.vendor = pci_dev->subsystem_vendor; - vp_dev->vdev.id.device = pci_dev->subsystem_device; + vp_dev->isr = ldev->isr; + vp_dev->vdev.id = ldev->id; vp_dev->vdev.config = &virtio_pci_config_ops; @@ -264,16 +223,11 @@ int virtio_pci_legacy_probe(struct virtio_pci_device *vp_dev) vp_dev->del_vq = del_vq; return 0; - -err_iomap: - pci_release_region(pci_dev, 0); - return rc; } void virtio_pci_legacy_remove(struct virtio_pci_device *vp_dev) { - struct pci_dev *pci_dev = vp_dev->pci_dev; + struct virtio_pci_legacy_device *ldev = &vp_dev->ldev; - pci_iounmap(pci_dev, vp_dev->ioaddr); - pci_release_region(pci_dev, 0); + vp_legacy_remove(ldev); } diff --git a/drivers/virtio/virtio_pci_legacy_dev.c b/drivers/virtio/virtio_pci_legacy_dev.c new file mode 100644 index 000000000000..9b97680dd02b --- /dev/null +++ b/drivers/virtio/virtio_pci_legacy_dev.c @@ -0,0 +1,220 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "linux/virtio_pci.h" +#include <linux/virtio_pci_legacy.h> +#include <linux/module.h> +#include <linux/pci.h> + + +/* + * vp_legacy_probe: probe the legacy virtio pci device, note that the + * caller is required to enable PCI device before calling this function. + * @ldev: the legacy virtio-pci device + * + * Return 0 on succeed otherwise fail + */ +int vp_legacy_probe(struct virtio_pci_legacy_device *ldev) +{ + struct pci_dev *pci_dev = ldev->pci_dev; + int rc; + + /* We only own devices >= 0x1000 and <= 0x103f: leave the rest. */ + if (pci_dev->device < 0x1000 || pci_dev->device > 0x103f) + return -ENODEV; + + if (pci_dev->revision != VIRTIO_PCI_ABI_VERSION) + return -ENODEV; + + rc = dma_set_mask(&pci_dev->dev, DMA_BIT_MASK(64)); + if (rc) { + rc = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(32)); + } else { + /* + * The virtio ring base address is expressed as a 32-bit PFN, + * with a page size of 1 << VIRTIO_PCI_QUEUE_ADDR_SHIFT. + */ + dma_set_coherent_mask(&pci_dev->dev, + DMA_BIT_MASK(32 + VIRTIO_PCI_QUEUE_ADDR_SHIFT)); + } + + if (rc) + dev_warn(&pci_dev->dev, "Failed to enable 64-bit or 32-bit DMA. Trying to continue, but this might not work.\n"); + + rc = pci_request_region(pci_dev, 0, "virtio-pci-legacy"); + if (rc) + return rc; + + ldev->ioaddr = pci_iomap(pci_dev, 0, 0); + if (!ldev->ioaddr) + goto err_iomap; + + ldev->isr = ldev->ioaddr + VIRTIO_PCI_ISR; + + ldev->id.vendor = pci_dev->subsystem_vendor; + ldev->id.device = pci_dev->subsystem_device; + + return 0; +err_iomap: + pci_release_region(pci_dev, 0); + return rc; +} +EXPORT_SYMBOL_GPL(vp_legacy_probe); + +/* + * vp_legacy_probe: remove and cleanup the legacy virtio pci device + * @ldev: the legacy virtio-pci device + */ +void vp_legacy_remove(struct virtio_pci_legacy_device *ldev) +{ + struct pci_dev *pci_dev = ldev->pci_dev; + + pci_iounmap(pci_dev, ldev->ioaddr); + pci_release_region(pci_dev, 0); +} +EXPORT_SYMBOL_GPL(vp_legacy_remove); + +/* + * vp_legacy_get_features - get features from device + * @ldev: the legacy virtio-pci device + * + * Returns the features read from the device + */ +u64 vp_legacy_get_features(struct virtio_pci_legacy_device *ldev) +{ + + return ioread32(ldev->ioaddr + VIRTIO_PCI_HOST_FEATURES); +} +EXPORT_SYMBOL_GPL(vp_legacy_get_features); + +/* + * vp_legacy_get_driver_features - get driver features from device + * @ldev: the legacy virtio-pci device + * + * Returns the driver features read from the device + */ +u64 vp_legacy_get_driver_features(struct virtio_pci_legacy_device *ldev) +{ + return ioread32(ldev->ioaddr + VIRTIO_PCI_GUEST_FEATURES); +} +EXPORT_SYMBOL_GPL(vp_legacy_get_driver_features); + +/* + * vp_legacy_set_features - set features to device + * @ldev: the legacy virtio-pci device + * @features: the features set to device + */ +void vp_legacy_set_features(struct virtio_pci_legacy_device *ldev, + u32 features) +{ + iowrite32(features, ldev->ioaddr + VIRTIO_PCI_GUEST_FEATURES); +} +EXPORT_SYMBOL_GPL(vp_legacy_set_features); + +/* + * vp_legacy_get_status - get the device status + * @ldev: the legacy virtio-pci device + * + * Returns the status read from device + */ +u8 vp_legacy_get_status(struct virtio_pci_legacy_device *ldev) +{ + return ioread8(ldev->ioaddr + VIRTIO_PCI_STATUS); +} +EXPORT_SYMBOL_GPL(vp_legacy_get_status); + +/* + * vp_legacy_set_status - set status to device + * @ldev: the legacy virtio-pci device + * @status: the status set to device + */ +void vp_legacy_set_status(struct virtio_pci_legacy_device *ldev, + u8 status) +{ + iowrite8(status, ldev->ioaddr + VIRTIO_PCI_STATUS); +} +EXPORT_SYMBOL_GPL(vp_legacy_set_status); + +/* + * vp_legacy_queue_vector - set the MSIX vector for a specific virtqueue + * @ldev: the legacy virtio-pci device + * @index: queue index + * @vector: the config vector + * + * Returns the config vector read from the device + */ +u16 vp_legacy_queue_vector(struct virtio_pci_legacy_device *ldev, + u16 index, u16 vector) +{ + iowrite16(index, ldev->ioaddr + VIRTIO_PCI_QUEUE_SEL); + iowrite16(vector, ldev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); + /* Flush the write out to device */ + return ioread16(ldev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); +} +EXPORT_SYMBOL_GPL(vp_legacy_queue_vector); + +/* + * vp_legacy_config_vector - set the vector for config interrupt + * @ldev: the legacy virtio-pci device + * @vector: the config vector + * + * Returns the config vector read from the device + */ +u16 vp_legacy_config_vector(struct virtio_pci_legacy_device *ldev, + u16 vector) +{ + /* Setup the vector used for configuration events */ + iowrite16(vector, ldev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); + /* Verify we had enough resources to assign the vector */ + /* Will also flush the write out to device */ + return ioread16(ldev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); +} +EXPORT_SYMBOL_GPL(vp_legacy_config_vector); + +/* + * vp_legacy_set_queue_address - set the virtqueue address + * @ldev: the legacy virtio-pci device + * @index: the queue index + * @queue_pfn: pfn of the virtqueue + */ +void vp_legacy_set_queue_address(struct virtio_pci_legacy_device *ldev, + u16 index, u32 queue_pfn) +{ + iowrite16(index, ldev->ioaddr + VIRTIO_PCI_QUEUE_SEL); + iowrite32(queue_pfn, ldev->ioaddr + VIRTIO_PCI_QUEUE_PFN); +} +EXPORT_SYMBOL_GPL(vp_legacy_set_queue_address); + +/* + * vp_legacy_get_queue_enable - enable a virtqueue + * @ldev: the legacy virtio-pci device + * @index: the queue index + * + * Returns whether a virtqueue is enabled or not + */ +bool vp_legacy_get_queue_enable(struct virtio_pci_legacy_device *ldev, + u16 index) +{ + iowrite16(index, ldev->ioaddr + VIRTIO_PCI_QUEUE_SEL); + return ioread32(ldev->ioaddr + VIRTIO_PCI_QUEUE_PFN); +} +EXPORT_SYMBOL_GPL(vp_legacy_get_queue_enable); + +/* + * vp_legacy_get_queue_size - get size for a virtqueue + * @ldev: the legacy virtio-pci device + * @index: the queue index + * + * Returns the size of the virtqueue + */ +u16 vp_legacy_get_queue_size(struct virtio_pci_legacy_device *ldev, + u16 index) +{ + iowrite16(index, ldev->ioaddr + VIRTIO_PCI_QUEUE_SEL); + return ioread16(ldev->ioaddr + VIRTIO_PCI_QUEUE_NUM); +} +EXPORT_SYMBOL_GPL(vp_legacy_get_queue_size); + +MODULE_VERSION("0.1"); +MODULE_DESCRIPTION("Legacy Virtio PCI Device"); +MODULE_AUTHOR("Wu Zongyong <wuzongyong@linux.alibaba.com>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index 30654d3a0b41..5455bc041fb6 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -172,8 +172,8 @@ static void vp_reset(struct virtio_device *vdev) */ while (vp_modern_get_status(mdev)) msleep(1); - /* Flush pending VQ/configuration callbacks. */ - vp_synchronize_vectors(vdev); + /* Disable VQ/configuration callbacks. */ + vp_disable_cbs(vdev); } static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector) @@ -380,6 +380,7 @@ static bool vp_get_shm_region(struct virtio_device *vdev, } static const struct virtio_config_ops virtio_pci_config_nodev_ops = { + .enable_cbs = vp_enable_cbs, .get = NULL, .set = NULL, .generation = vp_generation, @@ -397,6 +398,7 @@ static const struct virtio_config_ops virtio_pci_config_nodev_ops = { }; static const struct virtio_config_ops virtio_pci_config_ops = { + .enable_cbs = vp_enable_cbs, .get = vp_get, .set = vp_set, .generation = vp_generation, diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 3035bb6f5458..00f64f2f8b72 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -14,6 +14,9 @@ #include <linux/spinlock.h> #include <xen/xen.h> +static bool force_used_validation = false; +module_param(force_used_validation, bool, 0444); + #ifdef DEBUG /* For development, we want to crash whenever the ring is screwed. */ #define BAD_RING(_vq, fmt, args...) \ @@ -79,8 +82,8 @@ struct vring_desc_state_packed { }; struct vring_desc_extra { - dma_addr_t addr; /* Buffer DMA addr. */ - u32 len; /* Buffer length. */ + dma_addr_t addr; /* Descriptor DMA addr. */ + u32 len; /* Descriptor length. */ u16 flags; /* Descriptor flags. */ u16 next; /* The next desc state in a list. */ }; @@ -182,6 +185,9 @@ struct vring_virtqueue { } packed; }; + /* Per-descriptor in buffer length */ + u32 *buflen; + /* How to notify other side. FIXME: commonalize hcalls! */ bool (*notify)(struct virtqueue *vq); @@ -490,6 +496,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, unsigned int i, n, avail, descs_used, prev, err_idx; int head; bool indirect; + u32 buflen = 0; START_USE(vq); @@ -571,6 +578,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, VRING_DESC_F_NEXT | VRING_DESC_F_WRITE, indirect); + buflen += sg->length; } } /* Last one doesn't continue. */ @@ -610,6 +618,10 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, else vq->split.desc_state[head].indir_desc = ctx; + /* Store in buffer length if necessary */ + if (vq->buflen) + vq->buflen[head] = buflen; + /* Put entry in available array (but don't update avail->idx until they * do sync). */ avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1); @@ -784,6 +796,11 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq, BAD_RING(vq, "id %u is not a head!\n", i); return NULL; } + if (vq->buflen && unlikely(*len > vq->buflen[i])) { + BAD_RING(vq, "used len %d is larger than in buflen %u\n", + *len, vq->buflen[i]); + return NULL; + } /* detach_buf_split clears data, so grab it now. */ ret = vq->split.desc_state[i].data; @@ -1050,21 +1067,24 @@ static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg, } static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, - struct scatterlist *sgs[], - unsigned int total_sg, - unsigned int out_sgs, - unsigned int in_sgs, - void *data, - gfp_t gfp) + struct scatterlist *sgs[], + unsigned int total_sg, + unsigned int out_sgs, + unsigned int in_sgs, + void *data, + gfp_t gfp) { struct vring_packed_desc *desc; struct scatterlist *sg; unsigned int i, n, err_idx; u16 head, id; dma_addr_t addr; + u32 buflen = 0; head = vq->packed.next_avail_idx; desc = alloc_indirect_packed(total_sg, gfp); + if (!desc) + return -ENOMEM; if (unlikely(vq->vq.num_free < 1)) { pr_debug("Can't add buf len 1 - avail = 0\n"); @@ -1089,6 +1109,8 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, desc[i].addr = cpu_to_le64(addr); desc[i].len = cpu_to_le32(sg->length); i++; + if (n >= out_sgs) + buflen += sg->length; } } @@ -1142,6 +1164,10 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, vq->packed.desc_state[id].indir_desc = desc; vq->packed.desc_state[id].last = id; + /* Store in buffer length if necessary */ + if (vq->buflen) + vq->buflen[id] = buflen; + vq->num_added += 1; pr_debug("Added buffer head %i to %p\n", head, vq); @@ -1176,6 +1202,8 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, unsigned int i, n, c, descs_used, err_idx; __le16 head_flags, flags; u16 head, id, prev, curr, avail_used_flags; + int err; + u32 buflen = 0; START_USE(vq); @@ -1191,9 +1219,14 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, BUG_ON(total_sg == 0); - if (virtqueue_use_indirect(_vq, total_sg)) - return virtqueue_add_indirect_packed(vq, sgs, total_sg, - out_sgs, in_sgs, data, gfp); + if (virtqueue_use_indirect(_vq, total_sg)) { + err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs, + in_sgs, data, gfp); + if (err != -ENOMEM) + return err; + + /* fall back on direct */ + } head = vq->packed.next_avail_idx; avail_used_flags = vq->packed.avail_used_flags; @@ -1250,6 +1283,8 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, 1 << VRING_PACKED_DESC_F_AVAIL | 1 << VRING_PACKED_DESC_F_USED; } + if (n >= out_sgs) + buflen += sg->length; } } @@ -1269,6 +1304,10 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, vq->packed.desc_state[id].indir_desc = ctx; vq->packed.desc_state[id].last = prev; + /* Store in buffer length if necessary */ + if (vq->buflen) + vq->buflen[id] = buflen; + /* * A driver MUST NOT make the first descriptor in the list * available before all subsequent descriptors comprising @@ -1455,6 +1494,11 @@ static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq, BAD_RING(vq, "id %u is not a head!\n", id); return NULL; } + if (vq->buflen && unlikely(*len > vq->buflen[id])) { + BAD_RING(vq, "used len %d is larger than in buflen %u\n", + *len, vq->buflen[id]); + return NULL; + } /* detach_buf_packed clears data, so grab it now. */ ret = vq->packed.desc_state[id].data; @@ -1660,6 +1704,7 @@ static struct virtqueue *vring_create_virtqueue_packed( struct vring_virtqueue *vq; struct vring_packed_desc *ring; struct vring_packed_desc_event *driver, *device; + struct virtio_driver *drv = drv_to_virtio(vdev->dev.driver); dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr; size_t ring_size_in_bytes, event_size_in_bytes; @@ -1749,6 +1794,15 @@ static struct virtqueue *vring_create_virtqueue_packed( if (!vq->packed.desc_extra) goto err_desc_extra; + if (!drv->suppress_used_validation || force_used_validation) { + vq->buflen = kmalloc_array(num, sizeof(*vq->buflen), + GFP_KERNEL); + if (!vq->buflen) + goto err_buflen; + } else { + vq->buflen = NULL; + } + /* No callback? Tell other side not to bother us. */ if (!callback) { vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; @@ -1761,6 +1815,8 @@ static struct virtqueue *vring_create_virtqueue_packed( spin_unlock(&vdev->vqs_list_lock); return &vq->vq; +err_buflen: + kfree(vq->packed.desc_extra); err_desc_extra: kfree(vq->packed.desc_state); err_desc_state: @@ -2168,6 +2224,7 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, void (*callback)(struct virtqueue *), const char *name) { + struct virtio_driver *drv = drv_to_virtio(vdev->dev.driver); struct vring_virtqueue *vq; if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) @@ -2227,6 +2284,15 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, if (!vq->split.desc_extra) goto err_extra; + if (!drv->suppress_used_validation || force_used_validation) { + vq->buflen = kmalloc_array(vring.num, sizeof(*vq->buflen), + GFP_KERNEL); + if (!vq->buflen) + goto err_buflen; + } else { + vq->buflen = NULL; + } + /* Put everything in free lists. */ vq->free_head = 0; memset(vq->split.desc_state, 0, vring.num * @@ -2237,6 +2303,8 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, spin_unlock(&vdev->vqs_list_lock); return &vq->vq; +err_buflen: + kfree(vq->split.desc_extra); err_extra: kfree(vq->split.desc_state); err_state: diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c index 72eaef2caeb1..f85f860bc10b 100644 --- a/drivers/virtio/virtio_vdpa.c +++ b/drivers/virtio/virtio_vdpa.c @@ -65,9 +65,8 @@ static void virtio_vdpa_set(struct virtio_device *vdev, unsigned offset, const void *buf, unsigned len) { struct vdpa_device *vdpa = vd_get_vdpa(vdev); - const struct vdpa_config_ops *ops = vdpa->config; - ops->set_config(vdpa, offset, buf, len); + vdpa_set_config(vdpa, offset, buf, len); } static u32 virtio_vdpa_generation(struct virtio_device *vdev) @@ -145,7 +144,8 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index, /* Assume split virtqueue, switch to packed if necessary */ struct vdpa_vq_state state = {0}; unsigned long flags; - u32 align, num; + u32 align, max_num, min_num = 1; + bool may_reduce_num = true; int err; if (!name) @@ -163,16 +163,21 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index, if (!info) return ERR_PTR(-ENOMEM); - num = ops->get_vq_num_max(vdpa); - if (num == 0) { + max_num = ops->get_vq_num_max(vdpa); + if (max_num == 0) { err = -ENOENT; goto error_new_virtqueue; } + if (ops->get_vq_num_min) + min_num = ops->get_vq_num_min(vdpa); + + may_reduce_num = (max_num == min_num) ? false : true; + /* Create the vring */ align = ops->get_vq_align(vdpa); - vq = vring_create_virtqueue(index, num, align, vdev, - true, true, ctx, + vq = vring_create_virtqueue(index, max_num, align, vdev, + true, may_reduce_num, ctx, virtio_vdpa_notify, callback, name); if (!vq) { err = -ENOMEM; diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 3972ab765de1..c3011ccda430 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -6,6 +6,8 @@ #include <linux/device.h> #include <linux/interrupt.h> #include <linux/vhost_iotlb.h> +#include <linux/virtio_net.h> +#include <linux/if_ether.h> /** * struct vdpa_calllback - vDPA callback definition. @@ -63,6 +65,7 @@ struct vdpa_mgmt_dev; * @dev: underlying device * @dma_dev: the actual device that is performing DMA * @config: the configuration ops for this device. + * @cf_mutex: Protects get and set access to configuration layout. * @index: device index * @features_valid: were features initialized? for legacy guests * @use_va: indicate whether virtual address must be used by this device @@ -74,6 +77,7 @@ struct vdpa_device { struct device dev; struct device *dma_dev; const struct vdpa_config_ops *config; + struct mutex cf_mutex; /* Protects get/set config */ unsigned int index; bool features_valid; bool use_va; @@ -91,6 +95,14 @@ struct vdpa_iova_range { u64 last; }; +struct vdpa_dev_set_config { + struct { + u8 mac[ETH_ALEN]; + u16 mtu; + } net; + u64 mask; +}; + /** * Corresponding file area for device memory mapping * @file: vma->vm_file for the mapping @@ -171,6 +183,9 @@ struct vdpa_map_file { * @get_vq_num_max: Get the max size of virtqueue * @vdev: vdpa device * Returns u16: max size of virtqueue + * @get_vq_num_min: Get the min size of virtqueue (optional) + * @vdev: vdpa device + * Returns u16: min size of virtqueue * @get_device_id: Get virtio device id * @vdev: vdpa device * Returns u32: virtio device id @@ -257,7 +272,7 @@ struct vdpa_config_ops { struct vdpa_notification_area (*get_vq_notification)(struct vdpa_device *vdev, u16 idx); /* vq irq is not expected to be changed once DRIVER_OK is set */ - int (*get_vq_irq)(struct vdpa_device *vdv, u16 idx); + int (*get_vq_irq)(struct vdpa_device *vdev, u16 idx); /* Device ops */ u32 (*get_vq_align)(struct vdpa_device *vdev); @@ -266,6 +281,7 @@ struct vdpa_config_ops { void (*set_config_cb)(struct vdpa_device *vdev, struct vdpa_callback *cb); u16 (*get_vq_num_max)(struct vdpa_device *vdev); + u16 (*get_vq_num_min)(struct vdpa_device *vdev); u32 (*get_device_id)(struct vdpa_device *vdev); u32 (*get_vendor_id)(struct vdpa_device *vdev); u8 (*get_status)(struct vdpa_device *vdev); @@ -382,26 +398,16 @@ static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features) return ops->set_features(vdev, features); } -static inline void vdpa_get_config(struct vdpa_device *vdev, - unsigned int offset, void *buf, - unsigned int len) -{ - const struct vdpa_config_ops *ops = vdev->config; - - /* - * Config accesses aren't supposed to trigger before features are set. - * If it does happen we assume a legacy guest. - */ - if (!vdev->features_valid) - vdpa_set_features(vdev, 0); - ops->get_config(vdev, offset, buf, len); -} - +void vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, + void *buf, unsigned int len); +void vdpa_set_config(struct vdpa_device *dev, unsigned int offset, + const void *buf, unsigned int length); /** * struct vdpa_mgmtdev_ops - vdpa device ops * @dev_add: Add a vdpa device using alloc and register * @mdev: parent device to use for device addition * @name: name of the new vdpa device + * @config: config attributes to apply to the device under creation * Driver need to add a new device using _vdpa_register_device() * after fully initializing the vdpa device. Driver must return 0 * on success or appropriate error code. @@ -412,14 +418,25 @@ static inline void vdpa_get_config(struct vdpa_device *vdev, * _vdpa_unregister_device(). */ struct vdpa_mgmtdev_ops { - int (*dev_add)(struct vdpa_mgmt_dev *mdev, const char *name); + int (*dev_add)(struct vdpa_mgmt_dev *mdev, const char *name, + const struct vdpa_dev_set_config *config); void (*dev_del)(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev); }; +/** + * struct vdpa_mgmt_dev - vdpa management device + * @device: Management parent device + * @ops: operations supported by management device + * @id_table: Pointer to device id table of supported ids + * @config_attr_mask: bit mask of attributes of type enum vdpa_attr that + * management device support during dev_add callback + * @list: list entry + */ struct vdpa_mgmt_dev { struct device *device; const struct vdpa_mgmtdev_ops *ops; - const struct virtio_device_id *id_table; /* supported ids */ + const struct virtio_device_id *id_table; + u64 config_attr_mask; struct list_head list; }; diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 41edbc01ffa4..44d0e09da2d9 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -152,6 +152,7 @@ size_t virtio_max_dma_size(struct virtio_device *vdev); * @feature_table_size: number of entries in the feature table array. * @feature_table_legacy: same as feature_table but when working in legacy mode. * @feature_table_size_legacy: number of entries in feature table legacy array. + * @suppress_used_validation: set to not have core validate used length * @probe: the function to call when a device is found. Returns 0 or -errno. * @scan: optional function to call after successful probe; intended * for virtio-scsi to invoke a scan. @@ -168,6 +169,7 @@ struct virtio_driver { unsigned int feature_table_size; const unsigned int *feature_table_legacy; unsigned int feature_table_size_legacy; + bool suppress_used_validation; int (*validate)(struct virtio_device *dev); int (*probe)(struct virtio_device *dev); void (*scan)(struct virtio_device *dev); diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 8519b3ae5d52..4d107ad31149 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -23,6 +23,8 @@ struct virtio_shm_region { * any of @get/@set, @get_status/@set_status, or @get_features/ * @finalize_features are NOT safe to be called from an atomic * context. + * @enable_cbs: enable the callbacks + * vdev: the virtio_device * @get: read the value of a configuration field * vdev: the virtio_device * offset: the offset of the configuration field @@ -75,6 +77,7 @@ struct virtio_shm_region { */ typedef void vq_callback_t(struct virtqueue *); struct virtio_config_ops { + void (*enable_cbs)(struct virtio_device *vdev); void (*get)(struct virtio_device *vdev, unsigned offset, void *buf, unsigned len); void (*set)(struct virtio_device *vdev, unsigned offset, @@ -229,6 +232,9 @@ void virtio_device_ready(struct virtio_device *dev) { unsigned status = dev->config->get_status(dev); + if (dev->config->enable_cbs) + dev->config->enable_cbs(dev); + BUG_ON(status & VIRTIO_CONFIG_S_DRIVER_OK); dev->config->set_status(dev, status | VIRTIO_CONFIG_S_DRIVER_OK); } diff --git a/include/linux/virtio_pci_legacy.h b/include/linux/virtio_pci_legacy.h new file mode 100644 index 000000000000..e5d665faf00e --- /dev/null +++ b/include/linux/virtio_pci_legacy.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_VIRTIO_PCI_LEGACY_H +#define _LINUX_VIRTIO_PCI_LEGACY_H + +#include "linux/mod_devicetable.h" +#include <linux/pci.h> +#include <linux/virtio_pci.h> + +struct virtio_pci_legacy_device { + struct pci_dev *pci_dev; + + /* Where to read and clear interrupt */ + u8 __iomem *isr; + /* The IO mapping for the PCI config space (legacy mode only) */ + void __iomem *ioaddr; + + struct virtio_device_id id; +}; + +u64 vp_legacy_get_features(struct virtio_pci_legacy_device *ldev); +u64 vp_legacy_get_driver_features(struct virtio_pci_legacy_device *ldev); +void vp_legacy_set_features(struct virtio_pci_legacy_device *ldev, + u32 features); +u8 vp_legacy_get_status(struct virtio_pci_legacy_device *ldev); +void vp_legacy_set_status(struct virtio_pci_legacy_device *ldev, + u8 status); +u16 vp_legacy_queue_vector(struct virtio_pci_legacy_device *ldev, + u16 idx, u16 vector); +u16 vp_legacy_config_vector(struct virtio_pci_legacy_device *ldev, + u16 vector); +void vp_legacy_set_queue_address(struct virtio_pci_legacy_device *ldev, + u16 index, u32 queue_pfn); +bool vp_legacy_get_queue_enable(struct virtio_pci_legacy_device *ldev, + u16 idx); +void vp_legacy_set_queue_size(struct virtio_pci_legacy_device *ldev, + u16 idx, u16 size); +u16 vp_legacy_get_queue_size(struct virtio_pci_legacy_device *ldev, + u16 idx); +int vp_legacy_probe(struct virtio_pci_legacy_device *ldev); +void vp_legacy_remove(struct virtio_pci_legacy_device *ldev); + +#endif diff --git a/include/uapi/linux/vdpa.h b/include/uapi/linux/vdpa.h index 66a41e4ec163..a252f06f9dfd 100644 --- a/include/uapi/linux/vdpa.h +++ b/include/uapi/linux/vdpa.h @@ -17,6 +17,7 @@ enum vdpa_command { VDPA_CMD_DEV_NEW, VDPA_CMD_DEV_DEL, VDPA_CMD_DEV_GET, /* can dump */ + VDPA_CMD_DEV_CONFIG_GET, /* can dump */ }; enum vdpa_attr { @@ -32,6 +33,12 @@ enum vdpa_attr { VDPA_ATTR_DEV_VENDOR_ID, /* u32 */ VDPA_ATTR_DEV_MAX_VQS, /* u32 */ VDPA_ATTR_DEV_MAX_VQ_SIZE, /* u16 */ + VDPA_ATTR_DEV_MIN_VQ_SIZE, /* u16 */ + + VDPA_ATTR_DEV_NET_CFG_MACADDR, /* binary */ + VDPA_ATTR_DEV_NET_STATUS, /* u8 */ + VDPA_ATTR_DEV_NET_CFG_MAX_VQP, /* u16 */ + VDPA_ATTR_DEV_NET_CFG_MTU, /* u16 */ /* new attributes must be added above here */ VDPA_ATTR_MAX, diff --git a/include/uapi/linux/virtio_i2c.h b/include/uapi/linux/virtio_i2c.h index 7c6a6fc01ad6..acf3b6069136 100644 --- a/include/uapi/linux/virtio_i2c.h +++ b/include/uapi/linux/virtio_i2c.h @@ -11,9 +11,15 @@ #include <linux/const.h> #include <linux/types.h> +/* Virtio I2C Feature bits */ +#define VIRTIO_I2C_F_ZERO_LENGTH_REQUEST 0 + /* The bit 0 of the @virtio_i2c_out_hdr.@flags, used to group the requests */ #define VIRTIO_I2C_FLAGS_FAIL_NEXT _BITUL(0) +/* The bit 1 of the @virtio_i2c_out_hdr.@flags, used to mark a buffer as read */ +#define VIRTIO_I2C_FLAGS_M_RD _BITUL(1) + /** * struct virtio_i2c_out_hdr - the virtio I2C message OUT header * @addr: the controlled device address |