diff options
42 files changed, 2767 insertions, 348 deletions
diff --git a/Documentation/networking/device_drivers/ethernet/amd/pds_vdpa.rst b/Documentation/networking/device_drivers/ethernet/amd/pds_vdpa.rst new file mode 100644 index 000000000000..587927d3de92 --- /dev/null +++ b/Documentation/networking/device_drivers/ethernet/amd/pds_vdpa.rst @@ -0,0 +1,85 @@ +.. SPDX-License-Identifier: GPL-2.0+ +.. note: can be edited and viewed with /usr/bin/formiko-vim + +========================================================== +PCI vDPA driver for the AMD/Pensando(R) DSC adapter family +========================================================== + +AMD/Pensando vDPA VF Device Driver + +Copyright(c) 2023 Advanced Micro Devices, Inc + +Overview +======== + +The ``pds_vdpa`` driver is an auxiliary bus driver that supplies +a vDPA device for use by the virtio network stack. It is used with +the Pensando Virtual Function devices that offer vDPA and virtio queue +services. It depends on the ``pds_core`` driver and hardware for the PF +and VF PCI handling as well as for device configuration services. + +Using the device +================ + +The ``pds_vdpa`` device is enabled via multiple configuration steps and +depends on the ``pds_core`` driver to create and enable SR-IOV Virtual +Function devices. After the VFs are enabled, we enable the vDPA service +in the ``pds_core`` device to create the auxiliary devices used by pds_vdpa. + +Example steps: + +.. code-block:: bash + + #!/bin/bash + + modprobe pds_core + modprobe vdpa + modprobe pds_vdpa + + PF_BDF=`ls /sys/module/pds_core/drivers/pci\:pds_core/*/sriov_numvfs | awk -F / '{print $7}'` + + # Enable vDPA VF auxiliary device(s) in the PF + devlink dev param set pci/$PF_BDF name enable_vnet cmode runtime value true + + # Create a VF for vDPA use + echo 1 > /sys/bus/pci/drivers/pds_core/$PF_BDF/sriov_numvfs + + # Find the vDPA services/devices available + PDS_VDPA_MGMT=`vdpa mgmtdev show | grep vDPA | head -1 | cut -d: -f1` + + # Create a vDPA device for use in virtio network configurations + vdpa dev add name vdpa1 mgmtdev $PDS_VDPA_MGMT mac 00:11:22:33:44:55 + + # Set up an ethernet interface on the vdpa device + modprobe virtio_vdpa + + + +Enabling the driver +=================== + +The driver is enabled via the standard kernel configuration system, +using the make command:: + + make oldconfig/menuconfig/etc. + +The driver is located in the menu structure at: + + -> Device Drivers + -> Network device support (NETDEVICES [=y]) + -> Ethernet driver support + -> Pensando devices + -> Pensando Ethernet PDS_VDPA Support + +Support +======= + +For general Linux networking support, please use the netdev mailing +list, which is monitored by Pensando personnel:: + + netdev@vger.kernel.org + +For more specific support needs, please use the Pensando driver support +email:: + + drivers@pensando.io diff --git a/Documentation/networking/device_drivers/ethernet/index.rst b/Documentation/networking/device_drivers/ethernet/index.rst index 417ca514a4d0..94ecb67c0885 100644 --- a/Documentation/networking/device_drivers/ethernet/index.rst +++ b/Documentation/networking/device_drivers/ethernet/index.rst @@ -15,6 +15,7 @@ Contents: amazon/ena altera/altera_tse amd/pds_core + amd/pds_vdpa aquantia/atlantic chelsio/cxgb cirrus/cs89x0 diff --git a/MAINTAINERS b/MAINTAINERS index d7d65163e54e..1367a240cb0e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -22535,6 +22535,10 @@ F: include/linux/vringh.h F: include/uapi/linux/virtio_*.h F: tools/virtio/ +PDS DSC VIRTIO DATA PATH ACCELERATOR +R: Shannon Nelson <shannon.nelson@amd.com> +F: drivers/vdpa/pds/ + VIRTIO CRYPTO DRIVER M: Gonglei <arei.gonglei@huawei.com> L: virtualization@lists.linux-foundation.org diff --git a/drivers/bluetooth/virtio_bt.c b/drivers/bluetooth/virtio_bt.c index c570c45d1480..2ac70b560c46 100644 --- a/drivers/bluetooth/virtio_bt.c +++ b/drivers/bluetooth/virtio_bt.c @@ -79,6 +79,7 @@ static int virtbt_close_vdev(struct virtio_bluetooth *vbt) while ((skb = virtqueue_detach_unused_buf(vq))) kfree_skb(skb); + cond_resched(); } return 0; diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 1f8da0a71ce9..680d1ef2a217 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -1936,6 +1936,7 @@ static void remove_vqs(struct ports_device *portdev) flush_bufs(vq, true); while ((buf = virtqueue_detach_unused_buf(vq))) free_buf(buf, true); + cond_resched(); } portdev->vdev->config->del_vqs(portdev->vdev); kfree(portdev->in_vqs); diff --git a/drivers/crypto/virtio/virtio_crypto_core.c b/drivers/crypto/virtio/virtio_crypto_core.c index 1198bd306365..94849fa3bd74 100644 --- a/drivers/crypto/virtio/virtio_crypto_core.c +++ b/drivers/crypto/virtio/virtio_crypto_core.c @@ -480,6 +480,7 @@ static void virtcrypto_free_unused_reqs(struct virtio_crypto *vcrypto) kfree(vc_req->req_data); kfree(vc_req->sgs); } + cond_resched(); } } diff --git a/drivers/vdpa/Kconfig b/drivers/vdpa/Kconfig index cd6ad92f3f05..656c1cb541de 100644 --- a/drivers/vdpa/Kconfig +++ b/drivers/vdpa/Kconfig @@ -116,4 +116,14 @@ config ALIBABA_ENI_VDPA This driver includes a HW monitor device that reads health values from the DPU. +config PDS_VDPA + tristate "vDPA driver for AMD/Pensando DSC devices" + select VIRTIO_PCI_LIB + depends on PCI_MSI + depends on PDS_CORE + help + vDPA network driver for AMD/Pensando's PDS Core devices. + With this driver, the VirtIO dataplane can be + offloaded to an AMD/Pensando DSC device. + endif # VDPA diff --git a/drivers/vdpa/Makefile b/drivers/vdpa/Makefile index 59396ff2a318..8f53c6f3cca7 100644 --- a/drivers/vdpa/Makefile +++ b/drivers/vdpa/Makefile @@ -7,3 +7,4 @@ obj-$(CONFIG_MLX5_VDPA) += mlx5/ obj-$(CONFIG_VP_VDPA) += virtio_pci/ obj-$(CONFIG_ALIBABA_ENI_VDPA) += alibaba/ obj-$(CONFIG_SNET_VDPA) += solidrun/ +obj-$(CONFIG_PDS_VDPA) += pds/ diff --git a/drivers/vdpa/ifcvf/ifcvf_base.c b/drivers/vdpa/ifcvf/ifcvf_base.c index 5563b3a773c7..060f837a4f9f 100644 --- a/drivers/vdpa/ifcvf/ifcvf_base.c +++ b/drivers/vdpa/ifcvf/ifcvf_base.c @@ -69,6 +69,37 @@ static int ifcvf_read_config_range(struct pci_dev *dev, return 0; } +static u16 ifcvf_get_vq_size(struct ifcvf_hw *hw, u16 qid) +{ + u16 queue_size; + + vp_iowrite16(qid, &hw->common_cfg->queue_select); + queue_size = vp_ioread16(&hw->common_cfg->queue_size); + + return queue_size; +} + +/* This function returns the max allowed safe size for + * all virtqueues. It is the minimal size that can be + * suppprted by all virtqueues. + */ +u16 ifcvf_get_max_vq_size(struct ifcvf_hw *hw) +{ + u16 queue_size, max_size, qid; + + max_size = ifcvf_get_vq_size(hw, 0); + for (qid = 1; qid < hw->nr_vring; qid++) { + queue_size = ifcvf_get_vq_size(hw, qid); + /* 0 means the queue is unavailable */ + if (!queue_size) + continue; + + max_size = min(queue_size, max_size); + } + + return max_size; +} + int ifcvf_init_hw(struct ifcvf_hw *hw, struct pci_dev *pdev) { struct virtio_pci_cap cap; @@ -134,6 +165,9 @@ next: } hw->nr_vring = vp_ioread16(&hw->common_cfg->num_queues); + hw->vring = kzalloc(sizeof(struct vring_info) * hw->nr_vring, GFP_KERNEL); + if (!hw->vring) + return -ENOMEM; for (i = 0; i < hw->nr_vring; i++) { vp_iowrite16(i, &hw->common_cfg->queue_select); @@ -170,21 +204,9 @@ void ifcvf_set_status(struct ifcvf_hw *hw, u8 status) void ifcvf_reset(struct ifcvf_hw *hw) { - hw->config_cb.callback = NULL; - hw->config_cb.private = NULL; - ifcvf_set_status(hw, 0); - /* flush set_status, make sure VF is stopped, reset */ - ifcvf_get_status(hw); -} - -static void ifcvf_add_status(struct ifcvf_hw *hw, u8 status) -{ - if (status != 0) - status |= ifcvf_get_status(hw); - - ifcvf_set_status(hw, status); - ifcvf_get_status(hw); + while (ifcvf_get_status(hw)) + msleep(1); } u64 ifcvf_get_hw_features(struct ifcvf_hw *hw) @@ -204,11 +226,29 @@ u64 ifcvf_get_hw_features(struct ifcvf_hw *hw) return features; } -u64 ifcvf_get_features(struct ifcvf_hw *hw) +/* return provisioned vDPA dev features */ +u64 ifcvf_get_dev_features(struct ifcvf_hw *hw) { return hw->dev_features; } +u64 ifcvf_get_driver_features(struct ifcvf_hw *hw) +{ + struct virtio_pci_common_cfg __iomem *cfg = hw->common_cfg; + u32 features_lo, features_hi; + u64 features; + + vp_iowrite32(0, &cfg->device_feature_select); + features_lo = vp_ioread32(&cfg->guest_feature); + + vp_iowrite32(1, &cfg->device_feature_select); + features_hi = vp_ioread32(&cfg->guest_feature); + + features = ((u64)features_hi << 32) | features_lo; + + return features; +} + int ifcvf_verify_min_features(struct ifcvf_hw *hw, u64 features) { if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)) && features) { @@ -275,7 +315,7 @@ void ifcvf_write_dev_config(struct ifcvf_hw *hw, u64 offset, vp_iowrite8(*p++, hw->dev_cfg + offset + i); } -static void ifcvf_set_features(struct ifcvf_hw *hw, u64 features) +void ifcvf_set_driver_features(struct ifcvf_hw *hw, u64 features) { struct virtio_pci_common_cfg __iomem *cfg = hw->common_cfg; @@ -286,105 +326,104 @@ static void ifcvf_set_features(struct ifcvf_hw *hw, u64 features) vp_iowrite32(features >> 32, &cfg->guest_feature); } -static int ifcvf_config_features(struct ifcvf_hw *hw) -{ - ifcvf_set_features(hw, hw->req_features); - ifcvf_add_status(hw, VIRTIO_CONFIG_S_FEATURES_OK); - - if (!(ifcvf_get_status(hw) & VIRTIO_CONFIG_S_FEATURES_OK)) { - IFCVF_ERR(hw->pdev, "Failed to set FEATURES_OK status\n"); - return -EIO; - } - - return 0; -} - u16 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid) { - struct ifcvf_lm_cfg __iomem *ifcvf_lm; - void __iomem *avail_idx_addr; + struct ifcvf_lm_cfg __iomem *lm_cfg = hw->lm_cfg; u16 last_avail_idx; - u32 q_pair_id; - ifcvf_lm = (struct ifcvf_lm_cfg __iomem *)hw->lm_cfg; - q_pair_id = qid / 2; - avail_idx_addr = &ifcvf_lm->vring_lm_cfg[q_pair_id].idx_addr[qid % 2]; - last_avail_idx = vp_ioread16(avail_idx_addr); + last_avail_idx = vp_ioread16(&lm_cfg->vq_state_region + qid * 2); return last_avail_idx; } int ifcvf_set_vq_state(struct ifcvf_hw *hw, u16 qid, u16 num) { - struct ifcvf_lm_cfg __iomem *ifcvf_lm; - void __iomem *avail_idx_addr; - u32 q_pair_id; + struct ifcvf_lm_cfg __iomem *lm_cfg = hw->lm_cfg; - ifcvf_lm = (struct ifcvf_lm_cfg __iomem *)hw->lm_cfg; - q_pair_id = qid / 2; - avail_idx_addr = &ifcvf_lm->vring_lm_cfg[q_pair_id].idx_addr[qid % 2]; - hw->vring[qid].last_avail_idx = num; - vp_iowrite16(num, avail_idx_addr); + vp_iowrite16(num, &lm_cfg->vq_state_region + qid * 2); return 0; } -static int ifcvf_hw_enable(struct ifcvf_hw *hw) +void ifcvf_set_vq_num(struct ifcvf_hw *hw, u16 qid, u32 num) { - struct virtio_pci_common_cfg __iomem *cfg; - u32 i; + struct virtio_pci_common_cfg __iomem *cfg = hw->common_cfg; - cfg = hw->common_cfg; - for (i = 0; i < hw->nr_vring; i++) { - if (!hw->vring[i].ready) - break; + vp_iowrite16(qid, &cfg->queue_select); + vp_iowrite16(num, &cfg->queue_size); +} - vp_iowrite16(i, &cfg->queue_select); - vp_iowrite64_twopart(hw->vring[i].desc, &cfg->queue_desc_lo, - &cfg->queue_desc_hi); - vp_iowrite64_twopart(hw->vring[i].avail, &cfg->queue_avail_lo, - &cfg->queue_avail_hi); - vp_iowrite64_twopart(hw->vring[i].used, &cfg->queue_used_lo, - &cfg->queue_used_hi); - vp_iowrite16(hw->vring[i].size, &cfg->queue_size); - ifcvf_set_vq_state(hw, i, hw->vring[i].last_avail_idx); - vp_iowrite16(1, &cfg->queue_enable); - } +int ifcvf_set_vq_address(struct ifcvf_hw *hw, u16 qid, u64 desc_area, + u64 driver_area, u64 device_area) +{ + struct virtio_pci_common_cfg __iomem *cfg = hw->common_cfg; + + vp_iowrite16(qid, &cfg->queue_select); + vp_iowrite64_twopart(desc_area, &cfg->queue_desc_lo, + &cfg->queue_desc_hi); + vp_iowrite64_twopart(driver_area, &cfg->queue_avail_lo, + &cfg->queue_avail_hi); + vp_iowrite64_twopart(device_area, &cfg->queue_used_lo, + &cfg->queue_used_hi); return 0; } -static void ifcvf_hw_disable(struct ifcvf_hw *hw) +bool ifcvf_get_vq_ready(struct ifcvf_hw *hw, u16 qid) { - u32 i; + struct virtio_pci_common_cfg __iomem *cfg = hw->common_cfg; + u16 queue_enable; - ifcvf_set_config_vector(hw, VIRTIO_MSI_NO_VECTOR); - for (i = 0; i < hw->nr_vring; i++) { - ifcvf_set_vq_vector(hw, i, VIRTIO_MSI_NO_VECTOR); - } + vp_iowrite16(qid, &cfg->queue_select); + queue_enable = vp_ioread16(&cfg->queue_enable); + + return (bool)queue_enable; } -int ifcvf_start_hw(struct ifcvf_hw *hw) +void ifcvf_set_vq_ready(struct ifcvf_hw *hw, u16 qid, bool ready) { - ifcvf_reset(hw); - ifcvf_add_status(hw, VIRTIO_CONFIG_S_ACKNOWLEDGE); - ifcvf_add_status(hw, VIRTIO_CONFIG_S_DRIVER); + struct virtio_pci_common_cfg __iomem *cfg = hw->common_cfg; - if (ifcvf_config_features(hw) < 0) - return -EINVAL; + vp_iowrite16(qid, &cfg->queue_select); + vp_iowrite16(ready, &cfg->queue_enable); +} - if (ifcvf_hw_enable(hw) < 0) - return -EINVAL; +static void ifcvf_reset_vring(struct ifcvf_hw *hw) +{ + u16 qid; - ifcvf_add_status(hw, VIRTIO_CONFIG_S_DRIVER_OK); + for (qid = 0; qid < hw->nr_vring; qid++) { + hw->vring[qid].cb.callback = NULL; + hw->vring[qid].cb.private = NULL; + ifcvf_set_vq_vector(hw, qid, VIRTIO_MSI_NO_VECTOR); + } +} - return 0; +static void ifcvf_reset_config_handler(struct ifcvf_hw *hw) +{ + hw->config_cb.callback = NULL; + hw->config_cb.private = NULL; + ifcvf_set_config_vector(hw, VIRTIO_MSI_NO_VECTOR); +} + +static void ifcvf_synchronize_irq(struct ifcvf_hw *hw) +{ + u32 nvectors = hw->num_msix_vectors; + struct pci_dev *pdev = hw->pdev; + int i, irq; + + for (i = 0; i < nvectors; i++) { + irq = pci_irq_vector(pdev, i); + if (irq >= 0) + synchronize_irq(irq); + } } -void ifcvf_stop_hw(struct ifcvf_hw *hw) +void ifcvf_stop(struct ifcvf_hw *hw) { - ifcvf_hw_disable(hw); - ifcvf_reset(hw); + ifcvf_synchronize_irq(hw); + ifcvf_reset_vring(hw); + ifcvf_reset_config_handler(hw); } void ifcvf_notify_queue(struct ifcvf_hw *hw, u16 qid) diff --git a/drivers/vdpa/ifcvf/ifcvf_base.h b/drivers/vdpa/ifcvf/ifcvf_base.h index c20d1c40214e..b57849c643f6 100644 --- a/drivers/vdpa/ifcvf/ifcvf_base.h +++ b/drivers/vdpa/ifcvf/ifcvf_base.h @@ -24,15 +24,9 @@ #define N3000_DEVICE_ID 0x1041 #define N3000_SUBSYS_DEVICE_ID 0x001A -/* Max 8 data queue pairs(16 queues) and one control vq for now. */ -#define IFCVF_MAX_QUEUES 17 - #define IFCVF_QUEUE_ALIGNMENT PAGE_SIZE -#define IFCVF_QUEUE_MAX 32768 #define IFCVF_PCI_MAX_RESOURCE 6 -#define IFCVF_LM_CFG_SIZE 0x40 -#define IFCVF_LM_RING_STATE_OFFSET 0x20 #define IFCVF_LM_BAR 4 #define IFCVF_ERR(pdev, fmt, ...) dev_err(&pdev->dev, fmt, ##__VA_ARGS__) @@ -47,12 +41,7 @@ #define MSIX_VECTOR_DEV_SHARED 3 struct vring_info { - u64 desc; - u64 avail; - u64 used; - u16 size; u16 last_avail_idx; - bool ready; void __iomem *notify_addr; phys_addr_t notify_pa; u32 irq; @@ -60,10 +49,18 @@ struct vring_info { char msix_name[256]; }; +struct ifcvf_lm_cfg { + __le64 control; + __le64 status; + __le64 lm_mem_log_start_addr; + __le64 lm_mem_log_end_addr; + __le16 vq_state_region; +}; + struct ifcvf_hw { u8 __iomem *isr; /* Live migration */ - u8 __iomem *lm_cfg; + struct ifcvf_lm_cfg __iomem *lm_cfg; /* Notification bar number */ u8 notify_bar; u8 msix_vector_status; @@ -74,13 +71,12 @@ struct ifcvf_hw { phys_addr_t notify_base_pa; u32 notify_off_multiplier; u32 dev_type; - u64 req_features; u64 hw_features; /* provisioned device features */ u64 dev_features; struct virtio_pci_common_cfg __iomem *common_cfg; void __iomem *dev_cfg; - struct vring_info vring[IFCVF_MAX_QUEUES]; + struct vring_info *vring; void __iomem * const *base; char config_msix_name[256]; struct vdpa_callback config_cb; @@ -88,6 +84,7 @@ struct ifcvf_hw { int vqs_reused_irq; u16 nr_vring; /* VIRTIO_PCI_CAP_DEVICE_CFG size */ + u32 num_msix_vectors; u32 cap_dev_config_size; struct pci_dev *pdev; }; @@ -98,16 +95,6 @@ struct ifcvf_adapter { struct ifcvf_hw *vf; }; -struct ifcvf_vring_lm_cfg { - u32 idx_addr[2]; - u8 reserved[IFCVF_LM_CFG_SIZE - 8]; -}; - -struct ifcvf_lm_cfg { - u8 reserved[IFCVF_LM_RING_STATE_OFFSET]; - struct ifcvf_vring_lm_cfg vring_lm_cfg[IFCVF_MAX_QUEUES]; -}; - struct ifcvf_vdpa_mgmt_dev { struct vdpa_mgmt_dev mdev; struct ifcvf_hw vf; @@ -116,8 +103,7 @@ struct ifcvf_vdpa_mgmt_dev { }; int ifcvf_init_hw(struct ifcvf_hw *hw, struct pci_dev *dev); -int ifcvf_start_hw(struct ifcvf_hw *hw); -void ifcvf_stop_hw(struct ifcvf_hw *hw); +void ifcvf_stop(struct ifcvf_hw *hw); void ifcvf_notify_queue(struct ifcvf_hw *hw, u16 qid); void ifcvf_read_dev_config(struct ifcvf_hw *hw, u64 offset, void *dst, int length); @@ -127,7 +113,7 @@ u8 ifcvf_get_status(struct ifcvf_hw *hw); void ifcvf_set_status(struct ifcvf_hw *hw, u8 status); void io_write64_twopart(u64 val, u32 *lo, u32 *hi); void ifcvf_reset(struct ifcvf_hw *hw); -u64 ifcvf_get_features(struct ifcvf_hw *hw); +u64 ifcvf_get_dev_features(struct ifcvf_hw *hw); u64 ifcvf_get_hw_features(struct ifcvf_hw *hw); int ifcvf_verify_min_features(struct ifcvf_hw *hw, u64 features); u16 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid); @@ -137,4 +123,12 @@ int ifcvf_probed_virtio_net(struct ifcvf_hw *hw); u32 ifcvf_get_config_size(struct ifcvf_hw *hw); u16 ifcvf_set_vq_vector(struct ifcvf_hw *hw, u16 qid, int vector); u16 ifcvf_set_config_vector(struct ifcvf_hw *hw, int vector); +void ifcvf_set_vq_num(struct ifcvf_hw *hw, u16 qid, u32 num); +int ifcvf_set_vq_address(struct ifcvf_hw *hw, u16 qid, u64 desc_area, + u64 driver_area, u64 device_area); +bool ifcvf_get_vq_ready(struct ifcvf_hw *hw, u16 qid); +void ifcvf_set_vq_ready(struct ifcvf_hw *hw, u16 qid, bool ready); +void ifcvf_set_driver_features(struct ifcvf_hw *hw, u64 features); +u64 ifcvf_get_driver_features(struct ifcvf_hw *hw); +u16 ifcvf_get_max_vq_size(struct ifcvf_hw *hw); #endif /* _IFCVF_H_ */ diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c index 7f78c47e40d6..e98fa8100f3c 100644 --- a/drivers/vdpa/ifcvf/ifcvf_main.c +++ b/drivers/vdpa/ifcvf/ifcvf_main.c @@ -125,6 +125,7 @@ static void ifcvf_free_irq(struct ifcvf_hw *vf) ifcvf_free_vq_irq(vf); ifcvf_free_config_irq(vf); ifcvf_free_irq_vectors(pdev); + vf->num_msix_vectors = 0; } /* ifcvf MSIX vectors allocator, this helper tries to allocate @@ -343,56 +344,11 @@ static int ifcvf_request_irq(struct ifcvf_hw *vf) if (ret) return ret; - return 0; -} - -static int ifcvf_start_datapath(struct ifcvf_adapter *adapter) -{ - struct ifcvf_hw *vf = adapter->vf; - u8 status; - int ret; - - ret = ifcvf_start_hw(vf); - if (ret < 0) { - status = ifcvf_get_status(vf); - status |= VIRTIO_CONFIG_S_FAILED; - ifcvf_set_status(vf, status); - } - - return ret; -} - -static int ifcvf_stop_datapath(struct ifcvf_adapter *adapter) -{ - struct ifcvf_hw *vf = adapter->vf; - int i; - - for (i = 0; i < vf->nr_vring; i++) - vf->vring[i].cb.callback = NULL; - - ifcvf_stop_hw(vf); + vf->num_msix_vectors = nvectors; return 0; } -static void ifcvf_reset_vring(struct ifcvf_adapter *adapter) -{ - struct ifcvf_hw *vf = adapter->vf; - int i; - - for (i = 0; i < vf->nr_vring; i++) { - vf->vring[i].last_avail_idx = 0; - vf->vring[i].desc = 0; - vf->vring[i].avail = 0; - vf->vring[i].used = 0; - vf->vring[i].ready = 0; - vf->vring[i].cb.callback = NULL; - vf->vring[i].cb.private = NULL; - } - - ifcvf_reset(vf); -} - static struct ifcvf_adapter *vdpa_to_adapter(struct vdpa_device *vdpa_dev) { return container_of(vdpa_dev, struct ifcvf_adapter, vdpa); @@ -414,7 +370,7 @@ static u64 ifcvf_vdpa_get_device_features(struct vdpa_device *vdpa_dev) u64 features; if (type == VIRTIO_ID_NET || type == VIRTIO_ID_BLOCK) - features = ifcvf_get_features(vf); + features = ifcvf_get_dev_features(vf); else { features = 0; IFCVF_ERR(pdev, "VIRTIO ID %u not supported\n", vf->dev_type); @@ -432,7 +388,7 @@ static int ifcvf_vdpa_set_driver_features(struct vdpa_device *vdpa_dev, u64 feat if (ret) return ret; - vf->req_features = features; + ifcvf_set_driver_features(vf, features); return 0; } @@ -440,8 +396,11 @@ static int ifcvf_vdpa_set_driver_features(struct vdpa_device *vdpa_dev, u64 feat static u64 ifcvf_vdpa_get_driver_features(struct vdpa_device *vdpa_dev) { struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); + u64 features; + + features = ifcvf_get_driver_features(vf); - return vf->req_features; + return features; } static u8 ifcvf_vdpa_get_status(struct vdpa_device *vdpa_dev) @@ -453,13 +412,11 @@ static u8 ifcvf_vdpa_get_status(struct vdpa_device *vdpa_dev) static void ifcvf_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status) { - struct ifcvf_adapter *adapter; struct ifcvf_hw *vf; u8 status_old; int ret; vf = vdpa_to_vf(vdpa_dev); - adapter = vdpa_to_adapter(vdpa_dev); status_old = ifcvf_get_status(vf); if (status_old == status) @@ -469,16 +426,9 @@ static void ifcvf_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status) !(status_old & VIRTIO_CONFIG_S_DRIVER_OK)) { ret = ifcvf_request_irq(vf); if (ret) { - status = ifcvf_get_status(vf); - status |= VIRTIO_CONFIG_S_FAILED; - ifcvf_set_status(vf, status); + IFCVF_ERR(vf->pdev, "failed to request irq with error %d\n", ret); return; } - - if (ifcvf_start_datapath(adapter) < 0) - IFCVF_ERR(adapter->pdev, - "Failed to set ifcvf vdpa status %u\n", - status); } ifcvf_set_status(vf, status); @@ -486,30 +436,24 @@ static void ifcvf_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status) static int ifcvf_vdpa_reset(struct vdpa_device *vdpa_dev) { - struct ifcvf_adapter *adapter; - struct ifcvf_hw *vf; - u8 status_old; - - vf = vdpa_to_vf(vdpa_dev); - adapter = vdpa_to_adapter(vdpa_dev); - status_old = ifcvf_get_status(vf); + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); + u8 status = ifcvf_get_status(vf); - if (status_old == 0) - return 0; + ifcvf_stop(vf); - if (status_old & VIRTIO_CONFIG_S_DRIVER_OK) { - ifcvf_stop_datapath(adapter); + if (status & VIRTIO_CONFIG_S_DRIVER_OK) ifcvf_free_irq(vf); - } - ifcvf_reset_vring(adapter); + ifcvf_reset(vf); return 0; } static u16 ifcvf_vdpa_get_vq_num_max(struct vdpa_device *vdpa_dev) { - return IFCVF_QUEUE_MAX; + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); + + return ifcvf_get_max_vq_size(vf); } static int ifcvf_vdpa_get_vq_state(struct vdpa_device *vdpa_dev, u16 qid, @@ -542,14 +486,14 @@ static void ifcvf_vdpa_set_vq_ready(struct vdpa_device *vdpa_dev, { struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); - vf->vring[qid].ready = ready; + ifcvf_set_vq_ready(vf, qid, ready); } static bool ifcvf_vdpa_get_vq_ready(struct vdpa_device *vdpa_dev, u16 qid) { struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); - return vf->vring[qid].ready; + return ifcvf_get_vq_ready(vf, qid); } static void ifcvf_vdpa_set_vq_num(struct vdpa_device *vdpa_dev, u16 qid, @@ -557,7 +501,7 @@ static void ifcvf_vdpa_set_vq_num(struct vdpa_device *vdpa_dev, u16 qid, { struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); - vf->vring[qid].size = num; + ifcvf_set_vq_num(vf, qid, num); } static int ifcvf_vdpa_set_vq_address(struct vdpa_device *vdpa_dev, u16 qid, @@ -566,11 +510,7 @@ static int ifcvf_vdpa_set_vq_address(struct vdpa_device *vdpa_dev, u16 qid, { struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); - vf->vring[qid].desc = desc_area; - vf->vring[qid].avail = driver_area; - vf->vring[qid].used = device_area; - - return 0; + return ifcvf_set_vq_address(vf, qid, desc_area, driver_area, device_area); } static void ifcvf_vdpa_kick_vq(struct vdpa_device *vdpa_dev, u16 qid) @@ -892,6 +832,7 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id) return 0; err: + kfree(ifcvf_mgmt_dev->vf.vring); kfree(ifcvf_mgmt_dev); return ret; } @@ -902,6 +843,7 @@ static void ifcvf_remove(struct pci_dev *pdev) ifcvf_mgmt_dev = pci_get_drvdata(pdev); vdpa_mgmtdev_unregister(&ifcvf_mgmt_dev->mdev); + kfree(ifcvf_mgmt_dev->vf.vring); kfree(ifcvf_mgmt_dev); } @@ -911,7 +853,9 @@ static struct pci_device_id ifcvf_pci_ids[] = { N3000_DEVICE_ID, PCI_VENDOR_ID_INTEL, N3000_SUBSYS_DEVICE_ID) }, - /* C5000X-PL network device */ + /* C5000X-PL network device + * F2000X-PL network device + */ { PCI_DEVICE_SUB(PCI_VENDOR_ID_REDHAT_QUMRANET, VIRTIO_TRANS_ID_NET, PCI_VENDOR_ID_INTEL, diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 279ac6a558d2..9138ef2fb2c8 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -83,6 +83,7 @@ struct mlx5_vq_restore_info { u64 driver_addr; u16 avail_index; u16 used_index; + struct msi_map map; bool ready; bool restore; }; @@ -118,6 +119,7 @@ struct mlx5_vdpa_virtqueue { u16 avail_idx; u16 used_idx; int fw_state; + struct msi_map map; /* keep last in the struct */ struct mlx5_vq_restore_info ri; @@ -808,6 +810,13 @@ static bool counters_supported(const struct mlx5_vdpa_dev *mvdev) BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); } +static bool msix_mode_supported(struct mlx5_vdpa_dev *mvdev) +{ + return MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, event_mode) & + (1 << MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE) && + pci_msix_can_alloc_dyn(mvdev->mdev->pdev); +} + static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) { int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in); @@ -849,9 +858,15 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque if (vq_is_tx(mvq->index)) MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn); - MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE); + if (mvq->map.virq) { + MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE); + MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->map.index); + } else { + MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE); + MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn); + } + MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index); - MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn); MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent); MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0, !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1))); @@ -1194,6 +1209,56 @@ static void counter_set_dealloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_vir mlx5_vdpa_warn(&ndev->mvdev, "dealloc counter set 0x%x\n", mvq->counter_set_id); } +static irqreturn_t mlx5_vdpa_int_handler(int irq, void *priv) +{ + struct vdpa_callback *cb = priv; + + if (cb->callback) + return cb->callback(cb->private); + + return IRQ_HANDLED; +} + +static void alloc_vector(struct mlx5_vdpa_net *ndev, + struct mlx5_vdpa_virtqueue *mvq) +{ + struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp; + struct mlx5_vdpa_irq_pool_entry *ent; + int err; + int i; + + for (i = 0; i < irqp->num_ent; i++) { + ent = &irqp->entries[i]; + if (!ent->used) { + snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d", + dev_name(&ndev->mvdev.vdev.dev), mvq->index); + ent->dev_id = &ndev->event_cbs[mvq->index]; + err = request_irq(ent->map.virq, mlx5_vdpa_int_handler, 0, + ent->name, ent->dev_id); + if (err) + return; + + ent->used = true; + mvq->map = ent->map; + return; + } + } +} + +static void dealloc_vector(struct mlx5_vdpa_net *ndev, + struct mlx5_vdpa_virtqueue *mvq) +{ + struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp; + int i; + + for (i = 0; i < irqp->num_ent; i++) + if (mvq->map.virq == irqp->entries[i].map.virq) { + free_irq(mvq->map.virq, irqp->entries[i].dev_id); + irqp->entries[i].used = false; + return; + } +} + static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) { u16 idx = mvq->index; @@ -1223,27 +1288,31 @@ static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) err = counter_set_alloc(ndev, mvq); if (err) - goto err_counter; + goto err_connect; + alloc_vector(ndev, mvq); err = create_virtqueue(ndev, mvq); if (err) - goto err_connect; + goto err_vq; if (mvq->ready) { err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY); if (err) { mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n", idx, err); - goto err_connect; + goto err_modify; } } mvq->initialized = true; return 0; -err_connect: +err_modify: + destroy_virtqueue(ndev, mvq); +err_vq: + dealloc_vector(ndev, mvq); counter_set_dealloc(ndev, mvq); -err_counter: +err_connect: qp_destroy(ndev, &mvq->vqqp); err_vqqp: qp_destroy(ndev, &mvq->fwqp); @@ -1288,6 +1357,7 @@ static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue * suspend_vq(ndev, mvq); destroy_virtqueue(ndev, mvq); + dealloc_vector(ndev, mvq); counter_set_dealloc(ndev, mvq); qp_destroy(ndev, &mvq->vqqp); qp_destroy(ndev, &mvq->fwqp); @@ -2505,6 +2575,7 @@ static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqu ri->desc_addr = mvq->desc_addr; ri->device_addr = mvq->device_addr; ri->driver_addr = mvq->driver_addr; + ri->map = mvq->map; ri->restore = true; return 0; } @@ -2549,6 +2620,7 @@ static void restore_channels_info(struct mlx5_vdpa_net *ndev) mvq->desc_addr = ri->desc_addr; mvq->device_addr = ri->device_addr; mvq->driver_addr = ri->driver_addr; + mvq->map = ri->map; } } @@ -2833,6 +2905,25 @@ static struct device *mlx5_get_vq_dma_dev(struct vdpa_device *vdev, u16 idx) return mvdev->vdev.dma_dev; } +static void free_irqs(struct mlx5_vdpa_net *ndev) +{ + struct mlx5_vdpa_irq_pool_entry *ent; + int i; + + if (!msix_mode_supported(&ndev->mvdev)) + return; + + if (!ndev->irqp.entries) + return; + + for (i = ndev->irqp.num_ent - 1; i >= 0; i--) { + ent = ndev->irqp.entries + i; + if (ent->map.virq) + pci_msix_free_irq(ndev->mvdev.mdev->pdev, ent->map); + } + kfree(ndev->irqp.entries); +} + static void mlx5_vdpa_free(struct vdpa_device *vdev) { struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); @@ -2848,6 +2939,7 @@ static void mlx5_vdpa_free(struct vdpa_device *vdev) mlx5_mpfs_del_mac(pfmdev, ndev->config.mac); } mlx5_vdpa_free_resources(&ndev->mvdev); + free_irqs(ndev); kfree(ndev->event_cbs); kfree(ndev->vqs); } @@ -2876,9 +2968,23 @@ static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device return ret; } -static int mlx5_get_vq_irq(struct vdpa_device *vdv, u16 idx) +static int mlx5_get_vq_irq(struct vdpa_device *vdev, u16 idx) { - return -EOPNOTSUPP; + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + struct mlx5_vdpa_virtqueue *mvq; + + if (!is_index_valid(mvdev, idx)) + return -EINVAL; + + if (is_ctrl_vq_idx(mvdev, idx)) + return -EOPNOTSUPP; + + mvq = &ndev->vqs[idx]; + if (!mvq->map.virq) + return -EOPNOTSUPP; + + return mvq->map.virq; } static u64 mlx5_vdpa_get_driver_features(struct vdpa_device *vdev) @@ -3155,6 +3261,34 @@ static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu) return err; } +static void allocate_irqs(struct mlx5_vdpa_net *ndev) +{ + struct mlx5_vdpa_irq_pool_entry *ent; + int i; + + if (!msix_mode_supported(&ndev->mvdev)) + return; + + if (!ndev->mvdev.mdev->pdev) + return; + + ndev->irqp.entries = kcalloc(ndev->mvdev.max_vqs, sizeof(*ndev->irqp.entries), GFP_KERNEL); + if (!ndev->irqp.entries) + return; + + + for (i = 0; i < ndev->mvdev.max_vqs; i++) { + ent = ndev->irqp.entries + i; + snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d", + dev_name(&ndev->mvdev.vdev.dev), i); + ent->map = pci_msix_alloc_irq_at(ndev->mvdev.mdev->pdev, MSI_ANY_INDEX, NULL); + if (!ent->map.virq) + return; + + ndev->irqp.num_ent++; + } +} + static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, const struct vdpa_dev_set_config *add_config) { @@ -3233,6 +3367,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, } init_mvqs(ndev); + allocate_irqs(ndev); init_rwsem(&ndev->reslock); config = &ndev->config; @@ -3413,6 +3548,17 @@ static void mlx5v_remove(struct auxiliary_device *adev) kfree(mgtdev); } +static void mlx5v_shutdown(struct auxiliary_device *auxdev) +{ + struct mlx5_vdpa_mgmtdev *mgtdev; + struct mlx5_vdpa_net *ndev; + + mgtdev = auxiliary_get_drvdata(auxdev); + ndev = mgtdev->ndev; + + free_irqs(ndev); +} + static const struct auxiliary_device_id mlx5v_id_table[] = { { .name = MLX5_ADEV_NAME ".vnet", }, {}, @@ -3424,6 +3570,7 @@ static struct auxiliary_driver mlx5v_driver = { .name = "vnet", .probe = mlx5v_probe, .remove = mlx5v_remove, + .shutdown = mlx5v_shutdown, .id_table = mlx5v_id_table, }; diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.h b/drivers/vdpa/mlx5/net/mlx5_vnet.h index c90a89e1de4d..36c44d9fdd16 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.h +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.h @@ -26,6 +26,20 @@ static inline u16 key2vid(u64 key) return (u16)(key >> 48) & 0xfff; } +#define MLX5_VDPA_IRQ_NAME_LEN 32 + +struct mlx5_vdpa_irq_pool_entry { + struct msi_map map; + bool used; + char name[MLX5_VDPA_IRQ_NAME_LEN]; + void *dev_id; +}; + +struct mlx5_vdpa_irq_pool { + int num_ent; + struct mlx5_vdpa_irq_pool_entry *entries; +}; + struct mlx5_vdpa_net { struct mlx5_vdpa_dev mvdev; struct mlx5_vdpa_net_resources res; @@ -49,6 +63,7 @@ struct mlx5_vdpa_net { struct vdpa_callback config_cb; struct mlx5_vdpa_wq_ent cvq_ent; struct hlist_head macvlan_hash[MLX5V_MACVLAN_SIZE]; + struct mlx5_vdpa_irq_pool irqp; struct dentry *debugfs; }; diff --git a/drivers/vdpa/pds/Makefile b/drivers/vdpa/pds/Makefile new file mode 100644 index 000000000000..2e22418e3ab3 --- /dev/null +++ b/drivers/vdpa/pds/Makefile @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0-only +# Copyright(c) 2023 Advanced Micro Devices, Inc + +obj-$(CONFIG_PDS_VDPA) := pds_vdpa.o + +pds_vdpa-y := aux_drv.o \ + cmds.o \ + vdpa_dev.o + +pds_vdpa-$(CONFIG_DEBUG_FS) += debugfs.o diff --git a/drivers/vdpa/pds/aux_drv.c b/drivers/vdpa/pds/aux_drv.c new file mode 100644 index 000000000000..186e9ee22eb1 --- /dev/null +++ b/drivers/vdpa/pds/aux_drv.c @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2023 Advanced Micro Devices, Inc */ + +#include <linux/auxiliary_bus.h> +#include <linux/pci.h> +#include <linux/vdpa.h> +#include <linux/virtio_pci_modern.h> + +#include <linux/pds/pds_common.h> +#include <linux/pds/pds_core_if.h> +#include <linux/pds/pds_adminq.h> +#include <linux/pds/pds_auxbus.h> + +#include "aux_drv.h" +#include "debugfs.h" +#include "vdpa_dev.h" + +static const struct auxiliary_device_id pds_vdpa_id_table[] = { + { .name = PDS_VDPA_DEV_NAME, }, + {}, +}; + +static int pds_vdpa_device_id_check(struct pci_dev *pdev) +{ + if (pdev->device != PCI_DEVICE_ID_PENSANDO_VDPA_VF || + pdev->vendor != PCI_VENDOR_ID_PENSANDO) + return -ENODEV; + + return PCI_DEVICE_ID_PENSANDO_VDPA_VF; +} + +static int pds_vdpa_probe(struct auxiliary_device *aux_dev, + const struct auxiliary_device_id *id) + +{ + struct pds_auxiliary_dev *padev = + container_of(aux_dev, struct pds_auxiliary_dev, aux_dev); + struct device *dev = &aux_dev->dev; + struct pds_vdpa_aux *vdpa_aux; + int err; + + vdpa_aux = kzalloc(sizeof(*vdpa_aux), GFP_KERNEL); + if (!vdpa_aux) + return -ENOMEM; + + vdpa_aux->padev = padev; + vdpa_aux->vf_id = pci_iov_vf_id(padev->vf_pdev); + auxiliary_set_drvdata(aux_dev, vdpa_aux); + + /* Get device ident info and set up the vdpa_mgmt_dev */ + err = pds_vdpa_get_mgmt_info(vdpa_aux); + if (err) + goto err_free_mem; + + /* Find the virtio configuration */ + vdpa_aux->vd_mdev.pci_dev = padev->vf_pdev; + vdpa_aux->vd_mdev.device_id_check = pds_vdpa_device_id_check; + vdpa_aux->vd_mdev.dma_mask = DMA_BIT_MASK(PDS_CORE_ADDR_LEN); + err = vp_modern_probe(&vdpa_aux->vd_mdev); + if (err) { + dev_err(dev, "Unable to probe for virtio configuration: %pe\n", + ERR_PTR(err)); + goto err_free_mgmt_info; + } + + /* Let vdpa know that we can provide devices */ + err = vdpa_mgmtdev_register(&vdpa_aux->vdpa_mdev); + if (err) { + dev_err(dev, "%s: Failed to initialize vdpa_mgmt interface: %pe\n", + __func__, ERR_PTR(err)); + goto err_free_virtio; + } + + pds_vdpa_debugfs_add_pcidev(vdpa_aux); + pds_vdpa_debugfs_add_ident(vdpa_aux); + + return 0; + +err_free_virtio: + vp_modern_remove(&vdpa_aux->vd_mdev); +err_free_mgmt_info: + pci_free_irq_vectors(padev->vf_pdev); +err_free_mem: + kfree(vdpa_aux); + auxiliary_set_drvdata(aux_dev, NULL); + + return err; +} + +static void pds_vdpa_remove(struct auxiliary_device *aux_dev) +{ + struct pds_vdpa_aux *vdpa_aux = auxiliary_get_drvdata(aux_dev); + struct device *dev = &aux_dev->dev; + + vdpa_mgmtdev_unregister(&vdpa_aux->vdpa_mdev); + vp_modern_remove(&vdpa_aux->vd_mdev); + pci_free_irq_vectors(vdpa_aux->padev->vf_pdev); + + pds_vdpa_debugfs_del_vdpadev(vdpa_aux); + kfree(vdpa_aux); + auxiliary_set_drvdata(aux_dev, NULL); + + dev_info(dev, "Removed\n"); +} + +static struct auxiliary_driver pds_vdpa_driver = { + .name = PDS_DEV_TYPE_VDPA_STR, + .probe = pds_vdpa_probe, + .remove = pds_vdpa_remove, + .id_table = pds_vdpa_id_table, +}; + +static void __exit pds_vdpa_cleanup(void) +{ + auxiliary_driver_unregister(&pds_vdpa_driver); + + pds_vdpa_debugfs_destroy(); +} +module_exit(pds_vdpa_cleanup); + +static int __init pds_vdpa_init(void) +{ + int err; + + pds_vdpa_debugfs_create(); + + err = auxiliary_driver_register(&pds_vdpa_driver); + if (err) { + pr_err("%s: aux driver register failed: %pe\n", + PDS_VDPA_DRV_NAME, ERR_PTR(err)); + pds_vdpa_debugfs_destroy(); + } + + return err; +} +module_init(pds_vdpa_init); + +MODULE_DESCRIPTION(PDS_VDPA_DRV_DESCRIPTION); +MODULE_AUTHOR("Advanced Micro Devices, Inc"); +MODULE_LICENSE("GPL"); diff --git a/drivers/vdpa/pds/aux_drv.h b/drivers/vdpa/pds/aux_drv.h new file mode 100644 index 000000000000..26b75344156e --- /dev/null +++ b/drivers/vdpa/pds/aux_drv.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2023 Advanced Micro Devices, Inc */ + +#ifndef _AUX_DRV_H_ +#define _AUX_DRV_H_ + +#include <linux/virtio_pci_modern.h> + +#define PDS_VDPA_DRV_DESCRIPTION "AMD/Pensando vDPA VF Device Driver" +#define PDS_VDPA_DRV_NAME KBUILD_MODNAME + +struct pds_vdpa_aux { + struct pds_auxiliary_dev *padev; + + struct vdpa_mgmt_dev vdpa_mdev; + struct pds_vdpa_device *pdsv; + + struct pds_vdpa_ident ident; + + int vf_id; + struct dentry *dentry; + struct virtio_pci_modern_device vd_mdev; + + int nintrs; +}; +#endif /* _AUX_DRV_H_ */ diff --git a/drivers/vdpa/pds/cmds.c b/drivers/vdpa/pds/cmds.c new file mode 100644 index 000000000000..80863a41c3cd --- /dev/null +++ b/drivers/vdpa/pds/cmds.c @@ -0,0 +1,185 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2023 Advanced Micro Devices, Inc */ + +#include <linux/vdpa.h> +#include <linux/virtio_pci_modern.h> + +#include <linux/pds/pds_common.h> +#include <linux/pds/pds_core_if.h> +#include <linux/pds/pds_adminq.h> +#include <linux/pds/pds_auxbus.h> + +#include "vdpa_dev.h" +#include "aux_drv.h" +#include "cmds.h" + +int pds_vdpa_init_hw(struct pds_vdpa_device *pdsv) +{ + struct pds_auxiliary_dev *padev = pdsv->vdpa_aux->padev; + struct device *dev = &padev->aux_dev.dev; + union pds_core_adminq_cmd cmd = { + .vdpa_init.opcode = PDS_VDPA_CMD_INIT, + .vdpa_init.vdpa_index = pdsv->vdpa_index, + .vdpa_init.vf_id = cpu_to_le16(pdsv->vdpa_aux->vf_id), + }; + union pds_core_adminq_comp comp = {}; + int err; + + /* Initialize the vdpa/virtio device */ + err = pds_client_adminq_cmd(padev, &cmd, sizeof(cmd.vdpa_init), + &comp, 0); + if (err) + dev_dbg(dev, "Failed to init hw, status %d: %pe\n", + comp.status, ERR_PTR(err)); + + return err; +} + +int pds_vdpa_cmd_reset(struct pds_vdpa_device *pdsv) +{ + struct pds_auxiliary_dev *padev = pdsv->vdpa_aux->padev; + struct device *dev = &padev->aux_dev.dev; + union pds_core_adminq_cmd cmd = { + .vdpa.opcode = PDS_VDPA_CMD_RESET, + .vdpa.vdpa_index = pdsv->vdpa_index, + .vdpa.vf_id = cpu_to_le16(pdsv->vdpa_aux->vf_id), + }; + union pds_core_adminq_comp comp = {}; + int err; + + err = pds_client_adminq_cmd(padev, &cmd, sizeof(cmd.vdpa), &comp, 0); + if (err) + dev_dbg(dev, "Failed to reset hw, status %d: %pe\n", + comp.status, ERR_PTR(err)); + + return err; +} + +int pds_vdpa_cmd_set_status(struct pds_vdpa_device *pdsv, u8 status) +{ + struct pds_auxiliary_dev *padev = pdsv->vdpa_aux->padev; + struct device *dev = &padev->aux_dev.dev; + union pds_core_adminq_cmd cmd = { + .vdpa_status.opcode = PDS_VDPA_CMD_STATUS_UPDATE, + .vdpa_status.vdpa_index = pdsv->vdpa_index, + .vdpa_status.vf_id = cpu_to_le16(pdsv->vdpa_aux->vf_id), + .vdpa_status.status = status, + }; + union pds_core_adminq_comp comp = {}; + int err; + + err = pds_client_adminq_cmd(padev, &cmd, sizeof(cmd.vdpa_status), &comp, 0); + if (err) + dev_dbg(dev, "Failed to set status to %#x, error status %d: %pe\n", + status, comp.status, ERR_PTR(err)); + + return err; +} + +int pds_vdpa_cmd_set_mac(struct pds_vdpa_device *pdsv, u8 *mac) +{ + struct pds_auxiliary_dev *padev = pdsv->vdpa_aux->padev; + struct device *dev = &padev->aux_dev.dev; + union pds_core_adminq_cmd cmd = { + .vdpa_setattr.opcode = PDS_VDPA_CMD_SET_ATTR, + .vdpa_setattr.vdpa_index = pdsv->vdpa_index, + .vdpa_setattr.vf_id = cpu_to_le16(pdsv->vdpa_aux->vf_id), + .vdpa_setattr.attr = PDS_VDPA_ATTR_MAC, + }; + union pds_core_adminq_comp comp = {}; + int err; + + ether_addr_copy(cmd.vdpa_setattr.mac, mac); + err = pds_client_adminq_cmd(padev, &cmd, sizeof(cmd.vdpa_setattr), + &comp, 0); + if (err) + dev_dbg(dev, "Failed to set mac address %pM, status %d: %pe\n", + mac, comp.status, ERR_PTR(err)); + + return err; +} + +int pds_vdpa_cmd_set_max_vq_pairs(struct pds_vdpa_device *pdsv, u16 max_vqp) +{ + struct pds_auxiliary_dev *padev = pdsv->vdpa_aux->padev; + struct device *dev = &padev->aux_dev.dev; + union pds_core_adminq_cmd cmd = { + .vdpa_setattr.opcode = PDS_VDPA_CMD_SET_ATTR, + .vdpa_setattr.vdpa_index = pdsv->vdpa_index, + .vdpa_setattr.vf_id = cpu_to_le16(pdsv->vdpa_aux->vf_id), + .vdpa_setattr.attr = PDS_VDPA_ATTR_MAX_VQ_PAIRS, + .vdpa_setattr.max_vq_pairs = cpu_to_le16(max_vqp), + }; + union pds_core_adminq_comp comp = {}; + int err; + + err = pds_client_adminq_cmd(padev, &cmd, sizeof(cmd.vdpa_setattr), + &comp, 0); + if (err) + dev_dbg(dev, "Failed to set max vq pairs %u, status %d: %pe\n", + max_vqp, comp.status, ERR_PTR(err)); + + return err; +} + +int pds_vdpa_cmd_init_vq(struct pds_vdpa_device *pdsv, u16 qid, u16 invert_idx, + struct pds_vdpa_vq_info *vq_info) +{ + struct pds_auxiliary_dev *padev = pdsv->vdpa_aux->padev; + struct device *dev = &padev->aux_dev.dev; + union pds_core_adminq_cmd cmd = { + .vdpa_vq_init.opcode = PDS_VDPA_CMD_VQ_INIT, + .vdpa_vq_init.vdpa_index = pdsv->vdpa_index, + .vdpa_vq_init.vf_id = cpu_to_le16(pdsv->vdpa_aux->vf_id), + .vdpa_vq_init.qid = cpu_to_le16(qid), + .vdpa_vq_init.len = cpu_to_le16(ilog2(vq_info->q_len)), + .vdpa_vq_init.desc_addr = cpu_to_le64(vq_info->desc_addr), + .vdpa_vq_init.avail_addr = cpu_to_le64(vq_info->avail_addr), + .vdpa_vq_init.used_addr = cpu_to_le64(vq_info->used_addr), + .vdpa_vq_init.intr_index = cpu_to_le16(qid), + .vdpa_vq_init.avail_index = cpu_to_le16(vq_info->avail_idx ^ invert_idx), + .vdpa_vq_init.used_index = cpu_to_le16(vq_info->used_idx ^ invert_idx), + }; + union pds_core_adminq_comp comp = {}; + int err; + + dev_dbg(dev, "%s: qid %d len %d desc_addr %#llx avail_addr %#llx used_addr %#llx\n", + __func__, qid, ilog2(vq_info->q_len), + vq_info->desc_addr, vq_info->avail_addr, vq_info->used_addr); + + err = pds_client_adminq_cmd(padev, &cmd, sizeof(cmd.vdpa_vq_init), + &comp, 0); + if (err) + dev_dbg(dev, "Failed to init vq %d, status %d: %pe\n", + qid, comp.status, ERR_PTR(err)); + + return err; +} + +int pds_vdpa_cmd_reset_vq(struct pds_vdpa_device *pdsv, u16 qid, u16 invert_idx, + struct pds_vdpa_vq_info *vq_info) +{ + struct pds_auxiliary_dev *padev = pdsv->vdpa_aux->padev; + struct device *dev = &padev->aux_dev.dev; + union pds_core_adminq_cmd cmd = { + .vdpa_vq_reset.opcode = PDS_VDPA_CMD_VQ_RESET, + .vdpa_vq_reset.vdpa_index = pdsv->vdpa_index, + .vdpa_vq_reset.vf_id = cpu_to_le16(pdsv->vdpa_aux->vf_id), + .vdpa_vq_reset.qid = cpu_to_le16(qid), + }; + union pds_core_adminq_comp comp = {}; + int err; + + err = pds_client_adminq_cmd(padev, &cmd, sizeof(cmd.vdpa_vq_reset), + &comp, 0); + if (err) { + dev_dbg(dev, "Failed to reset vq %d, status %d: %pe\n", + qid, comp.status, ERR_PTR(err)); + return err; + } + + vq_info->avail_idx = le16_to_cpu(comp.vdpa_vq_reset.avail_index) ^ invert_idx; + vq_info->used_idx = le16_to_cpu(comp.vdpa_vq_reset.used_index) ^ invert_idx; + + return 0; +} diff --git a/drivers/vdpa/pds/cmds.h b/drivers/vdpa/pds/cmds.h new file mode 100644 index 000000000000..e24d85cb8f1c --- /dev/null +++ b/drivers/vdpa/pds/cmds.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2023 Advanced Micro Devices, Inc */ + +#ifndef _VDPA_CMDS_H_ +#define _VDPA_CMDS_H_ + +int pds_vdpa_init_hw(struct pds_vdpa_device *pdsv); + +int pds_vdpa_cmd_reset(struct pds_vdpa_device *pdsv); +int pds_vdpa_cmd_set_status(struct pds_vdpa_device *pdsv, u8 status); +int pds_vdpa_cmd_set_mac(struct pds_vdpa_device *pdsv, u8 *mac); +int pds_vdpa_cmd_set_max_vq_pairs(struct pds_vdpa_device *pdsv, u16 max_vqp); +int pds_vdpa_cmd_init_vq(struct pds_vdpa_device *pdsv, u16 qid, u16 invert_idx, + struct pds_vdpa_vq_info *vq_info); +int pds_vdpa_cmd_reset_vq(struct pds_vdpa_device *pdsv, u16 qid, u16 invert_idx, + struct pds_vdpa_vq_info *vq_info); +int pds_vdpa_cmd_set_features(struct pds_vdpa_device *pdsv, u64 features); +#endif /* _VDPA_CMDS_H_ */ diff --git a/drivers/vdpa/pds/debugfs.c b/drivers/vdpa/pds/debugfs.c new file mode 100644 index 000000000000..21a0dc0cb607 --- /dev/null +++ b/drivers/vdpa/pds/debugfs.c @@ -0,0 +1,289 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2023 Advanced Micro Devices, Inc */ + +#include <linux/pci.h> +#include <linux/vdpa.h> + +#include <linux/pds/pds_common.h> +#include <linux/pds/pds_core_if.h> +#include <linux/pds/pds_adminq.h> +#include <linux/pds/pds_auxbus.h> + +#include "aux_drv.h" +#include "vdpa_dev.h" +#include "debugfs.h" + +static struct dentry *dbfs_dir; + +void pds_vdpa_debugfs_create(void) +{ + dbfs_dir = debugfs_create_dir(PDS_VDPA_DRV_NAME, NULL); +} + +void pds_vdpa_debugfs_destroy(void) +{ + debugfs_remove_recursive(dbfs_dir); + dbfs_dir = NULL; +} + +#define PRINT_SBIT_NAME(__seq, __f, __name) \ + do { \ + if ((__f) & (__name)) \ + seq_printf(__seq, " %s", &#__name[16]); \ + } while (0) + +static void print_status_bits(struct seq_file *seq, u8 status) +{ + seq_puts(seq, "status:"); + PRINT_SBIT_NAME(seq, status, VIRTIO_CONFIG_S_ACKNOWLEDGE); + PRINT_SBIT_NAME(seq, status, VIRTIO_CONFIG_S_DRIVER); + PRINT_SBIT_NAME(seq, status, VIRTIO_CONFIG_S_DRIVER_OK); + PRINT_SBIT_NAME(seq, status, VIRTIO_CONFIG_S_FEATURES_OK); + PRINT_SBIT_NAME(seq, status, VIRTIO_CONFIG_S_NEEDS_RESET); + PRINT_SBIT_NAME(seq, status, VIRTIO_CONFIG_S_FAILED); + seq_puts(seq, "\n"); +} + +static void print_feature_bits_all(struct seq_file *seq, u64 features) +{ + int i; + + seq_puts(seq, "features:"); + + for (i = 0; i < (sizeof(u64) * 8); i++) { + u64 mask = BIT_ULL(i); + + switch (features & mask) { + case BIT_ULL(VIRTIO_NET_F_CSUM): + seq_puts(seq, " VIRTIO_NET_F_CSUM"); + break; + case BIT_ULL(VIRTIO_NET_F_GUEST_CSUM): + seq_puts(seq, " VIRTIO_NET_F_GUEST_CSUM"); + break; + case BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS): + seq_puts(seq, " VIRTIO_NET_F_CTRL_GUEST_OFFLOADS"); + break; + case BIT_ULL(VIRTIO_NET_F_MTU): + seq_puts(seq, " VIRTIO_NET_F_MTU"); + break; + case BIT_ULL(VIRTIO_NET_F_MAC): + seq_puts(seq, " VIRTIO_NET_F_MAC"); + break; + case BIT_ULL(VIRTIO_NET_F_GUEST_TSO4): + seq_puts(seq, " VIRTIO_NET_F_GUEST_TSO4"); + break; + case BIT_ULL(VIRTIO_NET_F_GUEST_TSO6): + seq_puts(seq, " VIRTIO_NET_F_GUEST_TSO6"); + break; + case BIT_ULL(VIRTIO_NET_F_GUEST_ECN): + seq_puts(seq, " VIRTIO_NET_F_GUEST_ECN"); + break; + case BIT_ULL(VIRTIO_NET_F_GUEST_UFO): + seq_puts(seq, " VIRTIO_NET_F_GUEST_UFO"); + break; + case BIT_ULL(VIRTIO_NET_F_HOST_TSO4): + seq_puts(seq, " VIRTIO_NET_F_HOST_TSO4"); + break; + case BIT_ULL(VIRTIO_NET_F_HOST_TSO6): + seq_puts(seq, " VIRTIO_NET_F_HOST_TSO6"); + break; + case BIT_ULL(VIRTIO_NET_F_HOST_ECN): + seq_puts(seq, " VIRTIO_NET_F_HOST_ECN"); + break; + case BIT_ULL(VIRTIO_NET_F_HOST_UFO): + seq_puts(seq, " VIRTIO_NET_F_HOST_UFO"); + break; + case BIT_ULL(VIRTIO_NET_F_MRG_RXBUF): + seq_puts(seq, " VIRTIO_NET_F_MRG_RXBUF"); + break; + case BIT_ULL(VIRTIO_NET_F_STATUS): + seq_puts(seq, " VIRTIO_NET_F_STATUS"); + break; + case BIT_ULL(VIRTIO_NET_F_CTRL_VQ): + seq_puts(seq, " VIRTIO_NET_F_CTRL_VQ"); + break; + case BIT_ULL(VIRTIO_NET_F_CTRL_RX): + seq_puts(seq, " VIRTIO_NET_F_CTRL_RX"); + break; + case BIT_ULL(VIRTIO_NET_F_CTRL_VLAN): + seq_puts(seq, " VIRTIO_NET_F_CTRL_VLAN"); + break; + case BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA): + seq_puts(seq, " VIRTIO_NET_F_CTRL_RX_EXTRA"); + break; + case BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE): + seq_puts(seq, " VIRTIO_NET_F_GUEST_ANNOUNCE"); + break; + case BIT_ULL(VIRTIO_NET_F_MQ): + seq_puts(seq, " VIRTIO_NET_F_MQ"); + break; + case BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR): + seq_puts(seq, " VIRTIO_NET_F_CTRL_MAC_ADDR"); + break; + case BIT_ULL(VIRTIO_NET_F_HASH_REPORT): + seq_puts(seq, " VIRTIO_NET_F_HASH_REPORT"); + break; + case BIT_ULL(VIRTIO_NET_F_RSS): + seq_puts(seq, " VIRTIO_NET_F_RSS"); + break; + case BIT_ULL(VIRTIO_NET_F_RSC_EXT): + seq_puts(seq, " VIRTIO_NET_F_RSC_EXT"); + break; + case BIT_ULL(VIRTIO_NET_F_STANDBY): + seq_puts(seq, " VIRTIO_NET_F_STANDBY"); + break; + case BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX): + seq_puts(seq, " VIRTIO_NET_F_SPEED_DUPLEX"); + break; + case BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY): + seq_puts(seq, " VIRTIO_F_NOTIFY_ON_EMPTY"); + break; + case BIT_ULL(VIRTIO_F_ANY_LAYOUT): + seq_puts(seq, " VIRTIO_F_ANY_LAYOUT"); + break; + case BIT_ULL(VIRTIO_F_VERSION_1): + seq_puts(seq, " VIRTIO_F_VERSION_1"); + break; + case BIT_ULL(VIRTIO_F_ACCESS_PLATFORM): + seq_puts(seq, " VIRTIO_F_ACCESS_PLATFORM"); + break; + case BIT_ULL(VIRTIO_F_RING_PACKED): + seq_puts(seq, " VIRTIO_F_RING_PACKED"); + break; + case BIT_ULL(VIRTIO_F_ORDER_PLATFORM): + seq_puts(seq, " VIRTIO_F_ORDER_PLATFORM"); + break; + case BIT_ULL(VIRTIO_F_SR_IOV): + seq_puts(seq, " VIRTIO_F_SR_IOV"); + break; + case 0: + break; + default: + seq_printf(seq, " bit_%d", i); + break; + } + } + + seq_puts(seq, "\n"); +} + +void pds_vdpa_debugfs_add_pcidev(struct pds_vdpa_aux *vdpa_aux) +{ + vdpa_aux->dentry = debugfs_create_dir(pci_name(vdpa_aux->padev->vf_pdev), dbfs_dir); +} + +static int identity_show(struct seq_file *seq, void *v) +{ + struct pds_vdpa_aux *vdpa_aux = seq->private; + struct vdpa_mgmt_dev *mgmt; + + seq_printf(seq, "aux_dev: %s\n", + dev_name(&vdpa_aux->padev->aux_dev.dev)); + + mgmt = &vdpa_aux->vdpa_mdev; + seq_printf(seq, "max_vqs: %d\n", mgmt->max_supported_vqs); + seq_printf(seq, "config_attr_mask: %#llx\n", mgmt->config_attr_mask); + seq_printf(seq, "supported_features: %#llx\n", mgmt->supported_features); + print_feature_bits_all(seq, mgmt->supported_features); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(identity); + +void pds_vdpa_debugfs_add_ident(struct pds_vdpa_aux *vdpa_aux) +{ + debugfs_create_file("identity", 0400, vdpa_aux->dentry, + vdpa_aux, &identity_fops); +} + +static int config_show(struct seq_file *seq, void *v) +{ + struct pds_vdpa_device *pdsv = seq->private; + struct virtio_net_config vc; + u64 driver_features; + u8 status; + + memcpy_fromio(&vc, pdsv->vdpa_aux->vd_mdev.device, + sizeof(struct virtio_net_config)); + + seq_printf(seq, "mac: %pM\n", vc.mac); + seq_printf(seq, "max_virtqueue_pairs: %d\n", + __virtio16_to_cpu(true, vc.max_virtqueue_pairs)); + seq_printf(seq, "mtu: %d\n", __virtio16_to_cpu(true, vc.mtu)); + seq_printf(seq, "speed: %d\n", le32_to_cpu(vc.speed)); + seq_printf(seq, "duplex: %d\n", vc.duplex); + seq_printf(seq, "rss_max_key_size: %d\n", vc.rss_max_key_size); + seq_printf(seq, "rss_max_indirection_table_length: %d\n", + le16_to_cpu(vc.rss_max_indirection_table_length)); + seq_printf(seq, "supported_hash_types: %#x\n", + le32_to_cpu(vc.supported_hash_types)); + seq_printf(seq, "vn_status: %#x\n", + __virtio16_to_cpu(true, vc.status)); + + status = vp_modern_get_status(&pdsv->vdpa_aux->vd_mdev); + seq_printf(seq, "dev_status: %#x\n", status); + print_status_bits(seq, status); + + seq_printf(seq, "req_features: %#llx\n", pdsv->req_features); + print_feature_bits_all(seq, pdsv->req_features); + driver_features = vp_modern_get_driver_features(&pdsv->vdpa_aux->vd_mdev); + seq_printf(seq, "driver_features: %#llx\n", driver_features); + print_feature_bits_all(seq, driver_features); + seq_printf(seq, "vdpa_index: %d\n", pdsv->vdpa_index); + seq_printf(seq, "num_vqs: %d\n", pdsv->num_vqs); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(config); + +static int vq_show(struct seq_file *seq, void *v) +{ + struct pds_vdpa_vq_info *vq = seq->private; + + seq_printf(seq, "ready: %d\n", vq->ready); + seq_printf(seq, "desc_addr: %#llx\n", vq->desc_addr); + seq_printf(seq, "avail_addr: %#llx\n", vq->avail_addr); + seq_printf(seq, "used_addr: %#llx\n", vq->used_addr); + seq_printf(seq, "q_len: %d\n", vq->q_len); + seq_printf(seq, "qid: %d\n", vq->qid); + + seq_printf(seq, "doorbell: %#llx\n", vq->doorbell); + seq_printf(seq, "avail_idx: %d\n", vq->avail_idx); + seq_printf(seq, "used_idx: %d\n", vq->used_idx); + seq_printf(seq, "irq: %d\n", vq->irq); + seq_printf(seq, "irq-name: %s\n", vq->irq_name); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(vq); + +void pds_vdpa_debugfs_add_vdpadev(struct pds_vdpa_aux *vdpa_aux) +{ + int i; + + debugfs_create_file("config", 0400, vdpa_aux->dentry, vdpa_aux->pdsv, &config_fops); + + for (i = 0; i < vdpa_aux->pdsv->num_vqs; i++) { + char name[8]; + + snprintf(name, sizeof(name), "vq%02d", i); + debugfs_create_file(name, 0400, vdpa_aux->dentry, + &vdpa_aux->pdsv->vqs[i], &vq_fops); + } +} + +void pds_vdpa_debugfs_del_vdpadev(struct pds_vdpa_aux *vdpa_aux) +{ + debugfs_remove_recursive(vdpa_aux->dentry); + vdpa_aux->dentry = NULL; +} + +void pds_vdpa_debugfs_reset_vdpadev(struct pds_vdpa_aux *vdpa_aux) +{ + /* we don't keep track of the entries, so remove it all + * then rebuild the basics + */ + pds_vdpa_debugfs_del_vdpadev(vdpa_aux); + pds_vdpa_debugfs_add_pcidev(vdpa_aux); + pds_vdpa_debugfs_add_ident(vdpa_aux); +} diff --git a/drivers/vdpa/pds/debugfs.h b/drivers/vdpa/pds/debugfs.h new file mode 100644 index 000000000000..c088a4e8f1e9 --- /dev/null +++ b/drivers/vdpa/pds/debugfs.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2023 Advanced Micro Devices, Inc */ + +#ifndef _PDS_VDPA_DEBUGFS_H_ +#define _PDS_VDPA_DEBUGFS_H_ + +#include <linux/debugfs.h> + +void pds_vdpa_debugfs_create(void); +void pds_vdpa_debugfs_destroy(void); +void pds_vdpa_debugfs_add_pcidev(struct pds_vdpa_aux *vdpa_aux); +void pds_vdpa_debugfs_add_ident(struct pds_vdpa_aux *vdpa_aux); +void pds_vdpa_debugfs_add_vdpadev(struct pds_vdpa_aux *vdpa_aux); +void pds_vdpa_debugfs_del_vdpadev(struct pds_vdpa_aux *vdpa_aux); +void pds_vdpa_debugfs_reset_vdpadev(struct pds_vdpa_aux *vdpa_aux); + +#endif /* _PDS_VDPA_DEBUGFS_H_ */ diff --git a/drivers/vdpa/pds/vdpa_dev.c b/drivers/vdpa/pds/vdpa_dev.c new file mode 100644 index 000000000000..5071a4d58f8d --- /dev/null +++ b/drivers/vdpa/pds/vdpa_dev.c @@ -0,0 +1,769 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2023 Advanced Micro Devices, Inc */ + +#include <linux/pci.h> +#include <linux/vdpa.h> +#include <uapi/linux/vdpa.h> +#include <linux/virtio_pci_modern.h> + +#include <linux/pds/pds_common.h> +#include <linux/pds/pds_core_if.h> +#include <linux/pds/pds_adminq.h> +#include <linux/pds/pds_auxbus.h> + +#include "vdpa_dev.h" +#include "aux_drv.h" +#include "cmds.h" +#include "debugfs.h" + +static u64 pds_vdpa_get_driver_features(struct vdpa_device *vdpa_dev); + +static struct pds_vdpa_device *vdpa_to_pdsv(struct vdpa_device *vdpa_dev) +{ + return container_of(vdpa_dev, struct pds_vdpa_device, vdpa_dev); +} + +static int pds_vdpa_notify_handler(struct notifier_block *nb, + unsigned long ecode, + void *data) +{ + struct pds_vdpa_device *pdsv = container_of(nb, struct pds_vdpa_device, nb); + struct device *dev = &pdsv->vdpa_aux->padev->aux_dev.dev; + + dev_dbg(dev, "%s: event code %lu\n", __func__, ecode); + + if (ecode == PDS_EVENT_RESET || ecode == PDS_EVENT_LINK_CHANGE) { + if (pdsv->config_cb.callback) + pdsv->config_cb.callback(pdsv->config_cb.private); + } + + return 0; +} + +static int pds_vdpa_register_event_handler(struct pds_vdpa_device *pdsv) +{ + struct device *dev = &pdsv->vdpa_aux->padev->aux_dev.dev; + struct notifier_block *nb = &pdsv->nb; + int err; + + if (!nb->notifier_call) { + nb->notifier_call = pds_vdpa_notify_handler; + err = pdsc_register_notify(nb); + if (err) { + nb->notifier_call = NULL; + dev_err(dev, "failed to register pds event handler: %ps\n", + ERR_PTR(err)); + return -EINVAL; + } + dev_dbg(dev, "pds event handler registered\n"); + } + + return 0; +} + +static void pds_vdpa_unregister_event_handler(struct pds_vdpa_device *pdsv) +{ + if (pdsv->nb.notifier_call) { + pdsc_unregister_notify(&pdsv->nb); + pdsv->nb.notifier_call = NULL; + } +} + +static int pds_vdpa_set_vq_address(struct vdpa_device *vdpa_dev, u16 qid, + u64 desc_addr, u64 driver_addr, u64 device_addr) +{ + struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev); + + pdsv->vqs[qid].desc_addr = desc_addr; + pdsv->vqs[qid].avail_addr = driver_addr; + pdsv->vqs[qid].used_addr = device_addr; + + return 0; +} + +static void pds_vdpa_set_vq_num(struct vdpa_device *vdpa_dev, u16 qid, u32 num) +{ + struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev); + + pdsv->vqs[qid].q_len = num; +} + +static void pds_vdpa_kick_vq(struct vdpa_device *vdpa_dev, u16 qid) +{ + struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev); + + iowrite16(qid, pdsv->vqs[qid].notify); +} + +static void pds_vdpa_set_vq_cb(struct vdpa_device *vdpa_dev, u16 qid, + struct vdpa_callback *cb) +{ + struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev); + + pdsv->vqs[qid].event_cb = *cb; +} + +static irqreturn_t pds_vdpa_isr(int irq, void *data) +{ + struct pds_vdpa_vq_info *vq; + + vq = data; + if (vq->event_cb.callback) + vq->event_cb.callback(vq->event_cb.private); + + return IRQ_HANDLED; +} + +static void pds_vdpa_release_irq(struct pds_vdpa_device *pdsv, int qid) +{ + if (pdsv->vqs[qid].irq == VIRTIO_MSI_NO_VECTOR) + return; + + free_irq(pdsv->vqs[qid].irq, &pdsv->vqs[qid]); + pdsv->vqs[qid].irq = VIRTIO_MSI_NO_VECTOR; +} + +static void pds_vdpa_set_vq_ready(struct vdpa_device *vdpa_dev, u16 qid, bool ready) +{ + struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev); + struct pci_dev *pdev = pdsv->vdpa_aux->padev->vf_pdev; + struct device *dev = &pdsv->vdpa_dev.dev; + u64 driver_features; + u16 invert_idx = 0; + int irq; + int err; + + dev_dbg(dev, "%s: qid %d ready %d => %d\n", + __func__, qid, pdsv->vqs[qid].ready, ready); + if (ready == pdsv->vqs[qid].ready) + return; + + driver_features = pds_vdpa_get_driver_features(vdpa_dev); + if (driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) + invert_idx = PDS_VDPA_PACKED_INVERT_IDX; + + if (ready) { + irq = pci_irq_vector(pdev, qid); + snprintf(pdsv->vqs[qid].irq_name, sizeof(pdsv->vqs[qid].irq_name), + "vdpa-%s-%d", dev_name(dev), qid); + + err = request_irq(irq, pds_vdpa_isr, 0, + pdsv->vqs[qid].irq_name, &pdsv->vqs[qid]); + if (err) { + dev_err(dev, "%s: no irq for qid %d: %pe\n", + __func__, qid, ERR_PTR(err)); + return; + } + pdsv->vqs[qid].irq = irq; + + /* Pass vq setup info to DSC using adminq to gather up and + * send all info at once so FW can do its full set up in + * one easy operation + */ + err = pds_vdpa_cmd_init_vq(pdsv, qid, invert_idx, &pdsv->vqs[qid]); + if (err) { + dev_err(dev, "Failed to init vq %d: %pe\n", + qid, ERR_PTR(err)); + pds_vdpa_release_irq(pdsv, qid); + ready = false; + } + } else { + err = pds_vdpa_cmd_reset_vq(pdsv, qid, invert_idx, &pdsv->vqs[qid]); + if (err) + dev_err(dev, "%s: reset_vq failed qid %d: %pe\n", + __func__, qid, ERR_PTR(err)); + pds_vdpa_release_irq(pdsv, qid); + } + + pdsv->vqs[qid].ready = ready; +} + +static bool pds_vdpa_get_vq_ready(struct vdpa_device *vdpa_dev, u16 qid) +{ + struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev); + + return pdsv->vqs[qid].ready; +} + +static int pds_vdpa_set_vq_state(struct vdpa_device *vdpa_dev, u16 qid, + const struct vdpa_vq_state *state) +{ + struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev); + struct pds_auxiliary_dev *padev = pdsv->vdpa_aux->padev; + struct device *dev = &padev->aux_dev.dev; + u64 driver_features; + u16 avail; + u16 used; + + if (pdsv->vqs[qid].ready) { + dev_err(dev, "Setting device position is denied while vq is enabled\n"); + return -EINVAL; + } + + driver_features = pds_vdpa_get_driver_features(vdpa_dev); + if (driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) { + avail = state->packed.last_avail_idx | + (state->packed.last_avail_counter << 15); + used = state->packed.last_used_idx | + (state->packed.last_used_counter << 15); + + /* The avail and used index are stored with the packed wrap + * counter bit inverted. This way, in case set_vq_state is + * not called, the initial value can be set to zero prior to + * feature negotiation, and it is good for both packed and + * split vq. + */ + avail ^= PDS_VDPA_PACKED_INVERT_IDX; + used ^= PDS_VDPA_PACKED_INVERT_IDX; + } else { + avail = state->split.avail_index; + /* state->split does not provide a used_index: + * the vq will be set to "empty" here, and the vq will read + * the current used index the next time the vq is kicked. + */ + used = avail; + } + + if (used != avail) { + dev_dbg(dev, "Setting used equal to avail, for interoperability\n"); + used = avail; + } + + pdsv->vqs[qid].avail_idx = avail; + pdsv->vqs[qid].used_idx = used; + + return 0; +} + +static int pds_vdpa_get_vq_state(struct vdpa_device *vdpa_dev, u16 qid, + struct vdpa_vq_state *state) +{ + struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev); + struct pds_auxiliary_dev *padev = pdsv->vdpa_aux->padev; + struct device *dev = &padev->aux_dev.dev; + u64 driver_features; + u16 avail; + u16 used; + + if (pdsv->vqs[qid].ready) { + dev_err(dev, "Getting device position is denied while vq is enabled\n"); + return -EINVAL; + } + + avail = pdsv->vqs[qid].avail_idx; + used = pdsv->vqs[qid].used_idx; + + driver_features = pds_vdpa_get_driver_features(vdpa_dev); + if (driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) { + avail ^= PDS_VDPA_PACKED_INVERT_IDX; + used ^= PDS_VDPA_PACKED_INVERT_IDX; + + state->packed.last_avail_idx = avail & 0x7fff; + state->packed.last_avail_counter = avail >> 15; + state->packed.last_used_idx = used & 0x7fff; + state->packed.last_used_counter = used >> 15; + } else { + state->split.avail_index = avail; + /* state->split does not provide a used_index. */ + } + + return 0; +} + +static struct vdpa_notification_area +pds_vdpa_get_vq_notification(struct vdpa_device *vdpa_dev, u16 qid) +{ + struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev); + struct virtio_pci_modern_device *vd_mdev; + struct vdpa_notification_area area; + + area.addr = pdsv->vqs[qid].notify_pa; + + vd_mdev = &pdsv->vdpa_aux->vd_mdev; + if (!vd_mdev->notify_offset_multiplier) + area.size = PDS_PAGE_SIZE; + else + area.size = vd_mdev->notify_offset_multiplier; + + return area; +} + +static int pds_vdpa_get_vq_irq(struct vdpa_device *vdpa_dev, u16 qid) +{ + struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev); + + return pdsv->vqs[qid].irq; +} + +static u32 pds_vdpa_get_vq_align(struct vdpa_device *vdpa_dev) +{ + return PDS_PAGE_SIZE; +} + +static u32 pds_vdpa_get_vq_group(struct vdpa_device *vdpa_dev, u16 idx) +{ + return 0; +} + +static u64 pds_vdpa_get_device_features(struct vdpa_device *vdpa_dev) +{ + struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev); + + return pdsv->supported_features; +} + +static int pds_vdpa_set_driver_features(struct vdpa_device *vdpa_dev, u64 features) +{ + struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev); + struct device *dev = &pdsv->vdpa_dev.dev; + u64 driver_features; + u64 nego_features; + u64 missing; + + if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)) && features) { + dev_err(dev, "VIRTIO_F_ACCESS_PLATFORM is not negotiated\n"); + return -EOPNOTSUPP; + } + + pdsv->req_features = features; + + /* Check for valid feature bits */ + nego_features = features & le64_to_cpu(pdsv->vdpa_aux->ident.hw_features); + missing = pdsv->req_features & ~nego_features; + if (missing) { + dev_err(dev, "Can't support all requested features in %#llx, missing %#llx features\n", + pdsv->req_features, missing); + return -EOPNOTSUPP; + } + + driver_features = pds_vdpa_get_driver_features(vdpa_dev); + dev_dbg(dev, "%s: %#llx => %#llx\n", + __func__, driver_features, nego_features); + + if (driver_features == nego_features) + return 0; + + vp_modern_set_features(&pdsv->vdpa_aux->vd_mdev, nego_features); + + return 0; +} + +static u64 pds_vdpa_get_driver_features(struct vdpa_device *vdpa_dev) +{ + struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev); + + return vp_modern_get_driver_features(&pdsv->vdpa_aux->vd_mdev); +} + +static void pds_vdpa_set_config_cb(struct vdpa_device *vdpa_dev, + struct vdpa_callback *cb) +{ + struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev); + + pdsv->config_cb.callback = cb->callback; + pdsv->config_cb.private = cb->private; +} + +static u16 pds_vdpa_get_vq_num_max(struct vdpa_device *vdpa_dev) +{ + struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev); + + /* qemu has assert() that vq_num_max <= VIRTQUEUE_MAX_SIZE (1024) */ + return min_t(u16, 1024, BIT(le16_to_cpu(pdsv->vdpa_aux->ident.max_qlen))); +} + +static u32 pds_vdpa_get_device_id(struct vdpa_device *vdpa_dev) +{ + return VIRTIO_ID_NET; +} + +static u32 pds_vdpa_get_vendor_id(struct vdpa_device *vdpa_dev) +{ + return PCI_VENDOR_ID_PENSANDO; +} + +static u8 pds_vdpa_get_status(struct vdpa_device *vdpa_dev) +{ + struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev); + + return vp_modern_get_status(&pdsv->vdpa_aux->vd_mdev); +} + +static void pds_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status) +{ + struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev); + struct device *dev = &pdsv->vdpa_dev.dev; + u8 old_status; + int i; + + old_status = pds_vdpa_get_status(vdpa_dev); + dev_dbg(dev, "%s: old %#x new %#x\n", __func__, old_status, status); + + pds_vdpa_cmd_set_status(pdsv, status); + + /* Note: still working with FW on the need for this reset cmd */ + if (status == 0) { + pds_vdpa_cmd_reset(pdsv); + + for (i = 0; i < pdsv->num_vqs; i++) { + pdsv->vqs[i].avail_idx = 0; + pdsv->vqs[i].used_idx = 0; + } + } + + if (status & ~old_status & VIRTIO_CONFIG_S_FEATURES_OK) { + for (i = 0; i < pdsv->num_vqs; i++) { + pdsv->vqs[i].notify = + vp_modern_map_vq_notify(&pdsv->vdpa_aux->vd_mdev, + i, &pdsv->vqs[i].notify_pa); + } + } +} + +static int pds_vdpa_reset(struct vdpa_device *vdpa_dev) +{ + struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev); + struct device *dev; + int err = 0; + u8 status; + int i; + + dev = &pdsv->vdpa_aux->padev->aux_dev.dev; + status = pds_vdpa_get_status(vdpa_dev); + + if (status == 0) + return 0; + + if (status & VIRTIO_CONFIG_S_DRIVER_OK) { + /* Reset the vqs */ + for (i = 0; i < pdsv->num_vqs && !err; i++) { + err = pds_vdpa_cmd_reset_vq(pdsv, i, 0, &pdsv->vqs[i]); + if (err) + dev_err(dev, "%s: reset_vq failed qid %d: %pe\n", + __func__, i, ERR_PTR(err)); + pds_vdpa_release_irq(pdsv, i); + memset(&pdsv->vqs[i], 0, sizeof(pdsv->vqs[0])); + pdsv->vqs[i].ready = false; + } + } + + pds_vdpa_set_status(vdpa_dev, 0); + + return 0; +} + +static size_t pds_vdpa_get_config_size(struct vdpa_device *vdpa_dev) +{ + return sizeof(struct virtio_net_config); +} + +static void pds_vdpa_get_config(struct vdpa_device *vdpa_dev, + unsigned int offset, + void *buf, unsigned int len) +{ + struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev); + void __iomem *device; + + if (offset + len > sizeof(struct virtio_net_config)) { + WARN(true, "%s: bad read, offset %d len %d\n", __func__, offset, len); + return; + } + + device = pdsv->vdpa_aux->vd_mdev.device; + memcpy_fromio(buf, device + offset, len); +} + +static void pds_vdpa_set_config(struct vdpa_device *vdpa_dev, + unsigned int offset, const void *buf, + unsigned int len) +{ + struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev); + void __iomem *device; + + if (offset + len > sizeof(struct virtio_net_config)) { + WARN(true, "%s: bad read, offset %d len %d\n", __func__, offset, len); + return; + } + + device = pdsv->vdpa_aux->vd_mdev.device; + memcpy_toio(device + offset, buf, len); +} + +static const struct vdpa_config_ops pds_vdpa_ops = { + .set_vq_address = pds_vdpa_set_vq_address, + .set_vq_num = pds_vdpa_set_vq_num, + .kick_vq = pds_vdpa_kick_vq, + .set_vq_cb = pds_vdpa_set_vq_cb, + .set_vq_ready = pds_vdpa_set_vq_ready, + .get_vq_ready = pds_vdpa_get_vq_ready, + .set_vq_state = pds_vdpa_set_vq_state, + .get_vq_state = pds_vdpa_get_vq_state, + .get_vq_notification = pds_vdpa_get_vq_notification, + .get_vq_irq = pds_vdpa_get_vq_irq, + .get_vq_align = pds_vdpa_get_vq_align, + .get_vq_group = pds_vdpa_get_vq_group, + + .get_device_features = pds_vdpa_get_device_features, + .set_driver_features = pds_vdpa_set_driver_features, + .get_driver_features = pds_vdpa_get_driver_features, + .set_config_cb = pds_vdpa_set_config_cb, + .get_vq_num_max = pds_vdpa_get_vq_num_max, + .get_device_id = pds_vdpa_get_device_id, + .get_vendor_id = pds_vdpa_get_vendor_id, + .get_status = pds_vdpa_get_status, + .set_status = pds_vdpa_set_status, + .reset = pds_vdpa_reset, + .get_config_size = pds_vdpa_get_config_size, + .get_config = pds_vdpa_get_config, + .set_config = pds_vdpa_set_config, +}; +static struct virtio_device_id pds_vdpa_id_table[] = { + {VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID}, + {0}, +}; + +static int pds_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, + const struct vdpa_dev_set_config *add_config) +{ + struct pds_vdpa_aux *vdpa_aux; + struct pds_vdpa_device *pdsv; + struct vdpa_mgmt_dev *mgmt; + u16 fw_max_vqs, vq_pairs; + struct device *dma_dev; + struct pci_dev *pdev; + struct device *dev; + u8 mac[ETH_ALEN]; + int err; + int i; + + vdpa_aux = container_of(mdev, struct pds_vdpa_aux, vdpa_mdev); + dev = &vdpa_aux->padev->aux_dev.dev; + mgmt = &vdpa_aux->vdpa_mdev; + + if (vdpa_aux->pdsv) { + dev_warn(dev, "Multiple vDPA devices on a VF is not supported.\n"); + return -EOPNOTSUPP; + } + + pdsv = vdpa_alloc_device(struct pds_vdpa_device, vdpa_dev, + dev, &pds_vdpa_ops, 1, 1, name, false); + if (IS_ERR(pdsv)) { + dev_err(dev, "Failed to allocate vDPA structure: %pe\n", pdsv); + return PTR_ERR(pdsv); + } + + vdpa_aux->pdsv = pdsv; + pdsv->vdpa_aux = vdpa_aux; + + pdev = vdpa_aux->padev->vf_pdev; + dma_dev = &pdev->dev; + pdsv->vdpa_dev.dma_dev = dma_dev; + + pdsv->supported_features = mgmt->supported_features; + + if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) { + u64 unsupp_features = + add_config->device_features & ~mgmt->supported_features; + + if (unsupp_features) { + dev_err(dev, "Unsupported features: %#llx\n", unsupp_features); + err = -EOPNOTSUPP; + goto err_unmap; + } + + pdsv->supported_features = add_config->device_features; + } + + err = pds_vdpa_cmd_reset(pdsv); + if (err) { + dev_err(dev, "Failed to reset hw: %pe\n", ERR_PTR(err)); + goto err_unmap; + } + + err = pds_vdpa_init_hw(pdsv); + if (err) { + dev_err(dev, "Failed to init hw: %pe\n", ERR_PTR(err)); + goto err_unmap; + } + + fw_max_vqs = le16_to_cpu(pdsv->vdpa_aux->ident.max_vqs); + vq_pairs = fw_max_vqs / 2; + + /* Make sure we have the queues being requested */ + if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) + vq_pairs = add_config->net.max_vq_pairs; + + pdsv->num_vqs = 2 * vq_pairs; + if (pdsv->supported_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) + pdsv->num_vqs++; + + if (pdsv->num_vqs > fw_max_vqs) { + dev_err(dev, "%s: queue count requested %u greater than max %u\n", + __func__, pdsv->num_vqs, fw_max_vqs); + err = -ENOSPC; + goto err_unmap; + } + + if (pdsv->num_vqs != fw_max_vqs) { + err = pds_vdpa_cmd_set_max_vq_pairs(pdsv, vq_pairs); + if (err) { + dev_err(dev, "Failed to set max_vq_pairs: %pe\n", + ERR_PTR(err)); + goto err_unmap; + } + } + + /* Set a mac, either from the user config if provided + * or set a random mac if default is 00:..:00 + */ + if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR)) { + ether_addr_copy(mac, add_config->net.mac); + pds_vdpa_cmd_set_mac(pdsv, mac); + } else { + struct virtio_net_config __iomem *vc; + + vc = pdsv->vdpa_aux->vd_mdev.device; + memcpy_fromio(mac, vc->mac, sizeof(mac)); + if (is_zero_ether_addr(mac)) { + eth_random_addr(mac); + dev_info(dev, "setting random mac %pM\n", mac); + pds_vdpa_cmd_set_mac(pdsv, mac); + } + } + + for (i = 0; i < pdsv->num_vqs; i++) { + pdsv->vqs[i].qid = i; + pdsv->vqs[i].pdsv = pdsv; + pdsv->vqs[i].irq = VIRTIO_MSI_NO_VECTOR; + pdsv->vqs[i].notify = vp_modern_map_vq_notify(&pdsv->vdpa_aux->vd_mdev, + i, &pdsv->vqs[i].notify_pa); + } + + pdsv->vdpa_dev.mdev = &vdpa_aux->vdpa_mdev; + + err = pds_vdpa_register_event_handler(pdsv); + if (err) { + dev_err(dev, "Failed to register for PDS events: %pe\n", ERR_PTR(err)); + goto err_unmap; + } + + /* We use the _vdpa_register_device() call rather than the + * vdpa_register_device() to avoid a deadlock because our + * dev_add() is called with the vdpa_dev_lock already set + * by vdpa_nl_cmd_dev_add_set_doit() + */ + err = _vdpa_register_device(&pdsv->vdpa_dev, pdsv->num_vqs); + if (err) { + dev_err(dev, "Failed to register to vDPA bus: %pe\n", ERR_PTR(err)); + goto err_unevent; + } + + pds_vdpa_debugfs_add_vdpadev(vdpa_aux); + + return 0; + +err_unevent: + pds_vdpa_unregister_event_handler(pdsv); +err_unmap: + put_device(&pdsv->vdpa_dev.dev); + vdpa_aux->pdsv = NULL; + return err; +} + +static void pds_vdpa_dev_del(struct vdpa_mgmt_dev *mdev, + struct vdpa_device *vdpa_dev) +{ + struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev); + struct pds_vdpa_aux *vdpa_aux; + + pds_vdpa_unregister_event_handler(pdsv); + + vdpa_aux = container_of(mdev, struct pds_vdpa_aux, vdpa_mdev); + _vdpa_unregister_device(vdpa_dev); + + pds_vdpa_cmd_reset(vdpa_aux->pdsv); + pds_vdpa_debugfs_reset_vdpadev(vdpa_aux); + + vdpa_aux->pdsv = NULL; + + dev_info(&vdpa_aux->padev->aux_dev.dev, "Removed vdpa device\n"); +} + +static const struct vdpa_mgmtdev_ops pds_vdpa_mgmt_dev_ops = { + .dev_add = pds_vdpa_dev_add, + .dev_del = pds_vdpa_dev_del +}; + +int pds_vdpa_get_mgmt_info(struct pds_vdpa_aux *vdpa_aux) +{ + union pds_core_adminq_cmd cmd = { + .vdpa_ident.opcode = PDS_VDPA_CMD_IDENT, + .vdpa_ident.vf_id = cpu_to_le16(vdpa_aux->vf_id), + }; + union pds_core_adminq_comp comp = {}; + struct vdpa_mgmt_dev *mgmt; + struct pci_dev *pf_pdev; + struct device *pf_dev; + struct pci_dev *pdev; + dma_addr_t ident_pa; + struct device *dev; + u16 dev_intrs; + u16 max_vqs; + int err; + + dev = &vdpa_aux->padev->aux_dev.dev; + pdev = vdpa_aux->padev->vf_pdev; + mgmt = &vdpa_aux->vdpa_mdev; + + /* Get resource info through the PF's adminq. It is a block of info, + * so we need to map some memory for PF to make available to the + * firmware for writing the data. + */ + pf_pdev = pci_physfn(vdpa_aux->padev->vf_pdev); + pf_dev = &pf_pdev->dev; + ident_pa = dma_map_single(pf_dev, &vdpa_aux->ident, + sizeof(vdpa_aux->ident), DMA_FROM_DEVICE); + if (dma_mapping_error(pf_dev, ident_pa)) { + dev_err(dev, "Failed to map ident space\n"); + return -ENOMEM; + } + + cmd.vdpa_ident.ident_pa = cpu_to_le64(ident_pa); + cmd.vdpa_ident.len = cpu_to_le32(sizeof(vdpa_aux->ident)); + err = pds_client_adminq_cmd(vdpa_aux->padev, &cmd, + sizeof(cmd.vdpa_ident), &comp, 0); + dma_unmap_single(pf_dev, ident_pa, + sizeof(vdpa_aux->ident), DMA_FROM_DEVICE); + if (err) { + dev_err(dev, "Failed to ident hw, status %d: %pe\n", + comp.status, ERR_PTR(err)); + return err; + } + + max_vqs = le16_to_cpu(vdpa_aux->ident.max_vqs); + dev_intrs = pci_msix_vec_count(pdev); + dev_dbg(dev, "ident.max_vqs %d dev_intrs %d\n", max_vqs, dev_intrs); + + max_vqs = min_t(u16, dev_intrs, max_vqs); + mgmt->max_supported_vqs = min_t(u16, PDS_VDPA_MAX_QUEUES, max_vqs); + vdpa_aux->nintrs = mgmt->max_supported_vqs; + + mgmt->ops = &pds_vdpa_mgmt_dev_ops; + mgmt->id_table = pds_vdpa_id_table; + mgmt->device = dev; + mgmt->supported_features = le64_to_cpu(vdpa_aux->ident.hw_features); + mgmt->config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR); + mgmt->config_attr_mask |= BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP); + mgmt->config_attr_mask |= BIT_ULL(VDPA_ATTR_DEV_FEATURES); + + err = pci_alloc_irq_vectors(pdev, vdpa_aux->nintrs, vdpa_aux->nintrs, + PCI_IRQ_MSIX); + if (err < 0) { + dev_err(dev, "Couldn't get %d msix vectors: %pe\n", + vdpa_aux->nintrs, ERR_PTR(err)); + return err; + } + vdpa_aux->nintrs = err; + + return 0; +} diff --git a/drivers/vdpa/pds/vdpa_dev.h b/drivers/vdpa/pds/vdpa_dev.h new file mode 100644 index 000000000000..a1bc37de9537 --- /dev/null +++ b/drivers/vdpa/pds/vdpa_dev.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2023 Advanced Micro Devices, Inc */ + +#ifndef _VDPA_DEV_H_ +#define _VDPA_DEV_H_ + +#include <linux/pci.h> +#include <linux/vdpa.h> + +struct pds_vdpa_vq_info { + bool ready; + u64 desc_addr; + u64 avail_addr; + u64 used_addr; + u32 q_len; + u16 qid; + int irq; + char irq_name[32]; + + void __iomem *notify; + dma_addr_t notify_pa; + + u64 doorbell; + u16 avail_idx; + u16 used_idx; + + struct vdpa_callback event_cb; + struct pds_vdpa_device *pdsv; +}; + +#define PDS_VDPA_MAX_QUEUES 65 +#define PDS_VDPA_MAX_QLEN 32768 +struct pds_vdpa_device { + struct vdpa_device vdpa_dev; + struct pds_vdpa_aux *vdpa_aux; + + struct pds_vdpa_vq_info vqs[PDS_VDPA_MAX_QUEUES]; + u64 supported_features; /* specified device features */ + u64 req_features; /* features requested by vdpa */ + u8 vdpa_index; /* rsvd for future subdevice use */ + u8 num_vqs; /* num vqs in use */ + struct vdpa_callback config_cb; + struct notifier_block nb; +}; + +#define PDS_VDPA_PACKED_INVERT_IDX 0x8000 + +int pds_vdpa_get_mgmt_info(struct pds_vdpa_aux *vdpa_aux); +#endif /* _VDPA_DEV_H_ */ diff --git a/drivers/vdpa/solidrun/snet_ctrl.c b/drivers/vdpa/solidrun/snet_ctrl.c index 3858738643b4..3cef2571d15d 100644 --- a/drivers/vdpa/solidrun/snet_ctrl.c +++ b/drivers/vdpa/solidrun/snet_ctrl.c @@ -16,6 +16,7 @@ enum snet_ctrl_opcodes { SNET_CTRL_OP_DESTROY = 1, SNET_CTRL_OP_READ_VQ_STATE, SNET_CTRL_OP_SUSPEND, + SNET_CTRL_OP_RESUME, }; #define SNET_CTRL_TIMEOUT 2000000 @@ -328,3 +329,8 @@ int snet_suspend_dev(struct snet *snet) { return snet_send_ctrl_msg(snet, SNET_CTRL_OP_SUSPEND, 0); } + +int snet_resume_dev(struct snet *snet) +{ + return snet_send_ctrl_msg(snet, SNET_CTRL_OP_RESUME, 0); +} diff --git a/drivers/vdpa/solidrun/snet_hwmon.c b/drivers/vdpa/solidrun/snet_hwmon.c index 42c87387a0f1..af531a339082 100644 --- a/drivers/vdpa/solidrun/snet_hwmon.c +++ b/drivers/vdpa/solidrun/snet_hwmon.c @@ -159,7 +159,7 @@ static const struct hwmon_ops snet_hwmon_ops = { .read_string = snet_hwmon_read_string }; -static const struct hwmon_channel_info *snet_hwmon_info[] = { +static const struct hwmon_channel_info * const snet_hwmon_info[] = { HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_CRIT | HWMON_T_LABEL, HWMON_T_INPUT | HWMON_T_CRIT | HWMON_T_LABEL), HWMON_CHANNEL_INFO(power, HWMON_P_INPUT | HWMON_P_LABEL), diff --git a/drivers/vdpa/solidrun/snet_main.c b/drivers/vdpa/solidrun/snet_main.c index cdcd84ce4f5a..99428a04068d 100644 --- a/drivers/vdpa/solidrun/snet_main.c +++ b/drivers/vdpa/solidrun/snet_main.c @@ -509,6 +509,20 @@ static int snet_suspend(struct vdpa_device *vdev) return ret; } +static int snet_resume(struct vdpa_device *vdev) +{ + struct snet *snet = vdpa_to_snet(vdev); + int ret; + + ret = snet_resume_dev(snet); + if (ret) + SNET_ERR(snet->pdev, "SNET[%u] resume failed, err: %d\n", snet->sid, ret); + else + SNET_DBG(snet->pdev, "Resume SNET[%u] device\n", snet->sid); + + return ret; +} + static const struct vdpa_config_ops snet_config_ops = { .set_vq_address = snet_set_vq_address, .set_vq_num = snet_set_vq_num, @@ -536,6 +550,7 @@ static const struct vdpa_config_ops snet_config_ops = { .get_config = snet_get_config, .set_config = snet_set_config, .suspend = snet_suspend, + .resume = snet_resume, }; static int psnet_open_pf_bar(struct pci_dev *pdev, struct psnet *psnet) diff --git a/drivers/vdpa/solidrun/snet_vdpa.h b/drivers/vdpa/solidrun/snet_vdpa.h index 3c78d4e7d485..36ac285835ea 100644 --- a/drivers/vdpa/solidrun/snet_vdpa.h +++ b/drivers/vdpa/solidrun/snet_vdpa.h @@ -204,5 +204,6 @@ void snet_ctrl_clear(struct snet *snet); int snet_destroy_dev(struct snet *snet); int snet_read_vq_state(struct snet *snet, u16 idx, struct vdpa_vq_state *state); int snet_suspend_dev(struct snet *snet); +int snet_resume_dev(struct snet *snet); #endif //_SNET_VDPA_H_ diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index 4619b4a520ef..dc38ed21319d 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -726,7 +726,11 @@ static int vduse_vdpa_set_vq_affinity(struct vdpa_device *vdpa, u16 idx, { struct vduse_dev *dev = vdpa_to_vduse(vdpa); - cpumask_copy(&dev->vqs[idx]->irq_affinity, cpu_mask); + if (cpu_mask) + cpumask_copy(&dev->vqs[idx]->irq_affinity, cpu_mask); + else + cpumask_setall(&dev->vqs[idx]->irq_affinity); + return 0; } diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index ae2273196b0c..f2ed7167c848 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -546,7 +546,7 @@ static void vhost_net_busy_poll(struct vhost_net *net, endtime = busy_clock() + busyloop_timeout; while (vhost_can_busy_poll(endtime)) { - if (vhost_has_work(&net->dev)) { + if (vhost_vq_has_work(vq)) { *busyloop_intr = true; break; } @@ -1347,8 +1347,10 @@ static int vhost_net_open(struct inode *inode, struct file *f) VHOST_NET_PKT_WEIGHT, VHOST_NET_WEIGHT, true, NULL); - vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, EPOLLOUT, dev); - vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, EPOLLIN, dev); + vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, EPOLLOUT, dev, + vqs[VHOST_NET_VQ_TX]); + vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, EPOLLIN, dev, + vqs[VHOST_NET_VQ_RX]); f->private_data = n; n->page_frag.page = NULL; diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index bb10fa4bb4f6..c83f7f043470 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -167,6 +167,7 @@ MODULE_PARM_DESC(max_io_vqs, "Set the max number of IO virtqueues a vhost scsi d struct vhost_scsi_virtqueue { struct vhost_virtqueue vq; + struct vhost_scsi *vs; /* * Reference counting for inflight reqs, used for flush operation. At * each time, one reference tracks new commands submitted, while we @@ -181,6 +182,9 @@ struct vhost_scsi_virtqueue { struct vhost_scsi_cmd *scsi_cmds; struct sbitmap scsi_tags; int max_cmds; + + struct vhost_work completion_work; + struct llist_head completion_list; }; struct vhost_scsi { @@ -190,12 +194,8 @@ struct vhost_scsi { struct vhost_dev dev; struct vhost_scsi_virtqueue *vqs; - unsigned long *compl_bitmap; struct vhost_scsi_inflight **old_inflight; - struct vhost_work vs_completion_work; /* cmd completion work item */ - struct llist_head vs_completion_list; /* cmd completion queue */ - struct vhost_work vs_event_work; /* evt injection work item */ struct llist_head vs_event_list; /* evt injection queue */ @@ -353,15 +353,17 @@ static void vhost_scsi_release_cmd(struct se_cmd *se_cmd) if (se_cmd->se_cmd_flags & SCF_SCSI_TMR_CDB) { struct vhost_scsi_tmf *tmf = container_of(se_cmd, struct vhost_scsi_tmf, se_cmd); + struct vhost_virtqueue *vq = &tmf->svq->vq; - vhost_work_queue(&tmf->vhost->dev, &tmf->vwork); + vhost_vq_work_queue(vq, &tmf->vwork); } else { struct vhost_scsi_cmd *cmd = container_of(se_cmd, struct vhost_scsi_cmd, tvc_se_cmd); - struct vhost_scsi *vs = cmd->tvc_vhost; + struct vhost_scsi_virtqueue *svq = container_of(cmd->tvc_vq, + struct vhost_scsi_virtqueue, vq); - llist_add(&cmd->tvc_completion_list, &vs->vs_completion_list); - vhost_work_queue(&vs->dev, &vs->vs_completion_work); + llist_add(&cmd->tvc_completion_list, &svq->completion_list); + vhost_vq_work_queue(&svq->vq, &svq->completion_work); } } @@ -509,17 +511,17 @@ static void vhost_scsi_evt_work(struct vhost_work *work) */ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) { - struct vhost_scsi *vs = container_of(work, struct vhost_scsi, - vs_completion_work); + struct vhost_scsi_virtqueue *svq = container_of(work, + struct vhost_scsi_virtqueue, completion_work); struct virtio_scsi_cmd_resp v_rsp; struct vhost_scsi_cmd *cmd, *t; struct llist_node *llnode; struct se_cmd *se_cmd; struct iov_iter iov_iter; - int ret, vq; + bool signal = false; + int ret; - bitmap_zero(vs->compl_bitmap, vs->dev.nvqs); - llnode = llist_del_all(&vs->vs_completion_list); + llnode = llist_del_all(&svq->completion_list); llist_for_each_entry_safe(cmd, t, llnode, tvc_completion_list) { se_cmd = &cmd->tvc_se_cmd; @@ -539,21 +541,17 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) cmd->tvc_in_iovs, sizeof(v_rsp)); ret = copy_to_iter(&v_rsp, sizeof(v_rsp), &iov_iter); if (likely(ret == sizeof(v_rsp))) { - struct vhost_scsi_virtqueue *q; + signal = true; + vhost_add_used(cmd->tvc_vq, cmd->tvc_vq_desc, 0); - q = container_of(cmd->tvc_vq, struct vhost_scsi_virtqueue, vq); - vq = q - vs->vqs; - __set_bit(vq, vs->compl_bitmap); } else pr_err("Faulted on virtio_scsi_cmd_resp\n"); vhost_scsi_release_cmd_res(se_cmd); } - vq = -1; - while ((vq = find_next_bit(vs->compl_bitmap, vs->dev.nvqs, vq + 1)) - < vs->dev.nvqs) - vhost_signal(&vs->dev, &vs->vqs[vq].vq); + if (signal) + vhost_signal(&svq->vs->dev, &svq->vq); } static struct vhost_scsi_cmd * @@ -1135,12 +1133,27 @@ static void vhost_scsi_tmf_resp_work(struct vhost_work *work) { struct vhost_scsi_tmf *tmf = container_of(work, struct vhost_scsi_tmf, vwork); - int resp_code; + struct vhost_virtqueue *ctl_vq, *vq; + int resp_code, i; + + if (tmf->scsi_resp == TMR_FUNCTION_COMPLETE) { + /* + * Flush IO vqs that don't share a worker with the ctl to make + * sure they have sent their responses before us. + */ + ctl_vq = &tmf->vhost->vqs[VHOST_SCSI_VQ_CTL].vq; + for (i = VHOST_SCSI_VQ_IO; i < tmf->vhost->dev.nvqs; i++) { + vq = &tmf->vhost->vqs[i].vq; + + if (vhost_vq_is_setup(vq) && + vq->worker != ctl_vq->worker) + vhost_vq_flush(vq); + } - if (tmf->scsi_resp == TMR_FUNCTION_COMPLETE) resp_code = VIRTIO_SCSI_S_FUNCTION_SUCCEEDED; - else + } else { resp_code = VIRTIO_SCSI_S_FUNCTION_REJECTED; + } vhost_scsi_send_tmf_resp(tmf->vhost, &tmf->svq->vq, tmf->in_iovs, tmf->vq_desc, &tmf->resp_iov, resp_code); @@ -1335,11 +1348,9 @@ static void vhost_scsi_ctl_handle_kick(struct vhost_work *work) } static void -vhost_scsi_send_evt(struct vhost_scsi *vs, - struct vhost_scsi_tpg *tpg, - struct se_lun *lun, - u32 event, - u32 reason) +vhost_scsi_send_evt(struct vhost_scsi *vs, struct vhost_virtqueue *vq, + struct vhost_scsi_tpg *tpg, struct se_lun *lun, + u32 event, u32 reason) { struct vhost_scsi_evt *evt; @@ -1361,7 +1372,7 @@ vhost_scsi_send_evt(struct vhost_scsi *vs, } llist_add(&evt->list, &vs->vs_event_list); - vhost_work_queue(&vs->dev, &vs->vs_event_work); + vhost_vq_work_queue(vq, &vs->vs_event_work); } static void vhost_scsi_evt_handle_kick(struct vhost_work *work) @@ -1375,7 +1386,8 @@ static void vhost_scsi_evt_handle_kick(struct vhost_work *work) goto out; if (vs->vs_events_missed) - vhost_scsi_send_evt(vs, NULL, NULL, VIRTIO_SCSI_T_NO_EVENT, 0); + vhost_scsi_send_evt(vs, vq, NULL, NULL, VIRTIO_SCSI_T_NO_EVENT, + 0); out: mutex_unlock(&vq->mutex); } @@ -1770,6 +1782,7 @@ static int vhost_scsi_set_features(struct vhost_scsi *vs, u64 features) static int vhost_scsi_open(struct inode *inode, struct file *f) { + struct vhost_scsi_virtqueue *svq; struct vhost_scsi *vs; struct vhost_virtqueue **vqs; int r = -ENOMEM, i, nvqs = vhost_scsi_max_io_vqs; @@ -1788,10 +1801,6 @@ static int vhost_scsi_open(struct inode *inode, struct file *f) } nvqs += VHOST_SCSI_VQ_IO; - vs->compl_bitmap = bitmap_alloc(nvqs, GFP_KERNEL); - if (!vs->compl_bitmap) - goto err_compl_bitmap; - vs->old_inflight = kmalloc_array(nvqs, sizeof(*vs->old_inflight), GFP_KERNEL | __GFP_ZERO); if (!vs->old_inflight) @@ -1806,7 +1815,6 @@ static int vhost_scsi_open(struct inode *inode, struct file *f) if (!vqs) goto err_local_vqs; - vhost_work_init(&vs->vs_completion_work, vhost_scsi_complete_cmd_work); vhost_work_init(&vs->vs_event_work, vhost_scsi_evt_work); vs->vs_events_nr = 0; @@ -1817,8 +1825,14 @@ static int vhost_scsi_open(struct inode *inode, struct file *f) vs->vqs[VHOST_SCSI_VQ_CTL].vq.handle_kick = vhost_scsi_ctl_handle_kick; vs->vqs[VHOST_SCSI_VQ_EVT].vq.handle_kick = vhost_scsi_evt_handle_kick; for (i = VHOST_SCSI_VQ_IO; i < nvqs; i++) { - vqs[i] = &vs->vqs[i].vq; - vs->vqs[i].vq.handle_kick = vhost_scsi_handle_kick; + svq = &vs->vqs[i]; + + vqs[i] = &svq->vq; + svq->vs = vs; + init_llist_head(&svq->completion_list); + vhost_work_init(&svq->completion_work, + vhost_scsi_complete_cmd_work); + svq->vq.handle_kick = vhost_scsi_handle_kick; } vhost_dev_init(&vs->dev, vqs, nvqs, UIO_MAXIOV, VHOST_SCSI_WEIGHT, 0, true, NULL); @@ -1833,8 +1847,6 @@ err_local_vqs: err_vqs: kfree(vs->old_inflight); err_inflight: - bitmap_free(vs->compl_bitmap); -err_compl_bitmap: kvfree(vs); err_vs: return r; @@ -1854,7 +1866,6 @@ static int vhost_scsi_release(struct inode *inode, struct file *f) kfree(vs->dev.vqs); kfree(vs->vqs); kfree(vs->old_inflight); - bitmap_free(vs->compl_bitmap); kvfree(vs); return 0; } @@ -1916,6 +1927,14 @@ vhost_scsi_ioctl(struct file *f, if (copy_from_user(&features, featurep, sizeof features)) return -EFAULT; return vhost_scsi_set_features(vs, features); + case VHOST_NEW_WORKER: + case VHOST_FREE_WORKER: + case VHOST_ATTACH_VRING_WORKER: + case VHOST_GET_VRING_WORKER: + mutex_lock(&vs->dev.mutex); + r = vhost_worker_ioctl(&vs->dev, ioctl, argp); + mutex_unlock(&vs->dev.mutex); + return r; default: mutex_lock(&vs->dev.mutex); r = vhost_dev_ioctl(&vs->dev, ioctl, argp); @@ -1995,7 +2014,7 @@ vhost_scsi_do_plug(struct vhost_scsi_tpg *tpg, goto unlock; if (vhost_has_feature(vq, VIRTIO_SCSI_F_HOTPLUG)) - vhost_scsi_send_evt(vs, tpg, lun, + vhost_scsi_send_evt(vs, vq, tpg, lun, VIRTIO_SCSI_T_TRANSPORT_RESET, reason); unlock: mutex_unlock(&vq->mutex); diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 60c9ebd629dd..c71d573f1c94 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -187,13 +187,15 @@ EXPORT_SYMBOL_GPL(vhost_work_init); /* Init poll structure */ void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn, - __poll_t mask, struct vhost_dev *dev) + __poll_t mask, struct vhost_dev *dev, + struct vhost_virtqueue *vq) { init_waitqueue_func_entry(&poll->wait, vhost_poll_wakeup); init_poll_funcptr(&poll->table, vhost_poll_func); poll->mask = mask; poll->dev = dev; poll->wqh = NULL; + poll->vq = vq; vhost_work_init(&poll->work, fn); } @@ -231,46 +233,102 @@ void vhost_poll_stop(struct vhost_poll *poll) } EXPORT_SYMBOL_GPL(vhost_poll_stop); -void vhost_dev_flush(struct vhost_dev *dev) +static void vhost_worker_queue(struct vhost_worker *worker, + struct vhost_work *work) +{ + if (!test_and_set_bit(VHOST_WORK_QUEUED, &work->flags)) { + /* We can only add the work to the list after we're + * sure it was not in the list. + * test_and_set_bit() implies a memory barrier. + */ + llist_add(&work->node, &worker->work_list); + vhost_task_wake(worker->vtsk); + } +} + +bool vhost_vq_work_queue(struct vhost_virtqueue *vq, struct vhost_work *work) +{ + struct vhost_worker *worker; + bool queued = false; + + rcu_read_lock(); + worker = rcu_dereference(vq->worker); + if (worker) { + queued = true; + vhost_worker_queue(worker, work); + } + rcu_read_unlock(); + + return queued; +} +EXPORT_SYMBOL_GPL(vhost_vq_work_queue); + +void vhost_vq_flush(struct vhost_virtqueue *vq) { struct vhost_flush_struct flush; - if (dev->worker.vtsk) { - init_completion(&flush.wait_event); - vhost_work_init(&flush.work, vhost_flush_work); + init_completion(&flush.wait_event); + vhost_work_init(&flush.work, vhost_flush_work); - vhost_work_queue(dev, &flush.work); + if (vhost_vq_work_queue(vq, &flush.work)) wait_for_completion(&flush.wait_event); - } } -EXPORT_SYMBOL_GPL(vhost_dev_flush); +EXPORT_SYMBOL_GPL(vhost_vq_flush); -void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work) +/** + * vhost_worker_flush - flush a worker + * @worker: worker to flush + * + * This does not use RCU to protect the worker, so the device or worker + * mutex must be held. + */ +static void vhost_worker_flush(struct vhost_worker *worker) { - if (!dev->worker.vtsk) - return; + struct vhost_flush_struct flush; - if (!test_and_set_bit(VHOST_WORK_QUEUED, &work->flags)) { - /* We can only add the work to the list after we're - * sure it was not in the list. - * test_and_set_bit() implies a memory barrier. - */ - llist_add(&work->node, &dev->worker.work_list); - vhost_task_wake(dev->worker.vtsk); + init_completion(&flush.wait_event); + vhost_work_init(&flush.work, vhost_flush_work); + + vhost_worker_queue(worker, &flush.work); + wait_for_completion(&flush.wait_event); +} + +void vhost_dev_flush(struct vhost_dev *dev) +{ + struct vhost_worker *worker; + unsigned long i; + + xa_for_each(&dev->worker_xa, i, worker) { + mutex_lock(&worker->mutex); + if (!worker->attachment_cnt) { + mutex_unlock(&worker->mutex); + continue; + } + vhost_worker_flush(worker); + mutex_unlock(&worker->mutex); } } -EXPORT_SYMBOL_GPL(vhost_work_queue); +EXPORT_SYMBOL_GPL(vhost_dev_flush); /* A lockless hint for busy polling code to exit the loop */ -bool vhost_has_work(struct vhost_dev *dev) +bool vhost_vq_has_work(struct vhost_virtqueue *vq) { - return !llist_empty(&dev->worker.work_list); + struct vhost_worker *worker; + bool has_work = false; + + rcu_read_lock(); + worker = rcu_dereference(vq->worker); + if (worker && !llist_empty(&worker->work_list)) + has_work = true; + rcu_read_unlock(); + + return has_work; } -EXPORT_SYMBOL_GPL(vhost_has_work); +EXPORT_SYMBOL_GPL(vhost_vq_has_work); void vhost_poll_queue(struct vhost_poll *poll) { - vhost_work_queue(poll->dev, &poll->work); + vhost_vq_work_queue(poll->vq, &poll->work); } EXPORT_SYMBOL_GPL(vhost_poll_queue); @@ -329,6 +387,7 @@ static void vhost_vq_reset(struct vhost_dev *dev, vq->busyloop_timeout = 0; vq->umem = NULL; vq->iotlb = NULL; + rcu_assign_pointer(vq->worker, NULL); vhost_vring_call_reset(&vq->call_ctx); __vhost_vq_meta_reset(vq); } @@ -458,8 +517,6 @@ void vhost_dev_init(struct vhost_dev *dev, dev->umem = NULL; dev->iotlb = NULL; dev->mm = NULL; - memset(&dev->worker, 0, sizeof(dev->worker)); - init_llist_head(&dev->worker.work_list); dev->iov_limit = iov_limit; dev->weight = weight; dev->byte_weight = byte_weight; @@ -469,7 +526,7 @@ void vhost_dev_init(struct vhost_dev *dev, INIT_LIST_HEAD(&dev->read_list); INIT_LIST_HEAD(&dev->pending_list); spin_lock_init(&dev->iotlb_lock); - + xa_init_flags(&dev->worker_xa, XA_FLAGS_ALLOC); for (i = 0; i < dev->nvqs; ++i) { vq = dev->vqs[i]; @@ -481,7 +538,7 @@ void vhost_dev_init(struct vhost_dev *dev, vhost_vq_reset(dev, vq); if (vq->handle_kick) vhost_poll_init(&vq->poll, vq->handle_kick, - EPOLLIN, dev); + EPOLLIN, dev, vq); } } EXPORT_SYMBOL_GPL(vhost_dev_init); @@ -531,38 +588,284 @@ static void vhost_detach_mm(struct vhost_dev *dev) dev->mm = NULL; } -static void vhost_worker_free(struct vhost_dev *dev) +static void vhost_worker_destroy(struct vhost_dev *dev, + struct vhost_worker *worker) +{ + if (!worker) + return; + + WARN_ON(!llist_empty(&worker->work_list)); + xa_erase(&dev->worker_xa, worker->id); + vhost_task_stop(worker->vtsk); + kfree(worker); +} + +static void vhost_workers_free(struct vhost_dev *dev) { - if (!dev->worker.vtsk) + struct vhost_worker *worker; + unsigned long i; + + if (!dev->use_worker) return; - WARN_ON(!llist_empty(&dev->worker.work_list)); - vhost_task_stop(dev->worker.vtsk); - dev->worker.kcov_handle = 0; - dev->worker.vtsk = NULL; + for (i = 0; i < dev->nvqs; i++) + rcu_assign_pointer(dev->vqs[i]->worker, NULL); + /* + * Free the default worker we created and cleanup workers userspace + * created but couldn't clean up (it forgot or crashed). + */ + xa_for_each(&dev->worker_xa, i, worker) + vhost_worker_destroy(dev, worker); + xa_destroy(&dev->worker_xa); } -static int vhost_worker_create(struct vhost_dev *dev) +static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev) { + struct vhost_worker *worker; struct vhost_task *vtsk; char name[TASK_COMM_LEN]; + int ret; + u32 id; + + worker = kzalloc(sizeof(*worker), GFP_KERNEL_ACCOUNT); + if (!worker) + return NULL; snprintf(name, sizeof(name), "vhost-%d", current->pid); - vtsk = vhost_task_create(vhost_worker, &dev->worker, name); + vtsk = vhost_task_create(vhost_worker, worker, name); if (!vtsk) - return -ENOMEM; + goto free_worker; + + mutex_init(&worker->mutex); + init_llist_head(&worker->work_list); + worker->kcov_handle = kcov_common_handle(); + worker->vtsk = vtsk; - dev->worker.kcov_handle = kcov_common_handle(); - dev->worker.vtsk = vtsk; vhost_task_start(vtsk); + + ret = xa_alloc(&dev->worker_xa, &id, worker, xa_limit_32b, GFP_KERNEL); + if (ret < 0) + goto stop_worker; + worker->id = id; + + return worker; + +stop_worker: + vhost_task_stop(vtsk); +free_worker: + kfree(worker); + return NULL; +} + +/* Caller must have device mutex */ +static void __vhost_vq_attach_worker(struct vhost_virtqueue *vq, + struct vhost_worker *worker) +{ + struct vhost_worker *old_worker; + + old_worker = rcu_dereference_check(vq->worker, + lockdep_is_held(&vq->dev->mutex)); + + mutex_lock(&worker->mutex); + worker->attachment_cnt++; + mutex_unlock(&worker->mutex); + rcu_assign_pointer(vq->worker, worker); + + if (!old_worker) + return; + /* + * Take the worker mutex to make sure we see the work queued from + * device wide flushes which doesn't use RCU for execution. + */ + mutex_lock(&old_worker->mutex); + old_worker->attachment_cnt--; + /* + * We don't want to call synchronize_rcu for every vq during setup + * because it will slow down VM startup. If we haven't done + * VHOST_SET_VRING_KICK and not done the driver specific + * SET_ENDPOINT/RUNNUNG then we can skip the sync since there will + * not be any works queued for scsi and net. + */ + mutex_lock(&vq->mutex); + if (!vhost_vq_get_backend(vq) && !vq->kick) { + mutex_unlock(&vq->mutex); + mutex_unlock(&old_worker->mutex); + /* + * vsock can queue anytime after VHOST_VSOCK_SET_GUEST_CID. + * Warn if it adds support for multiple workers but forgets to + * handle the early queueing case. + */ + WARN_ON(!old_worker->attachment_cnt && + !llist_empty(&old_worker->work_list)); + return; + } + mutex_unlock(&vq->mutex); + + /* Make sure new vq queue/flush/poll calls see the new worker */ + synchronize_rcu(); + /* Make sure whatever was queued gets run */ + vhost_worker_flush(old_worker); + mutex_unlock(&old_worker->mutex); +} + + /* Caller must have device mutex */ +static int vhost_vq_attach_worker(struct vhost_virtqueue *vq, + struct vhost_vring_worker *info) +{ + unsigned long index = info->worker_id; + struct vhost_dev *dev = vq->dev; + struct vhost_worker *worker; + + if (!dev->use_worker) + return -EINVAL; + + worker = xa_find(&dev->worker_xa, &index, UINT_MAX, XA_PRESENT); + if (!worker || worker->id != info->worker_id) + return -ENODEV; + + __vhost_vq_attach_worker(vq, worker); + return 0; +} + +/* Caller must have device mutex */ +static int vhost_new_worker(struct vhost_dev *dev, + struct vhost_worker_state *info) +{ + struct vhost_worker *worker; + + worker = vhost_worker_create(dev); + if (!worker) + return -ENOMEM; + + info->worker_id = worker->id; + return 0; +} + +/* Caller must have device mutex */ +static int vhost_free_worker(struct vhost_dev *dev, + struct vhost_worker_state *info) +{ + unsigned long index = info->worker_id; + struct vhost_worker *worker; + + worker = xa_find(&dev->worker_xa, &index, UINT_MAX, XA_PRESENT); + if (!worker || worker->id != info->worker_id) + return -ENODEV; + + mutex_lock(&worker->mutex); + if (worker->attachment_cnt) { + mutex_unlock(&worker->mutex); + return -EBUSY; + } + mutex_unlock(&worker->mutex); + + vhost_worker_destroy(dev, worker); return 0; } +static int vhost_get_vq_from_user(struct vhost_dev *dev, void __user *argp, + struct vhost_virtqueue **vq, u32 *id) +{ + u32 __user *idxp = argp; + u32 idx; + long r; + + r = get_user(idx, idxp); + if (r < 0) + return r; + + if (idx >= dev->nvqs) + return -ENOBUFS; + + idx = array_index_nospec(idx, dev->nvqs); + + *vq = dev->vqs[idx]; + *id = idx; + return 0; +} + +/* Caller must have device mutex */ +long vhost_worker_ioctl(struct vhost_dev *dev, unsigned int ioctl, + void __user *argp) +{ + struct vhost_vring_worker ring_worker; + struct vhost_worker_state state; + struct vhost_worker *worker; + struct vhost_virtqueue *vq; + long ret; + u32 idx; + + if (!dev->use_worker) + return -EINVAL; + + if (!vhost_dev_has_owner(dev)) + return -EINVAL; + + ret = vhost_dev_check_owner(dev); + if (ret) + return ret; + + switch (ioctl) { + /* dev worker ioctls */ + case VHOST_NEW_WORKER: + ret = vhost_new_worker(dev, &state); + if (!ret && copy_to_user(argp, &state, sizeof(state))) + ret = -EFAULT; + return ret; + case VHOST_FREE_WORKER: + if (copy_from_user(&state, argp, sizeof(state))) + return -EFAULT; + return vhost_free_worker(dev, &state); + /* vring worker ioctls */ + case VHOST_ATTACH_VRING_WORKER: + case VHOST_GET_VRING_WORKER: + break; + default: + return -ENOIOCTLCMD; + } + + ret = vhost_get_vq_from_user(dev, argp, &vq, &idx); + if (ret) + return ret; + + switch (ioctl) { + case VHOST_ATTACH_VRING_WORKER: + if (copy_from_user(&ring_worker, argp, sizeof(ring_worker))) { + ret = -EFAULT; + break; + } + + ret = vhost_vq_attach_worker(vq, &ring_worker); + break; + case VHOST_GET_VRING_WORKER: + worker = rcu_dereference_check(vq->worker, + lockdep_is_held(&dev->mutex)); + if (!worker) { + ret = -EINVAL; + break; + } + + ring_worker.index = idx; + ring_worker.worker_id = worker->id; + + if (copy_to_user(argp, &ring_worker, sizeof(ring_worker))) + ret = -EFAULT; + break; + default: + ret = -ENOIOCTLCMD; + break; + } + + return ret; +} +EXPORT_SYMBOL_GPL(vhost_worker_ioctl); + /* Caller should have device mutex */ long vhost_dev_set_owner(struct vhost_dev *dev) { - int err; + struct vhost_worker *worker; + int err, i; /* Is there an owner already? */ if (vhost_dev_has_owner(dev)) { @@ -572,20 +875,32 @@ long vhost_dev_set_owner(struct vhost_dev *dev) vhost_attach_mm(dev); - if (dev->use_worker) { - err = vhost_worker_create(dev); - if (err) - goto err_worker; - } - err = vhost_dev_alloc_iovecs(dev); if (err) goto err_iovecs; + if (dev->use_worker) { + /* + * This should be done last, because vsock can queue work + * before VHOST_SET_OWNER so it simplifies the failure path + * below since we don't have to worry about vsock queueing + * while we free the worker. + */ + worker = vhost_worker_create(dev); + if (!worker) { + err = -ENOMEM; + goto err_worker; + } + + for (i = 0; i < dev->nvqs; i++) + __vhost_vq_attach_worker(dev->vqs[i], worker); + } + return 0; -err_iovecs: - vhost_worker_free(dev); + err_worker: + vhost_dev_free_iovecs(dev); +err_iovecs: vhost_detach_mm(dev); err_mm: return err; @@ -677,7 +992,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev) dev->iotlb = NULL; vhost_clear_msg(dev); wake_up_interruptible_poll(&dev->wait, EPOLLIN | EPOLLRDNORM); - vhost_worker_free(dev); + vhost_workers_free(dev); vhost_detach_mm(dev); } EXPORT_SYMBOL_GPL(vhost_dev_cleanup); @@ -1565,21 +1880,15 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg struct file *eventfp, *filep = NULL; bool pollstart = false, pollstop = false; struct eventfd_ctx *ctx = NULL; - u32 __user *idxp = argp; struct vhost_virtqueue *vq; struct vhost_vring_state s; struct vhost_vring_file f; u32 idx; long r; - r = get_user(idx, idxp); + r = vhost_get_vq_from_user(d, argp, &vq, &idx); if (r < 0) return r; - if (idx >= d->nvqs) - return -ENOBUFS; - - idx = array_index_nospec(idx, d->nvqs); - vq = d->vqs[idx]; if (ioctl == VHOST_SET_VRING_NUM || ioctl == VHOST_SET_VRING_ADDR) { diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index fc900be504b3..f60d5f7bef94 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -28,8 +28,12 @@ struct vhost_work { struct vhost_worker { struct vhost_task *vtsk; + /* Used to serialize device wide flushing with worker swapping. */ + struct mutex mutex; struct llist_head work_list; u64 kcov_handle; + u32 id; + int attachment_cnt; }; /* Poll a file (eventfd or socket) */ @@ -41,17 +45,17 @@ struct vhost_poll { struct vhost_work work; __poll_t mask; struct vhost_dev *dev; + struct vhost_virtqueue *vq; }; -void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn); -void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work); -bool vhost_has_work(struct vhost_dev *dev); - void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn, - __poll_t mask, struct vhost_dev *dev); + __poll_t mask, struct vhost_dev *dev, + struct vhost_virtqueue *vq); int vhost_poll_start(struct vhost_poll *poll, struct file *file); void vhost_poll_stop(struct vhost_poll *poll); void vhost_poll_queue(struct vhost_poll *poll); + +void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn); void vhost_dev_flush(struct vhost_dev *dev); struct vhost_log { @@ -74,6 +78,7 @@ struct vhost_vring_call { /* The virtqueue structure describes a queue attached to a device. */ struct vhost_virtqueue { struct vhost_dev *dev; + struct vhost_worker __rcu *worker; /* The actual ring of buffers. */ struct mutex mutex; @@ -158,7 +163,6 @@ struct vhost_dev { struct vhost_virtqueue **vqs; int nvqs; struct eventfd_ctx *log_ctx; - struct vhost_worker worker; struct vhost_iotlb *umem; struct vhost_iotlb *iotlb; spinlock_t iotlb_lock; @@ -168,6 +172,7 @@ struct vhost_dev { int iov_limit; int weight; int byte_weight; + struct xarray worker_xa; bool use_worker; int (*msg_handler)(struct vhost_dev *dev, u32 asid, struct vhost_iotlb_msg *msg); @@ -188,16 +193,21 @@ void vhost_dev_cleanup(struct vhost_dev *); void vhost_dev_stop(struct vhost_dev *); long vhost_dev_ioctl(struct vhost_dev *, unsigned int ioctl, void __user *argp); long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp); +long vhost_worker_ioctl(struct vhost_dev *dev, unsigned int ioctl, + void __user *argp); bool vhost_vq_access_ok(struct vhost_virtqueue *vq); bool vhost_log_access_ok(struct vhost_dev *); void vhost_clear_msg(struct vhost_dev *dev); int vhost_get_vq_desc(struct vhost_virtqueue *, - struct iovec iov[], unsigned int iov_count, + struct iovec iov[], unsigned int iov_size, unsigned int *out_num, unsigned int *in_num, struct vhost_log *log, unsigned int *log_num); void vhost_discard_vq_desc(struct vhost_virtqueue *, int n); +void vhost_vq_flush(struct vhost_virtqueue *vq); +bool vhost_vq_work_queue(struct vhost_virtqueue *vq, struct vhost_work *work); +bool vhost_vq_has_work(struct vhost_virtqueue *vq); bool vhost_vq_is_setup(struct vhost_virtqueue *vq); int vhost_vq_init_access(struct vhost_virtqueue *); int vhost_add_used(struct vhost_virtqueue *, unsigned int head, int len); diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 6578db78f0ae..817d377a3f36 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -285,7 +285,7 @@ vhost_transport_send_pkt(struct sk_buff *skb) atomic_inc(&vsock->queued_replies); virtio_vsock_skb_queue_tail(&vsock->send_pkt_queue, skb); - vhost_work_queue(&vsock->dev, &vsock->send_pkt_work); + vhost_vq_work_queue(&vsock->vqs[VSOCK_VQ_RX], &vsock->send_pkt_work); rcu_read_unlock(); return len; @@ -583,7 +583,7 @@ static int vhost_vsock_start(struct vhost_vsock *vsock) /* Some packets may have been queued before the device was started, * let's kick the send worker to send them. */ - vhost_work_queue(&vsock->dev, &vsock->send_pkt_work); + vhost_vq_work_queue(&vsock->vqs[VSOCK_VQ_RX], &vsock->send_pkt_work); mutex_unlock(&vsock->dev.mutex); return 0; diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h index 23112d84218f..4b773bd7c58c 100644 --- a/drivers/virtio/virtio_pci_common.h +++ b/drivers/virtio/virtio_pci_common.h @@ -45,9 +45,10 @@ struct virtio_pci_vq_info { struct virtio_pci_device { struct virtio_device vdev; struct pci_dev *pci_dev; - struct virtio_pci_legacy_device ldev; - struct virtio_pci_modern_device mdev; - + union { + struct virtio_pci_legacy_device ldev; + struct virtio_pci_modern_device mdev; + }; bool is_legacy; /* Where to read and clear interrupt */ diff --git a/drivers/virtio/virtio_pci_modern_dev.c b/drivers/virtio/virtio_pci_modern_dev.c index 869cb46bef96..aad7d9296e77 100644 --- a/drivers/virtio/virtio_pci_modern_dev.c +++ b/drivers/virtio/virtio_pci_modern_dev.c @@ -218,21 +218,29 @@ int vp_modern_probe(struct virtio_pci_modern_device *mdev) int err, common, isr, notify, device; u32 notify_length; u32 notify_offset; + int devid; check_offsets(); - /* We only own devices >= 0x1000 and <= 0x107f: leave the rest. */ - if (pci_dev->device < 0x1000 || pci_dev->device > 0x107f) - return -ENODEV; - - if (pci_dev->device < 0x1040) { - /* Transitional devices: use the PCI subsystem device id as - * virtio device id, same as legacy driver always did. - */ - mdev->id.device = pci_dev->subsystem_device; + if (mdev->device_id_check) { + devid = mdev->device_id_check(pci_dev); + if (devid < 0) + return devid; + mdev->id.device = devid; } else { - /* Modern devices: simply use PCI device id, but start from 0x1040. */ - mdev->id.device = pci_dev->device - 0x1040; + /* We only own devices >= 0x1000 and <= 0x107f: leave the rest. */ + if (pci_dev->device < 0x1000 || pci_dev->device > 0x107f) + return -ENODEV; + + if (pci_dev->device < 0x1040) { + /* Transitional devices: use the PCI subsystem device id as + * virtio device id, same as legacy driver always did. + */ + mdev->id.device = pci_dev->subsystem_device; + } else { + /* Modern devices: simply use PCI device id, but start from 0x1040. */ + mdev->id.device = pci_dev->device - 0x1040; + } } mdev->id.vendor = pci_dev->subsystem_vendor; @@ -260,7 +268,8 @@ int vp_modern_probe(struct virtio_pci_modern_device *mdev) return -EINVAL; } - err = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(64)); + err = dma_set_mask_and_coherent(&pci_dev->dev, + mdev->dma_mask ? : DMA_BIT_MASK(64)); if (err) err = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(32)); diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c index eb6aee8c06b2..989e2d7184ce 100644 --- a/drivers/virtio/virtio_vdpa.c +++ b/drivers/virtio/virtio_vdpa.c @@ -385,7 +385,9 @@ static int virtio_vdpa_find_vqs(struct virtio_device *vdev, unsigned int nvqs, err = PTR_ERR(vqs[i]); goto err_setup_vq; } - ops->set_vq_affinity(vdpa, i, &masks[i]); + + if (ops->set_vq_affinity) + ops->set_vq_affinity(vdpa, i, &masks[i]); } cb.callback = virtio_vdpa_config_cb; diff --git a/include/linux/pds/pds_adminq.h b/include/linux/pds/pds_adminq.h index 98a60ce87b92..bcba7fda3cc9 100644 --- a/include/linux/pds/pds_adminq.h +++ b/include/linux/pds/pds_adminq.h @@ -222,6 +222,27 @@ enum pds_core_lif_type { PDS_CORE_LIF_TYPE_DEFAULT = 0, }; +#define PDS_CORE_IFNAMSIZ 16 + +/** + * enum pds_core_logical_qtype - Logical Queue Types + * @PDS_CORE_QTYPE_ADMINQ: Administrative Queue + * @PDS_CORE_QTYPE_NOTIFYQ: Notify Queue + * @PDS_CORE_QTYPE_RXQ: Receive Queue + * @PDS_CORE_QTYPE_TXQ: Transmit Queue + * @PDS_CORE_QTYPE_EQ: Event Queue + * @PDS_CORE_QTYPE_MAX: Max queue type supported + */ +enum pds_core_logical_qtype { + PDS_CORE_QTYPE_ADMINQ = 0, + PDS_CORE_QTYPE_NOTIFYQ = 1, + PDS_CORE_QTYPE_RXQ = 2, + PDS_CORE_QTYPE_TXQ = 3, + PDS_CORE_QTYPE_EQ = 4, + + PDS_CORE_QTYPE_MAX = 16 /* don't change - used in struct size */ +}; + /** * union pds_core_lif_config - LIF configuration * @state: LIF state (enum pds_core_lif_state) @@ -584,6 +605,219 @@ struct pds_core_q_init_comp { u8 color; }; +/* + * enum pds_vdpa_cmd_opcode - vDPA Device commands + */ +enum pds_vdpa_cmd_opcode { + PDS_VDPA_CMD_INIT = 48, + PDS_VDPA_CMD_IDENT = 49, + PDS_VDPA_CMD_RESET = 51, + PDS_VDPA_CMD_VQ_RESET = 52, + PDS_VDPA_CMD_VQ_INIT = 53, + PDS_VDPA_CMD_STATUS_UPDATE = 54, + PDS_VDPA_CMD_SET_FEATURES = 55, + PDS_VDPA_CMD_SET_ATTR = 56, +}; + +/** + * struct pds_vdpa_cmd - generic command + * @opcode: Opcode + * @vdpa_index: Index for vdpa subdevice + * @vf_id: VF id + */ +struct pds_vdpa_cmd { + u8 opcode; + u8 vdpa_index; + __le16 vf_id; +}; + +/** + * struct pds_vdpa_init_cmd - INIT command + * @opcode: Opcode PDS_VDPA_CMD_INIT + * @vdpa_index: Index for vdpa subdevice + * @vf_id: VF id + */ +struct pds_vdpa_init_cmd { + u8 opcode; + u8 vdpa_index; + __le16 vf_id; +}; + +/** + * struct pds_vdpa_ident - vDPA identification data + * @hw_features: vDPA features supported by device + * @max_vqs: max queues available (2 queues for a single queuepair) + * @max_qlen: log(2) of maximum number of descriptors + * @min_qlen: log(2) of minimum number of descriptors + * + * This struct is used in a DMA block that is set up for the PDS_VDPA_CMD_IDENT + * transaction. Set up the DMA block and send the address in the IDENT cmd + * data, the DSC will write the ident information, then we can remove the DMA + * block after reading the answer. If the completion status is 0, then there + * is valid information, else there was an error and the data should be invalid. + */ +struct pds_vdpa_ident { + __le64 hw_features; + __le16 max_vqs; + __le16 max_qlen; + __le16 min_qlen; +}; + +/** + * struct pds_vdpa_ident_cmd - IDENT command + * @opcode: Opcode PDS_VDPA_CMD_IDENT + * @rsvd: Word boundary padding + * @vf_id: VF id + * @len: length of ident info DMA space + * @ident_pa: address for DMA of ident info (struct pds_vdpa_ident) + * only used for this transaction, then forgotten by DSC + */ +struct pds_vdpa_ident_cmd { + u8 opcode; + u8 rsvd; + __le16 vf_id; + __le32 len; + __le64 ident_pa; +}; + +/** + * struct pds_vdpa_status_cmd - STATUS_UPDATE command + * @opcode: Opcode PDS_VDPA_CMD_STATUS_UPDATE + * @vdpa_index: Index for vdpa subdevice + * @vf_id: VF id + * @status: new status bits + */ +struct pds_vdpa_status_cmd { + u8 opcode; + u8 vdpa_index; + __le16 vf_id; + u8 status; +}; + +/** + * enum pds_vdpa_attr - List of VDPA device attributes + * @PDS_VDPA_ATTR_MAC: MAC address + * @PDS_VDPA_ATTR_MAX_VQ_PAIRS: Max virtqueue pairs + */ +enum pds_vdpa_attr { + PDS_VDPA_ATTR_MAC = 1, + PDS_VDPA_ATTR_MAX_VQ_PAIRS = 2, +}; + +/** + * struct pds_vdpa_setattr_cmd - SET_ATTR command + * @opcode: Opcode PDS_VDPA_CMD_SET_ATTR + * @vdpa_index: Index for vdpa subdevice + * @vf_id: VF id + * @attr: attribute to be changed (enum pds_vdpa_attr) + * @pad: Word boundary padding + * @mac: new mac address to be assigned as vdpa device address + * @max_vq_pairs: new limit of virtqueue pairs + */ +struct pds_vdpa_setattr_cmd { + u8 opcode; + u8 vdpa_index; + __le16 vf_id; + u8 attr; + u8 pad[3]; + union { + u8 mac[6]; + __le16 max_vq_pairs; + } __packed; +}; + +/** + * struct pds_vdpa_vq_init_cmd - queue init command + * @opcode: Opcode PDS_VDPA_CMD_VQ_INIT + * @vdpa_index: Index for vdpa subdevice + * @vf_id: VF id + * @qid: Queue id (bit0 clear = rx, bit0 set = tx, qid=N is ctrlq) + * @len: log(2) of max descriptor count + * @desc_addr: DMA address of descriptor area + * @avail_addr: DMA address of available descriptors (aka driver area) + * @used_addr: DMA address of used descriptors (aka device area) + * @intr_index: interrupt index + * @avail_index: initial device position in available ring + * @used_index: initial device position in used ring + */ +struct pds_vdpa_vq_init_cmd { + u8 opcode; + u8 vdpa_index; + __le16 vf_id; + __le16 qid; + __le16 len; + __le64 desc_addr; + __le64 avail_addr; + __le64 used_addr; + __le16 intr_index; + __le16 avail_index; + __le16 used_index; +}; + +/** + * struct pds_vdpa_vq_init_comp - queue init completion + * @status: Status of the command (enum pds_core_status_code) + * @hw_qtype: HW queue type, used in doorbell selection + * @hw_qindex: HW queue index, used in doorbell selection + * @rsvd: Word boundary padding + * @color: Color bit + */ +struct pds_vdpa_vq_init_comp { + u8 status; + u8 hw_qtype; + __le16 hw_qindex; + u8 rsvd[11]; + u8 color; +}; + +/** + * struct pds_vdpa_vq_reset_cmd - queue reset command + * @opcode: Opcode PDS_VDPA_CMD_VQ_RESET + * @vdpa_index: Index for vdpa subdevice + * @vf_id: VF id + * @qid: Queue id + */ +struct pds_vdpa_vq_reset_cmd { + u8 opcode; + u8 vdpa_index; + __le16 vf_id; + __le16 qid; +}; + +/** + * struct pds_vdpa_vq_reset_comp - queue reset completion + * @status: Status of the command (enum pds_core_status_code) + * @rsvd0: Word boundary padding + * @avail_index: current device position in available ring + * @used_index: current device position in used ring + * @rsvd: Word boundary padding + * @color: Color bit + */ +struct pds_vdpa_vq_reset_comp { + u8 status; + u8 rsvd0; + __le16 avail_index; + __le16 used_index; + u8 rsvd[9]; + u8 color; +}; + +/** + * struct pds_vdpa_set_features_cmd - set hw features + * @opcode: Opcode PDS_VDPA_CMD_SET_FEATURES + * @vdpa_index: Index for vdpa subdevice + * @vf_id: VF id + * @rsvd: Word boundary padding + * @features: Feature bit mask + */ +struct pds_vdpa_set_features_cmd { + u8 opcode; + u8 vdpa_index; + __le16 vf_id; + __le32 rsvd; + __le64 features; +}; + union pds_core_adminq_cmd { u8 opcode; u8 bytes[64]; @@ -600,6 +834,16 @@ union pds_core_adminq_cmd { struct pds_core_q_identify_cmd q_ident; struct pds_core_q_init_cmd q_init; + + struct pds_vdpa_cmd vdpa; + struct pds_vdpa_init_cmd vdpa_init; + struct pds_vdpa_ident_cmd vdpa_ident; + struct pds_vdpa_status_cmd vdpa_status; + struct pds_vdpa_setattr_cmd vdpa_setattr; + struct pds_vdpa_set_features_cmd vdpa_set_features; + struct pds_vdpa_vq_init_cmd vdpa_vq_init; + struct pds_vdpa_vq_reset_cmd vdpa_vq_reset; + }; union pds_core_adminq_comp { @@ -621,6 +865,9 @@ union pds_core_adminq_comp { struct pds_core_q_identify_comp q_ident; struct pds_core_q_init_comp q_init; + + struct pds_vdpa_vq_init_comp vdpa_vq_init; + struct pds_vdpa_vq_reset_comp vdpa_vq_reset; }; #ifndef __CHECKER__ diff --git a/include/linux/pds/pds_common.h b/include/linux/pds/pds_common.h index 060331486d50..435c8e8161c2 100644 --- a/include/linux/pds/pds_common.h +++ b/include/linux/pds/pds_common.h @@ -39,26 +39,7 @@ enum pds_core_vif_types { #define PDS_DEV_TYPE_RDMA_STR "RDMA" #define PDS_DEV_TYPE_LM_STR "LM" -#define PDS_CORE_IFNAMSIZ 16 - -/** - * enum pds_core_logical_qtype - Logical Queue Types - * @PDS_CORE_QTYPE_ADMINQ: Administrative Queue - * @PDS_CORE_QTYPE_NOTIFYQ: Notify Queue - * @PDS_CORE_QTYPE_RXQ: Receive Queue - * @PDS_CORE_QTYPE_TXQ: Transmit Queue - * @PDS_CORE_QTYPE_EQ: Event Queue - * @PDS_CORE_QTYPE_MAX: Max queue type supported - */ -enum pds_core_logical_qtype { - PDS_CORE_QTYPE_ADMINQ = 0, - PDS_CORE_QTYPE_NOTIFYQ = 1, - PDS_CORE_QTYPE_RXQ = 2, - PDS_CORE_QTYPE_TXQ = 3, - PDS_CORE_QTYPE_EQ = 4, - - PDS_CORE_QTYPE_MAX = 16 /* don't change - used in struct size */ -}; +#define PDS_VDPA_DEV_NAME PDS_CORE_DRV_NAME "." PDS_DEV_TYPE_VDPA_STR int pdsc_register_notify(struct notifier_block *nb); void pdsc_unregister_notify(struct notifier_block *nb); diff --git a/include/linux/virtio.h b/include/linux/virtio.h index b93238db94e3..de6041deee37 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -103,6 +103,7 @@ int virtqueue_resize(struct virtqueue *vq, u32 num, * @config_enabled: configuration change reporting enabled * @config_change_pending: configuration change reported while disabled * @config_lock: protects configuration change reporting + * @vqs_list_lock: protects @vqs. * @dev: underlying device. * @id: the device type identification (used to match it with a driver). * @config: the configuration ops for this device. @@ -117,7 +118,7 @@ struct virtio_device { bool config_enabled; bool config_change_pending; spinlock_t config_lock; - spinlock_t vqs_list_lock; /* Protects VQs list access */ + spinlock_t vqs_list_lock; struct device dev; struct virtio_device_id id; const struct virtio_config_ops *config; @@ -160,6 +161,8 @@ size_t virtio_max_dma_size(const struct virtio_device *vdev); * @feature_table_size: number of entries in the feature table array. * @feature_table_legacy: same as feature_table but when working in legacy mode. * @feature_table_size_legacy: number of entries in feature table legacy array. + * @validate: the function to call to validate features and config space. + * Returns 0 or -errno. * @probe: the function to call when a device is found. Returns 0 or -errno. * @scan: optional function to call after successful probe; intended * for virtio-scsi to invoke a scan. diff --git a/include/linux/virtio_pci_modern.h b/include/linux/virtio_pci_modern.h index c4eeb79b0139..067ac1d789bc 100644 --- a/include/linux/virtio_pci_modern.h +++ b/include/linux/virtio_pci_modern.h @@ -38,6 +38,12 @@ struct virtio_pci_modern_device { int modern_bars; struct virtio_device_id id; + + /* optional check for vendor virtio device, returns dev_id or -ERRNO */ + int (*device_id_check)(struct pci_dev *pdev); + + /* optional mask for devices with limited DMA space */ + u64 dma_mask; }; /* diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index 92e1b700b51c..f5c48b61ab62 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -45,6 +45,25 @@ #define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64) /* Specify an eventfd file descriptor to signal on log write. */ #define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int) +/* By default, a device gets one vhost_worker that its virtqueues share. This + * command allows the owner of the device to create an additional vhost_worker + * for the device. It can later be bound to 1 or more of its virtqueues using + * the VHOST_ATTACH_VRING_WORKER command. + * + * This must be called after VHOST_SET_OWNER and the caller must be the owner + * of the device. The new thread will inherit caller's cgroups and namespaces, + * and will share the caller's memory space. The new thread will also be + * counted against the caller's RLIMIT_NPROC value. + * + * The worker's ID used in other commands will be returned in + * vhost_worker_state. + */ +#define VHOST_NEW_WORKER _IOR(VHOST_VIRTIO, 0x8, struct vhost_worker_state) +/* Free a worker created with VHOST_NEW_WORKER if it's not attached to any + * virtqueue. If userspace is not able to call this for workers its created, + * the kernel will free all the device's workers when the device is closed. + */ +#define VHOST_FREE_WORKER _IOW(VHOST_VIRTIO, 0x9, struct vhost_worker_state) /* Ring setup. */ /* Set number of descriptors in ring. This parameter can not @@ -70,6 +89,18 @@ #define VHOST_VRING_BIG_ENDIAN 1 #define VHOST_SET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x13, struct vhost_vring_state) #define VHOST_GET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x14, struct vhost_vring_state) +/* Attach a vhost_worker created with VHOST_NEW_WORKER to one of the device's + * virtqueues. + * + * This will replace the virtqueue's existing worker. If the replaced worker + * is no longer attached to any virtqueues, it can be freed with + * VHOST_FREE_WORKER. + */ +#define VHOST_ATTACH_VRING_WORKER _IOW(VHOST_VIRTIO, 0x15, \ + struct vhost_vring_worker) +/* Return the vring worker's ID */ +#define VHOST_GET_VRING_WORKER _IOWR(VHOST_VIRTIO, 0x16, \ + struct vhost_vring_worker) /* The following ioctls use eventfd file descriptors to signal and poll * for events. */ diff --git a/include/uapi/linux/vhost_types.h b/include/uapi/linux/vhost_types.h index c5690a8992d8..d3aad12ad1fa 100644 --- a/include/uapi/linux/vhost_types.h +++ b/include/uapi/linux/vhost_types.h @@ -47,6 +47,22 @@ struct vhost_vring_addr { __u64 log_guest_addr; }; +struct vhost_worker_state { + /* + * For VHOST_NEW_WORKER the kernel will return the new vhost_worker id. + * For VHOST_FREE_WORKER this must be set to the id of the vhost_worker + * to free. + */ + unsigned int worker_id; +}; + +struct vhost_vring_worker { + /* vring index */ + unsigned int index; + /* The id of the vhost_worker returned from VHOST_NEW_WORKER */ + unsigned int worker_id; +}; + /* no alignment requirement */ struct vhost_iotlb_msg { __u64 iova; diff --git a/tools/virtio/Makefile b/tools/virtio/Makefile index 7b7139d97d74..d128925980e0 100644 --- a/tools/virtio/Makefile +++ b/tools/virtio/Makefile @@ -4,7 +4,18 @@ test: virtio_test vringh_test virtio_test: virtio_ring.o virtio_test.o vringh_test: vringh_test.o vringh.o virtio_ring.o -CFLAGS += -g -O2 -Werror -Wno-maybe-uninitialized -Wall -I. -I../include/ -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -include ../../include/linux/kconfig.h -mfunction-return=thunk -fcf-protection=none -mindirect-branch-register +try-run = $(shell set -e; \ + if ($(1)) >/dev/null 2>&1; \ + then echo "$(2)"; \ + else echo "$(3)"; \ + fi) + +__cc-option = $(call try-run,\ + $(1) -Werror $(2) -c -x c /dev/null -o /dev/null,$(2),) +cc-option = $(call __cc-option, $(CC),$(1)) + +CFLAGS += -g -O2 -Werror -Wno-maybe-uninitialized -Wall -I. -I../include/ -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -include ../../include/linux/kconfig.h $(call cc-option,-mfunction-return=thunk) $(call cc-option,-fcf-protection=none) $(call cc-option,-mindirect-branch-register) + CFLAGS += -pthread LDFLAGS += -pthread vpath %.c ../../drivers/virtio ../../drivers/vhost |