summaryrefslogtreecommitdiffstats
path: root/drivers/vdpa
diff options
context:
space:
mode:
authorMichael S. Tsirkin <mst@redhat.com>2020-03-31 21:15:14 +0200
committerMichael S. Tsirkin <mst@redhat.com>2020-04-02 16:41:40 +0200
commitc9b9f5f8c0f3cdb893cb86c168cdaa3aa5ed7278 (patch)
treeed96a8fe87994f9f9d9596c2f79ad3289abc969b /drivers/vdpa
parentvirtio: Intel IFC VF driver for VDPA (diff)
downloadlinux-c9b9f5f8c0f3cdb893cb86c168cdaa3aa5ed7278.tar.xz
linux-c9b9f5f8c0f3cdb893cb86c168cdaa3aa5ed7278.zip
vdpa: move to drivers/vdpa
We have both vhost and virtio drivers that depend on vdpa. It's easier to locate it at a top level directory otherwise we run into issues e.g. if vhost is built-in but virtio is modular. Let's just move it up a level. Reported-by: Randy Dunlap <rdunlap@infradead.org> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Diffstat (limited to 'drivers/vdpa')
-rw-r--r--drivers/vdpa/Kconfig37
-rw-r--r--drivers/vdpa/Makefile4
-rw-r--r--drivers/vdpa/ifcvf/Makefile3
-rw-r--r--drivers/vdpa/ifcvf/ifcvf_base.c389
-rw-r--r--drivers/vdpa/ifcvf/ifcvf_base.h118
-rw-r--r--drivers/vdpa/ifcvf/ifcvf_main.c435
-rw-r--r--drivers/vdpa/vdpa.c180
-rw-r--r--drivers/vdpa/vdpa_sim/Makefile2
-rw-r--r--drivers/vdpa/vdpa_sim/vdpa_sim.c629
9 files changed, 1797 insertions, 0 deletions
diff --git a/drivers/vdpa/Kconfig b/drivers/vdpa/Kconfig
new file mode 100644
index 000000000000..7db1460104b7
--- /dev/null
+++ b/drivers/vdpa/Kconfig
@@ -0,0 +1,37 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config VDPA
+ tristate
+ help
+ Enable this module to support vDPA device that uses a
+ datapath which complies with virtio specifications with
+ vendor specific control path.
+
+menuconfig VDPA_MENU
+ bool "VDPA drivers"
+ default n
+
+if VDPA_MENU
+
+config VDPA_SIM
+ tristate "vDPA device simulator"
+ depends on RUNTIME_TESTING_MENU
+ select VDPA
+ select VHOST_RING
+ default n
+ help
+ vDPA networking device simulator which loop TX traffic back
+ to RX. This device is used for testing, prototyping and
+ development of vDPA.
+
+config IFCVF
+ tristate "Intel IFC VF VDPA driver"
+ depends on PCI_MSI
+ select VDPA
+ default n
+ help
+ This kernel module can drive Intel IFC VF NIC to offload
+ virtio dataplane traffic to hardware.
+ To compile this driver as a module, choose M here: the module will
+ be called ifcvf.
+
+endif # VDPA_MENU
diff --git a/drivers/vdpa/Makefile b/drivers/vdpa/Makefile
new file mode 100644
index 000000000000..8bbb686ca7a2
--- /dev/null
+++ b/drivers/vdpa/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_VDPA) += vdpa.o
+obj-$(CONFIG_VDPA_SIM) += vdpa_sim/
+obj-$(CONFIG_IFCVF) += ifcvf/
diff --git a/drivers/vdpa/ifcvf/Makefile b/drivers/vdpa/ifcvf/Makefile
new file mode 100644
index 000000000000..d709915995ab
--- /dev/null
+++ b/drivers/vdpa/ifcvf/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_IFCVF) += ifcvf.o
+ifcvf-$(CONFIG_IFCVF) += ifcvf_main.o ifcvf_base.o
diff --git a/drivers/vdpa/ifcvf/ifcvf_base.c b/drivers/vdpa/ifcvf/ifcvf_base.c
new file mode 100644
index 000000000000..b61b06ea26d3
--- /dev/null
+++ b/drivers/vdpa/ifcvf/ifcvf_base.c
@@ -0,0 +1,389 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Intel IFC VF NIC driver for virtio dataplane offloading
+ *
+ * Copyright (C) 2020 Intel Corporation.
+ *
+ * Author: Zhu Lingshan <lingshan.zhu@intel.com>
+ *
+ */
+
+#include "ifcvf_base.h"
+
+static inline u8 ifc_ioread8(u8 __iomem *addr)
+{
+ return ioread8(addr);
+}
+static inline u16 ifc_ioread16 (__le16 __iomem *addr)
+{
+ return ioread16(addr);
+}
+
+static inline u32 ifc_ioread32(__le32 __iomem *addr)
+{
+ return ioread32(addr);
+}
+
+static inline void ifc_iowrite8(u8 value, u8 __iomem *addr)
+{
+ iowrite8(value, addr);
+}
+
+static inline void ifc_iowrite16(u16 value, __le16 __iomem *addr)
+{
+ iowrite16(value, addr);
+}
+
+static inline void ifc_iowrite32(u32 value, __le32 __iomem *addr)
+{
+ iowrite32(value, addr);
+}
+
+static void ifc_iowrite64_twopart(u64 val,
+ __le32 __iomem *lo, __le32 __iomem *hi)
+{
+ ifc_iowrite32((u32)val, lo);
+ ifc_iowrite32(val >> 32, hi);
+}
+
+struct ifcvf_adapter *vf_to_adapter(struct ifcvf_hw *hw)
+{
+ return container_of(hw, struct ifcvf_adapter, vf);
+}
+
+static void __iomem *get_cap_addr(struct ifcvf_hw *hw,
+ struct virtio_pci_cap *cap)
+{
+ struct ifcvf_adapter *ifcvf;
+ struct pci_dev *pdev;
+ u32 length, offset;
+ u8 bar;
+
+ length = le32_to_cpu(cap->length);
+ offset = le32_to_cpu(cap->offset);
+ bar = cap->bar;
+
+ ifcvf= vf_to_adapter(hw);
+ pdev = ifcvf->pdev;
+
+ if (bar >= IFCVF_PCI_MAX_RESOURCE) {
+ IFCVF_DBG(pdev,
+ "Invalid bar number %u to get capabilities\n", bar);
+ return NULL;
+ }
+
+ if (offset + length > pci_resource_len(pdev, bar)) {
+ IFCVF_DBG(pdev,
+ "offset(%u) + len(%u) overflows bar%u's capability\n",
+ offset, length, bar);
+ return NULL;
+ }
+
+ return hw->base[bar] + offset;
+}
+
+static int ifcvf_read_config_range(struct pci_dev *dev,
+ uint32_t *val, int size, int where)
+{
+ int ret, i;
+
+ for (i = 0; i < size; i += 4) {
+ ret = pci_read_config_dword(dev, where + i, val + i / 4);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+int ifcvf_init_hw(struct ifcvf_hw *hw, struct pci_dev *pdev)
+{
+ struct virtio_pci_cap cap;
+ u16 notify_off;
+ int ret;
+ u8 pos;
+ u32 i;
+
+ ret = pci_read_config_byte(pdev, PCI_CAPABILITY_LIST, &pos);
+ if (ret < 0) {
+ IFCVF_ERR(pdev, "Failed to read PCI capability list\n");
+ return -EIO;
+ }
+
+ while (pos) {
+ ret = ifcvf_read_config_range(pdev, (u32 *)&cap,
+ sizeof(cap), pos);
+ if (ret < 0) {
+ IFCVF_ERR(pdev,
+ "Failed to get PCI capability at %x\n", pos);
+ break;
+ }
+
+ if (cap.cap_vndr != PCI_CAP_ID_VNDR)
+ goto next;
+
+ switch (cap.cfg_type) {
+ case VIRTIO_PCI_CAP_COMMON_CFG:
+ hw->common_cfg = get_cap_addr(hw, &cap);
+ IFCVF_DBG(pdev, "hw->common_cfg = %p\n",
+ hw->common_cfg);
+ break;
+ case VIRTIO_PCI_CAP_NOTIFY_CFG:
+ pci_read_config_dword(pdev, pos + sizeof(cap),
+ &hw->notify_off_multiplier);
+ hw->notify_bar = cap.bar;
+ hw->notify_base = get_cap_addr(hw, &cap);
+ IFCVF_DBG(pdev, "hw->notify_base = %p\n",
+ hw->notify_base);
+ break;
+ case VIRTIO_PCI_CAP_ISR_CFG:
+ hw->isr = get_cap_addr(hw, &cap);
+ IFCVF_DBG(pdev, "hw->isr = %p\n", hw->isr);
+ break;
+ case VIRTIO_PCI_CAP_DEVICE_CFG:
+ hw->net_cfg = get_cap_addr(hw, &cap);
+ IFCVF_DBG(pdev, "hw->net_cfg = %p\n", hw->net_cfg);
+ break;
+ }
+
+next:
+ pos = cap.cap_next;
+ }
+
+ if (hw->common_cfg == NULL || hw->notify_base == NULL ||
+ hw->isr == NULL || hw->net_cfg == NULL) {
+ IFCVF_ERR(pdev, "Incomplete PCI capabilities\n");
+ return -EIO;
+ }
+
+ for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) {
+ ifc_iowrite16(i, &hw->common_cfg->queue_select);
+ notify_off = ifc_ioread16(&hw->common_cfg->queue_notify_off);
+ hw->vring[i].notify_addr = hw->notify_base +
+ notify_off * hw->notify_off_multiplier;
+ }
+
+ hw->lm_cfg = hw->base[IFCVF_LM_BAR];
+
+ IFCVF_DBG(pdev,
+ "PCI capability mapping: common cfg: %p, notify base: %p\n, isr cfg: %p, device cfg: %p, multiplier: %u\n",
+ hw->common_cfg, hw->notify_base, hw->isr,
+ hw->net_cfg, hw->notify_off_multiplier);
+
+ return 0;
+}
+
+u8 ifcvf_get_status(struct ifcvf_hw *hw)
+{
+ return ifc_ioread8(&hw->common_cfg->device_status);
+}
+
+void ifcvf_set_status(struct ifcvf_hw *hw, u8 status)
+{
+ ifc_iowrite8(status, &hw->common_cfg->device_status);
+}
+
+void ifcvf_reset(struct ifcvf_hw *hw)
+{
+ ifcvf_set_status(hw, 0);
+ /* flush set_status, make sure VF is stopped, reset */
+ ifcvf_get_status(hw);
+}
+
+static void ifcvf_add_status(struct ifcvf_hw *hw, u8 status)
+{
+ if (status != 0)
+ status |= ifcvf_get_status(hw);
+
+ ifcvf_set_status(hw, status);
+ ifcvf_get_status(hw);
+}
+
+u64 ifcvf_get_features(struct ifcvf_hw *hw)
+{
+ struct virtio_pci_common_cfg __iomem *cfg = hw->common_cfg;
+ u32 features_lo, features_hi;
+
+ ifc_iowrite32(0, &cfg->device_feature_select);
+ features_lo = ifc_ioread32(&cfg->device_feature);
+
+ ifc_iowrite32(1, &cfg->device_feature_select);
+ features_hi = ifc_ioread32(&cfg->device_feature);
+
+ return ((u64)features_hi << 32) | features_lo;
+}
+
+void ifcvf_read_net_config(struct ifcvf_hw *hw, u64 offset,
+ void *dst, int length)
+{
+ u8 old_gen, new_gen, *p;
+ int i;
+
+ WARN_ON(offset + length > sizeof(struct virtio_net_config));
+ do {
+ old_gen = ifc_ioread8(&hw->common_cfg->config_generation);
+ p = dst;
+ for (i = 0; i < length; i++)
+ *p++ = ifc_ioread8(hw->net_cfg + offset + i);
+
+ new_gen = ifc_ioread8(&hw->common_cfg->config_generation);
+ } while (old_gen != new_gen);
+}
+
+void ifcvf_write_net_config(struct ifcvf_hw *hw, u64 offset,
+ const void *src, int length)
+{
+ const u8 *p;
+ int i;
+
+ p = src;
+ WARN_ON(offset + length > sizeof(struct virtio_net_config));
+ for (i = 0; i < length; i++)
+ ifc_iowrite8(*p++, hw->net_cfg + offset + i);
+}
+
+static void ifcvf_set_features(struct ifcvf_hw *hw, u64 features)
+{
+ struct virtio_pci_common_cfg __iomem *cfg = hw->common_cfg;
+
+ ifc_iowrite32(0, &cfg->guest_feature_select);
+ ifc_iowrite32((u32)features, &cfg->guest_feature);
+
+ ifc_iowrite32(1, &cfg->guest_feature_select);
+ ifc_iowrite32(features >> 32, &cfg->guest_feature);
+}
+
+static int ifcvf_config_features(struct ifcvf_hw *hw)
+{
+ struct ifcvf_adapter *ifcvf;
+
+ ifcvf = vf_to_adapter(hw);
+ ifcvf_set_features(hw, hw->req_features);
+ ifcvf_add_status(hw, VIRTIO_CONFIG_S_FEATURES_OK);
+
+ if (!(ifcvf_get_status(hw) & VIRTIO_CONFIG_S_FEATURES_OK)) {
+ IFCVF_ERR(ifcvf->pdev, "Failed to set FEATURES_OK status\n");
+ return -EIO;
+ }
+
+ return 0;
+}
+
+u64 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid)
+{
+ struct ifcvf_lm_cfg __iomem *ifcvf_lm;
+ void __iomem *avail_idx_addr;
+ u16 last_avail_idx;
+ u32 q_pair_id;
+
+ ifcvf_lm = (struct ifcvf_lm_cfg __iomem *)hw->lm_cfg;
+ q_pair_id = qid / (IFCVF_MAX_QUEUE_PAIRS * 2);
+ avail_idx_addr = &ifcvf_lm->vring_lm_cfg[q_pair_id].idx_addr[qid % 2];
+ last_avail_idx = ifc_ioread16(avail_idx_addr);
+
+ return last_avail_idx;
+}
+
+int ifcvf_set_vq_state(struct ifcvf_hw *hw, u16 qid, u64 num)
+{
+ struct ifcvf_lm_cfg __iomem *ifcvf_lm;
+ void __iomem *avail_idx_addr;
+ u32 q_pair_id;
+
+ ifcvf_lm = (struct ifcvf_lm_cfg __iomem *)hw->lm_cfg;
+ q_pair_id = qid / (IFCVF_MAX_QUEUE_PAIRS * 2);
+ avail_idx_addr = &ifcvf_lm->vring_lm_cfg[q_pair_id].idx_addr[qid % 2];
+ hw->vring[qid].last_avail_idx = num;
+ ifc_iowrite16(num, avail_idx_addr);
+
+ return 0;
+}
+
+static int ifcvf_hw_enable(struct ifcvf_hw *hw)
+{
+ struct ifcvf_lm_cfg __iomem *ifcvf_lm;
+ struct virtio_pci_common_cfg __iomem *cfg;
+ struct ifcvf_adapter *ifcvf;
+ u32 i;
+
+ ifcvf_lm = (struct ifcvf_lm_cfg __iomem *)hw->lm_cfg;
+ ifcvf = vf_to_adapter(hw);
+ cfg = hw->common_cfg;
+ ifc_iowrite16(IFCVF_MSI_CONFIG_OFF, &cfg->msix_config);
+
+ if (ifc_ioread16(&cfg->msix_config) == VIRTIO_MSI_NO_VECTOR) {
+ IFCVF_ERR(ifcvf->pdev, "No msix vector for device config\n");
+ return -EINVAL;
+ }
+
+ for (i = 0; i < hw->nr_vring; i++) {
+ if (!hw->vring[i].ready)
+ break;
+
+ ifc_iowrite16(i, &cfg->queue_select);
+ ifc_iowrite64_twopart(hw->vring[i].desc, &cfg->queue_desc_lo,
+ &cfg->queue_desc_hi);
+ ifc_iowrite64_twopart(hw->vring[i].avail, &cfg->queue_avail_lo,
+ &cfg->queue_avail_hi);
+ ifc_iowrite64_twopart(hw->vring[i].used, &cfg->queue_used_lo,
+ &cfg->queue_used_hi);
+ ifc_iowrite16(hw->vring[i].size, &cfg->queue_size);
+ ifc_iowrite16(i + IFCVF_MSI_QUEUE_OFF, &cfg->queue_msix_vector);
+
+ if (ifc_ioread16(&cfg->queue_msix_vector) ==
+ VIRTIO_MSI_NO_VECTOR) {
+ IFCVF_ERR(ifcvf->pdev,
+ "No msix vector for queue %u\n", i);
+ return -EINVAL;
+ }
+
+ ifcvf_set_vq_state(hw, i, hw->vring[i].last_avail_idx);
+ ifc_iowrite16(1, &cfg->queue_enable);
+ }
+
+ return 0;
+}
+
+static void ifcvf_hw_disable(struct ifcvf_hw *hw)
+{
+ struct virtio_pci_common_cfg __iomem *cfg;
+ u32 i;
+
+ cfg = hw->common_cfg;
+ ifc_iowrite16(VIRTIO_MSI_NO_VECTOR, &cfg->msix_config);
+
+ for (i = 0; i < hw->nr_vring; i++) {
+ ifc_iowrite16(i, &cfg->queue_select);
+ ifc_iowrite16(VIRTIO_MSI_NO_VECTOR, &cfg->queue_msix_vector);
+ }
+
+ ifc_ioread16(&cfg->queue_msix_vector);
+}
+
+int ifcvf_start_hw(struct ifcvf_hw *hw)
+{
+ ifcvf_reset(hw);
+ ifcvf_add_status(hw, VIRTIO_CONFIG_S_ACKNOWLEDGE);
+ ifcvf_add_status(hw, VIRTIO_CONFIG_S_DRIVER);
+
+ if (ifcvf_config_features(hw) < 0)
+ return -EINVAL;
+
+ if (ifcvf_hw_enable(hw) < 0)
+ return -EINVAL;
+
+ ifcvf_add_status(hw, VIRTIO_CONFIG_S_DRIVER_OK);
+
+ return 0;
+}
+
+void ifcvf_stop_hw(struct ifcvf_hw *hw)
+{
+ ifcvf_hw_disable(hw);
+ ifcvf_reset(hw);
+}
+
+void ifcvf_notify_queue(struct ifcvf_hw *hw, u16 qid)
+{
+ ifc_iowrite16(qid, hw->vring[qid].notify_addr);
+}
diff --git a/drivers/vdpa/ifcvf/ifcvf_base.h b/drivers/vdpa/ifcvf/ifcvf_base.h
new file mode 100644
index 000000000000..e80307092351
--- /dev/null
+++ b/drivers/vdpa/ifcvf/ifcvf_base.h
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Intel IFC VF NIC driver for virtio dataplane offloading
+ *
+ * Copyright (C) 2020 Intel Corporation.
+ *
+ * Author: Zhu Lingshan <lingshan.zhu@intel.com>
+ *
+ */
+
+#ifndef _IFCVF_H_
+#define _IFCVF_H_
+
+#include <linux/pci.h>
+#include <linux/pci_regs.h>
+#include <linux/vdpa.h>
+#include <uapi/linux/virtio_net.h>
+#include <uapi/linux/virtio_config.h>
+#include <uapi/linux/virtio_pci.h>
+
+#define IFCVF_VENDOR_ID 0x1AF4
+#define IFCVF_DEVICE_ID 0x1041
+#define IFCVF_SUBSYS_VENDOR_ID 0x8086
+#define IFCVF_SUBSYS_DEVICE_ID 0x001A
+
+#define IFCVF_SUPPORTED_FEATURES \
+ ((1ULL << VIRTIO_NET_F_MAC) | \
+ (1ULL << VIRTIO_F_ANY_LAYOUT) | \
+ (1ULL << VIRTIO_F_VERSION_1) | \
+ (1ULL << VIRTIO_F_ORDER_PLATFORM) | \
+ (1ULL << VIRTIO_F_IOMMU_PLATFORM) | \
+ (1ULL << VIRTIO_NET_F_MRG_RXBUF))
+
+/* Only one queue pair for now. */
+#define IFCVF_MAX_QUEUE_PAIRS 1
+
+#define IFCVF_QUEUE_ALIGNMENT PAGE_SIZE
+#define IFCVF_QUEUE_MAX 32768
+#define IFCVF_MSI_CONFIG_OFF 0
+#define IFCVF_MSI_QUEUE_OFF 1
+#define IFCVF_PCI_MAX_RESOURCE 6
+
+#define IFCVF_LM_CFG_SIZE 0x40
+#define IFCVF_LM_RING_STATE_OFFSET 0x20
+#define IFCVF_LM_BAR 4
+
+#define IFCVF_ERR(pdev, fmt, ...) dev_err(&pdev->dev, fmt, ##__VA_ARGS__)
+#define IFCVF_DBG(pdev, fmt, ...) dev_dbg(&pdev->dev, fmt, ##__VA_ARGS__)
+#define IFCVF_INFO(pdev, fmt, ...) dev_info(&pdev->dev, fmt, ##__VA_ARGS__)
+
+#define ifcvf_private_to_vf(adapter) \
+ (&((struct ifcvf_adapter *)adapter)->vf)
+
+#define IFCVF_MAX_INTR (IFCVF_MAX_QUEUE_PAIRS * 2 + 1)
+
+struct vring_info {
+ u64 desc;
+ u64 avail;
+ u64 used;
+ u16 size;
+ u16 last_avail_idx;
+ bool ready;
+ void __iomem *notify_addr;
+ u32 irq;
+ struct vdpa_callback cb;
+ char msix_name[256];
+};
+
+struct ifcvf_hw {
+ u8 __iomem *isr;
+ /* Live migration */
+ u8 __iomem *lm_cfg;
+ u16 nr_vring;
+ /* Notification bar number */
+ u8 notify_bar;
+ /* Notificaiton bar address */
+ void __iomem *notify_base;
+ u32 notify_off_multiplier;
+ u64 req_features;
+ struct virtio_pci_common_cfg __iomem *common_cfg;
+ void __iomem *net_cfg;
+ struct vring_info vring[IFCVF_MAX_QUEUE_PAIRS * 2];
+ void __iomem * const *base;
+};
+
+struct ifcvf_adapter {
+ struct vdpa_device vdpa;
+ struct pci_dev *pdev;
+ struct ifcvf_hw vf;
+};
+
+struct ifcvf_vring_lm_cfg {
+ u32 idx_addr[2];
+ u8 reserved[IFCVF_LM_CFG_SIZE - 8];
+};
+
+struct ifcvf_lm_cfg {
+ u8 reserved[IFCVF_LM_RING_STATE_OFFSET];
+ struct ifcvf_vring_lm_cfg vring_lm_cfg[IFCVF_MAX_QUEUE_PAIRS];
+};
+
+int ifcvf_init_hw(struct ifcvf_hw *hw, struct pci_dev *dev);
+int ifcvf_start_hw(struct ifcvf_hw *hw);
+void ifcvf_stop_hw(struct ifcvf_hw *hw);
+void ifcvf_notify_queue(struct ifcvf_hw *hw, u16 qid);
+void ifcvf_read_net_config(struct ifcvf_hw *hw, u64 offset,
+ void *dst, int length);
+void ifcvf_write_net_config(struct ifcvf_hw *hw, u64 offset,
+ const void *src, int length);
+u8 ifcvf_get_status(struct ifcvf_hw *hw);
+void ifcvf_set_status(struct ifcvf_hw *hw, u8 status);
+void io_write64_twopart(u64 val, u32 *lo, u32 *hi);
+void ifcvf_reset(struct ifcvf_hw *hw);
+u64 ifcvf_get_features(struct ifcvf_hw *hw);
+u64 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid);
+int ifcvf_set_vq_state(struct ifcvf_hw *hw, u16 qid, u64 num);
+struct ifcvf_adapter *vf_to_adapter(struct ifcvf_hw *hw);
+#endif /* _IFCVF_H_ */
diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c
new file mode 100644
index 000000000000..8d54dc5b08d2
--- /dev/null
+++ b/drivers/vdpa/ifcvf/ifcvf_main.c
@@ -0,0 +1,435 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Intel IFC VF NIC driver for virtio dataplane offloading
+ *
+ * Copyright (C) 2020 Intel Corporation.
+ *
+ * Author: Zhu Lingshan <lingshan.zhu@intel.com>
+ *
+ */
+
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/sysfs.h>
+#include "ifcvf_base.h"
+
+#define VERSION_STRING "0.1"
+#define DRIVER_AUTHOR "Intel Corporation"
+#define IFCVF_DRIVER_NAME "ifcvf"
+
+static irqreturn_t ifcvf_intr_handler(int irq, void *arg)
+{
+ struct vring_info *vring = arg;
+
+ if (vring->cb.callback)
+ return vring->cb.callback(vring->cb.private);
+
+ return IRQ_HANDLED;
+}
+
+static int ifcvf_start_datapath(void *private)
+{
+ struct ifcvf_hw *vf = ifcvf_private_to_vf(private);
+ struct ifcvf_adapter *ifcvf;
+ u8 status;
+ int ret;
+
+ ifcvf = vf_to_adapter(vf);
+ vf->nr_vring = IFCVF_MAX_QUEUE_PAIRS * 2;
+ ret = ifcvf_start_hw(vf);
+ if (ret < 0) {
+ status = ifcvf_get_status(vf);
+ status |= VIRTIO_CONFIG_S_FAILED;
+ ifcvf_set_status(vf, status);
+ }
+
+ return ret;
+}
+
+static int ifcvf_stop_datapath(void *private)
+{
+ struct ifcvf_hw *vf = ifcvf_private_to_vf(private);
+ int i;
+
+ for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++)
+ vf->vring[i].cb.callback = NULL;
+
+ ifcvf_stop_hw(vf);
+
+ return 0;
+}
+
+static void ifcvf_reset_vring(struct ifcvf_adapter *adapter)
+{
+ struct ifcvf_hw *vf = ifcvf_private_to_vf(adapter);
+ int i;
+
+ for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) {
+ vf->vring[i].last_avail_idx = 0;
+ vf->vring[i].desc = 0;
+ vf->vring[i].avail = 0;
+ vf->vring[i].used = 0;
+ vf->vring[i].ready = 0;
+ vf->vring[i].cb.callback = NULL;
+ vf->vring[i].cb.private = NULL;
+ }
+
+ ifcvf_reset(vf);
+}
+
+static struct ifcvf_adapter *vdpa_to_adapter(struct vdpa_device *vdpa_dev)
+{
+ return container_of(vdpa_dev, struct ifcvf_adapter, vdpa);
+}
+
+static struct ifcvf_hw *vdpa_to_vf(struct vdpa_device *vdpa_dev)
+{
+ struct ifcvf_adapter *adapter = vdpa_to_adapter(vdpa_dev);
+
+ return &adapter->vf;
+}
+
+static u64 ifcvf_vdpa_get_features(struct vdpa_device *vdpa_dev)
+{
+ struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
+ u64 features;
+
+ features = ifcvf_get_features(vf) & IFCVF_SUPPORTED_FEATURES;
+
+ return features;
+}
+
+static int ifcvf_vdpa_set_features(struct vdpa_device *vdpa_dev, u64 features)
+{
+ struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
+
+ vf->req_features = features;
+
+ return 0;
+}
+
+static u8 ifcvf_vdpa_get_status(struct vdpa_device *vdpa_dev)
+{
+ struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
+
+ return ifcvf_get_status(vf);
+}
+
+static void ifcvf_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status)
+{
+ struct ifcvf_adapter *adapter;
+ struct ifcvf_hw *vf;
+
+ vf = vdpa_to_vf(vdpa_dev);
+ adapter = dev_get_drvdata(vdpa_dev->dev.parent);
+
+ if (status == 0) {
+ ifcvf_stop_datapath(adapter);
+ ifcvf_reset_vring(adapter);
+ return;
+ }
+
+ if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
+ if (ifcvf_start_datapath(adapter) < 0)
+ IFCVF_ERR(adapter->pdev,
+ "Failed to set ifcvf vdpa status %u\n",
+ status);
+ }
+
+ ifcvf_set_status(vf, status);
+}
+
+static u16 ifcvf_vdpa_get_vq_num_max(struct vdpa_device *vdpa_dev)
+{
+ return IFCVF_QUEUE_MAX;
+}
+
+static u64 ifcvf_vdpa_get_vq_state(struct vdpa_device *vdpa_dev, u16 qid)
+{
+ struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
+
+ return ifcvf_get_vq_state(vf, qid);
+}
+
+static int ifcvf_vdpa_set_vq_state(struct vdpa_device *vdpa_dev, u16 qid,
+ u64 num)
+{
+ struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
+
+ return ifcvf_set_vq_state(vf, qid, num);
+}
+
+static void ifcvf_vdpa_set_vq_cb(struct vdpa_device *vdpa_dev, u16 qid,
+ struct vdpa_callback *cb)
+{
+ struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
+
+ vf->vring[qid].cb = *cb;
+}
+
+static void ifcvf_vdpa_set_vq_ready(struct vdpa_device *vdpa_dev,
+ u16 qid, bool ready)
+{
+ struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
+
+ vf->vring[qid].ready = ready;
+}
+
+static bool ifcvf_vdpa_get_vq_ready(struct vdpa_device *vdpa_dev, u16 qid)
+{
+ struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
+
+ return vf->vring[qid].ready;
+}
+
+static void ifcvf_vdpa_set_vq_num(struct vdpa_device *vdpa_dev, u16 qid,
+ u32 num)
+{
+ struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
+
+ vf->vring[qid].size = num;
+}
+
+static int ifcvf_vdpa_set_vq_address(struct vdpa_device *vdpa_dev, u16 qid,
+ u64 desc_area, u64 driver_area,
+ u64 device_area)
+{
+ struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
+
+ vf->vring[qid].desc = desc_area;
+ vf->vring[qid].avail = driver_area;
+ vf->vring[qid].used = device_area;
+
+ return 0;
+}
+
+static void ifcvf_vdpa_kick_vq(struct vdpa_device *vdpa_dev, u16 qid)
+{
+ struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
+
+ ifcvf_notify_queue(vf, qid);
+}
+
+static u32 ifcvf_vdpa_get_generation(struct vdpa_device *vdpa_dev)
+{
+ struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
+
+ return ioread8(&vf->common_cfg->config_generation);
+}
+
+static u32 ifcvf_vdpa_get_device_id(struct vdpa_device *vdpa_dev)
+{
+ return VIRTIO_ID_NET;
+}
+
+static u32 ifcvf_vdpa_get_vendor_id(struct vdpa_device *vdpa_dev)
+{
+ return IFCVF_SUBSYS_VENDOR_ID;
+}
+
+static u16 ifcvf_vdpa_get_vq_align(struct vdpa_device *vdpa_dev)
+{
+ return IFCVF_QUEUE_ALIGNMENT;
+}
+
+static void ifcvf_vdpa_get_config(struct vdpa_device *vdpa_dev,
+ unsigned int offset,
+ void *buf, unsigned int len)
+{
+ struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
+
+ WARN_ON(offset + len > sizeof(struct virtio_net_config));
+ ifcvf_read_net_config(vf, offset, buf, len);
+}
+
+static void ifcvf_vdpa_set_config(struct vdpa_device *vdpa_dev,
+ unsigned int offset, const void *buf,
+ unsigned int len)
+{
+ struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
+
+ WARN_ON(offset + len > sizeof(struct virtio_net_config));
+ ifcvf_write_net_config(vf, offset, buf, len);
+}
+
+static void ifcvf_vdpa_set_config_cb(struct vdpa_device *vdpa_dev,
+ struct vdpa_callback *cb)
+{
+ /* We don't support config interrupt */
+}
+
+/*
+ * IFCVF currently does't have on-chip IOMMU, so not
+ * implemented set_map()/dma_map()/dma_unmap()
+ */
+static const struct vdpa_config_ops ifc_vdpa_ops = {
+ .get_features = ifcvf_vdpa_get_features,
+ .set_features = ifcvf_vdpa_set_features,
+ .get_status = ifcvf_vdpa_get_status,
+ .set_status = ifcvf_vdpa_set_status,
+ .get_vq_num_max = ifcvf_vdpa_get_vq_num_max,
+ .get_vq_state = ifcvf_vdpa_get_vq_state,
+ .set_vq_state = ifcvf_vdpa_set_vq_state,
+ .set_vq_cb = ifcvf_vdpa_set_vq_cb,
+ .set_vq_ready = ifcvf_vdpa_set_vq_ready,
+ .get_vq_ready = ifcvf_vdpa_get_vq_ready,
+ .set_vq_num = ifcvf_vdpa_set_vq_num,
+ .set_vq_address = ifcvf_vdpa_set_vq_address,
+ .kick_vq = ifcvf_vdpa_kick_vq,
+ .get_generation = ifcvf_vdpa_get_generation,
+ .get_device_id = ifcvf_vdpa_get_device_id,
+ .get_vendor_id = ifcvf_vdpa_get_vendor_id,
+ .get_vq_align = ifcvf_vdpa_get_vq_align,
+ .get_config = ifcvf_vdpa_get_config,
+ .set_config = ifcvf_vdpa_set_config,
+ .set_config_cb = ifcvf_vdpa_set_config_cb,
+};
+
+static int ifcvf_request_irq(struct ifcvf_adapter *adapter)
+{
+ struct pci_dev *pdev = adapter->pdev;
+ struct ifcvf_hw *vf = &adapter->vf;
+ int vector, i, ret, irq;
+
+
+ for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) {
+ snprintf(vf->vring[i].msix_name, 256, "ifcvf[%s]-%d\n",
+ pci_name(pdev), i);
+ vector = i + IFCVF_MSI_QUEUE_OFF;
+ irq = pci_irq_vector(pdev, vector);
+ ret = devm_request_irq(&pdev->dev, irq,
+ ifcvf_intr_handler, 0,
+ vf->vring[i].msix_name,
+ &vf->vring[i]);
+ if (ret) {
+ IFCVF_ERR(pdev,
+ "Failed to request irq for vq %d\n", i);
+ return ret;
+ }
+ vf->vring[i].irq = irq;
+ }
+
+ return 0;
+}
+
+static void ifcvf_free_irq_vectors(void *data)
+{
+ pci_free_irq_vectors(data);
+}
+
+static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ struct device *dev = &pdev->dev;
+ struct ifcvf_adapter *adapter;
+ struct ifcvf_hw *vf;
+ int ret;
+
+ ret = pcim_enable_device(pdev);
+ if (ret) {
+ IFCVF_ERR(pdev, "Failed to enable device\n");
+ return ret;
+ }
+
+ ret = pcim_iomap_regions(pdev, BIT(0) | BIT(2) | BIT(4),
+ IFCVF_DRIVER_NAME);
+ if (ret) {
+ IFCVF_ERR(pdev, "Failed to request MMIO region\n");
+ return ret;
+ }
+
+ ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+ if (ret) {
+ IFCVF_ERR(pdev, "No usable DMA confiugration\n");
+ return ret;
+ }
+
+ ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+ if (ret) {
+ IFCVF_ERR(pdev,
+ "No usable coherent DMA confiugration\n");
+ return ret;
+ }
+
+ ret = pci_alloc_irq_vectors(pdev, IFCVF_MAX_INTR,
+ IFCVF_MAX_INTR, PCI_IRQ_MSIX);
+ if (ret < 0) {
+ IFCVF_ERR(pdev, "Failed to alloc irq vectors\n");
+ return ret;
+ }
+
+ ret = devm_add_action_or_reset(dev, ifcvf_free_irq_vectors, pdev);
+ if (ret) {
+ IFCVF_ERR(pdev,
+ "Failed for adding devres for freeing irq vectors\n");
+ return ret;
+ }
+
+ adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa,
+ dev, &ifc_vdpa_ops);
+ if (adapter == NULL) {
+ IFCVF_ERR(pdev, "Failed to allocate vDPA structure");
+ return -ENOMEM;
+ }
+
+ pci_set_master(pdev);
+ pci_set_drvdata(pdev, adapter);
+
+ vf = &adapter->vf;
+ vf->base = pcim_iomap_table(pdev);
+
+ adapter->pdev = pdev;
+ adapter->vdpa.dma_dev = &pdev->dev;
+
+ ret = ifcvf_request_irq(adapter);
+ if (ret) {
+ IFCVF_ERR(pdev, "Failed to request MSI-X irq\n");
+ goto err;
+ }
+
+ ret = ifcvf_init_hw(vf, pdev);
+ if (ret) {
+ IFCVF_ERR(pdev, "Failed to init IFCVF hw\n");
+ goto err;
+ }
+
+ ret = vdpa_register_device(&adapter->vdpa);
+ if (ret) {
+ IFCVF_ERR(pdev, "Failed to register ifcvf to vdpa bus");
+ goto err;
+ }
+
+ return 0;
+
+err:
+ put_device(&adapter->vdpa.dev);
+ return ret;
+}
+
+static void ifcvf_remove(struct pci_dev *pdev)
+{
+ struct ifcvf_adapter *adapter = pci_get_drvdata(pdev);
+
+ vdpa_unregister_device(&adapter->vdpa);
+}
+
+static struct pci_device_id ifcvf_pci_ids[] = {
+ { PCI_DEVICE_SUB(IFCVF_VENDOR_ID,
+ IFCVF_DEVICE_ID,
+ IFCVF_SUBSYS_VENDOR_ID,
+ IFCVF_SUBSYS_DEVICE_ID) },
+ { 0 },
+};
+MODULE_DEVICE_TABLE(pci, ifcvf_pci_ids);
+
+static struct pci_driver ifcvf_driver = {
+ .name = IFCVF_DRIVER_NAME,
+ .id_table = ifcvf_pci_ids,
+ .probe = ifcvf_probe,
+ .remove = ifcvf_remove,
+};
+
+module_pci_driver(ifcvf_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_VERSION(VERSION_STRING);
diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c
new file mode 100644
index 000000000000..e9ed6a2b635b
--- /dev/null
+++ b/drivers/vdpa/vdpa.c
@@ -0,0 +1,180 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vDPA bus.
+ *
+ * Copyright (c) 2020, Red Hat. All rights reserved.
+ * Author: Jason Wang <jasowang@redhat.com>
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/idr.h>
+#include <linux/slab.h>
+#include <linux/vdpa.h>
+
+static DEFINE_IDA(vdpa_index_ida);
+
+static int vdpa_dev_probe(struct device *d)
+{
+ struct vdpa_device *vdev = dev_to_vdpa(d);
+ struct vdpa_driver *drv = drv_to_vdpa(vdev->dev.driver);
+ int ret = 0;
+
+ if (drv && drv->probe)
+ ret = drv->probe(vdev);
+
+ return ret;
+}
+
+static int vdpa_dev_remove(struct device *d)
+{
+ struct vdpa_device *vdev = dev_to_vdpa(d);
+ struct vdpa_driver *drv = drv_to_vdpa(vdev->dev.driver);
+
+ if (drv && drv->remove)
+ drv->remove(vdev);
+
+ return 0;
+}
+
+static struct bus_type vdpa_bus = {
+ .name = "vdpa",
+ .probe = vdpa_dev_probe,
+ .remove = vdpa_dev_remove,
+};
+
+static void vdpa_release_dev(struct device *d)
+{
+ struct vdpa_device *vdev = dev_to_vdpa(d);
+ const struct vdpa_config_ops *ops = vdev->config;
+
+ if (ops->free)
+ ops->free(vdev);
+
+ ida_simple_remove(&vdpa_index_ida, vdev->index);
+ kfree(vdev);
+}
+
+/**
+ * __vdpa_alloc_device - allocate and initilaize a vDPA device
+ * This allows driver to some prepartion after device is
+ * initialized but before registered.
+ * @parent: the parent device
+ * @config: the bus operations that is supported by this device
+ * @size: size of the parent structure that contains private data
+ *
+ * Drvier should use vdap_alloc_device() wrapper macro instead of
+ * using this directly.
+ *
+ * Returns an error when parent/config/dma_dev is not set or fail to get
+ * ida.
+ */
+struct vdpa_device *__vdpa_alloc_device(struct device *parent,
+ const struct vdpa_config_ops *config,
+ size_t size)
+{
+ struct vdpa_device *vdev;
+ int err = -EINVAL;
+
+ if (!config)
+ goto err;
+
+ if (!!config->dma_map != !!config->dma_unmap)
+ goto err;
+
+ err = -ENOMEM;
+ vdev = kzalloc(size, GFP_KERNEL);
+ if (!vdev)
+ goto err;
+
+ err = ida_simple_get(&vdpa_index_ida, 0, 0, GFP_KERNEL);
+ if (err < 0)
+ goto err_ida;
+
+ vdev->dev.bus = &vdpa_bus;
+ vdev->dev.parent = parent;
+ vdev->dev.release = vdpa_release_dev;
+ vdev->index = err;
+ vdev->config = config;
+
+ err = dev_set_name(&vdev->dev, "vdpa%u", vdev->index);
+ if (err)
+ goto err_name;
+
+ device_initialize(&vdev->dev);
+
+ return vdev;
+
+err_name:
+ ida_simple_remove(&vdpa_index_ida, vdev->index);
+err_ida:
+ kfree(vdev);
+err:
+ return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(__vdpa_alloc_device);
+
+/**
+ * vdpa_register_device - register a vDPA device
+ * Callers must have a succeed call of vdpa_init_device() before.
+ * @vdev: the vdpa device to be registered to vDPA bus
+ *
+ * Returns an error when fail to add to vDPA bus
+ */
+int vdpa_register_device(struct vdpa_device *vdev)
+{
+ return device_add(&vdev->dev);
+}
+EXPORT_SYMBOL_GPL(vdpa_register_device);
+
+/**
+ * vdpa_unregister_device - unregister a vDPA device
+ * @vdev: the vdpa device to be unregisted from vDPA bus
+ */
+void vdpa_unregister_device(struct vdpa_device *vdev)
+{
+ device_unregister(&vdev->dev);
+}
+EXPORT_SYMBOL_GPL(vdpa_unregister_device);
+
+/**
+ * __vdpa_register_driver - register a vDPA device driver
+ * @drv: the vdpa device driver to be registered
+ * @owner: module owner of the driver
+ *
+ * Returns an err when fail to do the registration
+ */
+int __vdpa_register_driver(struct vdpa_driver *drv, struct module *owner)
+{
+ drv->driver.bus = &vdpa_bus;
+ drv->driver.owner = owner;
+
+ return driver_register(&drv->driver);
+}
+EXPORT_SYMBOL_GPL(__vdpa_register_driver);
+
+/**
+ * vdpa_unregister_driver - unregister a vDPA device driver
+ * @drv: the vdpa device driver to be unregistered
+ */
+void vdpa_unregister_driver(struct vdpa_driver *drv)
+{
+ driver_unregister(&drv->driver);
+}
+EXPORT_SYMBOL_GPL(vdpa_unregister_driver);
+
+static int vdpa_init(void)
+{
+ return bus_register(&vdpa_bus);
+}
+
+static void __exit vdpa_exit(void)
+{
+ bus_unregister(&vdpa_bus);
+ ida_destroy(&vdpa_index_ida);
+}
+core_initcall(vdpa_init);
+module_exit(vdpa_exit);
+
+MODULE_AUTHOR("Jason Wang <jasowang@redhat.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/vdpa/vdpa_sim/Makefile b/drivers/vdpa/vdpa_sim/Makefile
new file mode 100644
index 000000000000..b40278f65e04
--- /dev/null
+++ b/drivers/vdpa/vdpa_sim/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_VDPA_SIM) += vdpa_sim.o
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
new file mode 100644
index 000000000000..6e8a0cf2fdeb
--- /dev/null
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
@@ -0,0 +1,629 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * VDPA networking device simulator.
+ *
+ * Copyright (c) 2020, Red Hat Inc. All rights reserved.
+ * Author: Jason Wang <jasowang@redhat.com>
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/poll.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/uuid.h>
+#include <linux/iommu.h>
+#include <linux/dma-mapping.h>
+#include <linux/sysfs.h>
+#include <linux/file.h>
+#include <linux/etherdevice.h>
+#include <linux/vringh.h>
+#include <linux/vdpa.h>
+#include <linux/vhost_iotlb.h>
+#include <uapi/linux/virtio_config.h>
+#include <uapi/linux/virtio_net.h>
+
+#define DRV_VERSION "0.1"
+#define DRV_AUTHOR "Jason Wang <jasowang@redhat.com>"
+#define DRV_DESC "vDPA Device Simulator"
+#define DRV_LICENSE "GPL v2"
+
+struct vdpasim_virtqueue {
+ struct vringh vring;
+ struct vringh_kiov iov;
+ unsigned short head;
+ bool ready;
+ u64 desc_addr;
+ u64 device_addr;
+ u64 driver_addr;
+ u32 num;
+ void *private;
+ irqreturn_t (*cb)(void *data);
+};
+
+#define VDPASIM_QUEUE_ALIGN PAGE_SIZE
+#define VDPASIM_QUEUE_MAX 256
+#define VDPASIM_DEVICE_ID 0x1
+#define VDPASIM_VENDOR_ID 0
+#define VDPASIM_VQ_NUM 0x2
+#define VDPASIM_NAME "vdpasim-netdev"
+
+static u64 vdpasim_features = (1ULL << VIRTIO_F_ANY_LAYOUT) |
+ (1ULL << VIRTIO_F_VERSION_1) |
+ (1ULL << VIRTIO_F_IOMMU_PLATFORM);
+
+/* State of each vdpasim device */
+struct vdpasim {
+ struct vdpa_device vdpa;
+ struct vdpasim_virtqueue vqs[2];
+ struct work_struct work;
+ /* spinlock to synchronize virtqueue state */
+ spinlock_t lock;
+ struct virtio_net_config config;
+ struct vhost_iotlb *iommu;
+ void *buffer;
+ u32 status;
+ u32 generation;
+ u64 features;
+};
+
+static struct vdpasim *vdpasim_dev;
+
+static struct vdpasim *vdpa_to_sim(struct vdpa_device *vdpa)
+{
+ return container_of(vdpa, struct vdpasim, vdpa);
+}
+
+static struct vdpasim *dev_to_sim(struct device *dev)
+{
+ struct vdpa_device *vdpa = dev_to_vdpa(dev);
+
+ return vdpa_to_sim(vdpa);
+}
+
+static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx)
+{
+ struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
+ int ret;
+
+ ret = vringh_init_iotlb(&vq->vring, vdpasim_features,
+ VDPASIM_QUEUE_MAX, false,
+ (struct vring_desc *)(uintptr_t)vq->desc_addr,
+ (struct vring_avail *)
+ (uintptr_t)vq->driver_addr,
+ (struct vring_used *)
+ (uintptr_t)vq->device_addr);
+}
+
+static void vdpasim_vq_reset(struct vdpasim_virtqueue *vq)
+{
+ vq->ready = 0;
+ vq->desc_addr = 0;
+ vq->driver_addr = 0;
+ vq->device_addr = 0;
+ vq->cb = NULL;
+ vq->private = NULL;
+ vringh_init_iotlb(&vq->vring, vdpasim_features, VDPASIM_QUEUE_MAX,
+ false, NULL, NULL, NULL);
+}
+
+static void vdpasim_reset(struct vdpasim *vdpasim)
+{
+ int i;
+
+ for (i = 0; i < VDPASIM_VQ_NUM; i++)
+ vdpasim_vq_reset(&vdpasim->vqs[i]);
+
+ vhost_iotlb_reset(vdpasim->iommu);
+
+ vdpasim->features = 0;
+ vdpasim->status = 0;
+ ++vdpasim->generation;
+}
+
+static void vdpasim_work(struct work_struct *work)
+{
+ struct vdpasim *vdpasim = container_of(work, struct
+ vdpasim, work);
+ struct vdpasim_virtqueue *txq = &vdpasim->vqs[1];
+ struct vdpasim_virtqueue *rxq = &vdpasim->vqs[0];
+ size_t read, write, total_write;
+ int err;
+ int pkts = 0;
+
+ spin_lock(&vdpasim->lock);
+
+ if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK))
+ goto out;
+
+ if (!txq->ready || !rxq->ready)
+ goto out;
+
+ while (true) {
+ total_write = 0;
+ err = vringh_getdesc_iotlb(&txq->vring, &txq->iov, NULL,
+ &txq->head, GFP_ATOMIC);
+ if (err <= 0)
+ break;
+
+ err = vringh_getdesc_iotlb(&rxq->vring, NULL, &rxq->iov,
+ &rxq->head, GFP_ATOMIC);
+ if (err <= 0) {
+ vringh_complete_iotlb(&txq->vring, txq->head, 0);
+ break;
+ }
+
+ while (true) {
+ read = vringh_iov_pull_iotlb(&txq->vring, &txq->iov,
+ vdpasim->buffer,
+ PAGE_SIZE);
+ if (read <= 0)
+ break;
+
+ write = vringh_iov_push_iotlb(&rxq->vring, &rxq->iov,
+ vdpasim->buffer, read);
+ if (write <= 0)
+ break;
+
+ total_write += write;
+ }
+
+ /* Make sure data is wrote before advancing index */
+ smp_wmb();
+
+ vringh_complete_iotlb(&txq->vring, txq->head, 0);
+ vringh_complete_iotlb(&rxq->vring, rxq->head, total_write);
+
+ /* Make sure used is visible before rasing the interrupt. */
+ smp_wmb();
+
+ local_bh_disable();
+ if (txq->cb)
+ txq->cb(txq->private);
+ if (rxq->cb)
+ rxq->cb(rxq->private);
+ local_bh_enable();
+
+ if (++pkts > 4) {
+ schedule_work(&vdpasim->work);
+ goto out;
+ }
+ }
+
+out:
+ spin_unlock(&vdpasim->lock);
+}
+
+static int dir_to_perm(enum dma_data_direction dir)
+{
+ int perm = -EFAULT;
+
+ switch (dir) {
+ case DMA_FROM_DEVICE:
+ perm = VHOST_MAP_WO;
+ break;
+ case DMA_TO_DEVICE:
+ perm = VHOST_MAP_RO;
+ break;
+ case DMA_BIDIRECTIONAL:
+ perm = VHOST_MAP_RW;
+ break;
+ default:
+ break;
+ }
+
+ return perm;
+}
+
+static dma_addr_t vdpasim_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ struct vdpasim *vdpasim = dev_to_sim(dev);
+ struct vhost_iotlb *iommu = vdpasim->iommu;
+ u64 pa = (page_to_pfn(page) << PAGE_SHIFT) + offset;
+ int ret, perm = dir_to_perm(dir);
+
+ if (perm < 0)
+ return DMA_MAPPING_ERROR;
+
+ /* For simplicity, use identical mapping to avoid e.g iova
+ * allocator.
+ */
+ ret = vhost_iotlb_add_range(iommu, pa, pa + size - 1,
+ pa, dir_to_perm(dir));
+ if (ret)
+ return DMA_MAPPING_ERROR;
+
+ return (dma_addr_t)(pa);
+}
+
+static void vdpasim_unmap_page(struct device *dev, dma_addr_t dma_addr,
+ size_t size, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ struct vdpasim *vdpasim = dev_to_sim(dev);
+ struct vhost_iotlb *iommu = vdpasim->iommu;
+
+ vhost_iotlb_del_range(iommu, (u64)dma_addr,
+ (u64)dma_addr + size - 1);
+}
+
+static void *vdpasim_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t *dma_addr, gfp_t flag,
+ unsigned long attrs)
+{
+ struct vdpasim *vdpasim = dev_to_sim(dev);
+ struct vhost_iotlb *iommu = vdpasim->iommu;
+ void *addr = kmalloc(size, flag);
+ int ret;
+
+ if (!addr)
+ *dma_addr = DMA_MAPPING_ERROR;
+ else {
+ u64 pa = virt_to_phys(addr);
+
+ ret = vhost_iotlb_add_range(iommu, (u64)pa,
+ (u64)pa + size - 1,
+ pa, VHOST_MAP_RW);
+ if (ret) {
+ *dma_addr = DMA_MAPPING_ERROR;
+ kfree(addr);
+ addr = NULL;
+ } else
+ *dma_addr = (dma_addr_t)pa;
+ }
+
+ return addr;
+}
+
+static void vdpasim_free_coherent(struct device *dev, size_t size,
+ void *vaddr, dma_addr_t dma_addr,
+ unsigned long attrs)
+{
+ struct vdpasim *vdpasim = dev_to_sim(dev);
+ struct vhost_iotlb *iommu = vdpasim->iommu;
+
+ vhost_iotlb_del_range(iommu, (u64)dma_addr,
+ (u64)dma_addr + size - 1);
+ kfree(phys_to_virt((uintptr_t)dma_addr));
+}
+
+static const struct dma_map_ops vdpasim_dma_ops = {
+ .map_page = vdpasim_map_page,
+ .unmap_page = vdpasim_unmap_page,
+ .alloc = vdpasim_alloc_coherent,
+ .free = vdpasim_free_coherent,
+};
+
+static const struct vdpa_config_ops vdpasim_net_config_ops;
+
+static struct vdpasim *vdpasim_create(void)
+{
+ struct virtio_net_config *config;
+ struct vdpasim *vdpasim;
+ struct device *dev;
+ int ret = -ENOMEM;
+
+ vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL,
+ &vdpasim_net_config_ops);
+ if (!vdpasim)
+ goto err_alloc;
+
+ INIT_WORK(&vdpasim->work, vdpasim_work);
+ spin_lock_init(&vdpasim->lock);
+
+ dev = &vdpasim->vdpa.dev;
+ dev->coherent_dma_mask = DMA_BIT_MASK(64);
+ set_dma_ops(dev, &vdpasim_dma_ops);
+
+ vdpasim->iommu = vhost_iotlb_alloc(2048, 0);
+ if (!vdpasim->iommu)
+ goto err_iommu;
+
+ vdpasim->buffer = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!vdpasim->buffer)
+ goto err_iommu;
+
+ config = &vdpasim->config;
+ config->mtu = 1500;
+ config->status = VIRTIO_NET_S_LINK_UP;
+ eth_random_addr(config->mac);
+
+ vringh_set_iotlb(&vdpasim->vqs[0].vring, vdpasim->iommu);
+ vringh_set_iotlb(&vdpasim->vqs[1].vring, vdpasim->iommu);
+
+ vdpasim->vdpa.dma_dev = dev;
+ ret = vdpa_register_device(&vdpasim->vdpa);
+ if (ret)
+ goto err_iommu;
+
+ return vdpasim;
+
+err_iommu:
+ put_device(dev);
+err_alloc:
+ return ERR_PTR(ret);
+}
+
+static int vdpasim_set_vq_address(struct vdpa_device *vdpa, u16 idx,
+ u64 desc_area, u64 driver_area,
+ u64 device_area)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+ struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
+
+ vq->desc_addr = desc_area;
+ vq->driver_addr = driver_area;
+ vq->device_addr = device_area;
+
+ return 0;
+}
+
+static void vdpasim_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+ struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
+
+ vq->num = num;
+}
+
+static void vdpasim_kick_vq(struct vdpa_device *vdpa, u16 idx)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+ struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
+
+ if (vq->ready)
+ schedule_work(&vdpasim->work);
+}
+
+static void vdpasim_set_vq_cb(struct vdpa_device *vdpa, u16 idx,
+ struct vdpa_callback *cb)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+ struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
+
+ vq->cb = cb->callback;
+ vq->private = cb->private;
+}
+
+static void vdpasim_set_vq_ready(struct vdpa_device *vdpa, u16 idx, bool ready)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+ struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
+
+ spin_lock(&vdpasim->lock);
+ vq->ready = ready;
+ if (vq->ready)
+ vdpasim_queue_ready(vdpasim, idx);
+ spin_unlock(&vdpasim->lock);
+}
+
+static bool vdpasim_get_vq_ready(struct vdpa_device *vdpa, u16 idx)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+ struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
+
+ return vq->ready;
+}
+
+static int vdpasim_set_vq_state(struct vdpa_device *vdpa, u16 idx, u64 state)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+ struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
+ struct vringh *vrh = &vq->vring;
+
+ spin_lock(&vdpasim->lock);
+ vrh->last_avail_idx = state;
+ spin_unlock(&vdpasim->lock);
+
+ return 0;
+}
+
+static u64 vdpasim_get_vq_state(struct vdpa_device *vdpa, u16 idx)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+ struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
+ struct vringh *vrh = &vq->vring;
+
+ return vrh->last_avail_idx;
+}
+
+static u16 vdpasim_get_vq_align(struct vdpa_device *vdpa)
+{
+ return VDPASIM_QUEUE_ALIGN;
+}
+
+static u64 vdpasim_get_features(struct vdpa_device *vdpa)
+{
+ return vdpasim_features;
+}
+
+static int vdpasim_set_features(struct vdpa_device *vdpa, u64 features)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+
+ /* DMA mapping must be done by driver */
+ if (!(features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)))
+ return -EINVAL;
+
+ vdpasim->features = features & vdpasim_features;
+
+ return 0;
+}
+
+static void vdpasim_set_config_cb(struct vdpa_device *vdpa,
+ struct vdpa_callback *cb)
+{
+ /* We don't support config interrupt */
+}
+
+static u16 vdpasim_get_vq_num_max(struct vdpa_device *vdpa)
+{
+ return VDPASIM_QUEUE_MAX;
+}
+
+static u32 vdpasim_get_device_id(struct vdpa_device *vdpa)
+{
+ return VDPASIM_DEVICE_ID;
+}
+
+static u32 vdpasim_get_vendor_id(struct vdpa_device *vdpa)
+{
+ return VDPASIM_VENDOR_ID;
+}
+
+static u8 vdpasim_get_status(struct vdpa_device *vdpa)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+ u8 status;
+
+ spin_lock(&vdpasim->lock);
+ status = vdpasim->status;
+ spin_unlock(&vdpasim->lock);
+
+ return vdpasim->status;
+}
+
+static void vdpasim_set_status(struct vdpa_device *vdpa, u8 status)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+
+ spin_lock(&vdpasim->lock);
+ vdpasim->status = status;
+ if (status == 0)
+ vdpasim_reset(vdpasim);
+ spin_unlock(&vdpasim->lock);
+}
+
+static void vdpasim_get_config(struct vdpa_device *vdpa, unsigned int offset,
+ void *buf, unsigned int len)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+
+ if (offset + len < sizeof(struct virtio_net_config))
+ memcpy(buf, &vdpasim->config + offset, len);
+}
+
+static void vdpasim_set_config(struct vdpa_device *vdpa, unsigned int offset,
+ const void *buf, unsigned int len)
+{
+ /* No writable config supportted by vdpasim */
+}
+
+static u32 vdpasim_get_generation(struct vdpa_device *vdpa)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+
+ return vdpasim->generation;
+}
+
+static int vdpasim_set_map(struct vdpa_device *vdpa,
+ struct vhost_iotlb *iotlb)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+ struct vhost_iotlb_map *map;
+ u64 start = 0ULL, last = 0ULL - 1;
+ int ret;
+
+ vhost_iotlb_reset(vdpasim->iommu);
+
+ for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
+ map = vhost_iotlb_itree_next(map, start, last)) {
+ ret = vhost_iotlb_add_range(vdpasim->iommu, map->start,
+ map->last, map->addr, map->perm);
+ if (ret)
+ goto err;
+ }
+ return 0;
+
+err:
+ vhost_iotlb_reset(vdpasim->iommu);
+ return ret;
+}
+
+static int vdpasim_dma_map(struct vdpa_device *vdpa, u64 iova, u64 size,
+ u64 pa, u32 perm)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+
+ return vhost_iotlb_add_range(vdpasim->iommu, iova,
+ iova + size - 1, pa, perm);
+}
+
+static int vdpasim_dma_unmap(struct vdpa_device *vdpa, u64 iova, u64 size)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+
+ vhost_iotlb_del_range(vdpasim->iommu, iova, iova + size - 1);
+
+ return 0;
+}
+
+static void vdpasim_free(struct vdpa_device *vdpa)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+
+ cancel_work_sync(&vdpasim->work);
+ kfree(vdpasim->buffer);
+ if (vdpasim->iommu)
+ vhost_iotlb_free(vdpasim->iommu);
+}
+
+static const struct vdpa_config_ops vdpasim_net_config_ops = {
+ .set_vq_address = vdpasim_set_vq_address,
+ .set_vq_num = vdpasim_set_vq_num,
+ .kick_vq = vdpasim_kick_vq,
+ .set_vq_cb = vdpasim_set_vq_cb,
+ .set_vq_ready = vdpasim_set_vq_ready,
+ .get_vq_ready = vdpasim_get_vq_ready,
+ .set_vq_state = vdpasim_set_vq_state,
+ .get_vq_state = vdpasim_get_vq_state,
+ .get_vq_align = vdpasim_get_vq_align,
+ .get_features = vdpasim_get_features,
+ .set_features = vdpasim_set_features,
+ .set_config_cb = vdpasim_set_config_cb,
+ .get_vq_num_max = vdpasim_get_vq_num_max,
+ .get_device_id = vdpasim_get_device_id,
+ .get_vendor_id = vdpasim_get_vendor_id,
+ .get_status = vdpasim_get_status,
+ .set_status = vdpasim_set_status,
+ .get_config = vdpasim_get_config,
+ .set_config = vdpasim_set_config,
+ .get_generation = vdpasim_get_generation,
+ .set_map = vdpasim_set_map,
+ .dma_map = vdpasim_dma_map,
+ .dma_unmap = vdpasim_dma_unmap,
+ .free = vdpasim_free,
+};
+
+static int __init vdpasim_dev_init(void)
+{
+ vdpasim_dev = vdpasim_create();
+
+ if (!IS_ERR(vdpasim_dev))
+ return 0;
+
+ return PTR_ERR(vdpasim_dev);
+}
+
+static void __exit vdpasim_dev_exit(void)
+{
+ struct vdpa_device *vdpa = &vdpasim_dev->vdpa;
+
+ vdpa_unregister_device(vdpa);
+}
+
+module_init(vdpasim_dev_init)
+module_exit(vdpasim_dev_exit)
+
+MODULE_VERSION(DRV_VERSION);
+MODULE_LICENSE(DRV_LICENSE);
+MODULE_AUTHOR(DRV_AUTHOR);
+MODULE_DESCRIPTION(DRV_DESC);