summaryrefslogtreecommitdiffstats
path: root/drivers/vfio/pci/pds/cmds.c
diff options
context:
space:
mode:
authorBrett Creeley <brett.creeley@amd.com>2023-08-07 22:57:52 +0200
committerAlex Williamson <alex.williamson@redhat.com>2023-08-16 18:53:26 +0200
commitbb500dbe2ac622551d98c0bb2735a68f59489c98 (patch)
treeece715381ff3a196ce879f74cee292df6ace28ac /drivers/vfio/pci/pds/cmds.c
parentvfio/pds: register with the pds_core PF (diff)
downloadlinux-bb500dbe2ac622551d98c0bb2735a68f59489c98.tar.xz
linux-bb500dbe2ac622551d98c0bb2735a68f59489c98.zip
vfio/pds: Add VFIO live migration support
Add live migration support via the VFIO subsystem. The migration implementation aligns with the definition from uapi/vfio.h and uses the pds_core PF's adminq for device configuration. The ability to suspend, resume, and transfer VF device state data is included along with the required admin queue command structures and implementations. PDS_LM_CMD_SUSPEND and PDS_LM_CMD_SUSPEND_STATUS are added to support the VF device suspend operation. PDS_LM_CMD_RESUME is added to support the VF device resume operation. PDS_LM_CMD_STATE_SIZE is added to determine the exact size of the VF device state data. PDS_LM_CMD_SAVE is added to get the VF device state data. PDS_LM_CMD_RESTORE is added to restore the VF device with the previously saved data from PDS_LM_CMD_SAVE. PDS_LM_CMD_HOST_VF_STATUS is added to notify the DSC/firmware when a migration is in/not-in progress from the host's perspective. The DSC/firmware can use this to clear/setup any necessary state related to a migration. Signed-off-by: Brett Creeley <brett.creeley@amd.com> Signed-off-by: Shannon Nelson <shannon.nelson@amd.com> Reviewed-by: Simon Horman <horms@kernel.org> Reviewed-by: Kevin Tian <kevin.tian@intel.com> Reviewed-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> Link: https://lore.kernel.org/r/20230807205755.29579-6-brett.creeley@amd.com Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Diffstat (limited to 'drivers/vfio/pci/pds/cmds.c')
-rw-r--r--drivers/vfio/pci/pds/cmds.c330
1 files changed, 330 insertions, 0 deletions
diff --git a/drivers/vfio/pci/pds/cmds.c b/drivers/vfio/pci/pds/cmds.c
index 7f1618aa765d..b1c5f103500f 100644
--- a/drivers/vfio/pci/pds/cmds.c
+++ b/drivers/vfio/pci/pds/cmds.c
@@ -3,6 +3,7 @@
#include <linux/io.h>
#include <linux/types.h>
+#include <linux/delay.h>
#include <linux/pds/pds_common.h>
#include <linux/pds/pds_core_if.h>
@@ -11,6 +12,37 @@
#include "vfio_dev.h"
#include "cmds.h"
+#define SUSPEND_TIMEOUT_S 5
+#define SUSPEND_CHECK_INTERVAL_MS 1
+
+static int pds_vfio_client_adminq_cmd(struct pds_vfio_pci_device *pds_vfio,
+ union pds_core_adminq_cmd *req,
+ union pds_core_adminq_comp *resp,
+ bool fast_poll)
+{
+ struct pci_dev *pdev = pds_vfio_to_pci_dev(pds_vfio);
+ union pds_core_adminq_cmd cmd = {};
+ struct pdsc *pdsc;
+ int err;
+
+ /* Wrap the client request */
+ cmd.client_request.opcode = PDS_AQ_CMD_CLIENT_CMD;
+ cmd.client_request.client_id = cpu_to_le16(pds_vfio->client_id);
+ memcpy(cmd.client_request.client_cmd, req,
+ sizeof(cmd.client_request.client_cmd));
+
+ pdsc = pdsc_get_pf_struct(pdev);
+ if (IS_ERR(pdsc))
+ return PTR_ERR(pdsc);
+
+ err = pdsc_adminq_post(pdsc, &cmd, resp, fast_poll);
+ if (err && err != -EAGAIN)
+ dev_err(pds_vfio_to_dev(pds_vfio),
+ "client admin cmd failed: %pe\n", ERR_PTR(err));
+
+ return err;
+}
+
int pds_vfio_register_client_cmd(struct pds_vfio_pci_device *pds_vfio)
{
struct pci_dev *pdev = pds_vfio_to_pci_dev(pds_vfio);
@@ -52,3 +84,301 @@ void pds_vfio_unregister_client_cmd(struct pds_vfio_pci_device *pds_vfio)
pds_vfio->client_id = 0;
}
+
+static int
+pds_vfio_suspend_wait_device_cmd(struct pds_vfio_pci_device *pds_vfio)
+{
+ union pds_core_adminq_cmd cmd = {
+ .lm_suspend_status = {
+ .opcode = PDS_LM_CMD_SUSPEND_STATUS,
+ .vf_id = cpu_to_le16(pds_vfio->vf_id),
+ },
+ };
+ struct device *dev = pds_vfio_to_dev(pds_vfio);
+ union pds_core_adminq_comp comp = {};
+ unsigned long time_limit;
+ unsigned long time_start;
+ unsigned long time_done;
+ int err;
+
+ time_start = jiffies;
+ time_limit = time_start + HZ * SUSPEND_TIMEOUT_S;
+ do {
+ err = pds_vfio_client_adminq_cmd(pds_vfio, &cmd, &comp, true);
+ if (err != -EAGAIN)
+ break;
+
+ msleep(SUSPEND_CHECK_INTERVAL_MS);
+ } while (time_before(jiffies, time_limit));
+
+ time_done = jiffies;
+ dev_dbg(dev, "%s: vf%u: Suspend comp received in %d msecs\n", __func__,
+ pds_vfio->vf_id, jiffies_to_msecs(time_done - time_start));
+
+ /* Check the results */
+ if (time_after_eq(time_done, time_limit)) {
+ dev_err(dev, "%s: vf%u: Suspend comp timeout\n", __func__,
+ pds_vfio->vf_id);
+ err = -ETIMEDOUT;
+ }
+
+ return err;
+}
+
+int pds_vfio_suspend_device_cmd(struct pds_vfio_pci_device *pds_vfio, u8 type)
+{
+ union pds_core_adminq_cmd cmd = {
+ .lm_suspend = {
+ .opcode = PDS_LM_CMD_SUSPEND,
+ .vf_id = cpu_to_le16(pds_vfio->vf_id),
+ .type = type,
+ },
+ };
+ struct device *dev = pds_vfio_to_dev(pds_vfio);
+ union pds_core_adminq_comp comp = {};
+ int err;
+
+ dev_dbg(dev, "vf%u: Suspend device\n", pds_vfio->vf_id);
+
+ /*
+ * The initial suspend request to the firmware starts the device suspend
+ * operation and the firmware returns success if it's started
+ * successfully.
+ */
+ err = pds_vfio_client_adminq_cmd(pds_vfio, &cmd, &comp, true);
+ if (err) {
+ dev_err(dev, "vf%u: Suspend failed: %pe\n", pds_vfio->vf_id,
+ ERR_PTR(err));
+ return err;
+ }
+
+ /*
+ * The subsequent suspend status request(s) check if the firmware has
+ * completed the device suspend process.
+ */
+ return pds_vfio_suspend_wait_device_cmd(pds_vfio);
+}
+
+int pds_vfio_resume_device_cmd(struct pds_vfio_pci_device *pds_vfio, u8 type)
+{
+ union pds_core_adminq_cmd cmd = {
+ .lm_resume = {
+ .opcode = PDS_LM_CMD_RESUME,
+ .vf_id = cpu_to_le16(pds_vfio->vf_id),
+ .type = type,
+ },
+ };
+ struct device *dev = pds_vfio_to_dev(pds_vfio);
+ union pds_core_adminq_comp comp = {};
+
+ dev_dbg(dev, "vf%u: Resume device\n", pds_vfio->vf_id);
+
+ return pds_vfio_client_adminq_cmd(pds_vfio, &cmd, &comp, true);
+}
+
+int pds_vfio_get_lm_state_size_cmd(struct pds_vfio_pci_device *pds_vfio, u64 *size)
+{
+ union pds_core_adminq_cmd cmd = {
+ .lm_state_size = {
+ .opcode = PDS_LM_CMD_STATE_SIZE,
+ .vf_id = cpu_to_le16(pds_vfio->vf_id),
+ },
+ };
+ struct device *dev = pds_vfio_to_dev(pds_vfio);
+ union pds_core_adminq_comp comp = {};
+ int err;
+
+ dev_dbg(dev, "vf%u: Get migration status\n", pds_vfio->vf_id);
+
+ err = pds_vfio_client_adminq_cmd(pds_vfio, &cmd, &comp, false);
+ if (err)
+ return err;
+
+ *size = le64_to_cpu(comp.lm_state_size.size);
+ return 0;
+}
+
+static int pds_vfio_dma_map_lm_file(struct device *dev,
+ enum dma_data_direction dir,
+ struct pds_vfio_lm_file *lm_file)
+{
+ struct pds_lm_sg_elem *sgl, *sge;
+ struct scatterlist *sg;
+ dma_addr_t sgl_addr;
+ size_t sgl_size;
+ int err;
+ int i;
+
+ if (!lm_file)
+ return -EINVAL;
+
+ /* dma map file pages */
+ err = dma_map_sgtable(dev, &lm_file->sg_table, dir, 0);
+ if (err)
+ return err;
+
+ lm_file->num_sge = lm_file->sg_table.nents;
+
+ /* alloc sgl */
+ sgl_size = lm_file->num_sge * sizeof(struct pds_lm_sg_elem);
+ sgl = kzalloc(sgl_size, GFP_KERNEL);
+ if (!sgl) {
+ err = -ENOMEM;
+ goto out_unmap_sgtable;
+ }
+
+ /* fill sgl */
+ sge = sgl;
+ for_each_sgtable_dma_sg(&lm_file->sg_table, sg, i) {
+ sge->addr = cpu_to_le64(sg_dma_address(sg));
+ sge->len = cpu_to_le32(sg_dma_len(sg));
+ dev_dbg(dev, "addr = %llx, len = %u\n", sge->addr, sge->len);
+ sge++;
+ }
+
+ sgl_addr = dma_map_single(dev, sgl, sgl_size, DMA_TO_DEVICE);
+ if (dma_mapping_error(dev, sgl_addr)) {
+ err = -EIO;
+ goto out_free_sgl;
+ }
+
+ lm_file->sgl = sgl;
+ lm_file->sgl_addr = sgl_addr;
+
+ return 0;
+
+out_free_sgl:
+ kfree(sgl);
+out_unmap_sgtable:
+ lm_file->num_sge = 0;
+ dma_unmap_sgtable(dev, &lm_file->sg_table, dir, 0);
+ return err;
+}
+
+static void pds_vfio_dma_unmap_lm_file(struct device *dev,
+ enum dma_data_direction dir,
+ struct pds_vfio_lm_file *lm_file)
+{
+ if (!lm_file)
+ return;
+
+ /* free sgl */
+ if (lm_file->sgl) {
+ dma_unmap_single(dev, lm_file->sgl_addr,
+ lm_file->num_sge * sizeof(*lm_file->sgl),
+ DMA_TO_DEVICE);
+ kfree(lm_file->sgl);
+ lm_file->sgl = NULL;
+ lm_file->sgl_addr = DMA_MAPPING_ERROR;
+ lm_file->num_sge = 0;
+ }
+
+ /* dma unmap file pages */
+ dma_unmap_sgtable(dev, &lm_file->sg_table, dir, 0);
+}
+
+int pds_vfio_get_lm_state_cmd(struct pds_vfio_pci_device *pds_vfio)
+{
+ union pds_core_adminq_cmd cmd = {
+ .lm_save = {
+ .opcode = PDS_LM_CMD_SAVE,
+ .vf_id = cpu_to_le16(pds_vfio->vf_id),
+ },
+ };
+ struct pci_dev *pdev = pds_vfio_to_pci_dev(pds_vfio);
+ struct device *pdsc_dev = &pci_physfn(pdev)->dev;
+ union pds_core_adminq_comp comp = {};
+ struct pds_vfio_lm_file *lm_file;
+ int err;
+
+ dev_dbg(&pdev->dev, "vf%u: Get migration state\n", pds_vfio->vf_id);
+
+ lm_file = pds_vfio->save_file;
+
+ err = pds_vfio_dma_map_lm_file(pdsc_dev, DMA_FROM_DEVICE, lm_file);
+ if (err) {
+ dev_err(&pdev->dev, "failed to map save migration file: %pe\n",
+ ERR_PTR(err));
+ return err;
+ }
+
+ cmd.lm_save.sgl_addr = cpu_to_le64(lm_file->sgl_addr);
+ cmd.lm_save.num_sge = cpu_to_le32(lm_file->num_sge);
+
+ err = pds_vfio_client_adminq_cmd(pds_vfio, &cmd, &comp, false);
+ if (err)
+ dev_err(&pdev->dev, "failed to get migration state: %pe\n",
+ ERR_PTR(err));
+
+ pds_vfio_dma_unmap_lm_file(pdsc_dev, DMA_FROM_DEVICE, lm_file);
+
+ return err;
+}
+
+int pds_vfio_set_lm_state_cmd(struct pds_vfio_pci_device *pds_vfio)
+{
+ union pds_core_adminq_cmd cmd = {
+ .lm_restore = {
+ .opcode = PDS_LM_CMD_RESTORE,
+ .vf_id = cpu_to_le16(pds_vfio->vf_id),
+ },
+ };
+ struct pci_dev *pdev = pds_vfio_to_pci_dev(pds_vfio);
+ struct device *pdsc_dev = &pci_physfn(pdev)->dev;
+ union pds_core_adminq_comp comp = {};
+ struct pds_vfio_lm_file *lm_file;
+ int err;
+
+ dev_dbg(&pdev->dev, "vf%u: Set migration state\n", pds_vfio->vf_id);
+
+ lm_file = pds_vfio->restore_file;
+
+ err = pds_vfio_dma_map_lm_file(pdsc_dev, DMA_TO_DEVICE, lm_file);
+ if (err) {
+ dev_err(&pdev->dev,
+ "failed to map restore migration file: %pe\n",
+ ERR_PTR(err));
+ return err;
+ }
+
+ cmd.lm_restore.sgl_addr = cpu_to_le64(lm_file->sgl_addr);
+ cmd.lm_restore.num_sge = cpu_to_le32(lm_file->num_sge);
+
+ err = pds_vfio_client_adminq_cmd(pds_vfio, &cmd, &comp, false);
+ if (err)
+ dev_err(&pdev->dev, "failed to set migration state: %pe\n",
+ ERR_PTR(err));
+
+ pds_vfio_dma_unmap_lm_file(pdsc_dev, DMA_TO_DEVICE, lm_file);
+
+ return err;
+}
+
+void pds_vfio_send_host_vf_lm_status_cmd(struct pds_vfio_pci_device *pds_vfio,
+ enum pds_lm_host_vf_status vf_status)
+{
+ union pds_core_adminq_cmd cmd = {
+ .lm_host_vf_status = {
+ .opcode = PDS_LM_CMD_HOST_VF_STATUS,
+ .vf_id = cpu_to_le16(pds_vfio->vf_id),
+ .status = vf_status,
+ },
+ };
+ struct device *dev = pds_vfio_to_dev(pds_vfio);
+ union pds_core_adminq_comp comp = {};
+ int err;
+
+ dev_dbg(dev, "vf%u: Set host VF LM status: %u", pds_vfio->vf_id,
+ vf_status);
+ if (vf_status != PDS_LM_STA_IN_PROGRESS &&
+ vf_status != PDS_LM_STA_NONE) {
+ dev_warn(dev, "Invalid host VF migration status, %d\n",
+ vf_status);
+ return;
+ }
+
+ err = pds_vfio_client_adminq_cmd(pds_vfio, &cmd, &comp, false);
+ if (err)
+ dev_warn(dev, "failed to send host VF migration status: %pe\n",
+ ERR_PTR(err));
+}