summaryrefslogtreecommitdiffstats
path: root/drivers/vfio/pci/mlx5/main.c
diff options
context:
space:
mode:
authorYishai Hadas <yishaih@nvidia.com>2023-01-24 15:49:54 +0100
committerAlex Williamson <alex.williamson@redhat.com>2023-01-30 20:16:15 +0100
commitb04e2e86e919633825728007484508dbdc44c7bb (patch)
tree4c93d401dad36863e45aa6690f5788ae21f5c7c6 /drivers/vfio/pci/mlx5/main.c
parentvfio/mlx5: Check whether VF is migratable (diff)
downloadlinux-b04e2e86e919633825728007484508dbdc44c7bb.tar.xz
linux-b04e2e86e919633825728007484508dbdc44c7bb.zip
vfio/mlx5: Improve the source side flow upon pre_copy
Improve the source side flow upon pre_copy as of below. - Prepare the stop_copy buffers as part of moving to pre_copy. - Send to the target a record that includes the expected stop_copy size to let it optimize its stop_copy flow as well. As for sending the target this new record type (i.e. MLX5_MIGF_HEADER_TAG_STOP_COPY_SIZE) we split the current 64 header flags bits into 32 flags bits and another 32 tag bits, each record may have a tag and a flag whether it's optional or mandatory. Optional records will be ignored in the target. The above reduces the downtime upon stop_copy as the relevant data stuff is prepared ahead as part of pre_copy. Signed-off-by: Yishai Hadas <yishaih@nvidia.com> Link: https://lore.kernel.org/r/20230124144955.139901-3-yishaih@nvidia.com Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Diffstat (limited to 'drivers/vfio/pci/mlx5/main.c')
-rw-r--r--drivers/vfio/pci/mlx5/main.c133
1 files changed, 110 insertions, 23 deletions
diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c
index 7ba127d8889a..6856e7b97533 100644
--- a/drivers/vfio/pci/mlx5/main.c
+++ b/drivers/vfio/pci/mlx5/main.c
@@ -304,6 +304,87 @@ static void mlx5vf_mark_err(struct mlx5_vf_migration_file *migf)
wake_up_interruptible(&migf->poll_wait);
}
+static int mlx5vf_add_stop_copy_header(struct mlx5_vf_migration_file *migf)
+{
+ size_t size = sizeof(struct mlx5_vf_migration_header) +
+ sizeof(struct mlx5_vf_migration_tag_stop_copy_data);
+ struct mlx5_vf_migration_tag_stop_copy_data data = {};
+ struct mlx5_vhca_data_buffer *header_buf = NULL;
+ struct mlx5_vf_migration_header header = {};
+ unsigned long flags;
+ struct page *page;
+ u8 *to_buff;
+ int ret;
+
+ header_buf = mlx5vf_get_data_buffer(migf, size, DMA_NONE);
+ if (IS_ERR(header_buf))
+ return PTR_ERR(header_buf);
+
+ header.record_size = cpu_to_le64(sizeof(data));
+ header.flags = cpu_to_le32(MLX5_MIGF_HEADER_FLAGS_TAG_OPTIONAL);
+ header.tag = cpu_to_le32(MLX5_MIGF_HEADER_TAG_STOP_COPY_SIZE);
+ page = mlx5vf_get_migration_page(header_buf, 0);
+ if (!page) {
+ ret = -EINVAL;
+ goto err;
+ }
+ to_buff = kmap_local_page(page);
+ memcpy(to_buff, &header, sizeof(header));
+ header_buf->length = sizeof(header);
+ data.stop_copy_size = cpu_to_le64(migf->buf->allocated_length);
+ memcpy(to_buff + sizeof(header), &data, sizeof(data));
+ header_buf->length += sizeof(data);
+ kunmap_local(to_buff);
+ header_buf->start_pos = header_buf->migf->max_pos;
+ migf->max_pos += header_buf->length;
+ spin_lock_irqsave(&migf->list_lock, flags);
+ list_add_tail(&header_buf->buf_elm, &migf->buf_list);
+ spin_unlock_irqrestore(&migf->list_lock, flags);
+ migf->pre_copy_initial_bytes = size;
+ return 0;
+err:
+ mlx5vf_put_data_buffer(header_buf);
+ return ret;
+}
+
+static int mlx5vf_prep_stop_copy(struct mlx5_vf_migration_file *migf,
+ size_t state_size)
+{
+ struct mlx5_vhca_data_buffer *buf;
+ size_t inc_state_size;
+ int ret;
+
+ /* let's be ready for stop_copy size that might grow by 10 percents */
+ if (check_add_overflow(state_size, state_size / 10, &inc_state_size))
+ inc_state_size = state_size;
+
+ buf = mlx5vf_get_data_buffer(migf, inc_state_size, DMA_FROM_DEVICE);
+ if (IS_ERR(buf))
+ return PTR_ERR(buf);
+
+ migf->buf = buf;
+ buf = mlx5vf_get_data_buffer(migf,
+ sizeof(struct mlx5_vf_migration_header), DMA_NONE);
+ if (IS_ERR(buf)) {
+ ret = PTR_ERR(buf);
+ goto err;
+ }
+
+ migf->buf_header = buf;
+ ret = mlx5vf_add_stop_copy_header(migf);
+ if (ret)
+ goto err_header;
+ return 0;
+
+err_header:
+ mlx5vf_put_data_buffer(migf->buf_header);
+ migf->buf_header = NULL;
+err:
+ mlx5vf_put_data_buffer(migf->buf);
+ migf->buf = NULL;
+ return ret;
+}
+
static long mlx5vf_precopy_ioctl(struct file *filp, unsigned int cmd,
unsigned long arg)
{
@@ -314,7 +395,7 @@ static long mlx5vf_precopy_ioctl(struct file *filp, unsigned int cmd,
loff_t *pos = &filp->f_pos;
unsigned long minsz;
size_t inc_length = 0;
- bool end_of_data;
+ bool end_of_data = false;
int ret;
if (cmd != VFIO_MIG_GET_PRECOPY_INFO)
@@ -358,25 +439,19 @@ static long mlx5vf_precopy_ioctl(struct file *filp, unsigned int cmd,
goto err_migf_unlock;
}
- buf = mlx5vf_get_data_buff_from_pos(migf, *pos, &end_of_data);
- if (buf) {
- if (buf->start_pos == 0) {
- info.initial_bytes = buf->header_image_size - *pos;
- } else if (buf->start_pos ==
- sizeof(struct mlx5_vf_migration_header)) {
- /* First data buffer following the header */
- info.initial_bytes = buf->start_pos +
- buf->length - *pos;
- } else {
- info.dirty_bytes = buf->start_pos + buf->length - *pos;
- }
+ if (migf->pre_copy_initial_bytes > *pos) {
+ info.initial_bytes = migf->pre_copy_initial_bytes - *pos;
} else {
- if (!end_of_data) {
- ret = -EINVAL;
- goto err_migf_unlock;
+ buf = mlx5vf_get_data_buff_from_pos(migf, *pos, &end_of_data);
+ if (buf) {
+ info.dirty_bytes = buf->start_pos + buf->length - *pos;
+ } else {
+ if (!end_of_data) {
+ ret = -EINVAL;
+ goto err_migf_unlock;
+ }
+ info.dirty_bytes = inc_length;
}
-
- info.dirty_bytes = inc_length;
}
if (!end_of_data || !inc_length) {
@@ -441,10 +516,16 @@ static int mlx5vf_pci_save_device_inc_data(struct mlx5vf_pci_core_device *mvdev)
if (ret)
goto err;
- buf = mlx5vf_get_data_buffer(migf, length, DMA_FROM_DEVICE);
- if (IS_ERR(buf)) {
- ret = PTR_ERR(buf);
- goto err;
+ /* Checking whether we have a matching pre-allocated buffer that can fit */
+ if (migf->buf && migf->buf->allocated_length >= length) {
+ buf = migf->buf;
+ migf->buf = NULL;
+ } else {
+ buf = mlx5vf_get_data_buffer(migf, length, DMA_FROM_DEVICE);
+ if (IS_ERR(buf)) {
+ ret = PTR_ERR(buf);
+ goto err;
+ }
}
ret = mlx5vf_cmd_save_vhca_state(mvdev, migf, buf, true, false);
@@ -503,6 +584,12 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev, bool track)
if (ret)
goto out_pd;
+ if (track) {
+ ret = mlx5vf_prep_stop_copy(migf, length);
+ if (ret)
+ goto out_pd;
+ }
+
buf = mlx5vf_alloc_data_buffer(migf, length, DMA_FROM_DEVICE);
if (IS_ERR(buf)) {
ret = PTR_ERR(buf);
@@ -516,7 +603,7 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev, bool track)
out_save:
mlx5vf_free_data_buffer(buf);
out_pd:
- mlx5vf_cmd_dealloc_pd(migf);
+ mlx5fv_cmd_clean_migf_resources(migf);
out_free:
fput(migf->filp);
end: