summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJoao Martins <joao.m.martins@oracle.com>2023-10-24 15:51:00 +0200
committerJason Gunthorpe <jgg@nvidia.com>2023-10-24 16:58:43 +0200
commit609848132c71316df3260d1ec066539c21bba585 (patch)
treebad3cee2b2323624ef87cd2048d8fe48ada9e2b3
parentiommufd: Add capabilities to IOMMU_GET_HW_INFO (diff)
downloadlinux-609848132c71316df3260d1ec066539c21bba585.tar.xz
linux-609848132c71316df3260d1ec066539c21bba585.zip
iommufd: Add a flag to skip clearing of IOPTE dirty
VFIO has an operation where it unmaps an IOVA while returning a bitmap with the dirty data. In reality the operation doesn't quite query the IO pagetables that the PTE was dirty or not. Instead it marks as dirty on anything that was mapped, and doing so in one syscall. In IOMMUFD the equivalent is done in two operations by querying with GET_DIRTY_IOVA followed by UNMAP_IOVA. However, this would incur two TLB flushes given that after clearing dirty bits IOMMU implementations require invalidating their IOTLB, plus another invalidation needed for the UNMAP. To allow dirty bits to be queried faster, add a flag (IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR) that requests to not clear the dirty bits from the PTE (but just reading them), under the expectation that the next operation is the unmap. An alternative is to unmap and just perpectually mark as dirty as that's the same behaviour as today. So here equivalent functionally can be provided with unmap alone, and if real dirty info is required it will amortize the cost while querying. There's still a race against DMA where in theory the unmap of the IOVA (when the guest invalidates the IOTLB via emulated iommu) would race against the VF performing DMA on the same IOVA. As discussed in [0], we are accepting to resolve this race as throwing away the DMA and it doesn't matter if it hit physical DRAM or not, the VM can't tell if we threw it away because the DMA was blocked or because we failed to copy the DRAM. [0] https://lore.kernel.org/linux-iommu/20220502185239.GR8364@nvidia.com/ Link: https://lore.kernel.org/r/20231024135109.73787-10-joao.m.martins@oracle.com Signed-off-by: Joao Martins <joao.m.martins@oracle.com> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> Reviewed-by: Kevin Tian <kevin.tian@intel.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
-rw-r--r--drivers/iommu/iommufd/hw_pagetable.c3
-rw-r--r--drivers/iommu/iommufd/io_pagetable.c9
-rw-r--r--include/uapi/linux/iommufd.h15
3 files changed, 23 insertions, 4 deletions
diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c
index 7316f69110ef..72a5269984b0 100644
--- a/drivers/iommu/iommufd/hw_pagetable.c
+++ b/drivers/iommu/iommufd/hw_pagetable.c
@@ -228,7 +228,8 @@ int iommufd_hwpt_get_dirty_bitmap(struct iommufd_ucmd *ucmd)
struct iommufd_ioas *ioas;
int rc = -EOPNOTSUPP;
- if ((cmd->flags || cmd->__reserved))
+ if ((cmd->flags & ~(IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR)) ||
+ cmd->__reserved)
return -EOPNOTSUPP;
hwpt = iommufd_get_hwpt(ucmd, cmd->hwpt_id);
diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c
index 255264e796fb..9f060abe53b6 100644
--- a/drivers/iommu/iommufd/io_pagetable.c
+++ b/drivers/iommu/iommufd/io_pagetable.c
@@ -414,6 +414,7 @@ int iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt,
}
struct iova_bitmap_fn_arg {
+ unsigned long flags;
struct io_pagetable *iopt;
struct iommu_domain *domain;
struct iommu_dirty_bitmap *dirty;
@@ -430,13 +431,14 @@ static int __iommu_read_and_clear_dirty(struct iova_bitmap *bitmap,
struct iommu_dirty_bitmap *dirty = arg->dirty;
const struct iommu_dirty_ops *ops = domain->dirty_ops;
unsigned long last_iova = iova + length - 1;
+ unsigned long flags = arg->flags;
int ret;
iopt_for_each_contig_area(&iter, area, arg->iopt, iova, last_iova) {
unsigned long last = min(last_iova, iopt_area_last_iova(area));
ret = ops->read_and_clear_dirty(domain, iter.cur_iova,
- last - iter.cur_iova + 1, 0,
+ last - iter.cur_iova + 1, flags,
dirty);
if (ret)
return ret;
@@ -470,12 +472,15 @@ iommu_read_and_clear_dirty(struct iommu_domain *domain,
iommu_dirty_bitmap_init(&dirty, iter, &gather);
+ arg.flags = flags;
arg.iopt = iopt;
arg.domain = domain;
arg.dirty = &dirty;
iova_bitmap_for_each(iter, &arg, __iommu_read_and_clear_dirty);
- iommu_iotlb_sync(domain, &gather);
+ if (!(flags & IOMMU_DIRTY_NO_CLEAR))
+ iommu_iotlb_sync(domain, &gather);
+
iova_bitmap_free(iter);
return ret;
diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index 036ebc6c19cf..c44eecf5d318 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -501,10 +501,23 @@ struct iommu_hwpt_set_dirty_tracking {
IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING)
/**
+ * enum iommufd_hwpt_get_dirty_bitmap_flags - Flags for getting dirty bits
+ * @IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR: Just read the PTEs without clearing
+ * any dirty bits metadata. This flag
+ * can be passed in the expectation
+ * where the next operation is an unmap
+ * of the same IOVA range.
+ *
+ */
+enum iommufd_hwpt_get_dirty_bitmap_flags {
+ IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR = 1,
+};
+
+/**
* struct iommu_hwpt_get_dirty_bitmap - ioctl(IOMMU_HWPT_GET_DIRTY_BITMAP)
* @size: sizeof(struct iommu_hwpt_get_dirty_bitmap)
* @hwpt_id: HW pagetable ID that represents the IOMMU domain
- * @flags: Must be zero
+ * @flags: Combination of enum iommufd_hwpt_get_dirty_bitmap_flags
* @__reserved: Must be 0
* @iova: base IOVA of the bitmap first bit
* @length: IOVA range size