diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-11-30 19:33:14 +0100 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-11-30 19:33:14 +0100 |
commit | aa32f1169148beb90d71494e2f2a1999ba7b5366 (patch) | |
tree | ec8c434bff07bf0beb2df08629089824927f62f9 /drivers/infiniband/hw/mlx5 | |
parent | Merge tag 'drm-vmwgfx-coherent-2019-11-29' of git://anongit.freedesktop.org/d... (diff) | |
parent | mm/hmm: remove hmm_range_dma_map and hmm_range_dma_unmap (diff) | |
download | linux-aa32f1169148beb90d71494e2f2a1999ba7b5366.tar.xz linux-aa32f1169148beb90d71494e2f2a1999ba7b5366.zip |
Merge tag 'for-linus-hmm' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull hmm updates from Jason Gunthorpe:
"This is another round of bug fixing and cleanup. This time the focus
is on the driver pattern to use mmu notifiers to monitor a VA range.
This code is lifted out of many drivers and hmm_mirror directly into
the mmu_notifier core and written using the best ideas from all the
driver implementations.
This removes many bugs from the drivers and has a very pleasing
diffstat. More drivers can still be converted, but that is for another
cycle.
- A shared branch with RDMA reworking the RDMA ODP implementation
- New mmu_interval_notifier API. This is focused on the use case of
monitoring a VA and simplifies the process for drivers
- A common seq-count locking scheme built into the
mmu_interval_notifier API usable by drivers that call
get_user_pages() or hmm_range_fault() with the VA range
- Conversion of mlx5 ODP, hfi1, radeon, nouveau, AMD GPU, and Xen
GntDev drivers to the new API. This deletes a lot of wonky driver
code.
- Two improvements for hmm_range_fault(), from testing done by Ralph"
* tag 'for-linus-hmm' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma:
mm/hmm: remove hmm_range_dma_map and hmm_range_dma_unmap
mm/hmm: make full use of walk_page_range()
xen/gntdev: use mmu_interval_notifier_insert
mm/hmm: remove hmm_mirror and related
drm/amdgpu: Use mmu_interval_notifier instead of hmm_mirror
drm/amdgpu: Use mmu_interval_insert instead of hmm_mirror
drm/amdgpu: Call find_vma under mmap_sem
nouveau: use mmu_interval_notifier instead of hmm_mirror
nouveau: use mmu_notifier directly for invalidate_range_start
drm/radeon: use mmu_interval_notifier_insert
RDMA/hfi1: Use mmu_interval_notifier_insert for user_exp_rcv
RDMA/odp: Use mmu_interval_notifier_insert()
mm/hmm: define the pre-processor related parts of hmm.h even if disabled
mm/hmm: allow hmm_range to be used with a mmu_interval_notifier or hmm_mirror
mm/mmu_notifier: add an interval tree notifier
mm/mmu_notifier: define the header pre-processor parts even if disabled
mm/hmm: allow snapshot of the special zero page
Diffstat (limited to 'drivers/infiniband/hw/mlx5')
-rw-r--r-- | drivers/infiniband/hw/mlx5/mlx5_ib.h | 7 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mr.c | 3 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/odp.c | 50 |
3 files changed, 27 insertions, 33 deletions
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 03af54b53bf7..5986953ec2fa 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1258,8 +1258,6 @@ int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev); void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev); int __init mlx5_ib_odp_init(void); void mlx5_ib_odp_cleanup(void); -void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, - unsigned long end); void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent); void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset, size_t nentries, struct mlx5_ib_mr *mr, int flags); @@ -1289,11 +1287,10 @@ mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, { return -EOPNOTSUPP; } -static inline void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, - unsigned long start, - unsigned long end){}; #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ +extern const struct mmu_interval_notifier_ops mlx5_mn_ops; + /* Needed for rep profile */ void __mlx5_ib_remove(struct mlx5_ib_dev *dev, const struct mlx5_ib_profile *profile, diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 60d39b9ec41c..ea8bfc3e2d8d 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -749,7 +749,8 @@ static int mr_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata, if (access_flags & IB_ACCESS_ON_DEMAND) { struct ib_umem_odp *odp; - odp = ib_umem_odp_get(udata, start, length, access_flags); + odp = ib_umem_odp_get(udata, start, length, access_flags, + &mlx5_mn_ops); if (IS_ERR(odp)) { mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(odp)); diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 45ee40c2f36e..f924250f80c2 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -241,18 +241,27 @@ out_unlock: xa_unlock(&imr->implicit_children); } -void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, - unsigned long end) +static bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq) { + struct ib_umem_odp *umem_odp = + container_of(mni, struct ib_umem_odp, notifier); struct mlx5_ib_mr *mr; const u64 umr_block_mask = (MLX5_UMR_MTT_ALIGNMENT / sizeof(struct mlx5_mtt)) - 1; u64 idx = 0, blk_start_idx = 0; u64 invalidations = 0; + unsigned long start; + unsigned long end; int in_block = 0; u64 addr; + if (!mmu_notifier_range_blockable(range)) + return false; + mutex_lock(&umem_odp->umem_mutex); + mmu_interval_set_seq(mni, cur_seq); /* * If npages is zero then umem_odp->private may not be setup yet. This * does not complete until after the first page is mapped for DMA. @@ -261,8 +270,8 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, goto out; mr = umem_odp->private; - start = max_t(u64, ib_umem_start(umem_odp), start); - end = min_t(u64, ib_umem_end(umem_odp), end); + start = max_t(u64, ib_umem_start(umem_odp), range->start); + end = min_t(u64, ib_umem_end(umem_odp), range->end); /* * Iteration one - zap the HW's MTTs. The notifiers_count ensures that @@ -319,8 +328,13 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, destroy_unused_implicit_child_mr(mr); out: mutex_unlock(&umem_odp->umem_mutex); + return true; } +const struct mmu_interval_notifier_ops mlx5_mn_ops = { + .invalidate = mlx5_ib_invalidate_range, +}; + void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) { struct ib_odp_caps *caps = &dev->odp_caps; @@ -419,7 +433,7 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, odp = ib_umem_odp_alloc_child(to_ib_umem_odp(imr->umem), idx * MLX5_IMR_MTT_SIZE, - MLX5_IMR_MTT_SIZE); + MLX5_IMR_MTT_SIZE, &mlx5_mn_ops); if (IS_ERR(odp)) return ERR_CAST(odp); @@ -606,8 +620,9 @@ static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp, u64 user_va, size_t bcnt, u32 *bytes_mapped, u32 flags) { - int current_seq, page_shift, ret, np; + int page_shift, ret, np; bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE; + unsigned long current_seq; u64 access_mask; u64 start_idx, page_mask; @@ -619,12 +634,7 @@ static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp, if (odp->umem.writable && !downgrade) access_mask |= ODP_WRITE_ALLOWED_BIT; - current_seq = READ_ONCE(odp->notifiers_seq); - /* - * Ensure the sequence number is valid for some time before we call - * gup. - */ - smp_rmb(); + current_seq = mmu_interval_read_begin(&odp->notifier); np = ib_umem_odp_map_dma_pages(odp, user_va, bcnt, access_mask, current_seq); @@ -632,7 +642,7 @@ static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp, return np; mutex_lock(&odp->umem_mutex); - if (!ib_umem_mmu_notifier_retry(odp, current_seq)) { + if (!mmu_interval_read_retry(&odp->notifier, current_seq)) { /* * No need to check whether the MTTs really belong to * this MR, since ib_umem_odp_map_dma_pages already @@ -662,19 +672,6 @@ static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp, return np << (page_shift - PAGE_SHIFT); out: - if (ret == -EAGAIN) { - unsigned long timeout = msecs_to_jiffies(MMU_NOTIFIER_TIMEOUT); - - if (!wait_for_completion_timeout(&odp->notifier_completion, - timeout)) { - mlx5_ib_warn( - mr->dev, - "timeout waiting for mmu notifier. seq %d against %d. notifiers_count=%d\n", - current_seq, odp->notifiers_seq, - odp->notifiers_count); - } - } - return ret; } @@ -1622,7 +1619,6 @@ void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) static const struct ib_device_ops mlx5_ib_dev_odp_ops = { .advise_mr = mlx5_ib_advise_mr, - .invalidate_range = mlx5_ib_invalidate_range, }; int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev) |