diff options
author | Jason Gunthorpe <jgg@mellanox.com> | 2019-10-09 18:09:30 +0200 |
---|---|---|
committer | Jason Gunthorpe <jgg@mellanox.com> | 2019-10-28 20:41:14 +0100 |
commit | 3389baa831b6a09e3c96e2a6283a1b952be2f0cd (patch) | |
tree | 841feb907161e0aa96df8066ddb96d3d8e063609 /drivers | |
parent | RDMA/mlx5: Use an xarray for the children of an implicit ODP (diff) | |
download | linux-3389baa831b6a09e3c96e2a6283a1b952be2f0cd.tar.xz linux-3389baa831b6a09e3c96e2a6283a1b952be2f0cd.zip |
RDMA/mlx5: Reduce locking in implicit_mr_get_data()
Now that the child MRs are stored in an xarray we can rely on the SRCU
lock to protect the xa_load and use xa_cmpxchg on the slow allocation path
to resolve races with concurrent page fault.
This reduces the scope of the critical section of umem_mutex for implicit
MRs to only cover mlx5_ib_update_xlt, and avoids taking a lock at all if
the child MR is already in the xarray. This makes it consistent with the
normal ODP MR critical section for umem_lock, and the locking approach
used for destroying an unusued implicit child MR.
The MLX5_IB_UPD_XLT_ATOMIC is no longer needed in implicit_get_child_mr()
since it is no longer called with any locks.
Link: https://lore.kernel.org/r/20191009160934.3143-11-jgg@ziepe.ca
Reviewed-by: Artemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/infiniband/hw/mlx5/odp.c | 38 |
1 files changed, 26 insertions, 12 deletions
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 6f7eea175c72..00e14b6acd98 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -381,8 +381,7 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, MLX5_IMR_MTT_ENTRIES, PAGE_SHIFT, MLX5_IB_UPD_XLT_ZAP | - MLX5_IB_UPD_XLT_ENABLE | - MLX5_IB_UPD_XLT_ATOMIC); + MLX5_IB_UPD_XLT_ENABLE); if (err) { ret = ERR_PTR(err); goto out_release; @@ -392,9 +391,16 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, * Once the store to either xarray completes any error unwind has to * use synchronize_srcu(). Avoid this with xa_reserve() */ - err = xa_err(xa_store(&imr->implicit_children, idx, mr, GFP_KERNEL)); - if (err) { - ret = ERR_PTR(err); + ret = xa_cmpxchg(&imr->implicit_children, idx, NULL, mr, GFP_KERNEL); + if (unlikely(ret)) { + if (xa_is_err(ret)) { + ret = ERR_PTR(xa_err(ret)); + goto out_release; + } + /* + * Another thread beat us to creating the child mr, use + * theirs. + */ goto out_release; } @@ -424,7 +430,8 @@ static struct mlx5_ib_mr *implicit_mr_get_data(struct mlx5_ib_mr *imr, struct mlx5_ib_mr *result = NULL; int ret; - mutex_lock(&odp_imr->umem_mutex); + lockdep_assert_held(&imr->dev->odp_srcu); + for (idx = idx; idx <= end_idx; idx++) { struct mlx5_ib_mr *mtt = xa_load(&imr->implicit_children, idx); @@ -450,20 +457,27 @@ static struct mlx5_ib_mr *implicit_mr_get_data(struct mlx5_ib_mr *imr, */ out: if (likely(!inv_len)) - goto out_unlock; + return result; + /* + * Notice this is not strictly ordered right, the KSM is updated after + * the implicit_leaves is updated, so a parallel page fault could see + * a MR that is not yet visible in the KSM. This is similar to a + * parallel page fault seeing a MR that is being concurrently removed + * from the KSM. Both of these improbable situations are resolved + * safely by resuming the HW and then taking another page fault. The + * next pagefault handler will see the new information. + */ + mutex_lock(&odp_imr->umem_mutex); ret = mlx5_ib_update_xlt(imr, inv_start_idx, inv_len, 0, MLX5_IB_UPD_XLT_INDIRECT | MLX5_IB_UPD_XLT_ATOMIC); + mutex_unlock(&odp_imr->umem_mutex); if (ret) { mlx5_ib_err(to_mdev(imr->ibmr.pd->device), "Failed to update PAS\n"); - result = ERR_PTR(ret); - goto out_unlock; + return ERR_PTR(ret); } - -out_unlock: - mutex_unlock(&odp_imr->umem_mutex); return result; } |