summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason Gunthorpe <jgg@nvidia.com>2020-11-30 08:58:38 +0100
committerJason Gunthorpe <jgg@nvidia.com>2020-12-07 19:06:23 +0100
commit38f8ff5b4438876a7d5b2f8b54eb46c7d5154457 (patch)
tree755c66fcf271fe19f560cb809c2ca3bf19056872
parentRDMA/uverbs: Allow drivers to create a new HW object during rereg_mr (diff)
downloadlinux-38f8ff5b4438876a7d5b2f8b54eb46c7d5154457.tar.xz
linux-38f8ff5b4438876a7d5b2f8b54eb46c7d5154457.zip
RDMA/mlx5: Reorganize mlx5_ib_reg_user_mr()
This function handles an ODP and regular MR flow all mushed together, even though the two flows are quite different. Split them into two dedicated functions. Link: https://lore.kernel.org/r/20201130075839.278575-5-leon@kernel.org Signed-off-by: Leon Romanovsky <leonro@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h4
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c249
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c16
3 files changed, 140 insertions, 129 deletions
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index ab84d4efbda3..fac495e7834e 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -1340,7 +1340,7 @@ void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
enum ib_uverbs_advise_mr_advice advice,
u32 flags, struct ib_sge *sg_list, u32 num_sge);
-int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr, bool enable);
+int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr);
#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
{
@@ -1362,7 +1362,7 @@ mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
{
return -EOPNOTSUPP;
}
-static inline int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr, bool enable)
+static inline int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr)
{
return -EOPNOTSUPP;
}
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 778cc08e17ad..8fb8f8a2c72e 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -56,6 +56,10 @@ enum {
static void
create_mkey_callback(int status, struct mlx5_async_work *context);
+static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
+ struct ib_umem *umem, u64 iova,
+ int access_flags, unsigned int page_size,
+ bool populate);
static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr,
struct ib_pd *pd)
@@ -875,32 +879,6 @@ static int mr_cache_max_order(struct mlx5_ib_dev *dev)
return MLX5_MAX_UMR_SHIFT;
}
-static struct ib_umem *mr_umem_get(struct mlx5_ib_dev *dev, u64 start,
- u64 length, int access_flags)
-{
- struct ib_umem *u;
-
- if (access_flags & IB_ACCESS_ON_DEMAND) {
- struct ib_umem_odp *odp;
-
- odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags,
- &mlx5_mn_ops);
- if (IS_ERR(odp)) {
- mlx5_ib_dbg(dev, "umem get failed (%ld)\n",
- PTR_ERR(odp));
- return ERR_CAST(odp);
- }
- return &odp->umem;
- }
-
- u = ib_umem_get(&dev->ib_dev, start, length, access_flags);
- if (IS_ERR(u)) {
- mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(u));
- return u;
- }
- return u;
-}
-
static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct mlx5_ib_umr_context *context =
@@ -957,9 +935,18 @@ static struct mlx5_cache_ent *mr_cache_ent_from_order(struct mlx5_ib_dev *dev,
return &cache->ent[order];
}
-static struct mlx5_ib_mr *alloc_mr_from_cache(struct ib_pd *pd,
- struct ib_umem *umem, u64 iova,
- int access_flags)
+static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
+ u64 length, int access_flags)
+{
+ mr->ibmr.lkey = mr->mmkey.key;
+ mr->ibmr.rkey = mr->mmkey.key;
+ mr->ibmr.length = length;
+ mr->access_flags = access_flags;
+}
+
+static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
+ struct ib_umem *umem, u64 iova,
+ int access_flags)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_cache_ent *ent;
@@ -971,16 +958,26 @@ static struct mlx5_ib_mr *alloc_mr_from_cache(struct ib_pd *pd,
return ERR_PTR(-EINVAL);
ent = mr_cache_ent_from_order(
dev, order_base_2(ib_umem_num_dma_blocks(umem, page_size)));
- if (!ent)
- return ERR_PTR(-E2BIG);
-
- /* Matches access in alloc_cache_mr() */
- if (!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags))
- return ERR_PTR(-EOPNOTSUPP);
+ /*
+ * Matches access in alloc_cache_mr(). If the MR can't come from the
+ * cache then synchronously create an uncached one.
+ */
+ if (!ent || ent->limit == 0 ||
+ !mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) {
+ mutex_lock(&dev->slow_path_mutex);
+ mr = reg_create(NULL, pd, umem, iova, access_flags, page_size,
+ false);
+ mutex_unlock(&dev->slow_path_mutex);
+ return mr;
+ }
mr = get_cache_mr(ent);
if (!mr) {
mr = create_cache_mr(ent);
+ /*
+ * The above already tried to do the same stuff as reg_create(),
+ * no reason to try it again.
+ */
if (IS_ERR(mr))
return mr;
}
@@ -993,6 +990,8 @@ static struct mlx5_ib_mr *alloc_mr_from_cache(struct ib_pd *pd,
mr->mmkey.size = umem->length;
mr->mmkey.pd = to_mpd(pd)->pdn;
mr->page_shift = order_base_2(page_size);
+ mr->umem = umem;
+ set_mr_fields(dev, mr, umem->length, access_flags);
return mr;
}
@@ -1279,10 +1278,10 @@ err:
*/
static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
struct ib_umem *umem, u64 iova,
- int access_flags, bool populate)
+ int access_flags, unsigned int page_size,
+ bool populate)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
- unsigned int page_size;
struct mlx5_ib_mr *mr;
__be64 *pas;
void *mkc;
@@ -1291,11 +1290,12 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
int err;
bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg));
- page_size =
- mlx5_umem_find_best_pgsz(umem, mkc, log_page_size, 0, iova);
- if (WARN_ON(!page_size))
- return ERR_PTR(-EINVAL);
-
+ if (!page_size) {
+ page_size = mlx5_umem_find_best_pgsz(umem, mkc, log_page_size,
+ 0, iova);
+ if (!page_size)
+ return ERR_PTR(-EINVAL);
+ }
mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
@@ -1352,6 +1352,8 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
mr->mmkey.type = MLX5_MKEY_MR;
mr->desc_size = sizeof(struct mlx5_mtt);
mr->dev = dev;
+ mr->umem = umem;
+ set_mr_fields(dev, mr, umem->length, access_flags);
kvfree(in);
mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key);
@@ -1368,15 +1370,6 @@ err_1:
return ERR_PTR(err);
}
-static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
- u64 length, int access_flags)
-{
- mr->ibmr.lkey = mr->mmkey.key;
- mr->ibmr.rkey = mr->mmkey.key;
- mr->ibmr.length = length;
- mr->access_flags = access_flags;
-}
-
static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr,
u64 length, int acc, int mode)
{
@@ -1471,70 +1464,32 @@ struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
attr->access_flags, mode);
}
-struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
- u64 virt_addr, int access_flags,
- struct ib_udata *udata)
+static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem,
+ u64 iova, int access_flags)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_ib_mr *mr = NULL;
bool xlt_with_umr;
- struct ib_umem *umem;
int err;
- if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM))
- return ERR_PTR(-EOPNOTSUPP);
-
- mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
- start, virt_addr, length, access_flags);
-
- xlt_with_umr = mlx5_ib_can_load_pas_with_umr(dev, length);
- /* ODP requires xlt update via umr to work. */
- if (!xlt_with_umr && (access_flags & IB_ACCESS_ON_DEMAND))
- return ERR_PTR(-EINVAL);
-
- if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && !start &&
- length == U64_MAX) {
- if (virt_addr != start)
- return ERR_PTR(-EINVAL);
- if (!(access_flags & IB_ACCESS_ON_DEMAND) ||
- !(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
- return ERR_PTR(-EINVAL);
-
- mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), udata, access_flags);
- if (IS_ERR(mr))
- return ERR_CAST(mr);
- return &mr->ibmr;
- }
-
- umem = mr_umem_get(dev, start, length, access_flags);
- if (IS_ERR(umem))
- return ERR_CAST(umem);
-
+ xlt_with_umr = mlx5_ib_can_load_pas_with_umr(dev, umem->length);
if (xlt_with_umr) {
- mr = alloc_mr_from_cache(pd, umem, virt_addr, access_flags);
- if (IS_ERR(mr))
- mr = NULL;
- }
-
- if (!mr) {
+ mr = alloc_cacheable_mr(pd, umem, iova, access_flags);
+ } else {
mutex_lock(&dev->slow_path_mutex);
- mr = reg_create(NULL, pd, umem, virt_addr, access_flags,
- !xlt_with_umr);
+ mr = reg_create(NULL, pd, umem, iova, access_flags, 0, true);
mutex_unlock(&dev->slow_path_mutex);
}
-
if (IS_ERR(mr)) {
- err = PTR_ERR(mr);
- goto error;
+ ib_umem_release(umem);
+ return ERR_CAST(mr);
}
mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key);
- mr->umem = umem;
- atomic_add(ib_umem_num_pages(mr->umem), &dev->mdev->priv.reg_pages);
- set_mr_fields(dev, mr, length, access_flags);
+ atomic_add(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages);
- if (xlt_with_umr && !(access_flags & IB_ACCESS_ON_DEMAND)) {
+ if (xlt_with_umr) {
/*
* If the MR was created with reg_create then it will be
* configured properly but left disabled. It is safe to go ahead
@@ -1546,32 +1501,88 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
return ERR_PTR(err);
}
}
+ return &mr->ibmr;
+}
- if (is_odp_mr(mr)) {
- to_ib_umem_odp(mr->umem)->private = mr;
- init_waitqueue_head(&mr->q_deferred_work);
- atomic_set(&mr->num_deferred_work, 0);
- err = xa_err(xa_store(&dev->odp_mkeys,
- mlx5_base_mkey(mr->mmkey.key), &mr->mmkey,
- GFP_KERNEL));
- if (err) {
- dereg_mr(dev, mr);
- return ERR_PTR(err);
- }
+static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 iova, int access_flags,
+ struct ib_udata *udata)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct ib_umem_odp *odp;
+ struct mlx5_ib_mr *mr;
+ int err;
- err = mlx5_ib_init_odp_mr(mr, xlt_with_umr);
- if (err) {
- dereg_mr(dev, mr);
- return ERR_PTR(err);
- }
+ if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (!start && length == U64_MAX) {
+ if (iova != 0)
+ return ERR_PTR(-EINVAL);
+ if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
+ return ERR_PTR(-EINVAL);
+
+ mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), udata, access_flags);
+ if (IS_ERR(mr))
+ return ERR_CAST(mr);
+ return &mr->ibmr;
}
+ /* ODP requires xlt update via umr to work. */
+ if (!mlx5_ib_can_load_pas_with_umr(dev, length))
+ return ERR_PTR(-EINVAL);
+
+ odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags,
+ &mlx5_mn_ops);
+ if (IS_ERR(odp))
+ return ERR_CAST(odp);
+
+ mr = alloc_cacheable_mr(pd, &odp->umem, iova, access_flags);
+ if (IS_ERR(mr)) {
+ ib_umem_release(&odp->umem);
+ return ERR_CAST(mr);
+ }
+
+ odp->private = mr;
+ init_waitqueue_head(&mr->q_deferred_work);
+ atomic_set(&mr->num_deferred_work, 0);
+ err = xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key),
+ &mr->mmkey, GFP_KERNEL));
+ if (err)
+ goto err_dereg_mr;
+
+ err = mlx5_ib_init_odp_mr(mr);
+ if (err)
+ goto err_dereg_mr;
return &mr->ibmr;
-error:
- ib_umem_release(umem);
+
+err_dereg_mr:
+ dereg_mr(dev, mr);
return ERR_PTR(err);
}
+struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 iova, int access_flags,
+ struct ib_udata *udata)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct ib_umem *umem;
+
+ if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mlx5_ib_dbg(dev, "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n",
+ start, iova, length, access_flags);
+
+ if (access_flags & IB_ACCESS_ON_DEMAND)
+ return create_user_odp_mr(pd, start, length, iova, access_flags,
+ udata);
+ umem = ib_umem_get(&dev->ib_dev, start, length, access_flags);
+ if (IS_ERR(umem))
+ return ERR_CAST(umem);
+ return create_real_mr(pd, umem, iova, access_flags);
+}
+
/**
* mlx5_mr_cache_invalidate - Fence all DMA on the MR
* @mr: The MR to fence
@@ -1661,7 +1672,7 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
atomic_sub(ib_umem_num_pages(mr->umem),
&dev->mdev->priv.reg_pages);
ib_umem_release(mr->umem);
- mr->umem = mr_umem_get(dev, addr, len, access_flags);
+ mr->umem = ib_umem_get(&dev->ib_dev, addr, len, access_flags);
if (IS_ERR(mr->umem)) {
err = PTR_ERR(mr->umem);
mr->umem = NULL;
@@ -1685,7 +1696,7 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
if (err)
goto err;
- mr = reg_create(ib_mr, pd, mr->umem, addr, access_flags, true);
+ mr = reg_create(ib_mr, pd, mr->umem, addr, access_flags, 0, true);
if (IS_ERR(mr)) {
err = PTR_ERR(mr);
mr = to_mmr(ib_mr);
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 5c853ec1b0d8..f4a28a012187 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -536,6 +536,10 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
struct mlx5_ib_mr *imr;
int err;
+ if (!mlx5_ib_can_load_pas_with_umr(dev,
+ MLX5_IMR_MTT_ENTRIES * PAGE_SIZE))
+ return ERR_PTR(-EOPNOTSUPP);
+
umem_odp = ib_umem_odp_alloc_implicit(&dev->ib_dev, access_flags);
if (IS_ERR(umem_odp))
return ERR_CAST(umem_odp);
@@ -831,17 +835,13 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt,
flags);
}
-int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr, bool enable)
+int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr)
{
- u32 flags = MLX5_PF_FLAGS_SNAPSHOT;
int ret;
- if (enable)
- flags |= MLX5_PF_FLAGS_ENABLE;
-
- ret = pagefault_real_mr(mr, to_ib_umem_odp(mr->umem),
- mr->umem->address, mr->umem->length, NULL,
- flags);
+ ret = pagefault_real_mr(mr, to_ib_umem_odp(mr->umem), mr->umem->address,
+ mr->umem->length, NULL,
+ MLX5_PF_FLAGS_SNAPSHOT | MLX5_PF_FLAGS_ENABLE);
return ret >= 0 ? 0 : ret;
}