diff options
Diffstat (limited to 'drivers/infiniband/hw/mlx5/mr.c')
-rw-r--r-- | drivers/infiniband/hw/mlx5/mr.c | 644 |
1 files changed, 297 insertions, 347 deletions
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 8f608debe141..b8f9382a8b7d 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -46,14 +46,10 @@ enum { }; #define MLX5_UMR_ALIGN 2048 -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING -static __be64 mlx5_ib_update_mtt_emergency_buffer[ - MLX5_UMR_MTT_MIN_CHUNK_SIZE/sizeof(__be64)] - __aligned(MLX5_UMR_ALIGN); -static DEFINE_MUTEX(mlx5_ib_update_mtt_emergency_buffer_mutex); -#endif static int clean_mr(struct mlx5_ib_mr *mr); +static int use_umr(struct mlx5_ib_dev *dev, int order); +static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { @@ -134,6 +130,7 @@ static void reg_mr_callback(int status, void *context) return; } + mr->mmkey.type = MLX5_MKEY_MR; spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags); key = dev->mdev->priv.mkey_key++; spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags); @@ -153,6 +150,9 @@ static void reg_mr_callback(int status, void *context) if (err) pr_err("Error inserting to mkey tree. 0x%x\n", -err); write_unlock_irqrestore(&table->lock, flags); + + if (!completion_done(&ent->compl)) + complete(&ent->compl); } static int add_keys(struct mlx5_ib_dev *dev, int c, int num) @@ -161,7 +161,6 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) struct mlx5_cache_ent *ent = &cache->ent[c]; int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); struct mlx5_ib_mr *mr; - int npages = 1 << ent->order; void *mkc; u32 *in; int err = 0; @@ -189,11 +188,11 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) MLX5_SET(mkc, mkc, free, 1); MLX5_SET(mkc, mkc, umr_en, 1); - MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT); + MLX5_SET(mkc, mkc, access_mode, ent->access_mode); MLX5_SET(mkc, mkc, qpn, 0xffffff); - MLX5_SET(mkc, mkc, translations_octword_size, (npages + 1) / 2); - MLX5_SET(mkc, mkc, log_page_size, 12); + MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt); + MLX5_SET(mkc, mkc, log_page_size, ent->page); spin_lock_irq(&ent->lock); ent->pending++; @@ -451,6 +450,42 @@ static void cache_work_func(struct work_struct *work) __cache_work_func(ent); } +struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry) +{ + struct mlx5_mr_cache *cache = &dev->cache; + struct mlx5_cache_ent *ent; + struct mlx5_ib_mr *mr; + int err; + + if (entry < 0 || entry >= MAX_MR_CACHE_ENTRIES) { + mlx5_ib_err(dev, "cache entry %d is out of range\n", entry); + return NULL; + } + + ent = &cache->ent[entry]; + while (1) { + spin_lock_irq(&ent->lock); + if (list_empty(&ent->head)) { + spin_unlock_irq(&ent->lock); + + err = add_keys(dev, entry, 1); + if (err && err != -EAGAIN) + return ERR_PTR(err); + + wait_for_completion(&ent->compl); + } else { + mr = list_first_entry(&ent->head, struct mlx5_ib_mr, + list); + list_del(&mr->list); + ent->cur--; + spin_unlock_irq(&ent->lock); + if (ent->cur < ent->limit) + queue_work(cache->wq, &ent->work); + return mr; + } + } +} + static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order) { struct mlx5_mr_cache *cache = &dev->cache; @@ -460,12 +495,12 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order) int i; c = order2idx(dev, order); - if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) { + if (c < 0 || c > MAX_UMR_CACHE_ENTRY) { mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c); return NULL; } - for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) { + for (i = c; i < MAX_UMR_CACHE_ENTRY; i++) { ent = &cache->ent[i]; mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i); @@ -492,7 +527,7 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order) return mr; } -static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) +void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent; @@ -504,6 +539,10 @@ static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c); return; } + + if (unreg_umr(dev, mr)) + return; + ent = &cache->ent[c]; spin_lock_irq(&ent->lock); list_add_tail(&mr->list, &ent->head); @@ -606,7 +645,6 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) { struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent; - int limit; int err; int i; @@ -619,25 +657,35 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev); for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { - INIT_LIST_HEAD(&cache->ent[i].head); - spin_lock_init(&cache->ent[i].lock); - ent = &cache->ent[i]; INIT_LIST_HEAD(&ent->head); spin_lock_init(&ent->lock); ent->order = i + 2; ent->dev = dev; + ent->limit = 0; - if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) && - (mlx5_core_is_pf(dev->mdev))) - limit = dev->mdev->profile->mr_cache[i].limit; - else - limit = 0; - + init_completion(&ent->compl); INIT_WORK(&ent->work, cache_work_func); INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); - ent->limit = limit; queue_work(cache->wq, &ent->work); + + if (i > MAX_UMR_CACHE_ENTRY) { + mlx5_odp_init_mr_cache_entry(ent); + continue; + } + + if (!use_umr(dev, ent->order)) + continue; + + ent->page = PAGE_SHIFT; + ent->xlt = (1 << ent->order) * sizeof(struct mlx5_mtt) / + MLX5_IB_UMR_OCTOWORD; + ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT; + if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) && + mlx5_core_is_pf(dev->mdev)) + ent->limit = dev->mdev->profile->mr_cache[i].limit; + else + ent->limit = 0; } err = mlx5_mr_cache_debugfs_init(dev); @@ -732,6 +780,7 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) goto err_in; kfree(in); + mr->mmkey.type = MLX5_MKEY_MR; mr->ibmr.lkey = mr->mmkey.key; mr->ibmr.rkey = mr->mmkey.key; mr->umem = NULL; @@ -757,94 +806,13 @@ static int get_octo_len(u64 addr, u64 len, int page_size) return (npages + 1) / 2; } -static int use_umr(int order) +static int use_umr(struct mlx5_ib_dev *dev, int order) { + if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) + return order <= MAX_UMR_CACHE_ENTRY + 2; return order <= MLX5_MAX_UMR_SHIFT; } -static int dma_map_mr_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, - int npages, int page_shift, int *size, - __be64 **mr_pas, dma_addr_t *dma) -{ - __be64 *pas; - struct device *ddev = dev->ib_dev.dma_device; - - /* - * UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes. - * To avoid copying garbage after the pas array, we allocate - * a little more. - */ - *size = ALIGN(sizeof(u64) * npages, MLX5_UMR_MTT_ALIGNMENT); - *mr_pas = kmalloc(*size + MLX5_UMR_ALIGN - 1, GFP_KERNEL); - if (!(*mr_pas)) - return -ENOMEM; - - pas = PTR_ALIGN(*mr_pas, MLX5_UMR_ALIGN); - mlx5_ib_populate_pas(dev, umem, page_shift, pas, MLX5_IB_MTT_PRESENT); - /* Clear padding after the actual pages. */ - memset(pas + npages, 0, *size - npages * sizeof(u64)); - - *dma = dma_map_single(ddev, pas, *size, DMA_TO_DEVICE); - if (dma_mapping_error(ddev, *dma)) { - kfree(*mr_pas); - return -ENOMEM; - } - - return 0; -} - -static void prep_umr_wqe_common(struct ib_pd *pd, struct ib_send_wr *wr, - struct ib_sge *sg, u64 dma, int n, u32 key, - int page_shift) -{ - struct mlx5_ib_dev *dev = to_mdev(pd->device); - struct mlx5_umr_wr *umrwr = umr_wr(wr); - - sg->addr = dma; - sg->length = ALIGN(sizeof(u64) * n, 64); - sg->lkey = dev->umrc.pd->local_dma_lkey; - - wr->next = NULL; - wr->sg_list = sg; - if (n) - wr->num_sge = 1; - else - wr->num_sge = 0; - - wr->opcode = MLX5_IB_WR_UMR; - - umrwr->npages = n; - umrwr->page_shift = page_shift; - umrwr->mkey = key; -} - -static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr, - struct ib_sge *sg, u64 dma, int n, u32 key, - int page_shift, u64 virt_addr, u64 len, - int access_flags) -{ - struct mlx5_umr_wr *umrwr = umr_wr(wr); - - prep_umr_wqe_common(pd, wr, sg, dma, n, key, page_shift); - - wr->send_flags = 0; - - umrwr->target.virt_addr = virt_addr; - umrwr->length = len; - umrwr->access_flags = access_flags; - umrwr->pd = pd; -} - -static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev, - struct ib_send_wr *wr, u32 key) -{ - struct mlx5_umr_wr *umrwr = umr_wr(wr); - - wr->send_flags = MLX5_IB_SEND_UMR_UNREG | MLX5_IB_SEND_UMR_FAIL_IF_FREE; - wr->opcode = MLX5_IB_WR_UMR; - umrwr->mkey = key; -} - static int mr_umem_get(struct ib_pd *pd, u64 start, u64 length, int access_flags, struct ib_umem **umem, int *npages, int *page_shift, int *ncont, @@ -891,21 +859,39 @@ static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context) init_completion(&context->done); } +static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev, + struct mlx5_umr_wr *umrwr) +{ + struct umr_common *umrc = &dev->umrc; + struct ib_send_wr *bad; + int err; + struct mlx5_ib_umr_context umr_context; + + mlx5_ib_init_umr_context(&umr_context); + umrwr->wr.wr_cqe = &umr_context.cqe; + + down(&umrc->sem); + err = ib_post_send(umrc->qp, &umrwr->wr, &bad); + if (err) { + mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err); + } else { + wait_for_completion(&umr_context.done); + if (umr_context.status != IB_WC_SUCCESS) { + mlx5_ib_warn(dev, "reg umr failed (%u)\n", + umr_context.status); + err = -EFAULT; + } + } + up(&umrc->sem); + return err; +} + static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr, u64 len, int npages, int page_shift, int order, int access_flags) { struct mlx5_ib_dev *dev = to_mdev(pd->device); - struct device *ddev = dev->ib_dev.dma_device; - struct umr_common *umrc = &dev->umrc; - struct mlx5_ib_umr_context umr_context; - struct mlx5_umr_wr umrwr = {}; - struct ib_send_wr *bad; struct mlx5_ib_mr *mr; - struct ib_sge sg; - int size; - __be64 *mr_pas; - dma_addr_t dma; int err = 0; int i; @@ -924,173 +910,180 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, if (!mr) return ERR_PTR(-EAGAIN); - err = dma_map_mr_pas(dev, umem, npages, page_shift, &size, &mr_pas, - &dma); - if (err) - goto free_mr; - - mlx5_ib_init_umr_context(&umr_context); - - umrwr.wr.wr_cqe = &umr_context.cqe; - prep_umr_reg_wqe(pd, &umrwr.wr, &sg, dma, npages, mr->mmkey.key, - page_shift, virt_addr, len, access_flags); - - down(&umrc->sem); - err = ib_post_send(umrc->qp, &umrwr.wr, &bad); - if (err) { - mlx5_ib_warn(dev, "post send failed, err %d\n", err); - goto unmap_dma; - } else { - wait_for_completion(&umr_context.done); - if (umr_context.status != IB_WC_SUCCESS) { - mlx5_ib_warn(dev, "reg umr failed\n"); - err = -EFAULT; - } - } - + mr->ibmr.pd = pd; + mr->umem = umem; + mr->access_flags = access_flags; + mr->desc_size = sizeof(struct mlx5_mtt); mr->mmkey.iova = virt_addr; mr->mmkey.size = len; mr->mmkey.pd = to_mpd(pd)->pdn; - mr->live = 1; - -unmap_dma: - up(&umrc->sem); - dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE); + err = mlx5_ib_update_xlt(mr, 0, npages, page_shift, + MLX5_IB_UPD_XLT_ENABLE); - kfree(mr_pas); - -free_mr: if (err) { - free_cached_mr(dev, mr); + mlx5_mr_cache_free(dev, mr); return ERR_PTR(err); } + mr->live = 1; + return mr; } -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING -int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages, - int zap) +static inline int populate_xlt(struct mlx5_ib_mr *mr, int idx, int npages, + void *xlt, int page_shift, size_t size, + int flags) { struct mlx5_ib_dev *dev = mr->dev; - struct device *ddev = dev->ib_dev.dma_device; - struct umr_common *umrc = &dev->umrc; - struct mlx5_ib_umr_context umr_context; struct ib_umem *umem = mr->umem; + if (flags & MLX5_IB_UPD_XLT_INDIRECT) { + mlx5_odp_populate_klm(xlt, idx, npages, mr, flags); + return npages; + } + + npages = min_t(size_t, npages, ib_umem_num_pages(umem) - idx); + + if (!(flags & MLX5_IB_UPD_XLT_ZAP)) { + __mlx5_ib_populate_pas(dev, umem, page_shift, + idx, npages, xlt, + MLX5_IB_MTT_PRESENT); + /* Clear padding after the pages + * brought from the umem. + */ + memset(xlt + (npages * sizeof(struct mlx5_mtt)), 0, + size - npages * sizeof(struct mlx5_mtt)); + } + + return npages; +} + +#define MLX5_MAX_UMR_CHUNK ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - \ + MLX5_UMR_MTT_ALIGNMENT) +#define MLX5_SPARE_UMR_CHUNK 0x10000 + +int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, + int page_shift, int flags) +{ + struct mlx5_ib_dev *dev = mr->dev; + struct device *ddev = dev->ib_dev.dev.parent; + struct mlx5_ib_ucontext *uctx = NULL; int size; - __be64 *pas; + void *xlt; dma_addr_t dma; - struct ib_send_wr *bad; struct mlx5_umr_wr wr; struct ib_sge sg; int err = 0; - const int page_index_alignment = MLX5_UMR_MTT_ALIGNMENT / sizeof(u64); - const int page_index_mask = page_index_alignment - 1; + int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT) + ? sizeof(struct mlx5_klm) + : sizeof(struct mlx5_mtt); + const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size; + const int page_mask = page_align - 1; size_t pages_mapped = 0; size_t pages_to_map = 0; size_t pages_iter = 0; - int use_emergency_buf = 0; + gfp_t gfp; /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes, - * so we need to align the offset and length accordingly */ - if (start_page_index & page_index_mask) { - npages += start_page_index & page_index_mask; - start_page_index &= ~page_index_mask; + * so we need to align the offset and length accordingly + */ + if (idx & page_mask) { + npages += idx & page_mask; + idx &= ~page_mask; } - pages_to_map = ALIGN(npages, page_index_alignment); + gfp = flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC : GFP_KERNEL; + gfp |= __GFP_ZERO | __GFP_NOWARN; - if (start_page_index + pages_to_map > MLX5_MAX_UMR_PAGES) - return -EINVAL; + pages_to_map = ALIGN(npages, page_align); + size = desc_size * pages_to_map; + size = min_t(int, size, MLX5_MAX_UMR_CHUNK); + + xlt = (void *)__get_free_pages(gfp, get_order(size)); + if (!xlt && size > MLX5_SPARE_UMR_CHUNK) { + mlx5_ib_dbg(dev, "Failed to allocate %d bytes of order %d. fallback to spare UMR allocation od %d bytes\n", + size, get_order(size), MLX5_SPARE_UMR_CHUNK); - size = sizeof(u64) * pages_to_map; - size = min_t(int, PAGE_SIZE, size); - /* We allocate with GFP_ATOMIC to avoid recursion into page-reclaim - * code, when we are called from an invalidation. The pas buffer must - * be 2k-aligned for Connect-IB. */ - pas = (__be64 *)get_zeroed_page(GFP_ATOMIC); - if (!pas) { - mlx5_ib_warn(dev, "unable to allocate memory during MTT update, falling back to slower chunked mechanism.\n"); - pas = mlx5_ib_update_mtt_emergency_buffer; - size = MLX5_UMR_MTT_MIN_CHUNK_SIZE; - use_emergency_buf = 1; - mutex_lock(&mlx5_ib_update_mtt_emergency_buffer_mutex); - memset(pas, 0, size); + size = MLX5_SPARE_UMR_CHUNK; + xlt = (void *)__get_free_pages(gfp, get_order(size)); } - pages_iter = size / sizeof(u64); - dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE); + + if (!xlt) { + uctx = to_mucontext(mr->ibmr.uobject->context); + mlx5_ib_warn(dev, "Using XLT emergency buffer\n"); + size = PAGE_SIZE; + xlt = (void *)uctx->upd_xlt_page; + mutex_lock(&uctx->upd_xlt_page_mutex); + memset(xlt, 0, size); + } + pages_iter = size / desc_size; + dma = dma_map_single(ddev, xlt, size, DMA_TO_DEVICE); if (dma_mapping_error(ddev, dma)) { - mlx5_ib_err(dev, "unable to map DMA during MTT update.\n"); + mlx5_ib_err(dev, "unable to map DMA during XLT update.\n"); err = -ENOMEM; - goto free_pas; + goto free_xlt; } + sg.addr = dma; + sg.lkey = dev->umrc.pd->local_dma_lkey; + + memset(&wr, 0, sizeof(wr)); + wr.wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT; + if (!(flags & MLX5_IB_UPD_XLT_ENABLE)) + wr.wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE; + wr.wr.sg_list = &sg; + wr.wr.num_sge = 1; + wr.wr.opcode = MLX5_IB_WR_UMR; + + wr.pd = mr->ibmr.pd; + wr.mkey = mr->mmkey.key; + wr.length = mr->mmkey.size; + wr.virt_addr = mr->mmkey.iova; + wr.access_flags = mr->access_flags; + wr.page_shift = page_shift; + for (pages_mapped = 0; pages_mapped < pages_to_map && !err; - pages_mapped += pages_iter, start_page_index += pages_iter) { + pages_mapped += pages_iter, idx += pages_iter) { dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE); - - npages = min_t(size_t, - pages_iter, - ib_umem_num_pages(umem) - start_page_index); - - if (!zap) { - __mlx5_ib_populate_pas(dev, umem, PAGE_SHIFT, - start_page_index, npages, pas, - MLX5_IB_MTT_PRESENT); - /* Clear padding after the pages brought from the - * umem. */ - memset(pas + npages, 0, size - npages * sizeof(u64)); - } + npages = populate_xlt(mr, idx, pages_iter, xlt, + page_shift, size, flags); dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE); - mlx5_ib_init_umr_context(&umr_context); - - memset(&wr, 0, sizeof(wr)); - wr.wr.wr_cqe = &umr_context.cqe; - - sg.addr = dma; - sg.length = ALIGN(npages * sizeof(u64), - MLX5_UMR_MTT_ALIGNMENT); - sg.lkey = dev->umrc.pd->local_dma_lkey; + sg.length = ALIGN(npages * desc_size, + MLX5_UMR_MTT_ALIGNMENT); + + if (pages_mapped + pages_iter >= pages_to_map) { + if (flags & MLX5_IB_UPD_XLT_ENABLE) + wr.wr.send_flags |= + MLX5_IB_SEND_UMR_ENABLE_MR | + MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS | + MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; + if (flags & MLX5_IB_UPD_XLT_PD || + flags & MLX5_IB_UPD_XLT_ACCESS) + wr.wr.send_flags |= + MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; + if (flags & MLX5_IB_UPD_XLT_ADDR) + wr.wr.send_flags |= + MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; + } - wr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE | - MLX5_IB_SEND_UMR_UPDATE_MTT; - wr.wr.sg_list = &sg; - wr.wr.num_sge = 1; - wr.wr.opcode = MLX5_IB_WR_UMR; - wr.npages = sg.length / sizeof(u64); - wr.page_shift = PAGE_SHIFT; - wr.mkey = mr->mmkey.key; - wr.target.offset = start_page_index; + wr.offset = idx * desc_size; + wr.xlt_size = sg.length; - down(&umrc->sem); - err = ib_post_send(umrc->qp, &wr.wr, &bad); - if (err) { - mlx5_ib_err(dev, "UMR post send failed, err %d\n", err); - } else { - wait_for_completion(&umr_context.done); - if (umr_context.status != IB_WC_SUCCESS) { - mlx5_ib_err(dev, "UMR completion failed, code %d\n", - umr_context.status); - err = -EFAULT; - } - } - up(&umrc->sem); + err = mlx5_ib_post_send_wait(dev, &wr); } dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE); -free_pas: - if (!use_emergency_buf) - free_page((unsigned long)pas); +free_xlt: + if (uctx) + mutex_unlock(&uctx->upd_xlt_page_mutex); else - mutex_unlock(&mlx5_ib_update_mtt_emergency_buffer_mutex); + free_pages((unsigned long)xlt, get_order(size)); return err; } -#endif /* * If ibmr is NULL it will be allocated by reg_create. @@ -1122,8 +1115,9 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, goto err_1; } pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); - mlx5_ib_populate_pas(dev, umem, page_shift, pas, - pg_cap ? MLX5_IB_MTT_PRESENT : 0); + if (!(access_flags & IB_ACCESS_ON_DEMAND)) + mlx5_ib_populate_pas(dev, umem, page_shift, pas, + pg_cap ? MLX5_IB_MTT_PRESENT : 0); /* The pg_access bit allows setting the access flags * in the page list submitted with the command. */ @@ -1153,6 +1147,8 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, mlx5_ib_warn(dev, "create mkey failed\n"); goto err_2; } + mr->mmkey.type = MLX5_MKEY_MR; + mr->desc_size = sizeof(struct mlx5_mtt); mr->umem = umem; mr->dev = dev; mr->live = 1; @@ -1198,20 +1194,33 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", start, virt_addr, length, access_flags); + +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + if (!start && length == U64_MAX) { + if (!(access_flags & IB_ACCESS_ON_DEMAND) || + !(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) + return ERR_PTR(-EINVAL); + + mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), access_flags); + return &mr->ibmr; + } +#endif + err = mr_umem_get(pd, start, length, access_flags, &umem, &npages, &page_shift, &ncont, &order); if (err < 0) return ERR_PTR(err); - if (use_umr(order)) { + if (use_umr(dev, order)) { mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift, order, access_flags); if (PTR_ERR(mr) == -EAGAIN) { mlx5_ib_dbg(dev, "cache empty for order %d", order); mr = NULL; } - } else if (access_flags & IB_ACCESS_ON_DEMAND) { + } else if (access_flags & IB_ACCESS_ON_DEMAND && + !MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) { err = -EINVAL; pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB"); goto error; @@ -1248,106 +1257,39 @@ error: static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { struct mlx5_core_dev *mdev = dev->mdev; - struct umr_common *umrc = &dev->umrc; - struct mlx5_ib_umr_context umr_context; struct mlx5_umr_wr umrwr = {}; - struct ib_send_wr *bad; - int err; if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) return 0; - mlx5_ib_init_umr_context(&umr_context); + umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR | + MLX5_IB_SEND_UMR_FAIL_IF_FREE; + umrwr.wr.opcode = MLX5_IB_WR_UMR; + umrwr.mkey = mr->mmkey.key; - umrwr.wr.wr_cqe = &umr_context.cqe; - prep_umr_unreg_wqe(dev, &umrwr.wr, mr->mmkey.key); - - down(&umrc->sem); - err = ib_post_send(umrc->qp, &umrwr.wr, &bad); - if (err) { - up(&umrc->sem); - mlx5_ib_dbg(dev, "err %d\n", err); - goto error; - } else { - wait_for_completion(&umr_context.done); - up(&umrc->sem); - } - if (umr_context.status != IB_WC_SUCCESS) { - mlx5_ib_warn(dev, "unreg umr failed\n"); - err = -EFAULT; - goto error; - } - return 0; - -error: - return err; + return mlx5_ib_post_send_wait(dev, &umrwr); } -static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr, u64 virt_addr, - u64 length, int npages, int page_shift, int order, +static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr, int access_flags, int flags) { struct mlx5_ib_dev *dev = to_mdev(pd->device); - struct device *ddev = dev->ib_dev.dma_device; - struct mlx5_ib_umr_context umr_context; - struct ib_send_wr *bad; struct mlx5_umr_wr umrwr = {}; - struct ib_sge sg; - struct umr_common *umrc = &dev->umrc; - dma_addr_t dma = 0; - __be64 *mr_pas = NULL; - int size; int err; - mlx5_ib_init_umr_context(&umr_context); - - umrwr.wr.wr_cqe = &umr_context.cqe; umrwr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE; - if (flags & IB_MR_REREG_TRANS) { - err = dma_map_mr_pas(dev, mr->umem, npages, page_shift, &size, - &mr_pas, &dma); - if (err) - return err; - - umrwr.target.virt_addr = virt_addr; - umrwr.length = length; - umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; - } - - prep_umr_wqe_common(pd, &umrwr.wr, &sg, dma, npages, mr->mmkey.key, - page_shift); + umrwr.wr.opcode = MLX5_IB_WR_UMR; + umrwr.mkey = mr->mmkey.key; - if (flags & IB_MR_REREG_PD) { + if (flags & IB_MR_REREG_PD || flags & IB_MR_REREG_ACCESS) { umrwr.pd = pd; - umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_PD; - } - - if (flags & IB_MR_REREG_ACCESS) { umrwr.access_flags = access_flags; - umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_ACCESS; + umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; } - /* post send request to UMR QP */ - down(&umrc->sem); - err = ib_post_send(umrc->qp, &umrwr.wr, &bad); - - if (err) { - mlx5_ib_warn(dev, "post send failed, err %d\n", err); - } else { - wait_for_completion(&umr_context.done); - if (umr_context.status != IB_WC_SUCCESS) { - mlx5_ib_warn(dev, "reg umr failed (%u)\n", - umr_context.status); - err = -EFAULT; - } - } + err = mlx5_ib_post_send_wait(dev, &umrwr); - up(&umrc->sem); - if (flags & IB_MR_REREG_TRANS) { - dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE); - kfree(mr_pas); - } return err; } @@ -1364,6 +1306,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, u64 addr = (flags & IB_MR_REREG_TRANS) ? virt_addr : mr->umem->address; u64 len = (flags & IB_MR_REREG_TRANS) ? length : mr->umem->length; int page_shift = 0; + int upd_flags = 0; int npages = 0; int ncont = 0; int order = 0; @@ -1372,6 +1315,8 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", start, virt_addr, length, access_flags); + atomic_sub(mr->npages, &dev->mdev->priv.reg_pages); + if (flags != IB_MR_REREG_PD) { /* * Replace umem. This needs to be done whether or not UMR is @@ -1382,7 +1327,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, err = mr_umem_get(pd, addr, len, access_flags, &mr->umem, &npages, &page_shift, &ncont, &order); if (err < 0) { - mr->umem = NULL; + clean_mr(mr); return err; } } @@ -1414,32 +1359,37 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, /* * Send a UMR WQE */ - err = rereg_umr(pd, mr, addr, len, npages, page_shift, - order, access_flags, flags); + mr->ibmr.pd = pd; + mr->access_flags = access_flags; + mr->mmkey.iova = addr; + mr->mmkey.size = len; + mr->mmkey.pd = to_mpd(pd)->pdn; + + if (flags & IB_MR_REREG_TRANS) { + upd_flags = MLX5_IB_UPD_XLT_ADDR; + if (flags & IB_MR_REREG_PD) + upd_flags |= MLX5_IB_UPD_XLT_PD; + if (flags & IB_MR_REREG_ACCESS) + upd_flags |= MLX5_IB_UPD_XLT_ACCESS; + err = mlx5_ib_update_xlt(mr, 0, npages, page_shift, + upd_flags); + } else { + err = rereg_umr(pd, mr, access_flags, flags); + } + if (err) { mlx5_ib_warn(dev, "Failed to rereg UMR\n"); + ib_umem_release(mr->umem); + clean_mr(mr); return err; } } - if (flags & IB_MR_REREG_PD) { - ib_mr->pd = pd; - mr->mmkey.pd = to_mpd(pd)->pdn; - } + set_mr_fileds(dev, mr, npages, len, access_flags); - if (flags & IB_MR_REREG_ACCESS) - mr->access_flags = access_flags; - - if (flags & IB_MR_REREG_TRANS) { - atomic_sub(mr->npages, &dev->mdev->priv.reg_pages); - set_mr_fileds(dev, mr, npages, len, access_flags); - mr->mmkey.iova = addr; - mr->mmkey.size = len; - } #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING update_odp_mr(mr); #endif - return 0; } @@ -1461,9 +1411,9 @@ mlx5_alloc_priv_descs(struct ib_device *device, mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN); - mr->desc_map = dma_map_single(device->dma_device, mr->descs, + mr->desc_map = dma_map_single(device->dev.parent, mr->descs, size, DMA_TO_DEVICE); - if (dma_mapping_error(device->dma_device, mr->desc_map)) { + if (dma_mapping_error(device->dev.parent, mr->desc_map)) { ret = -ENOMEM; goto err; } @@ -1482,7 +1432,7 @@ mlx5_free_priv_descs(struct mlx5_ib_mr *mr) struct ib_device *device = mr->ibmr.device; int size = mr->max_descs * mr->desc_size; - dma_unmap_single(device->dma_device, mr->desc_map, + dma_unmap_single(device->dev.parent, mr->desc_map, size, DMA_TO_DEVICE); kfree(mr->descs_alloc); mr->descs = NULL; @@ -1518,12 +1468,7 @@ static int clean_mr(struct mlx5_ib_mr *mr) return err; } } else { - err = unreg_umr(dev, mr); - if (err) { - mlx5_ib_warn(dev, "failed unregister\n"); - return err; - } - free_cached_mr(dev, mr); + mlx5_mr_cache_free(dev, mr); } if (!umred) @@ -1546,8 +1491,11 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr) /* Wait for all running page-fault handlers to finish. */ synchronize_srcu(&dev->mr_srcu); /* Destroy all page mappings */ - mlx5_ib_invalidate_range(umem, ib_umem_start(umem), - ib_umem_end(umem)); + if (umem->odp_data->page_list) + mlx5_ib_invalidate_range(umem, ib_umem_start(umem), + ib_umem_end(umem)); + else + mlx5_ib_free_implicit_mr(mr); /* * We kill the umem before the MR for ODP, * so that there will not be any invalidations in @@ -1603,11 +1551,11 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, mr->access_mode = MLX5_MKC_ACCESS_MODE_MTT; MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT); err = mlx5_alloc_priv_descs(pd->device, mr, - ndescs, sizeof(u64)); + ndescs, sizeof(struct mlx5_mtt)); if (err) goto err_free_in; - mr->desc_size = sizeof(u64); + mr->desc_size = sizeof(struct mlx5_mtt); mr->max_descs = ndescs; } else if (mr_type == IB_MR_TYPE_SG_GAPS) { mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS; @@ -1656,6 +1604,7 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, if (err) goto err_destroy_psv; + mr->mmkey.type = MLX5_MKEY_MR; mr->ibmr.lkey = mr->mmkey.key; mr->ibmr.rkey = mr->mmkey.key; mr->umem = NULL; @@ -1736,6 +1685,7 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, if (err) goto free; + mw->mmkey.type = MLX5_MKEY_MW; mw->ibmw.rkey = mw->mmkey.key; resp.response_length = min(offsetof(typeof(resp), response_length) + |