diff options
Diffstat (limited to 'drivers/infiniband')
53 files changed, 1918 insertions, 886 deletions
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 6b3f4384e46a..1ee87c3aaeab 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -4805,8 +4805,7 @@ static void cma_make_mc_event(int status, struct rdma_id_private *id_priv, event->param.ud.qkey = id_priv->qkey; out: - if (ndev) - dev_put(ndev); + dev_put(ndev); } static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h index 2c95e6f3d47a..ea81b2497511 100644 --- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h +++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h @@ -39,6 +39,7 @@ #ifndef __BNXT_RE_H__ #define __BNXT_RE_H__ +#include <rdma/uverbs_ioctl.h> #include "hw_counters.h" #define ROCE_DRV_MODULE_NAME "bnxt_re" @@ -179,10 +180,14 @@ struct bnxt_re_dev { #define BNXT_RE_ROCEV2_IPV4_PACKET 2 #define BNXT_RE_ROCEV2_IPV6_PACKET 3 +#define BNXT_RE_CHECK_RC(x) ((x) && ((x) != -ETIMEDOUT)) + static inline struct device *rdev_to_dev(struct bnxt_re_dev *rdev) { if (rdev) return &rdev->ibdev.dev; return NULL; } + +extern const struct uapi_definition bnxt_re_uapi_defs[]; #endif diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 952811c40c54..abef0b8baa7c 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -61,6 +61,15 @@ #include "bnxt_re.h" #include "ib_verbs.h" + +#include <rdma/uverbs_types.h> +#include <rdma/uverbs_std_types.h> + +#include <rdma/ib_user_ioctl_cmds.h> + +#define UVERBS_MODULE_NAME bnxt_re +#include <rdma/uverbs_named_ioctl.h> + #include <rdma/bnxt_re-abi.h> static int __from_ib_access_flags(int iflags) @@ -534,12 +543,57 @@ fail: return rc; } +static struct bnxt_re_user_mmap_entry* +bnxt_re_mmap_entry_insert(struct bnxt_re_ucontext *uctx, u64 mem_offset, + enum bnxt_re_mmap_flag mmap_flag, u64 *offset) +{ + struct bnxt_re_user_mmap_entry *entry; + int ret; + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return NULL; + + entry->mem_offset = mem_offset; + entry->mmap_flag = mmap_flag; + entry->uctx = uctx; + + switch (mmap_flag) { + case BNXT_RE_MMAP_SH_PAGE: + ret = rdma_user_mmap_entry_insert_exact(&uctx->ib_uctx, + &entry->rdma_entry, PAGE_SIZE, 0); + break; + case BNXT_RE_MMAP_UC_DB: + case BNXT_RE_MMAP_WC_DB: + ret = rdma_user_mmap_entry_insert(&uctx->ib_uctx, + &entry->rdma_entry, PAGE_SIZE); + break; + default: + ret = -EINVAL; + break; + } + + if (ret) { + kfree(entry); + return NULL; + } + if (offset) + *offset = rdma_user_mmap_get_offset(&entry->rdma_entry); + + return entry; +} + /* Protection Domains */ int bnxt_re_dealloc_pd(struct ib_pd *ib_pd, struct ib_udata *udata) { struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); struct bnxt_re_dev *rdev = pd->rdev; + if (udata) { + rdma_user_mmap_entry_remove(pd->pd_db_mmap); + pd->pd_db_mmap = NULL; + } + bnxt_re_destroy_fence_mr(pd); if (pd->qplib_pd.id) { @@ -558,7 +612,8 @@ int bnxt_re_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) struct bnxt_re_ucontext *ucntx = rdma_udata_to_drv_context( udata, struct bnxt_re_ucontext, ib_uctx); struct bnxt_re_pd *pd = container_of(ibpd, struct bnxt_re_pd, ib_pd); - int rc; + struct bnxt_re_user_mmap_entry *entry = NULL; + int rc = 0; pd->rdev = rdev; if (bnxt_qplib_alloc_pd(&rdev->qplib_res.pd_tbl, &pd->qplib_pd)) { @@ -568,15 +623,15 @@ int bnxt_re_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) } if (udata) { - struct bnxt_re_pd_resp resp; + struct bnxt_re_pd_resp resp = {}; if (!ucntx->dpi.dbr) { /* Allocate DPI in alloc_pd to avoid failing of * ibv_devinfo and family of application when DPIs * are depleted. */ - if (bnxt_qplib_alloc_dpi(&rdev->qplib_res.dpi_tbl, - &ucntx->dpi, ucntx)) { + if (bnxt_qplib_alloc_dpi(&rdev->qplib_res, + &ucntx->dpi, ucntx, BNXT_QPLIB_DPI_TYPE_UC)) { rc = -ENOMEM; goto dbfail; } @@ -585,12 +640,21 @@ int bnxt_re_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) resp.pdid = pd->qplib_pd.id; /* Still allow mapping this DBR to the new user PD. */ resp.dpi = ucntx->dpi.dpi; - resp.dbr = (u64)ucntx->dpi.umdbr; - rc = ib_copy_to_udata(udata, &resp, sizeof(resp)); + entry = bnxt_re_mmap_entry_insert(ucntx, (u64)ucntx->dpi.umdbr, + BNXT_RE_MMAP_UC_DB, &resp.dbr); + + if (!entry) { + rc = -ENOMEM; + goto dbfail; + } + + pd->pd_db_mmap = &entry->rdma_entry; + + rc = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen)); if (rc) { - ibdev_err(&rdev->ibdev, - "Failed to copy user response\n"); + rdma_user_mmap_entry_remove(pd->pd_db_mmap); + rc = -EFAULT; goto dbfail; } } @@ -614,12 +678,20 @@ int bnxt_re_destroy_ah(struct ib_ah *ib_ah, u32 flags) { struct bnxt_re_ah *ah = container_of(ib_ah, struct bnxt_re_ah, ib_ah); struct bnxt_re_dev *rdev = ah->rdev; + bool block = true; + int rc = 0; - bnxt_qplib_destroy_ah(&rdev->qplib_res, &ah->qplib_ah, - !(flags & RDMA_DESTROY_AH_SLEEPABLE)); + block = !(flags & RDMA_DESTROY_AH_SLEEPABLE); + rc = bnxt_qplib_destroy_ah(&rdev->qplib_res, &ah->qplib_ah, block); + if (BNXT_RE_CHECK_RC(rc)) { + if (rc == -ETIMEDOUT) + rc = 0; + else + goto fail; + } atomic_dec(&rdev->ah_count); - - return 0; +fail: + return rc; } static u8 bnxt_re_stack_to_dev_nw_type(enum rdma_network_type ntype) @@ -3955,6 +4027,7 @@ int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata) container_of(ctx, struct bnxt_re_ucontext, ib_uctx); struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; + struct bnxt_re_user_mmap_entry *entry; struct bnxt_re_uctx_resp resp = {}; u32 chip_met_rev_num = 0; int rc; @@ -3993,6 +4066,16 @@ int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata) resp.comp_mask |= BNXT_RE_UCNTX_CMASK_HAVE_MODE; resp.mode = rdev->chip_ctx->modes.wqe_mode; + if (rdev->chip_ctx->modes.db_push) + resp.comp_mask |= BNXT_RE_UCNTX_CMASK_WC_DPI_ENABLED; + + entry = bnxt_re_mmap_entry_insert(uctx, 0, BNXT_RE_MMAP_SH_PAGE, NULL); + if (!entry) { + rc = -ENOMEM; + goto cfail; + } + uctx->shpage_mmap = &entry->rdma_entry; + rc = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp))); if (rc) { ibdev_err(ibdev, "Failed to copy user context"); @@ -4016,6 +4099,8 @@ void bnxt_re_dealloc_ucontext(struct ib_ucontext *ib_uctx) struct bnxt_re_dev *rdev = uctx->rdev; + rdma_user_mmap_entry_remove(uctx->shpage_mmap); + uctx->shpage_mmap = NULL; if (uctx->shpg) free_page((unsigned long)uctx->shpg); @@ -4023,8 +4108,7 @@ void bnxt_re_dealloc_ucontext(struct ib_ucontext *ib_uctx) /* Free DPI only if this is the first PD allocated by the * application and mark the context dpi as NULL */ - bnxt_qplib_dealloc_dpi(&rdev->qplib_res, - &rdev->qplib_res.dpi_tbl, &uctx->dpi); + bnxt_qplib_dealloc_dpi(&rdev->qplib_res, &uctx->dpi); uctx->dpi.dbr = NULL; } } @@ -4035,27 +4119,177 @@ int bnxt_re_mmap(struct ib_ucontext *ib_uctx, struct vm_area_struct *vma) struct bnxt_re_ucontext *uctx = container_of(ib_uctx, struct bnxt_re_ucontext, ib_uctx); - struct bnxt_re_dev *rdev = uctx->rdev; + struct bnxt_re_user_mmap_entry *bnxt_entry; + struct rdma_user_mmap_entry *rdma_entry; + int ret = 0; u64 pfn; - if (vma->vm_end - vma->vm_start != PAGE_SIZE) + rdma_entry = rdma_user_mmap_entry_get(&uctx->ib_uctx, vma); + if (!rdma_entry) return -EINVAL; - if (vma->vm_pgoff) { - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, - PAGE_SIZE, vma->vm_page_prot)) { - ibdev_err(&rdev->ibdev, "Failed to map DPI"); - return -EAGAIN; - } - } else { - pfn = virt_to_phys(uctx->shpg) >> PAGE_SHIFT; - if (remap_pfn_range(vma, vma->vm_start, - pfn, PAGE_SIZE, vma->vm_page_prot)) { - ibdev_err(&rdev->ibdev, "Failed to map shared page"); - return -EAGAIN; + bnxt_entry = container_of(rdma_entry, struct bnxt_re_user_mmap_entry, + rdma_entry); + + switch (bnxt_entry->mmap_flag) { + case BNXT_RE_MMAP_WC_DB: + pfn = bnxt_entry->mem_offset >> PAGE_SHIFT; + ret = rdma_user_mmap_io(ib_uctx, vma, pfn, PAGE_SIZE, + pgprot_writecombine(vma->vm_page_prot), + rdma_entry); + break; + case BNXT_RE_MMAP_UC_DB: + pfn = bnxt_entry->mem_offset >> PAGE_SHIFT; + ret = rdma_user_mmap_io(ib_uctx, vma, pfn, PAGE_SIZE, + pgprot_noncached(vma->vm_page_prot), + rdma_entry); + break; + case BNXT_RE_MMAP_SH_PAGE: + ret = vm_insert_page(vma, vma->vm_start, virt_to_page(uctx->shpg)); + break; + default: + ret = -EINVAL; + break; + } + + rdma_user_mmap_entry_put(rdma_entry); + return ret; +} + +void bnxt_re_mmap_free(struct rdma_user_mmap_entry *rdma_entry) +{ + struct bnxt_re_user_mmap_entry *bnxt_entry; + + bnxt_entry = container_of(rdma_entry, struct bnxt_re_user_mmap_entry, + rdma_entry); + + kfree(bnxt_entry); +} + +static int UVERBS_HANDLER(BNXT_RE_METHOD_ALLOC_PAGE)(struct uverbs_attr_bundle *attrs) +{ + struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, BNXT_RE_ALLOC_PAGE_HANDLE); + enum bnxt_re_alloc_page_type alloc_type; + struct bnxt_re_user_mmap_entry *entry; + enum bnxt_re_mmap_flag mmap_flag; + struct bnxt_qplib_chip_ctx *cctx; + struct bnxt_re_ucontext *uctx; + struct bnxt_re_dev *rdev; + u64 mmap_offset; + u32 length; + u32 dpi; + u64 dbr; + int err; + + uctx = container_of(ib_uverbs_get_ucontext(attrs), struct bnxt_re_ucontext, ib_uctx); + if (IS_ERR(uctx)) + return PTR_ERR(uctx); + + err = uverbs_get_const(&alloc_type, attrs, BNXT_RE_ALLOC_PAGE_TYPE); + if (err) + return err; + + rdev = uctx->rdev; + cctx = rdev->chip_ctx; + + switch (alloc_type) { + case BNXT_RE_ALLOC_WC_PAGE: + if (cctx->modes.db_push) { + if (bnxt_qplib_alloc_dpi(&rdev->qplib_res, &uctx->wcdpi, + uctx, BNXT_QPLIB_DPI_TYPE_WC)) + return -ENOMEM; + length = PAGE_SIZE; + dpi = uctx->wcdpi.dpi; + dbr = (u64)uctx->wcdpi.umdbr; + mmap_flag = BNXT_RE_MMAP_WC_DB; + } else { + return -EINVAL; } + + break; + + default: + return -EOPNOTSUPP; } + entry = bnxt_re_mmap_entry_insert(uctx, dbr, mmap_flag, &mmap_offset); + if (!entry) + return -ENOMEM; + + uobj->object = entry; + uverbs_finalize_uobj_create(attrs, BNXT_RE_ALLOC_PAGE_HANDLE); + err = uverbs_copy_to(attrs, BNXT_RE_ALLOC_PAGE_MMAP_OFFSET, + &mmap_offset, sizeof(mmap_offset)); + if (err) + return err; + + err = uverbs_copy_to(attrs, BNXT_RE_ALLOC_PAGE_MMAP_LENGTH, + &length, sizeof(length)); + if (err) + return err; + + err = uverbs_copy_to(attrs, BNXT_RE_ALLOC_PAGE_DPI, + &dpi, sizeof(length)); + if (err) + return err; + return 0; } + +static int alloc_page_obj_cleanup(struct ib_uobject *uobject, + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) +{ + struct bnxt_re_user_mmap_entry *entry = uobject->object; + struct bnxt_re_ucontext *uctx = entry->uctx; + + switch (entry->mmap_flag) { + case BNXT_RE_MMAP_WC_DB: + if (uctx && uctx->wcdpi.dbr) { + struct bnxt_re_dev *rdev = uctx->rdev; + + bnxt_qplib_dealloc_dpi(&rdev->qplib_res, &uctx->wcdpi); + uctx->wcdpi.dbr = NULL; + } + break; + default: + goto exit; + } + rdma_user_mmap_entry_remove(&entry->rdma_entry); +exit: + return 0; +} + +DECLARE_UVERBS_NAMED_METHOD(BNXT_RE_METHOD_ALLOC_PAGE, + UVERBS_ATTR_IDR(BNXT_RE_ALLOC_PAGE_HANDLE, + BNXT_RE_OBJECT_ALLOC_PAGE, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_CONST_IN(BNXT_RE_ALLOC_PAGE_TYPE, + enum bnxt_re_alloc_page_type, + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(BNXT_RE_ALLOC_PAGE_MMAP_OFFSET, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(BNXT_RE_ALLOC_PAGE_MMAP_LENGTH, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(BNXT_RE_ALLOC_PAGE_DPI, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY(BNXT_RE_METHOD_DESTROY_PAGE, + UVERBS_ATTR_IDR(BNXT_RE_DESTROY_PAGE_HANDLE, + BNXT_RE_OBJECT_ALLOC_PAGE, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_OBJECT(BNXT_RE_OBJECT_ALLOC_PAGE, + UVERBS_TYPE_ALLOC_IDR(alloc_page_obj_cleanup), + &UVERBS_METHOD(BNXT_RE_METHOD_ALLOC_PAGE), + &UVERBS_METHOD(BNXT_RE_METHOD_DESTROY_PAGE)); + +const struct uapi_definition bnxt_re_uapi_defs[] = { + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(BNXT_RE_OBJECT_ALLOC_PAGE), + {} +}; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index 31f7e34040f7..32d9e9d09791 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -60,6 +60,8 @@ struct bnxt_re_pd { struct bnxt_re_dev *rdev; struct bnxt_qplib_pd qplib_pd; struct bnxt_re_fence_data fence; + struct rdma_user_mmap_entry *pd_db_mmap; + struct rdma_user_mmap_entry *pd_wcdb_mmap; }; struct bnxt_re_ah { @@ -134,8 +136,23 @@ struct bnxt_re_ucontext { struct ib_ucontext ib_uctx; struct bnxt_re_dev *rdev; struct bnxt_qplib_dpi dpi; + struct bnxt_qplib_dpi wcdpi; void *shpg; spinlock_t sh_lock; /* protect shpg */ + struct rdma_user_mmap_entry *shpage_mmap; +}; + +enum bnxt_re_mmap_flag { + BNXT_RE_MMAP_SH_PAGE, + BNXT_RE_MMAP_UC_DB, + BNXT_RE_MMAP_WC_DB, +}; + +struct bnxt_re_user_mmap_entry { + struct rdma_user_mmap_entry rdma_entry; + struct bnxt_re_ucontext *uctx; + u64 mem_offset; + u8 mmap_flag; }; static inline u16 bnxt_re_get_swqe_size(int nsge) @@ -213,6 +230,8 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata); void bnxt_re_dealloc_ucontext(struct ib_ucontext *context); int bnxt_re_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); +void bnxt_re_mmap_free(struct rdma_user_mmap_entry *rdma_entry); + unsigned long bnxt_re_lock_cqs(struct bnxt_re_qp *qp); void bnxt_re_unlock_cqs(struct bnxt_re_qp *qp, unsigned long flags); diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 3073398a2183..b42166fe7454 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -83,6 +83,45 @@ static int bnxt_re_netdev_event(struct notifier_block *notifier, unsigned long event, void *ptr); static struct bnxt_re_dev *bnxt_re_from_netdev(struct net_device *netdev); static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev); +static int bnxt_re_hwrm_qcaps(struct bnxt_re_dev *rdev); + +static int bnxt_re_hwrm_qcfg(struct bnxt_re_dev *rdev, u32 *db_len, + u32 *offset); +static void bnxt_re_set_db_offset(struct bnxt_re_dev *rdev) +{ + struct bnxt_qplib_chip_ctx *cctx; + struct bnxt_en_dev *en_dev; + struct bnxt_qplib_res *res; + u32 l2db_len = 0; + u32 offset = 0; + u32 barlen; + int rc; + + res = &rdev->qplib_res; + en_dev = rdev->en_dev; + cctx = rdev->chip_ctx; + + /* Issue qcfg */ + rc = bnxt_re_hwrm_qcfg(rdev, &l2db_len, &offset); + if (rc) + dev_info(rdev_to_dev(rdev), + "Couldn't get DB bar size, Low latency framework is disabled\n"); + /* set register offsets for both UC and WC */ + res->dpi_tbl.ucreg.offset = res->is_vf ? BNXT_QPLIB_DBR_VF_DB_OFFSET : + BNXT_QPLIB_DBR_PF_DB_OFFSET; + res->dpi_tbl.wcreg.offset = res->dpi_tbl.ucreg.offset; + + /* If WC mapping is disabled by L2 driver then en_dev->l2_db_size + * is equal to the DB-Bar actual size. This indicates that L2 + * is mapping entire bar as UC-. RoCE driver can't enable WC mapping + * in such cases and DB-push will be disabled. + */ + barlen = pci_resource_len(res->pdev, RCFW_DBR_PCI_BAR_REGION); + if (cctx->modes.db_push && l2db_len && en_dev->l2_db_size != barlen) { + res->dpi_tbl.wcreg.offset = en_dev->l2_db_size; + dev_info(rdev_to_dev(rdev), "Low latency framework is enabled\n"); + } +} static void bnxt_re_set_drv_mode(struct bnxt_re_dev *rdev, u8 mode) { @@ -91,6 +130,9 @@ static void bnxt_re_set_drv_mode(struct bnxt_re_dev *rdev, u8 mode) cctx = rdev->chip_ctx; cctx->modes.wqe_mode = bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ? mode : BNXT_QPLIB_WQE_MODE_STATIC; + if (bnxt_re_hwrm_qcaps(rdev)) + dev_err(rdev_to_dev(rdev), + "Failed to query hwrm qcaps\n"); } static void bnxt_re_destroy_chip_ctx(struct bnxt_re_dev *rdev) @@ -112,6 +154,7 @@ static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev, u8 wqe_mode) { struct bnxt_qplib_chip_ctx *chip_ctx; struct bnxt_en_dev *en_dev; + int rc; en_dev = rdev->en_dev; @@ -130,6 +173,12 @@ static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev, u8 wqe_mode) rdev->qplib_res.is_vf = BNXT_EN_VF(en_dev); bnxt_re_set_drv_mode(rdev, wqe_mode); + + bnxt_re_set_db_offset(rdev); + rc = bnxt_qplib_map_db_bar(&rdev->qplib_res); + if (rc) + return rc; + if (bnxt_qplib_determine_atomics(en_dev->pdev)) ibdev_info(&rdev->ibdev, "platform doesn't support global atomics."); @@ -283,15 +332,21 @@ static void bnxt_re_start_irq(void *handle, struct bnxt_msix_entry *ent) for (indx = 0; indx < rdev->num_msix; indx++) rdev->en_dev->msix_entries[indx].vector = ent[indx].vector; - bnxt_qplib_rcfw_start_irq(rcfw, msix_ent[BNXT_RE_AEQ_IDX].vector, - false); + rc = bnxt_qplib_rcfw_start_irq(rcfw, msix_ent[BNXT_RE_AEQ_IDX].vector, + false); + if (rc) { + ibdev_warn(&rdev->ibdev, "Failed to reinit CREQ\n"); + return; + } for (indx = BNXT_RE_NQ_IDX ; indx < rdev->num_msix; indx++) { nq = &rdev->nq[indx - 1]; rc = bnxt_qplib_nq_start_irq(nq, indx - 1, msix_ent[indx].vector, false); - if (rc) + if (rc) { ibdev_warn(&rdev->ibdev, "Failed to reinit NQ index %d\n", indx - 1); + return; + } } } @@ -315,12 +370,11 @@ static int bnxt_re_register_netdev(struct bnxt_re_dev *rdev) return rc; } -static void bnxt_re_init_hwrm_hdr(struct bnxt_re_dev *rdev, struct input *hdr, - u16 opcd, u16 crid, u16 trid) +static void bnxt_re_init_hwrm_hdr(struct input *hdr, u16 opcd) { hdr->req_type = cpu_to_le16(opcd); - hdr->cmpl_ring = cpu_to_le16(crid); - hdr->target_id = cpu_to_le16(trid); + hdr->cmpl_ring = cpu_to_le16(-1); + hdr->target_id = cpu_to_le16(-1); } static void bnxt_re_fill_fw_msg(struct bnxt_fw_msg *fw_msg, void *msg, @@ -334,13 +388,60 @@ static void bnxt_re_fill_fw_msg(struct bnxt_fw_msg *fw_msg, void *msg, fw_msg->timeout = timeout; } +/* Query device config using common hwrm */ +static int bnxt_re_hwrm_qcfg(struct bnxt_re_dev *rdev, u32 *db_len, + u32 *offset) +{ + struct bnxt_en_dev *en_dev = rdev->en_dev; + struct hwrm_func_qcfg_output resp = {0}; + struct hwrm_func_qcfg_input req = {0}; + struct bnxt_fw_msg fw_msg; + int rc; + + memset(&fw_msg, 0, sizeof(fw_msg)); + bnxt_re_init_hwrm_hdr((void *)&req, HWRM_FUNC_QCFG); + req.fid = cpu_to_le16(0xffff); + bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, + sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); + rc = bnxt_send_msg(en_dev, &fw_msg); + if (!rc) { + *db_len = PAGE_ALIGN(le16_to_cpu(resp.l2_doorbell_bar_size_kb) * 1024); + *offset = PAGE_ALIGN(le16_to_cpu(resp.legacy_l2_db_size_kb) * 1024); + } + return rc; +} + +/* Query function capabilities using common hwrm */ +int bnxt_re_hwrm_qcaps(struct bnxt_re_dev *rdev) +{ + struct bnxt_en_dev *en_dev = rdev->en_dev; + struct hwrm_func_qcaps_output resp = {}; + struct hwrm_func_qcaps_input req = {}; + struct bnxt_qplib_chip_ctx *cctx; + struct bnxt_fw_msg fw_msg = {}; + int rc; + + cctx = rdev->chip_ctx; + bnxt_re_init_hwrm_hdr((void *)&req, HWRM_FUNC_QCAPS); + req.fid = cpu_to_le16(0xffff); + bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, + sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); + + rc = bnxt_send_msg(en_dev, &fw_msg); + if (rc) + return rc; + cctx->modes.db_push = le32_to_cpu(resp.flags) & FUNC_QCAPS_RESP_FLAGS_WCB_PUSH_MODE; + + return 0; +} + static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev, u16 fw_ring_id, int type) { struct bnxt_en_dev *en_dev; - struct hwrm_ring_free_input req = {0}; + struct hwrm_ring_free_input req = {}; struct hwrm_ring_free_output resp; - struct bnxt_fw_msg fw_msg; + struct bnxt_fw_msg fw_msg = {}; int rc = -EINVAL; if (!rdev) @@ -354,9 +455,7 @@ static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev, if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags)) return 0; - memset(&fw_msg, 0, sizeof(fw_msg)); - - bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_RING_FREE, -1, -1); + bnxt_re_init_hwrm_hdr((void *)&req, HWRM_RING_FREE); req.ring_type = type; req.ring_id = cpu_to_le16(fw_ring_id); bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, @@ -373,16 +472,15 @@ static int bnxt_re_net_ring_alloc(struct bnxt_re_dev *rdev, u16 *fw_ring_id) { struct bnxt_en_dev *en_dev = rdev->en_dev; - struct hwrm_ring_alloc_input req = {0}; + struct hwrm_ring_alloc_input req = {}; struct hwrm_ring_alloc_output resp; - struct bnxt_fw_msg fw_msg; + struct bnxt_fw_msg fw_msg = {}; int rc = -EINVAL; if (!en_dev) return rc; - memset(&fw_msg, 0, sizeof(fw_msg)); - bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_RING_ALLOC, -1, -1); + bnxt_re_init_hwrm_hdr((void *)&req, HWRM_RING_ALLOC); req.enables = 0; req.page_tbl_addr = cpu_to_le64(ring_attr->dma_arr[0]); if (ring_attr->pages > 1) { @@ -411,7 +509,7 @@ static int bnxt_re_net_stats_ctx_free(struct bnxt_re_dev *rdev, struct bnxt_en_dev *en_dev = rdev->en_dev; struct hwrm_stat_ctx_free_input req = {}; struct hwrm_stat_ctx_free_output resp = {}; - struct bnxt_fw_msg fw_msg; + struct bnxt_fw_msg fw_msg = {}; int rc = -EINVAL; if (!en_dev) @@ -420,9 +518,7 @@ static int bnxt_re_net_stats_ctx_free(struct bnxt_re_dev *rdev, if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags)) return 0; - memset(&fw_msg, 0, sizeof(fw_msg)); - - bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_FREE, -1, -1); + bnxt_re_init_hwrm_hdr((void *)&req, HWRM_STAT_CTX_FREE); req.stat_ctx_id = cpu_to_le32(fw_stats_ctx_id); bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); @@ -439,10 +535,10 @@ static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev, u32 *fw_stats_ctx_id) { struct bnxt_qplib_chip_ctx *chip_ctx = rdev->chip_ctx; - struct hwrm_stat_ctx_alloc_output resp = {0}; - struct hwrm_stat_ctx_alloc_input req = {0}; + struct hwrm_stat_ctx_alloc_output resp = {}; + struct hwrm_stat_ctx_alloc_input req = {}; struct bnxt_en_dev *en_dev = rdev->en_dev; - struct bnxt_fw_msg fw_msg; + struct bnxt_fw_msg fw_msg = {}; int rc = -EINVAL; *fw_stats_ctx_id = INVALID_STATS_CTX_ID; @@ -450,9 +546,7 @@ static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev, if (!en_dev) return rc; - memset(&fw_msg, 0, sizeof(fw_msg)); - - bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_ALLOC, -1, -1); + bnxt_re_init_hwrm_hdr((void *)&req, HWRM_STAT_CTX_ALLOC); req.update_period_ms = cpu_to_le32(1000); req.stats_dma_addr = cpu_to_le64(dma_map); req.stats_dma_length = cpu_to_le16(chip_ctx->hw_stats_size); @@ -466,6 +560,10 @@ static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev, return rc; } +static void bnxt_re_disassociate_ucontext(struct ib_ucontext *ibcontext) +{ +} + /* Device */ static struct bnxt_re_dev *bnxt_re_from_netdev(struct net_device *netdev) @@ -532,6 +630,7 @@ static const struct ib_device_ops bnxt_re_dev_ops = { .destroy_qp = bnxt_re_destroy_qp, .destroy_srq = bnxt_re_destroy_srq, .device_group = &bnxt_re_dev_attr_group, + .disassociate_ucontext = bnxt_re_disassociate_ucontext, .get_dev_fw_str = bnxt_re_query_fw_str, .get_dma_mr = bnxt_re_get_dma_mr, .get_hw_stats = bnxt_re_ib_get_hw_stats, @@ -539,6 +638,7 @@ static const struct ib_device_ops bnxt_re_dev_ops = { .get_port_immutable = bnxt_re_get_port_immutable, .map_mr_sg = bnxt_re_map_mr_sg, .mmap = bnxt_re_mmap, + .mmap_free = bnxt_re_mmap_free, .modify_qp = bnxt_re_modify_qp, .modify_srq = bnxt_re_modify_srq, .poll_cq = bnxt_re_poll_cq, @@ -579,6 +679,9 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) ibdev->dev.parent = &rdev->en_dev->pdev->dev; ibdev->local_dma_lkey = BNXT_QPLIB_RSVD_LKEY; + if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)) + ibdev->driver_def = bnxt_re_uapi_defs; + ib_set_device_ops(ibdev, &bnxt_re_dev_ops); ret = ib_device_set_netdev(&rdev->ibdev, rdev->netdev, 1); if (ret) @@ -822,7 +925,6 @@ static void bnxt_re_free_res(struct bnxt_re_dev *rdev) if (rdev->qplib_res.dpi_tbl.max) { bnxt_qplib_dealloc_dpi(&rdev->qplib_res, - &rdev->qplib_res.dpi_tbl, &rdev->dpi_privileged); } if (rdev->qplib_res.rcfw) { @@ -850,9 +952,9 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev) if (rc) goto fail; - rc = bnxt_qplib_alloc_dpi(&rdev->qplib_res.dpi_tbl, + rc = bnxt_qplib_alloc_dpi(&rdev->qplib_res, &rdev->dpi_privileged, - rdev); + rdev, BNXT_QPLIB_DPI_TYPE_KERNEL); if (rc) goto dealloc_res; @@ -892,7 +994,6 @@ free_nq: bnxt_qplib_free_nq(&rdev->nq[i]); } bnxt_qplib_dealloc_dpi(&rdev->qplib_res, - &rdev->qplib_res.dpi_tbl, &rdev->dpi_privileged); dealloc_res: bnxt_qplib_free_res(&rdev->qplib_res); @@ -963,12 +1064,6 @@ static int bnxt_re_update_gid(struct bnxt_re_dev *rdev) if (!ib_device_try_get(&rdev->ibdev)) return 0; - if (!sgid_tbl) { - ibdev_err(&rdev->ibdev, "QPLIB: SGID table not allocated"); - rc = -EINVAL; - goto out; - } - for (index = 0; index < sgid_tbl->active; index++) { gid_idx = sgid_tbl->hw_id[index]; @@ -986,7 +1081,7 @@ static int bnxt_re_update_gid(struct bnxt_re_dev *rdev) rc = bnxt_qplib_update_sgid(sgid_tbl, &gid, gid_idx, rdev->qplib_res.netdev->dev_addr); } -out: + ib_device_put(&rdev->ibdev); return rc; } @@ -1039,14 +1134,13 @@ static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev) static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev) { struct bnxt_en_dev *en_dev = rdev->en_dev; - struct hwrm_ver_get_output resp = {0}; - struct hwrm_ver_get_input req = {0}; - struct bnxt_fw_msg fw_msg; + struct hwrm_ver_get_output resp = {}; + struct hwrm_ver_get_input req = {}; + struct bnxt_qplib_chip_ctx *cctx; + struct bnxt_fw_msg fw_msg = {}; int rc = 0; - memset(&fw_msg, 0, sizeof(fw_msg)); - bnxt_re_init_hwrm_hdr(rdev, (void *)&req, - HWRM_VER_GET, -1, -1); + bnxt_re_init_hwrm_hdr((void *)&req, HWRM_VER_GET); req.hwrm_intf_maj = HWRM_VERSION_MAJOR; req.hwrm_intf_min = HWRM_VERSION_MINOR; req.hwrm_intf_upd = HWRM_VERSION_UPDATE; @@ -1058,11 +1152,18 @@ static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev) rc); return; } - rdev->qplib_ctx.hwrm_intf_ver = + + cctx = rdev->chip_ctx; + cctx->hwrm_intf_ver = (u64)le16_to_cpu(resp.hwrm_intf_major) << 48 | (u64)le16_to_cpu(resp.hwrm_intf_minor) << 32 | (u64)le16_to_cpu(resp.hwrm_intf_build) << 16 | le16_to_cpu(resp.hwrm_intf_patch); + + cctx->hwrm_cmd_max_timeout = le16_to_cpu(resp.max_req_timeout); + + if (!cctx->hwrm_cmd_max_timeout) + cctx->hwrm_cmd_max_timeout = RCFW_FW_STALL_MAX_TIMEOUT; } static int bnxt_re_ib_init(struct bnxt_re_dev *rdev) @@ -1200,7 +1301,7 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 wqe_mode) db_offt = bnxt_re_get_nqdb_offset(rdev, BNXT_RE_AEQ_IDX); vid = rdev->en_dev->msix_entries[BNXT_RE_AEQ_IDX].vector; rc = bnxt_qplib_enable_rcfw_channel(&rdev->rcfw, - vid, db_offt, rdev->is_virtfn, + vid, db_offt, &bnxt_re_aeq_handler); if (rc) { ibdev_err(&rdev->ibdev, "Failed to enable RCFW channel: %#x\n", @@ -1497,6 +1598,7 @@ static int bnxt_re_suspend(struct auxiliary_device *adev, pm_message_t state) */ set_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags); set_bit(ERR_DEVICE_DETACHED, &rdev->rcfw.cmdq.flags); + wake_up_all(&rdev->rcfw.cmdq.waitq); mutex_unlock(&bnxt_re_mutex); return 0; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index 8974f6235cfa..91aed77ce40d 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -399,6 +399,9 @@ static irqreturn_t bnxt_qplib_nq_irq(int irq, void *dev_instance) void bnxt_qplib_nq_stop_irq(struct bnxt_qplib_nq *nq, bool kill) { + if (!nq->requested) + return; + tasklet_disable(&nq->nq_tasklet); /* Mask h/w interrupt */ bnxt_qplib_ring_nq_db(&nq->nq_db.dbinfo, nq->res->cctx, false); @@ -406,11 +409,12 @@ void bnxt_qplib_nq_stop_irq(struct bnxt_qplib_nq *nq, bool kill) synchronize_irq(nq->msix_vec); if (kill) tasklet_kill(&nq->nq_tasklet); - if (nq->requested) { - irq_set_affinity_hint(nq->msix_vec, NULL); - free_irq(nq->msix_vec, nq); - nq->requested = false; - } + + irq_set_affinity_hint(nq->msix_vec, NULL); + free_irq(nq->msix_vec, nq); + kfree(nq->name); + nq->name = NULL; + nq->requested = false; } void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq) @@ -436,6 +440,7 @@ void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq) int bnxt_qplib_nq_start_irq(struct bnxt_qplib_nq *nq, int nq_indx, int msix_vector, bool need_init) { + struct bnxt_qplib_res *res = nq->res; int rc; if (nq->requested) @@ -447,10 +452,17 @@ int bnxt_qplib_nq_start_irq(struct bnxt_qplib_nq *nq, int nq_indx, else tasklet_enable(&nq->nq_tasklet); - snprintf(nq->name, sizeof(nq->name), "bnxt_qplib_nq-%d", nq_indx); + nq->name = kasprintf(GFP_KERNEL, "bnxt_re-nq-%d@pci:%s", + nq_indx, pci_name(res->pdev)); + if (!nq->name) + return -ENOMEM; rc = request_irq(nq->msix_vec, bnxt_qplib_nq_irq, 0, nq->name, nq); - if (rc) + if (rc) { + kfree(nq->name); + nq->name = NULL; + tasklet_disable(&nq->nq_tasklet); return rc; + } cpumask_clear(&nq->mask); cpumask_set_cpu(nq_indx, &nq->mask); @@ -461,7 +473,7 @@ int bnxt_qplib_nq_start_irq(struct bnxt_qplib_nq *nq, int nq_indx, nq->msix_vec, nq_indx); } nq->requested = true; - bnxt_qplib_ring_nq_db(&nq->nq_db.dbinfo, nq->res->cctx, true); + bnxt_qplib_ring_nq_db(&nq->nq_db.dbinfo, res->cctx, true); return rc; } @@ -471,7 +483,6 @@ static int bnxt_qplib_map_nq_db(struct bnxt_qplib_nq *nq, u32 reg_offt) resource_size_t reg_base; struct bnxt_qplib_nq_db *nq_db; struct pci_dev *pdev; - int rc = 0; pdev = nq->pdev; nq_db = &nq->nq_db; @@ -481,8 +492,7 @@ static int bnxt_qplib_map_nq_db(struct bnxt_qplib_nq *nq, u32 reg_offt) if (!nq_db->reg.bar_base) { dev_err(&pdev->dev, "QPLIB: NQ BAR region %d resc start is 0!", nq_db->reg.bar_id); - rc = -ENOMEM; - goto fail; + return -ENOMEM; } reg_base = nq_db->reg.bar_base + reg_offt; @@ -492,15 +502,14 @@ static int bnxt_qplib_map_nq_db(struct bnxt_qplib_nq *nq, u32 reg_offt) if (!nq_db->reg.bar_reg) { dev_err(&pdev->dev, "QPLIB: NQ BAR region %d mapping failed", nq_db->reg.bar_id); - rc = -ENOMEM; - goto fail; + return -ENOMEM; } nq_db->dbinfo.db = nq_db->reg.bar_reg; nq_db->dbinfo.hwq = &nq->hwq; nq_db->dbinfo.xid = nq->ring_id; -fail: - return rc; + + return 0; } int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq, @@ -614,7 +623,7 @@ int bnxt_qplib_create_srq(struct bnxt_qplib_res *res, hwq_attr.type = HWQ_TYPE_QUEUE; rc = bnxt_qplib_alloc_init_hwq(&srq->hwq, &hwq_attr); if (rc) - goto exit; + return rc; srq->swq = kcalloc(srq->hwq.max_elements, sizeof(*srq->swq), GFP_KERNEL); @@ -659,7 +668,7 @@ int bnxt_qplib_create_srq(struct bnxt_qplib_res *res, srq->dbinfo.xid = srq->id; srq->dbinfo.db = srq->dpi->dbr; srq->dbinfo.max_slot = 1; - srq->dbinfo.priv_db = res->dpi_tbl.dbr_bar_reg_iomem; + srq->dbinfo.priv_db = res->dpi_tbl.priv_db; if (srq->threshold) bnxt_qplib_armen_db(&srq->dbinfo, DBC_DBC_TYPE_SRQ_ARMENA); srq->arm_req = false; @@ -668,7 +677,7 @@ int bnxt_qplib_create_srq(struct bnxt_qplib_res *res, fail: bnxt_qplib_free_hwq(res, &srq->hwq); kfree(srq->swq); -exit: + return rc; } @@ -732,15 +741,14 @@ int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq, struct rq_wqe *srqe; struct sq_sge *hw_sge; u32 sw_prod, sw_cons, count = 0; - int i, rc = 0, next; + int i, next; spin_lock(&srq_hwq->lock); if (srq->start_idx == srq->last_idx) { dev_err(&srq_hwq->pdev->dev, "FP: SRQ (0x%x) is full!\n", srq->id); - rc = -EINVAL; spin_unlock(&srq_hwq->lock); - goto done; + return -EINVAL; } next = srq->start_idx; srq->start_idx = srq->swq[next].next_idx; @@ -781,22 +789,19 @@ int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq, srq->arm_req = false; bnxt_qplib_srq_arm_db(&srq->dbinfo, srq->threshold); } -done: - return rc; + + return 0; } /* QP */ static int bnxt_qplib_alloc_init_swq(struct bnxt_qplib_q *que) { - int rc = 0; int indx; que->swq = kcalloc(que->max_wqe, sizeof(*que->swq), GFP_KERNEL); - if (!que->swq) { - rc = -ENOMEM; - goto out; - } + if (!que->swq) + return -ENOMEM; que->swq_start = 0; que->swq_last = que->max_wqe - 1; @@ -804,8 +809,8 @@ static int bnxt_qplib_alloc_init_swq(struct bnxt_qplib_q *que) que->swq[indx].next_idx = indx + 1; que->swq[que->swq_last].next_idx = 0; /* Make it circular */ que->swq_last = 0; -out: - return rc; + + return 0; } int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) @@ -839,7 +844,7 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) hwq_attr.type = HWQ_TYPE_QUEUE; rc = bnxt_qplib_alloc_init_hwq(&sq->hwq, &hwq_attr); if (rc) - goto exit; + return rc; rc = bnxt_qplib_alloc_init_swq(sq); if (rc) @@ -927,7 +932,6 @@ sq_swq: kfree(sq->swq); fail_sq: bnxt_qplib_free_hwq(res, &sq->hwq); -exit: return rc; } @@ -992,7 +996,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) hwq_attr.type = HWQ_TYPE_QUEUE; rc = bnxt_qplib_alloc_init_hwq(&sq->hwq, &hwq_attr); if (rc) - goto exit; + return rc; rc = bnxt_qplib_alloc_init_swq(sq); if (rc) @@ -1140,7 +1144,6 @@ sq_swq: kfree(sq->swq); fail_sq: bnxt_qplib_free_hwq(res, &sq->hwq); -exit: return rc; } @@ -1614,7 +1617,7 @@ static int bnxt_qplib_put_inline(struct bnxt_qplib_qp *qp, il_src = (void *)wqe->sg_list[indx].addr; t_len += len; if (t_len > qp->max_inline_data) - goto bad; + return -ENOMEM; while (len) { if (pull_dst) { pull_dst = false; @@ -1638,8 +1641,6 @@ static int bnxt_qplib_put_inline(struct bnxt_qplib_qp *qp, } return t_len; -bad: - return -ENOMEM; } static u32 bnxt_qplib_put_sges(struct bnxt_qplib_hwq *hwq, @@ -2069,7 +2070,7 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq) hwq_attr.sginfo = &cq->sg_info; rc = bnxt_qplib_alloc_init_hwq(&cq->hwq, &hwq_attr); if (rc) - goto exit; + return rc; bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, CMDQ_BASE_OPCODE_CREATE_CQ, @@ -2104,7 +2105,7 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq) cq->dbinfo.hwq = &cq->hwq; cq->dbinfo.xid = cq->id; cq->dbinfo.db = cq->dpi->dbr; - cq->dbinfo.priv_db = res->dpi_tbl.dbr_bar_reg_iomem; + cq->dbinfo.priv_db = res->dpi_tbl.priv_db; bnxt_qplib_armen_db(&cq->dbinfo, DBC_DBC_TYPE_CQ_ARMENA); @@ -2112,7 +2113,6 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq) fail: bnxt_qplib_free_hwq(res, &cq->hwq); -exit: return rc; } @@ -2505,7 +2505,6 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq, struct bnxt_qplib_qp *qp; struct bnxt_qplib_q *rq; u32 wr_id_idx; - int rc = 0; qp = (struct bnxt_qplib_qp *)((unsigned long) le64_to_cpu(hwcqe->qp_handle)); @@ -2516,7 +2515,7 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq, if (qp->rq.flushed) { dev_dbg(&cq->hwq.pdev->dev, "%s: QP in Flush QP = %p\n", __func__, qp); - goto done; + return 0; } cqe = *pcqe; @@ -2572,8 +2571,7 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq, } } -done: - return rc; + return 0; } static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq, @@ -2586,7 +2584,6 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq, struct bnxt_qplib_qp *qp; struct bnxt_qplib_q *rq; u32 wr_id_idx; - int rc = 0; qp = (struct bnxt_qplib_qp *)((unsigned long) le64_to_cpu(hwcqe->qp_handle)); @@ -2597,7 +2594,7 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq, if (qp->rq.flushed) { dev_dbg(&cq->hwq.pdev->dev, "%s: QP in Flush QP = %p\n", __func__, qp); - goto done; + return 0; } cqe = *pcqe; cqe->opcode = hwcqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK; @@ -2659,8 +2656,8 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq, bnxt_qplib_add_flush_qp(qp); } } -done: - return rc; + + return 0; } bool bnxt_qplib_is_cq_empty(struct bnxt_qplib_cq *cq) @@ -2687,7 +2684,6 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq, struct bnxt_qplib_srq *srq; struct bnxt_qplib_cqe *cqe; u32 wr_id_idx; - int rc = 0; qp = (struct bnxt_qplib_qp *)((unsigned long) le64_to_cpu(hwcqe->qp_handle)); @@ -2698,7 +2694,7 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq, if (qp->rq.flushed) { dev_dbg(&cq->hwq.pdev->dev, "%s: QP in Flush QP = %p\n", __func__, qp); - goto done; + return 0; } cqe = *pcqe; cqe->opcode = hwcqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK; @@ -2767,8 +2763,7 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq, } } -done: - return rc; + return 0; } static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq, @@ -2790,11 +2785,8 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq, qp = (struct bnxt_qplib_qp *)((unsigned long) le64_to_cpu(hwcqe->qp_handle)); - if (!qp) { - dev_err(&cq->hwq.pdev->dev, - "FP: CQ Process terminal qp is NULL\n"); + if (!qp) return -EINVAL; - } /* Must block new posting of SQ and RQ */ qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h index d74d5ead2e32..a42820821c47 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h @@ -472,7 +472,7 @@ typedef int (*srqn_handler_t)(struct bnxt_qplib_nq *nq, struct bnxt_qplib_nq { struct pci_dev *pdev; struct bnxt_qplib_res *res; - char name[32]; + char *name; struct bnxt_qplib_hwq hwq; struct bnxt_qplib_nq_db nq_db; u16 ring_id; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index de9069103177..b30e66b64827 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -53,112 +53,289 @@ static void bnxt_qplib_service_creq(struct tasklet_struct *t); -/* Hardware communication channel */ +/** + * bnxt_qplib_map_rc - map return type based on opcode + * @opcode - roce slow path opcode + * + * case #1 + * Firmware initiated error recovery is a safe state machine and + * driver can consider all the underlying rdma resources are free. + * In this state, it is safe to return success for opcodes related to + * destroying rdma resources (like destroy qp, destroy cq etc.). + * + * case #2 + * If driver detect potential firmware stall, it is not safe state machine + * and the driver can not consider all the underlying rdma resources are + * freed. + * In this state, it is not safe to return success for opcodes related to + * destroying rdma resources (like destroy qp, destroy cq etc.). + * + * Scope of this helper function is only for case #1. + * + * Returns: + * 0 to communicate success to caller. + * Non zero error code to communicate failure to caller. + */ +static int bnxt_qplib_map_rc(u8 opcode) +{ + switch (opcode) { + case CMDQ_BASE_OPCODE_DESTROY_QP: + case CMDQ_BASE_OPCODE_DESTROY_SRQ: + case CMDQ_BASE_OPCODE_DESTROY_CQ: + case CMDQ_BASE_OPCODE_DEALLOCATE_KEY: + case CMDQ_BASE_OPCODE_DEREGISTER_MR: + case CMDQ_BASE_OPCODE_DELETE_GID: + case CMDQ_BASE_OPCODE_DESTROY_QP1: + case CMDQ_BASE_OPCODE_DESTROY_AH: + case CMDQ_BASE_OPCODE_DEINITIALIZE_FW: + case CMDQ_BASE_OPCODE_MODIFY_ROCE_CC: + case CMDQ_BASE_OPCODE_SET_LINK_AGGR_MODE: + return 0; + default: + return -ETIMEDOUT; + } +} + +/** + * bnxt_re_is_fw_stalled - Check firmware health + * @rcfw - rcfw channel instance of rdev + * @cookie - cookie to track the command + * + * If firmware has not responded any rcfw command within + * rcfw->max_timeout, consider firmware as stalled. + * + * Returns: + * 0 if firmware is responding + * -ENODEV if firmware is not responding + */ +static int bnxt_re_is_fw_stalled(struct bnxt_qplib_rcfw *rcfw, + u16 cookie) +{ + struct bnxt_qplib_cmdq_ctx *cmdq; + struct bnxt_qplib_crsqe *crsqe; + + crsqe = &rcfw->crsqe_tbl[cookie]; + cmdq = &rcfw->cmdq; + + if (time_after(jiffies, cmdq->last_seen + + (rcfw->max_timeout * HZ))) { + dev_warn_ratelimited(&rcfw->pdev->dev, + "%s: FW STALL Detected. cmdq[%#x]=%#x waited (%d > %d) msec active %d ", + __func__, cookie, crsqe->opcode, + jiffies_to_msecs(jiffies - cmdq->last_seen), + rcfw->max_timeout * 1000, + crsqe->is_in_used); + return -ENODEV; + } + + return 0; +} + +/** + * __wait_for_resp - Don't hold the cpu context and wait for response + * @rcfw - rcfw channel instance of rdev + * @cookie - cookie to track the command + * + * Wait for command completion in sleepable context. + * + * Returns: + * 0 if command is completed by firmware. + * Non zero error code for rest of the case. + */ static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie) { struct bnxt_qplib_cmdq_ctx *cmdq; - u16 cbit; - int rc; + struct bnxt_qplib_crsqe *crsqe; + int ret; cmdq = &rcfw->cmdq; - cbit = cookie % rcfw->cmdq_depth; - rc = wait_event_timeout(cmdq->waitq, - !test_bit(cbit, cmdq->cmdq_bitmap), - msecs_to_jiffies(RCFW_CMD_WAIT_TIME_MS)); - return rc ? 0 : -ETIMEDOUT; + crsqe = &rcfw->crsqe_tbl[cookie]; + + do { + if (test_bit(ERR_DEVICE_DETACHED, &cmdq->flags)) + return bnxt_qplib_map_rc(crsqe->opcode); + if (test_bit(FIRMWARE_STALL_DETECTED, &cmdq->flags)) + return -ETIMEDOUT; + + wait_event_timeout(cmdq->waitq, + !crsqe->is_in_used || + test_bit(ERR_DEVICE_DETACHED, &cmdq->flags), + msecs_to_jiffies(rcfw->max_timeout * 1000)); + + if (!crsqe->is_in_used) + return 0; + + bnxt_qplib_service_creq(&rcfw->creq.creq_tasklet); + + if (!crsqe->is_in_used) + return 0; + + ret = bnxt_re_is_fw_stalled(rcfw, cookie); + if (ret) + return ret; + + } while (true); }; +/** + * __block_for_resp - hold the cpu context and wait for response + * @rcfw - rcfw channel instance of rdev + * @cookie - cookie to track the command + * + * This function will hold the cpu (non-sleepable context) and + * wait for command completion. Maximum holding interval is 8 second. + * + * Returns: + * -ETIMEOUT if command is not completed in specific time interval. + * 0 if command is completed by firmware. + */ static int __block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie) { - u32 count = RCFW_BLOCKED_CMD_WAIT_COUNT; - struct bnxt_qplib_cmdq_ctx *cmdq; - u16 cbit; + struct bnxt_qplib_cmdq_ctx *cmdq = &rcfw->cmdq; + struct bnxt_qplib_crsqe *crsqe; + unsigned long issue_time = 0; + + issue_time = jiffies; + crsqe = &rcfw->crsqe_tbl[cookie]; - cmdq = &rcfw->cmdq; - cbit = cookie % rcfw->cmdq_depth; - if (!test_bit(cbit, cmdq->cmdq_bitmap)) - goto done; do { + if (test_bit(ERR_DEVICE_DETACHED, &cmdq->flags)) + return bnxt_qplib_map_rc(crsqe->opcode); + if (test_bit(FIRMWARE_STALL_DETECTED, &cmdq->flags)) + return -ETIMEDOUT; + udelay(1); + bnxt_qplib_service_creq(&rcfw->creq.creq_tasklet); - } while (test_bit(cbit, cmdq->cmdq_bitmap) && --count); -done: - return count ? 0 : -ETIMEDOUT; + if (!crsqe->is_in_used) + return 0; + + } while (time_before(jiffies, issue_time + (8 * HZ))); + + return -ETIMEDOUT; }; -static int __send_message(struct bnxt_qplib_rcfw *rcfw, - struct bnxt_qplib_cmdqmsg *msg) +/* __send_message_no_waiter - get cookie and post the message. + * @rcfw - rcfw channel instance of rdev + * @msg - qplib message internal + * + * This function will just post and don't bother about completion. + * Current design of this function is - + * user must hold the completion queue hwq->lock. + * user must have used existing completion and free the resources. + * this function will not check queue full condition. + * this function will explicitly set is_waiter_alive=false. + * current use case is - send destroy_ah if create_ah is return + * after waiter of create_ah is lost. It can be extended for other + * use case as well. + * + * Returns: Nothing + * + */ +static void __send_message_no_waiter(struct bnxt_qplib_rcfw *rcfw, + struct bnxt_qplib_cmdqmsg *msg) { struct bnxt_qplib_cmdq_ctx *cmdq = &rcfw->cmdq; struct bnxt_qplib_hwq *hwq = &cmdq->hwq; struct bnxt_qplib_crsqe *crsqe; struct bnxt_qplib_cmdqe *cmdqe; u32 sw_prod, cmdq_prod; - struct pci_dev *pdev; - unsigned long flags; - u32 bsize, opcode; - u16 cookie, cbit; + u16 cookie; + u32 bsize; u8 *preq; - pdev = rcfw->pdev; + cookie = cmdq->seq_num & RCFW_MAX_COOKIE_VALUE; + __set_cmdq_base_cookie(msg->req, msg->req_sz, cpu_to_le16(cookie)); + crsqe = &rcfw->crsqe_tbl[cookie]; - opcode = __get_cmdq_base_opcode(msg->req, msg->req_sz); - if (!test_bit(FIRMWARE_INITIALIZED_FLAG, &cmdq->flags) && - (opcode != CMDQ_BASE_OPCODE_QUERY_FUNC && - opcode != CMDQ_BASE_OPCODE_INITIALIZE_FW && - opcode != CMDQ_BASE_OPCODE_QUERY_VERSION)) { - dev_err(&pdev->dev, - "RCFW not initialized, reject opcode 0x%x\n", opcode); - return -EINVAL; - } + /* Set cmd_size in terms of 16B slots in req. */ + bsize = bnxt_qplib_set_cmd_slots(msg->req); + /* GET_CMD_SIZE would return number of slots in either case of tlv + * and non-tlv commands after call to bnxt_qplib_set_cmd_slots() + */ + crsqe->is_internal_cmd = true; + crsqe->is_waiter_alive = false; + crsqe->is_in_used = true; + crsqe->req_size = __get_cmdq_base_cmd_size(msg->req, msg->req_sz); - if (test_bit(FIRMWARE_INITIALIZED_FLAG, &cmdq->flags) && - opcode == CMDQ_BASE_OPCODE_INITIALIZE_FW) { - dev_err(&pdev->dev, "RCFW already initialized!\n"); - return -EINVAL; - } + preq = (u8 *)msg->req; + do { + /* Locate the next cmdq slot */ + sw_prod = HWQ_CMP(hwq->prod, hwq); + cmdqe = bnxt_qplib_get_qe(hwq, sw_prod, NULL); + /* Copy a segment of the req cmd to the cmdq */ + memset(cmdqe, 0, sizeof(*cmdqe)); + memcpy(cmdqe, preq, min_t(u32, bsize, sizeof(*cmdqe))); + preq += min_t(u32, bsize, sizeof(*cmdqe)); + bsize -= min_t(u32, bsize, sizeof(*cmdqe)); + hwq->prod++; + } while (bsize > 0); + cmdq->seq_num++; - if (test_bit(FIRMWARE_TIMED_OUT, &cmdq->flags)) - return -ETIMEDOUT; + cmdq_prod = hwq->prod; + atomic_inc(&rcfw->timeout_send); + /* ring CMDQ DB */ + wmb(); + writel(cmdq_prod, cmdq->cmdq_mbox.prod); + writel(RCFW_CMDQ_TRIG_VAL, cmdq->cmdq_mbox.db); +} + +static int __send_message(struct bnxt_qplib_rcfw *rcfw, + struct bnxt_qplib_cmdqmsg *msg, u8 opcode) +{ + u32 bsize, free_slots, required_slots; + struct bnxt_qplib_cmdq_ctx *cmdq; + struct bnxt_qplib_crsqe *crsqe; + struct bnxt_qplib_cmdqe *cmdqe; + struct bnxt_qplib_hwq *hwq; + u32 sw_prod, cmdq_prod; + struct pci_dev *pdev; + unsigned long flags; + u16 cookie; + u8 *preq; + + cmdq = &rcfw->cmdq; + hwq = &cmdq->hwq; + pdev = rcfw->pdev; /* Cmdq are in 16-byte units, each request can consume 1 or more * cmdqe */ spin_lock_irqsave(&hwq->lock, flags); - if (msg->req->cmd_size >= HWQ_FREE_SLOTS(hwq)) { - dev_err(&pdev->dev, "RCFW: CMDQ is full!\n"); + required_slots = bnxt_qplib_get_cmd_slots(msg->req); + free_slots = HWQ_FREE_SLOTS(hwq); + cookie = cmdq->seq_num & RCFW_MAX_COOKIE_VALUE; + crsqe = &rcfw->crsqe_tbl[cookie]; + + if (required_slots >= free_slots) { + dev_info_ratelimited(&pdev->dev, + "CMDQ is full req/free %d/%d!", + required_slots, free_slots); spin_unlock_irqrestore(&hwq->lock, flags); return -EAGAIN; } - - - cookie = cmdq->seq_num & RCFW_MAX_COOKIE_VALUE; - cbit = cookie % rcfw->cmdq_depth; if (msg->block) cookie |= RCFW_CMD_IS_BLOCKING; - - set_bit(cbit, cmdq->cmdq_bitmap); __set_cmdq_base_cookie(msg->req, msg->req_sz, cpu_to_le16(cookie)); - crsqe = &rcfw->crsqe_tbl[cbit]; - if (crsqe->resp) { - spin_unlock_irqrestore(&hwq->lock, flags); - return -EBUSY; - } - /* change the cmd_size to the number of 16byte cmdq unit. - * req->cmd_size is modified here - */ bsize = bnxt_qplib_set_cmd_slots(msg->req); - - memset(msg->resp, 0, sizeof(*msg->resp)); + crsqe->free_slots = free_slots; crsqe->resp = (struct creq_qp_event *)msg->resp; crsqe->resp->cookie = cpu_to_le16(cookie); + crsqe->is_internal_cmd = false; + crsqe->is_waiter_alive = true; + crsqe->is_in_used = true; + crsqe->opcode = opcode; + crsqe->req_size = __get_cmdq_base_cmd_size(msg->req, msg->req_sz); if (__get_cmdq_base_resp_size(msg->req, msg->req_sz) && msg->sb) { struct bnxt_qplib_rcfw_sbuf *sbuf = msg->sb; - __set_cmdq_base_resp_addr(msg->req, msg->req_sz, cpu_to_le64(sbuf->dma_addr)); + + __set_cmdq_base_resp_addr(msg->req, msg->req_sz, + cpu_to_le64(sbuf->dma_addr)); __set_cmdq_base_resp_size(msg->req, msg->req_sz, - ALIGN(sbuf->size, BNXT_QPLIB_CMDQE_UNITS)); + ALIGN(sbuf->size, + BNXT_QPLIB_CMDQE_UNITS)); } preq = (u8 *)msg->req; @@ -166,11 +343,6 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, /* Locate the next cmdq slot */ sw_prod = HWQ_CMP(hwq->prod, hwq); cmdqe = bnxt_qplib_get_qe(hwq, sw_prod, NULL); - if (!cmdqe) { - dev_err(&pdev->dev, - "RCFW request failed with no cmdqe!\n"); - goto done; - } /* Copy a segment of the req cmd to the cmdq */ memset(cmdqe, 0, sizeof(*cmdqe)); memcpy(cmdqe, preq, min_t(u32, bsize, sizeof(*cmdqe))); @@ -180,7 +352,7 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, } while (bsize > 0); cmdq->seq_num++; - cmdq_prod = hwq->prod; + cmdq_prod = hwq->prod & 0xFFFF; if (test_bit(FIRMWARE_FIRST_FLAG, &cmdq->flags)) { /* The very first doorbell write * is required to set this flag @@ -194,51 +366,158 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, wmb(); writel(cmdq_prod, cmdq->cmdq_mbox.prod); writel(RCFW_CMDQ_TRIG_VAL, cmdq->cmdq_mbox.db); -done: spin_unlock_irqrestore(&hwq->lock, flags); /* Return the CREQ response pointer */ return 0; } -int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw, - struct bnxt_qplib_cmdqmsg *msg) +/** + * __poll_for_resp - self poll completion for rcfw command + * @rcfw - rcfw channel instance of rdev + * @cookie - cookie to track the command + * + * It works same as __wait_for_resp except this function will + * do self polling in sort interval since interrupt is disabled. + * This function can not be called from non-sleepable context. + * + * Returns: + * -ETIMEOUT if command is not completed in specific time interval. + * 0 if command is completed by firmware. + */ +static int __poll_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie) +{ + struct bnxt_qplib_cmdq_ctx *cmdq = &rcfw->cmdq; + struct bnxt_qplib_crsqe *crsqe; + unsigned long issue_time; + int ret; + + issue_time = jiffies; + crsqe = &rcfw->crsqe_tbl[cookie]; + + do { + if (test_bit(ERR_DEVICE_DETACHED, &cmdq->flags)) + return bnxt_qplib_map_rc(crsqe->opcode); + if (test_bit(FIRMWARE_STALL_DETECTED, &cmdq->flags)) + return -ETIMEDOUT; + + usleep_range(1000, 1001); + + bnxt_qplib_service_creq(&rcfw->creq.creq_tasklet); + if (!crsqe->is_in_used) + return 0; + if (jiffies_to_msecs(jiffies - issue_time) > + (rcfw->max_timeout * 1000)) { + ret = bnxt_re_is_fw_stalled(rcfw, cookie); + if (ret) + return ret; + } + } while (true); +}; + +static int __send_message_basic_sanity(struct bnxt_qplib_rcfw *rcfw, + struct bnxt_qplib_cmdqmsg *msg, + u8 opcode) +{ + struct bnxt_qplib_cmdq_ctx *cmdq; + + cmdq = &rcfw->cmdq; + + /* Prevent posting if f/w is not in a state to process */ + if (test_bit(ERR_DEVICE_DETACHED, &rcfw->cmdq.flags)) + return bnxt_qplib_map_rc(opcode); + if (test_bit(FIRMWARE_STALL_DETECTED, &cmdq->flags)) + return -ETIMEDOUT; + + if (test_bit(FIRMWARE_INITIALIZED_FLAG, &cmdq->flags) && + opcode == CMDQ_BASE_OPCODE_INITIALIZE_FW) { + dev_err(&rcfw->pdev->dev, "QPLIB: RCFW already initialized!"); + return -EINVAL; + } + + if (!test_bit(FIRMWARE_INITIALIZED_FLAG, &cmdq->flags) && + (opcode != CMDQ_BASE_OPCODE_QUERY_FUNC && + opcode != CMDQ_BASE_OPCODE_INITIALIZE_FW && + opcode != CMDQ_BASE_OPCODE_QUERY_VERSION)) { + dev_err(&rcfw->pdev->dev, + "QPLIB: RCFW not initialized, reject opcode 0x%x", + opcode); + return -EOPNOTSUPP; + } + + return 0; +} + +/* This function will just post and do not bother about completion */ +static void __destroy_timedout_ah(struct bnxt_qplib_rcfw *rcfw, + struct creq_create_ah_resp *create_ah_resp) +{ + struct bnxt_qplib_cmdqmsg msg = {}; + struct cmdq_destroy_ah req = {}; + + bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, + CMDQ_BASE_OPCODE_DESTROY_AH, + sizeof(req)); + req.ah_cid = create_ah_resp->xid; + msg.req = (struct cmdq_base *)&req; + msg.req_sz = sizeof(req); + __send_message_no_waiter(rcfw, &msg); + dev_info_ratelimited(&rcfw->pdev->dev, + "From %s: ah_cid = %d timeout_send %d\n", + __func__, req.ah_cid, + atomic_read(&rcfw->timeout_send)); +} + +/** + * __bnxt_qplib_rcfw_send_message - qplib interface to send + * and complete rcfw command. + * @rcfw - rcfw channel instance of rdev + * @msg - qplib message internal + * + * This function does not account shadow queue depth. It will send + * all the command unconditionally as long as send queue is not full. + * + * Returns: + * 0 if command completed by firmware. + * Non zero if the command is not completed by firmware. + */ +static int __bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw, + struct bnxt_qplib_cmdqmsg *msg) { struct creq_qp_event *evnt = (struct creq_qp_event *)msg->resp; + struct bnxt_qplib_crsqe *crsqe; + unsigned long flags; u16 cookie; - u8 opcode, retry_cnt = 0xFF; int rc = 0; + u8 opcode; - /* Prevent posting if f/w is not in a state to process */ - if (test_bit(ERR_DEVICE_DETACHED, &rcfw->cmdq.flags)) - return 0; + opcode = __get_cmdq_base_opcode(msg->req, msg->req_sz); - do { - opcode = __get_cmdq_base_opcode(msg->req, msg->req_sz); - rc = __send_message(rcfw, msg); - cookie = le16_to_cpu(__get_cmdq_base_cookie(msg->req, msg->req_sz)) & - RCFW_MAX_COOKIE_VALUE; - if (!rc) - break; - if (!retry_cnt || (rc != -EAGAIN && rc != -EBUSY)) { - /* send failed */ - dev_err(&rcfw->pdev->dev, "cmdq[%#x]=%#x send failed\n", - cookie, opcode); - return rc; - } - msg->block ? mdelay(1) : usleep_range(500, 1000); + rc = __send_message_basic_sanity(rcfw, msg, opcode); + if (rc) + return rc; - } while (retry_cnt--); + rc = __send_message(rcfw, msg, opcode); + if (rc) + return rc; + + cookie = le16_to_cpu(__get_cmdq_base_cookie(msg->req, msg->req_sz)) + & RCFW_MAX_COOKIE_VALUE; if (msg->block) rc = __block_for_resp(rcfw, cookie); - else + else if (atomic_read(&rcfw->rcfw_intr_enabled)) rc = __wait_for_resp(rcfw, cookie); + else + rc = __poll_for_resp(rcfw, cookie); + if (rc) { - /* timed out */ - dev_err(&rcfw->pdev->dev, "cmdq[%#x]=%#x timedout (%d)msec\n", - cookie, opcode, RCFW_CMD_WAIT_TIME_MS); - set_bit(FIRMWARE_TIMED_OUT, &rcfw->cmdq.flags); - return rc; + spin_lock_irqsave(&rcfw->cmdq.hwq.lock, flags); + crsqe = &rcfw->crsqe_tbl[cookie]; + crsqe->is_waiter_alive = false; + if (rc == -ENODEV) + set_bit(FIRMWARE_STALL_DETECTED, &rcfw->cmdq.flags); + spin_unlock_irqrestore(&rcfw->cmdq.hwq.lock, flags); + return -ETIMEDOUT; } if (evnt->status) { @@ -250,6 +529,48 @@ int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw, return rc; } + +/** + * bnxt_qplib_rcfw_send_message - qplib interface to send + * and complete rcfw command. + * @rcfw - rcfw channel instance of rdev + * @msg - qplib message internal + * + * Driver interact with Firmware through rcfw channel/slow path in two ways. + * a. Blocking rcfw command send. In this path, driver cannot hold + * the context for longer period since it is holding cpu until + * command is not completed. + * b. Non-blocking rcfw command send. In this path, driver can hold the + * context for longer period. There may be many pending command waiting + * for completion because of non-blocking nature. + * + * Driver will use shadow queue depth. Current queue depth of 8K + * (due to size of rcfw message there can be actual ~4K rcfw outstanding) + * is not optimal for rcfw command processing in firmware. + * + * Restrict at max #RCFW_CMD_NON_BLOCKING_SHADOW_QD Non-Blocking rcfw commands. + * Allow all blocking commands until there is no queue full. + * + * Returns: + * 0 if command completed by firmware. + * Non zero if the command is not completed by firmware. + */ +int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw, + struct bnxt_qplib_cmdqmsg *msg) +{ + int ret; + + if (!msg->block) { + down(&rcfw->rcfw_inflight); + ret = __bnxt_qplib_rcfw_send_message(rcfw, msg); + up(&rcfw->rcfw_inflight); + } else { + ret = __bnxt_qplib_rcfw_send_message(rcfw, msg); + } + + return ret; +} + /* Completions */ static int bnxt_qplib_process_func_event(struct bnxt_qplib_rcfw *rcfw, struct creq_func_event *func_event) @@ -295,19 +616,20 @@ static int bnxt_qplib_process_func_event(struct bnxt_qplib_rcfw *rcfw, } static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw, - struct creq_qp_event *qp_event) + struct creq_qp_event *qp_event, + u32 *num_wait) { struct creq_qp_error_notification *err_event; struct bnxt_qplib_hwq *hwq = &rcfw->cmdq.hwq; struct bnxt_qplib_crsqe *crsqe; + u32 qp_id, tbl_indx, req_size; struct bnxt_qplib_qp *qp; - u16 cbit, blocked = 0; + u16 cookie, blocked = 0; + bool is_waiter_alive; struct pci_dev *pdev; unsigned long flags; - __le16 mcookie; - u16 cookie; + u32 wait_cmds = 0; int rc = 0; - u32 qp_id, tbl_indx; pdev = rcfw->pdev; switch (qp_event->event) { @@ -339,33 +661,60 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw, spin_lock_irqsave_nested(&hwq->lock, flags, SINGLE_DEPTH_NESTING); cookie = le16_to_cpu(qp_event->cookie); - mcookie = qp_event->cookie; blocked = cookie & RCFW_CMD_IS_BLOCKING; cookie &= RCFW_MAX_COOKIE_VALUE; - cbit = cookie % rcfw->cmdq_depth; - crsqe = &rcfw->crsqe_tbl[cbit]; - if (crsqe->resp && - crsqe->resp->cookie == mcookie) { - memcpy(crsqe->resp, qp_event, sizeof(*qp_event)); - crsqe->resp = NULL; - } else { - if (crsqe->resp && crsqe->resp->cookie) - dev_err(&pdev->dev, - "CMD %s cookie sent=%#x, recd=%#x\n", - crsqe->resp ? "mismatch" : "collision", - crsqe->resp ? crsqe->resp->cookie : 0, - mcookie); + crsqe = &rcfw->crsqe_tbl[cookie]; + crsqe->is_in_used = false; + + if (WARN_ONCE(test_bit(FIRMWARE_STALL_DETECTED, + &rcfw->cmdq.flags), + "QPLIB: Unreponsive rcfw channel detected.!!")) { + dev_info(&pdev->dev, + "rcfw timedout: cookie = %#x, free_slots = %d", + cookie, crsqe->free_slots); + spin_unlock_irqrestore(&hwq->lock, flags); + return rc; } - if (!test_and_clear_bit(cbit, rcfw->cmdq.cmdq_bitmap)) - dev_warn(&pdev->dev, - "CMD bit %d was not requested\n", cbit); - hwq->cons += crsqe->req_size; + + if (crsqe->is_internal_cmd && !qp_event->status) + atomic_dec(&rcfw->timeout_send); + + if (crsqe->is_waiter_alive) { + if (crsqe->resp) + memcpy(crsqe->resp, qp_event, sizeof(*qp_event)); + if (!blocked) + wait_cmds++; + } + + req_size = crsqe->req_size; + is_waiter_alive = crsqe->is_waiter_alive; + crsqe->req_size = 0; + if (!is_waiter_alive) + crsqe->resp = NULL; - if (!blocked) - wake_up(&rcfw->cmdq.waitq); + hwq->cons += req_size; + + /* This is a case to handle below scenario - + * Create AH is completed successfully by firmware, + * but completion took more time and driver already lost + * the context of create_ah from caller. + * We have already return failure for create_ah verbs, + * so let's destroy the same address vector since it is + * no more used in stack. We don't care about completion + * in __send_message_no_waiter. + * If destroy_ah is failued by firmware, there will be AH + * resource leak and relatively not critical + unlikely + * scenario. Current design is not to handle such case. + */ + if (!is_waiter_alive && !qp_event->status && + qp_event->event == CREQ_QP_EVENT_EVENT_CREATE_AH) + __destroy_timedout_ah(rcfw, + (struct creq_create_ah_resp *) + qp_event); spin_unlock_irqrestore(&hwq->lock, flags); } + *num_wait += wait_cmds; return rc; } @@ -379,6 +728,7 @@ static void bnxt_qplib_service_creq(struct tasklet_struct *t) struct creq_base *creqe; u32 sw_cons, raw_cons; unsigned long flags; + u32 num_wakeup = 0; /* Service the CREQ until budget is over */ spin_lock_irqsave(&hwq->lock, flags); @@ -392,12 +742,14 @@ static void bnxt_qplib_service_creq(struct tasklet_struct *t) * reading any further. */ dma_rmb(); + rcfw->cmdq.last_seen = jiffies; type = creqe->type & CREQ_BASE_TYPE_MASK; switch (type) { case CREQ_BASE_TYPE_QP_EVENT: bnxt_qplib_process_qp_event - (rcfw, (struct creq_qp_event *)creqe); + (rcfw, (struct creq_qp_event *)creqe, + &num_wakeup); creq->stats.creq_qp_event_processed++; break; case CREQ_BASE_TYPE_FUNC_EVENT: @@ -425,6 +777,8 @@ static void bnxt_qplib_service_creq(struct tasklet_struct *t) rcfw->res->cctx, true); } spin_unlock_irqrestore(&hwq->lock, flags); + if (num_wakeup) + wake_up_nr(&rcfw->cmdq.waitq, num_wakeup); } static irqreturn_t bnxt_qplib_creq_irq(int irq, void *dev_instance) @@ -556,7 +910,6 @@ skip_ctx_setup: void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw) { - bitmap_free(rcfw->cmdq.cmdq_bitmap); kfree(rcfw->qp_tbl); kfree(rcfw->crsqe_tbl); bnxt_qplib_free_hwq(rcfw->res, &rcfw->cmdq.hwq); @@ -593,13 +946,11 @@ int bnxt_qplib_alloc_rcfw_channel(struct bnxt_qplib_res *res, "HW channel CREQ allocation failed\n"); goto fail; } - if (ctx->hwrm_intf_ver < HWRM_VERSION_RCFW_CMDQ_DEPTH_CHECK) - rcfw->cmdq_depth = BNXT_QPLIB_CMDQE_MAX_CNT_256; - else - rcfw->cmdq_depth = BNXT_QPLIB_CMDQE_MAX_CNT_8192; + + rcfw->cmdq_depth = BNXT_QPLIB_CMDQE_MAX_CNT; sginfo.pgsize = bnxt_qplib_cmdqe_page_size(rcfw->cmdq_depth); - hwq_attr.depth = rcfw->cmdq_depth; + hwq_attr.depth = rcfw->cmdq_depth & 0x7FFFFFFF; hwq_attr.stride = BNXT_QPLIB_CMDQE_UNITS; hwq_attr.type = HWQ_TYPE_CTX; if (bnxt_qplib_alloc_init_hwq(&cmdq->hwq, &hwq_attr)) { @@ -613,10 +964,6 @@ int bnxt_qplib_alloc_rcfw_channel(struct bnxt_qplib_res *res, if (!rcfw->crsqe_tbl) goto fail; - cmdq->cmdq_bitmap = bitmap_zalloc(rcfw->cmdq_depth, GFP_KERNEL); - if (!cmdq->cmdq_bitmap) - goto fail; - /* Allocate one extra to hold the QP1 entries */ rcfw->qp_tbl_size = qp_tbl_sz + 1; rcfw->qp_tbl = kcalloc(rcfw->qp_tbl_size, sizeof(struct bnxt_qplib_qp_node), @@ -624,6 +971,8 @@ int bnxt_qplib_alloc_rcfw_channel(struct bnxt_qplib_res *res, if (!rcfw->qp_tbl) goto fail; + rcfw->max_timeout = res->cctx->hwrm_cmd_max_timeout; + return 0; fail: @@ -636,6 +985,10 @@ void bnxt_qplib_rcfw_stop_irq(struct bnxt_qplib_rcfw *rcfw, bool kill) struct bnxt_qplib_creq_ctx *creq; creq = &rcfw->creq; + + if (!creq->requested) + return; + tasklet_disable(&creq->creq_tasklet); /* Mask h/w interrupts */ bnxt_qplib_ring_nq_db(&creq->creq_db.dbinfo, rcfw->res->cctx, false); @@ -644,17 +997,17 @@ void bnxt_qplib_rcfw_stop_irq(struct bnxt_qplib_rcfw *rcfw, bool kill) if (kill) tasklet_kill(&creq->creq_tasklet); - if (creq->requested) { - free_irq(creq->msix_vec, rcfw); - creq->requested = false; - } + free_irq(creq->msix_vec, rcfw); + kfree(creq->irq_name); + creq->irq_name = NULL; + creq->requested = false; + atomic_set(&rcfw->rcfw_intr_enabled, 0); } void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw) { struct bnxt_qplib_creq_ctx *creq; struct bnxt_qplib_cmdq_ctx *cmdq; - unsigned long indx; creq = &rcfw->creq; cmdq = &rcfw->cmdq; @@ -664,11 +1017,6 @@ void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw) iounmap(cmdq->cmdq_mbox.reg.bar_reg); iounmap(creq->creq_db.reg.bar_reg); - indx = find_first_bit(cmdq->cmdq_bitmap, rcfw->cmdq_depth); - if (indx != rcfw->cmdq_depth) - dev_err(&rcfw->pdev->dev, - "disabling RCFW with pending cmd-bit %lx\n", indx); - cmdq->cmdq_mbox.reg.bar_reg = NULL; creq->creq_db.reg.bar_reg = NULL; creq->aeq_handler = NULL; @@ -679,9 +1027,11 @@ int bnxt_qplib_rcfw_start_irq(struct bnxt_qplib_rcfw *rcfw, int msix_vector, bool need_init) { struct bnxt_qplib_creq_ctx *creq; + struct bnxt_qplib_res *res; int rc; creq = &rcfw->creq; + res = rcfw->res; if (creq->requested) return -EFAULT; @@ -691,24 +1041,32 @@ int bnxt_qplib_rcfw_start_irq(struct bnxt_qplib_rcfw *rcfw, int msix_vector, tasklet_setup(&creq->creq_tasklet, bnxt_qplib_service_creq); else tasklet_enable(&creq->creq_tasklet); + + creq->irq_name = kasprintf(GFP_KERNEL, "bnxt_re-creq@pci:%s", + pci_name(res->pdev)); + if (!creq->irq_name) + return -ENOMEM; rc = request_irq(creq->msix_vec, bnxt_qplib_creq_irq, 0, - "bnxt_qplib_creq", rcfw); - if (rc) + creq->irq_name, rcfw); + if (rc) { + kfree(creq->irq_name); + creq->irq_name = NULL; + tasklet_disable(&creq->creq_tasklet); return rc; + } creq->requested = true; - bnxt_qplib_ring_nq_db(&creq->creq_db.dbinfo, rcfw->res->cctx, true); + bnxt_qplib_ring_nq_db(&creq->creq_db.dbinfo, res->cctx, true); + atomic_inc(&rcfw->rcfw_intr_enabled); return 0; } -static int bnxt_qplib_map_cmdq_mbox(struct bnxt_qplib_rcfw *rcfw, bool is_vf) +static int bnxt_qplib_map_cmdq_mbox(struct bnxt_qplib_rcfw *rcfw) { struct bnxt_qplib_cmdq_mbox *mbox; resource_size_t bar_reg; struct pci_dev *pdev; - u16 prod_offt; - int rc = 0; pdev = rcfw->pdev; mbox = &rcfw->cmdq.cmdq_mbox; @@ -733,11 +1091,10 @@ static int bnxt_qplib_map_cmdq_mbox(struct bnxt_qplib_rcfw *rcfw, bool is_vf) return -ENOMEM; } - prod_offt = is_vf ? RCFW_VF_COMM_PROD_OFFSET : - RCFW_PF_COMM_PROD_OFFSET; - mbox->prod = (void __iomem *)(mbox->reg.bar_reg + prod_offt); + mbox->prod = (void __iomem *)(mbox->reg.bar_reg + + RCFW_PF_VF_COMM_PROD_OFFSET); mbox->db = (void __iomem *)(mbox->reg.bar_reg + RCFW_COMM_TRIG_OFFSET); - return rc; + return 0; } static int bnxt_qplib_map_creq_db(struct bnxt_qplib_rcfw *rcfw, u32 reg_offt) @@ -798,7 +1155,7 @@ static void bnxt_qplib_start_rcfw(struct bnxt_qplib_rcfw *rcfw) int bnxt_qplib_enable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw, int msix_vector, - int cp_bar_reg_off, int virt_fn, + int cp_bar_reg_off, aeq_handler_t aeq_handler) { struct bnxt_qplib_cmdq_ctx *cmdq; @@ -818,7 +1175,7 @@ int bnxt_qplib_enable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw, creq->stats.creq_func_event_processed = 0; creq->aeq_handler = aeq_handler; - rc = bnxt_qplib_map_cmdq_mbox(rcfw, virt_fn); + rc = bnxt_qplib_map_cmdq_mbox(rcfw); if (rc) return rc; @@ -834,6 +1191,7 @@ int bnxt_qplib_enable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw, return rc; } + sema_init(&rcfw->rcfw_inflight, RCFW_CMD_NON_BLOCKING_SHADOW_QD); bnxt_qplib_start_rcfw(rcfw); return 0; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h index dd5651478bbb..7b31bee3e000 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h @@ -45,13 +45,13 @@ #define RCFW_COMM_PCI_BAR_REGION 0 #define RCFW_COMM_CONS_PCI_BAR_REGION 2 #define RCFW_COMM_BASE_OFFSET 0x600 -#define RCFW_PF_COMM_PROD_OFFSET 0xc -#define RCFW_VF_COMM_PROD_OFFSET 0xc +#define RCFW_PF_VF_COMM_PROD_OFFSET 0xc #define RCFW_COMM_TRIG_OFFSET 0x100 #define RCFW_COMM_SIZE 0x104 #define RCFW_DBR_PCI_BAR_REGION 2 #define RCFW_DBR_BASE_PAGE_SHIFT 12 +#define RCFW_FW_STALL_MAX_TIMEOUT 40 /* Cmdq contains a fix number of a 16-Byte slots */ struct bnxt_qplib_cmdqe { @@ -67,11 +67,12 @@ static inline void bnxt_qplib_rcfw_cmd_prep(struct cmdq_base *req, req->cmd_size = cmd_size; } +/* Shadow queue depth for non blocking command */ +#define RCFW_CMD_NON_BLOCKING_SHADOW_QD 64 #define RCFW_CMD_WAIT_TIME_MS 20000 /* 20 Seconds timeout */ /* CMDQ elements */ -#define BNXT_QPLIB_CMDQE_MAX_CNT_256 256 -#define BNXT_QPLIB_CMDQE_MAX_CNT_8192 8192 +#define BNXT_QPLIB_CMDQE_MAX_CNT 8192 #define BNXT_QPLIB_CMDQE_BYTES(depth) ((depth) * BNXT_QPLIB_CMDQE_UNITS) static inline u32 bnxt_qplib_cmdqe_npages(u32 depth) @@ -89,6 +90,26 @@ static inline u32 bnxt_qplib_cmdqe_page_size(u32 depth) return (bnxt_qplib_cmdqe_npages(depth) * PAGE_SIZE); } +/* Get the number of command units required for the req. The + * function returns correct value only if called before + * setting using bnxt_qplib_set_cmd_slots + */ +static inline u32 bnxt_qplib_get_cmd_slots(struct cmdq_base *req) +{ + u32 cmd_units = 0; + + if (HAS_TLV_HEADER(req)) { + struct roce_tlv *tlv_req = (struct roce_tlv *)req; + + cmd_units = tlv_req->total_size; + } else { + cmd_units = (req->cmd_size + BNXT_QPLIB_CMDQE_UNITS - 1) / + BNXT_QPLIB_CMDQE_UNITS; + } + + return cmd_units; +} + static inline u32 bnxt_qplib_set_cmd_slots(struct cmdq_base *req) { u32 cmd_byte = 0; @@ -106,11 +127,10 @@ static inline u32 bnxt_qplib_set_cmd_slots(struct cmdq_base *req) return cmd_byte; } -#define RCFW_MAX_COOKIE_VALUE 0x7FFF +#define RCFW_MAX_COOKIE_VALUE (BNXT_QPLIB_CMDQE_MAX_CNT - 1) #define RCFW_CMD_IS_BLOCKING 0x8000 -#define RCFW_BLOCKED_CMD_WAIT_COUNT 20000000UL /* 20 sec */ -#define HWRM_VERSION_RCFW_CMDQ_DEPTH_CHECK 0x1000900020011ULL +#define HWRM_VERSION_DEV_ATTR_MAX_DPI 0x1000A0000000DULL /* Crsq buf is 1024-Byte */ struct bnxt_qplib_crsbe { @@ -132,6 +152,12 @@ typedef int (*aeq_handler_t)(struct bnxt_qplib_rcfw *, void *, void *); struct bnxt_qplib_crsqe { struct creq_qp_event *resp; u32 req_size; + /* Free slots at the time of submission */ + u32 free_slots; + u8 opcode; + bool is_waiter_alive; + bool is_internal_cmd; + bool is_in_used; }; struct bnxt_qplib_rcfw_sbuf { @@ -149,7 +175,7 @@ struct bnxt_qplib_qp_node { #define FIRMWARE_INITIALIZED_FLAG (0) #define FIRMWARE_FIRST_FLAG (31) -#define FIRMWARE_TIMED_OUT (3) +#define FIRMWARE_STALL_DETECTED (3) #define ERR_DEVICE_DETACHED (4) struct bnxt_qplib_cmdq_mbox { @@ -163,7 +189,7 @@ struct bnxt_qplib_cmdq_ctx { struct bnxt_qplib_cmdq_mbox cmdq_mbox; wait_queue_head_t waitq; unsigned long flags; - unsigned long *cmdq_bitmap; + unsigned long last_seen; u32 seq_num; }; @@ -186,6 +212,7 @@ struct bnxt_qplib_creq_ctx { u16 ring_id; int msix_vec; bool requested; /*irq handler installed */ + char *irq_name; }; /* RCFW Communication Channels */ @@ -200,6 +227,11 @@ struct bnxt_qplib_rcfw { u64 oos_prev; u32 init_oos_stats; u32 cmdq_depth; + atomic_t rcfw_intr_enabled; + struct semaphore rcfw_inflight; + atomic_t timeout_send; + /* cached from chip cctx for quick reference in slow path */ + u16 max_timeout; }; struct bnxt_qplib_cmdqmsg { @@ -234,7 +266,7 @@ int bnxt_qplib_rcfw_start_irq(struct bnxt_qplib_rcfw *rcfw, int msix_vector, bool need_init); int bnxt_qplib_enable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw, int msix_vector, - int cp_bar_reg_off, int virt_fn, + int cp_bar_reg_off, aeq_handler_t aeq_handler); struct bnxt_qplib_rcfw_sbuf *bnxt_qplib_rcfw_alloc_sbuf( diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c index 81b0c5e879f9..5fd8f7c90bb0 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c @@ -696,44 +696,76 @@ static int bnxt_qplib_alloc_pd_tbl(struct bnxt_qplib_res *res, } /* DPIs */ -int bnxt_qplib_alloc_dpi(struct bnxt_qplib_dpi_tbl *dpit, - struct bnxt_qplib_dpi *dpi, - void *app) +int bnxt_qplib_alloc_dpi(struct bnxt_qplib_res *res, + struct bnxt_qplib_dpi *dpi, + void *app, u8 type) { + struct bnxt_qplib_dpi_tbl *dpit = &res->dpi_tbl; + struct bnxt_qplib_reg_desc *reg; u32 bit_num; + u64 umaddr; + + reg = &dpit->wcreg; + mutex_lock(&res->dpi_tbl_lock); bit_num = find_first_bit(dpit->tbl, dpit->max); - if (bit_num == dpit->max) + if (bit_num == dpit->max) { + mutex_unlock(&res->dpi_tbl_lock); return -ENOMEM; + } /* Found unused DPI */ clear_bit(bit_num, dpit->tbl); dpit->app_tbl[bit_num] = app; - dpi->dpi = bit_num; - dpi->dbr = dpit->dbr_bar_reg_iomem + (bit_num * PAGE_SIZE); - dpi->umdbr = dpit->unmapped_dbr + (bit_num * PAGE_SIZE); + dpi->bit = bit_num; + dpi->dpi = bit_num + (reg->offset - dpit->ucreg.offset) / PAGE_SIZE; + + umaddr = reg->bar_base + reg->offset + bit_num * PAGE_SIZE; + dpi->umdbr = umaddr; + + switch (type) { + case BNXT_QPLIB_DPI_TYPE_KERNEL: + /* privileged dbr was already mapped just initialize it. */ + dpi->umdbr = dpit->ucreg.bar_base + + dpit->ucreg.offset + bit_num * PAGE_SIZE; + dpi->dbr = dpit->priv_db; + dpi->dpi = dpi->bit; + break; + case BNXT_QPLIB_DPI_TYPE_WC: + dpi->dbr = ioremap_wc(umaddr, PAGE_SIZE); + break; + default: + dpi->dbr = ioremap(umaddr, PAGE_SIZE); + break; + } + dpi->type = type; + mutex_unlock(&res->dpi_tbl_lock); return 0; + } int bnxt_qplib_dealloc_dpi(struct bnxt_qplib_res *res, - struct bnxt_qplib_dpi_tbl *dpit, - struct bnxt_qplib_dpi *dpi) + struct bnxt_qplib_dpi *dpi) { - if (dpi->dpi >= dpit->max) { - dev_warn(&res->pdev->dev, "Invalid DPI? dpi = %d\n", dpi->dpi); - return -EINVAL; - } - if (test_and_set_bit(dpi->dpi, dpit->tbl)) { - dev_warn(&res->pdev->dev, "Freeing an unused DPI? dpi = %d\n", - dpi->dpi); + struct bnxt_qplib_dpi_tbl *dpit = &res->dpi_tbl; + + mutex_lock(&res->dpi_tbl_lock); + if (dpi->dpi && dpi->type != BNXT_QPLIB_DPI_TYPE_KERNEL) + pci_iounmap(res->pdev, dpi->dbr); + + if (test_and_set_bit(dpi->bit, dpit->tbl)) { + dev_warn(&res->pdev->dev, + "Freeing an unused DPI? dpi = %d, bit = %d\n", + dpi->dpi, dpi->bit); + mutex_unlock(&res->dpi_tbl_lock); return -EINVAL; } if (dpit->app_tbl) - dpit->app_tbl[dpi->dpi] = NULL; + dpit->app_tbl[dpi->bit] = NULL; memset(dpi, 0, sizeof(*dpi)); - + mutex_unlock(&res->dpi_tbl_lock); return 0; } @@ -742,52 +774,38 @@ static void bnxt_qplib_free_dpi_tbl(struct bnxt_qplib_res *res, { kfree(dpit->tbl); kfree(dpit->app_tbl); - if (dpit->dbr_bar_reg_iomem) - pci_iounmap(res->pdev, dpit->dbr_bar_reg_iomem); - memset(dpit, 0, sizeof(*dpit)); + dpit->tbl = NULL; + dpit->app_tbl = NULL; + dpit->max = 0; } -static int bnxt_qplib_alloc_dpi_tbl(struct bnxt_qplib_res *res, - struct bnxt_qplib_dpi_tbl *dpit, - u32 dbr_offset) +static int bnxt_qplib_alloc_dpi_tbl(struct bnxt_qplib_res *res, + struct bnxt_qplib_dev_attr *dev_attr) { - u32 dbr_bar_reg = RCFW_DBR_PCI_BAR_REGION; - resource_size_t bar_reg_base; - u32 dbr_len, bytes; - - if (dpit->dbr_bar_reg_iomem) { - dev_err(&res->pdev->dev, "DBR BAR region %d already mapped\n", - dbr_bar_reg); - return -EALREADY; - } - - bar_reg_base = pci_resource_start(res->pdev, dbr_bar_reg); - if (!bar_reg_base) { - dev_err(&res->pdev->dev, "BAR region %d resc start failed\n", - dbr_bar_reg); - return -ENOMEM; - } + struct bnxt_qplib_dpi_tbl *dpit; + struct bnxt_qplib_reg_desc *reg; + unsigned long bar_len; + u32 dbr_offset; + u32 bytes; - dbr_len = pci_resource_len(res->pdev, dbr_bar_reg) - dbr_offset; - if (!dbr_len || ((dbr_len & (PAGE_SIZE - 1)) != 0)) { - dev_err(&res->pdev->dev, "Invalid DBR length %d\n", dbr_len); - return -ENOMEM; - } + dpit = &res->dpi_tbl; + reg = &dpit->wcreg; - dpit->dbr_bar_reg_iomem = ioremap(bar_reg_base + dbr_offset, - dbr_len); - if (!dpit->dbr_bar_reg_iomem) { - dev_err(&res->pdev->dev, - "FP: DBR BAR region %d mapping failed\n", dbr_bar_reg); - return -ENOMEM; + if (!bnxt_qplib_is_chip_gen_p5(res->cctx)) { + /* Offest should come from L2 driver */ + dbr_offset = dev_attr->l2_db_size; + dpit->ucreg.offset = dbr_offset; + dpit->wcreg.offset = dbr_offset; } - dpit->unmapped_dbr = bar_reg_base + dbr_offset; - dpit->max = dbr_len / PAGE_SIZE; + bar_len = pci_resource_len(res->pdev, reg->bar_id); + dpit->max = (bar_len - reg->offset) / PAGE_SIZE; + if (dev_attr->max_dpi) + dpit->max = min_t(u32, dpit->max, dev_attr->max_dpi); - dpit->app_tbl = kcalloc(dpit->max, sizeof(void *), GFP_KERNEL); + dpit->app_tbl = kcalloc(dpit->max, sizeof(void *), GFP_KERNEL); if (!dpit->app_tbl) - goto unmap_io; + return -ENOMEM; bytes = dpit->max >> 3; if (!bytes) @@ -797,17 +815,14 @@ static int bnxt_qplib_alloc_dpi_tbl(struct bnxt_qplib_res *res, if (!dpit->tbl) { kfree(dpit->app_tbl); dpit->app_tbl = NULL; - goto unmap_io; + return -ENOMEM; } memset((u8 *)dpit->tbl, 0xFF, bytes); + dpit->priv_db = dpit->ucreg.bar_reg + dpit->ucreg.offset; return 0; -unmap_io: - pci_iounmap(res->pdev, dpit->dbr_bar_reg_iomem); - dpit->dbr_bar_reg_iomem = NULL; - return -ENOMEM; } /* Stats */ @@ -874,7 +889,7 @@ int bnxt_qplib_alloc_res(struct bnxt_qplib_res *res, struct pci_dev *pdev, if (rc) goto fail; - rc = bnxt_qplib_alloc_dpi_tbl(res, &res->dpi_tbl, dev_attr->l2_db_size); + rc = bnxt_qplib_alloc_dpi_tbl(res, dev_attr); if (rc) goto fail; @@ -884,6 +899,46 @@ fail: return rc; } +void bnxt_qplib_unmap_db_bar(struct bnxt_qplib_res *res) +{ + struct bnxt_qplib_reg_desc *reg; + + reg = &res->dpi_tbl.ucreg; + if (reg->bar_reg) + pci_iounmap(res->pdev, reg->bar_reg); + reg->bar_reg = NULL; + reg->bar_base = 0; + reg->len = 0; + reg->bar_id = 0; +} + +int bnxt_qplib_map_db_bar(struct bnxt_qplib_res *res) +{ + struct bnxt_qplib_reg_desc *ucreg; + struct bnxt_qplib_reg_desc *wcreg; + + wcreg = &res->dpi_tbl.wcreg; + wcreg->bar_id = RCFW_DBR_PCI_BAR_REGION; + wcreg->bar_base = pci_resource_start(res->pdev, wcreg->bar_id); + + ucreg = &res->dpi_tbl.ucreg; + ucreg->bar_id = RCFW_DBR_PCI_BAR_REGION; + ucreg->bar_base = pci_resource_start(res->pdev, ucreg->bar_id); + ucreg->len = ucreg->offset + PAGE_SIZE; + if (!ucreg->len || ((ucreg->len & (PAGE_SIZE - 1)) != 0)) { + dev_err(&res->pdev->dev, "QPLIB: invalid dbr length %d", + (int)ucreg->len); + return -EINVAL; + } + ucreg->bar_reg = ioremap(ucreg->bar_base, ucreg->len); + if (!ucreg->bar_reg) { + dev_err(&res->pdev->dev, "privileged dpi map failed!"); + return -ENOMEM; + } + + return 0; +} + int bnxt_qplib_determine_atomics(struct pci_dev *dev) { int comp; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index 982e2c96dac2..d850a553821e 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -47,7 +47,7 @@ extern const struct bnxt_qplib_gid bnxt_qplib_gid_zero; struct bnxt_qplib_drv_modes { u8 wqe_mode; - /* Other modes to follow here */ + bool db_push; }; struct bnxt_qplib_chip_ctx { @@ -55,9 +55,14 @@ struct bnxt_qplib_chip_ctx { u8 chip_rev; u8 chip_metal; u16 hw_stats_size; + u16 hwrm_cmd_max_timeout; struct bnxt_qplib_drv_modes modes; + u64 hwrm_intf_ver; }; +#define BNXT_QPLIB_DBR_PF_DB_OFFSET 0x10000 +#define BNXT_QPLIB_DBR_VF_DB_OFFSET 0x4000 + #define PTR_CNT_PER_PG (PAGE_SIZE / sizeof(void *)) #define PTR_MAX_IDX_PER_PG (PTR_CNT_PER_PG - 1) #define PTR_PG(x) (((x) & ~PTR_MAX_IDX_PER_PG) / PTR_CNT_PER_PG) @@ -109,6 +114,7 @@ enum bnxt_qplib_hwrm_pg_size { struct bnxt_qplib_reg_desc { u8 bar_id; resource_size_t bar_base; + unsigned long offset; void __iomem *bar_reg; size_t len; }; @@ -185,18 +191,27 @@ struct bnxt_qplib_sgid_tbl { u8 *vlan; }; +enum { + BNXT_QPLIB_DPI_TYPE_KERNEL = 0, + BNXT_QPLIB_DPI_TYPE_UC = 1, + BNXT_QPLIB_DPI_TYPE_WC = 2 +}; + struct bnxt_qplib_dpi { u32 dpi; + u32 bit; void __iomem *dbr; u64 umdbr; + u8 type; }; struct bnxt_qplib_dpi_tbl { void **app_tbl; unsigned long *tbl; u16 max; - void __iomem *dbr_bar_reg_iomem; - u64 unmapped_dbr; + struct bnxt_qplib_reg_desc ucreg; /* Hold entire DB bar. */ + struct bnxt_qplib_reg_desc wcreg; + void __iomem *priv_db; }; struct bnxt_qplib_stats { @@ -241,7 +256,6 @@ struct bnxt_qplib_ctx { struct bnxt_qplib_tqm_ctx tqm_ctx; struct bnxt_qplib_stats stats; struct bnxt_qplib_vf_res vf_res; - u64 hwrm_intf_ver; }; struct bnxt_qplib_res { @@ -253,6 +267,8 @@ struct bnxt_qplib_res { struct bnxt_qplib_pd_tbl pd_tbl; struct bnxt_qplib_sgid_tbl sgid_tbl; struct bnxt_qplib_dpi_tbl dpi_tbl; + /* To protect the dpi table bit map */ + struct mutex dpi_tbl_lock; bool prio; bool is_vf; }; @@ -344,11 +360,10 @@ int bnxt_qplib_alloc_pd(struct bnxt_qplib_pd_tbl *pd_tbl, int bnxt_qplib_dealloc_pd(struct bnxt_qplib_res *res, struct bnxt_qplib_pd_tbl *pd_tbl, struct bnxt_qplib_pd *pd); -int bnxt_qplib_alloc_dpi(struct bnxt_qplib_dpi_tbl *dpit, - struct bnxt_qplib_dpi *dpi, - void *app); +int bnxt_qplib_alloc_dpi(struct bnxt_qplib_res *res, + struct bnxt_qplib_dpi *dpi, + void *app, u8 type); int bnxt_qplib_dealloc_dpi(struct bnxt_qplib_res *res, - struct bnxt_qplib_dpi_tbl *dpi_tbl, struct bnxt_qplib_dpi *dpi); void bnxt_qplib_cleanup_res(struct bnxt_qplib_res *res); int bnxt_qplib_init_res(struct bnxt_qplib_res *res); @@ -361,6 +376,9 @@ void bnxt_qplib_free_ctx(struct bnxt_qplib_res *res, int bnxt_qplib_alloc_ctx(struct bnxt_qplib_res *res, struct bnxt_qplib_ctx *ctx, bool virt_fn, bool is_p5); +int bnxt_qplib_map_db_bar(struct bnxt_qplib_res *res); +void bnxt_qplib_unmap_db_bar(struct bnxt_qplib_res *res); + int bnxt_qplib_determine_atomics(struct pci_dev *dev); static inline void bnxt_qplib_hwq_incr_prod(struct bnxt_qplib_hwq *hwq, u32 cnt) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index b967a17a44be..ab45f9d4bb02 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -170,6 +170,9 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, attr->tqm_alloc_reqs[i * 4 + 3] = *(++tqm_alloc); } + if (rcfw->res->cctx->hwrm_intf_ver >= HWRM_VERSION_DEV_ATTR_MAX_DPI) + attr->max_dpi = le32_to_cpu(sb->max_dpi); + attr->is_atomic = bnxt_qplib_is_atomic_cap(rcfw); bail: bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf); @@ -233,10 +236,6 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, struct bnxt_qplib_rcfw *rcfw = res->rcfw; int index; - if (!sgid_tbl) { - dev_err(&res->pdev->dev, "SGID table not allocated\n"); - return -EINVAL; - } /* Do we need a sgid_lock here? */ if (!sgid_tbl->active) { dev_err(&res->pdev->dev, "SGID table has no active entries\n"); @@ -297,10 +296,6 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, struct bnxt_qplib_rcfw *rcfw = res->rcfw; int i, free_idx; - if (!sgid_tbl) { - dev_err(&res->pdev->dev, "SGID table not allocated\n"); - return -EINVAL; - } /* Do we need a sgid_lock here? */ if (sgid_tbl->active == sgid_tbl->max) { dev_err(&res->pdev->dev, "SGID table is full\n"); @@ -468,13 +463,14 @@ int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah, return 0; } -void bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah, - bool block) +int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah, + bool block) { struct bnxt_qplib_rcfw *rcfw = res->rcfw; struct creq_destroy_ah_resp resp = {}; struct bnxt_qplib_cmdqmsg msg = {}; struct cmdq_destroy_ah req = {}; + int rc; /* Clean up the AH table in the device */ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, @@ -485,7 +481,8 @@ void bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah, bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req), sizeof(resp), block); - bnxt_qplib_rcfw_send_message(rcfw, &msg); + rc = bnxt_qplib_rcfw_send_message(rcfw, &msg); + return rc; } /* MRW */ diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h index 5de874659cdf..264ef3cedc45 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h @@ -72,6 +72,7 @@ struct bnxt_qplib_dev_attr { u8 tqm_alloc_reqs[MAX_TQM_ALLOC_REQ]; bool is_atomic; u16 dev_cap_flags; + u32 max_dpi; }; struct bnxt_qplib_pd { @@ -327,8 +328,8 @@ int bnxt_qplib_set_func_resources(struct bnxt_qplib_res *res, struct bnxt_qplib_ctx *ctx); int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah, bool block); -void bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah, - bool block); +int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah, + bool block); int bnxt_qplib_alloc_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw); int bnxt_qplib_dereg_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw, diff --git a/drivers/infiniband/hw/erdma/erdma.h b/drivers/infiniband/hw/erdma/erdma.h index e819e4032490..f190111840e9 100644 --- a/drivers/infiniband/hw/erdma/erdma.h +++ b/drivers/infiniband/hw/erdma/erdma.h @@ -128,13 +128,8 @@ struct erdma_devattr { int numa_node; enum erdma_cc_alg cc; - u32 grp_num; u32 irq_num; - bool disable_dwqe; - u16 dwqe_pages; - u16 dwqe_entries; - u32 max_qp; u32 max_send_wr; u32 max_recv_wr; @@ -215,15 +210,6 @@ struct erdma_dev { u32 next_alloc_qpn; u32 next_alloc_cqn; - spinlock_t db_bitmap_lock; - /* We provide max 64 uContexts that each has one SQ doorbell Page. */ - DECLARE_BITMAP(sdb_page, ERDMA_DWQE_TYPE0_CNT); - /* - * We provide max 496 uContexts that each has one SQ normal Db, - * and one directWQE db. - */ - DECLARE_BITMAP(sdb_entry, ERDMA_DWQE_TYPE1_CNT); - atomic_t num_ctx; struct list_head cep_list; }; @@ -268,6 +254,8 @@ static inline u32 erdma_reg_read32_filed(struct erdma_dev *dev, u32 reg, return FIELD_GET(filed_mask, val); } +#define ERDMA_GET(val, name) FIELD_GET(ERDMA_CMD_##name##_MASK, val) + int erdma_cmdq_init(struct erdma_dev *dev); void erdma_finish_cmdq_init(struct erdma_dev *dev); void erdma_cmdq_destroy(struct erdma_dev *dev); diff --git a/drivers/infiniband/hw/erdma/erdma_hw.h b/drivers/infiniband/hw/erdma/erdma_hw.h index 76ce2856be28..a882b57aa118 100644 --- a/drivers/infiniband/hw/erdma/erdma_hw.h +++ b/drivers/infiniband/hw/erdma/erdma_hw.h @@ -82,19 +82,6 @@ #define ERDMA_BAR_CQDB_SPACE_OFFSET \ (ERDMA_BAR_RQDB_SPACE_OFFSET + ERDMA_BAR_RQDB_SPACE_SIZE) -/* Doorbell page resources related. */ -/* - * Max # of parallelly issued directSQE is 3072 per device, - * hardware organizes this into 24 group, per group has 128 credits. - */ -#define ERDMA_DWQE_MAX_GRP_CNT 24 -#define ERDMA_DWQE_NUM_PER_GRP 128 - -#define ERDMA_DWQE_TYPE0_CNT 64 -#define ERDMA_DWQE_TYPE1_CNT 496 -/* type1 DB contains 2 DBs, takes 256Byte. */ -#define ERDMA_DWQE_TYPE1_CNT_PER_PAGE 16 - #define ERDMA_SDB_SHARED_PAGE_INDEX 95 /* Doorbell related. */ @@ -134,7 +121,7 @@ /* CMDQ related. */ #define ERDMA_CMDQ_MAX_OUTSTANDING 128 -#define ERDMA_CMDQ_SQE_SIZE 64 +#define ERDMA_CMDQ_SQE_SIZE 128 /* cmdq sub module definition. */ enum CMDQ_WQE_SUB_MOD { @@ -159,6 +146,9 @@ enum CMDQ_COMMON_OPCODE { CMDQ_OPCODE_DESTROY_EQ = 1, CMDQ_OPCODE_QUERY_FW_INFO = 2, CMDQ_OPCODE_CONF_MTU = 3, + CMDQ_OPCODE_CONF_DEVICE = 5, + CMDQ_OPCODE_ALLOC_DB = 8, + CMDQ_OPCODE_FREE_DB = 9, }; /* cmdq-SQE HDR */ @@ -196,11 +186,41 @@ struct erdma_cmdq_destroy_eq_req { u8 qtype; }; +/* config device cfg */ +#define ERDMA_CMD_CONFIG_DEVICE_PS_EN_MASK BIT(31) +#define ERDMA_CMD_CONFIG_DEVICE_PGSHIFT_MASK GENMASK(4, 0) + +struct erdma_cmdq_config_device_req { + u64 hdr; + u32 cfg; + u32 rsvd[5]; +}; + struct erdma_cmdq_config_mtu_req { u64 hdr; u32 mtu; }; +/* ext db requests(alloc and free) cfg */ +#define ERDMA_CMD_EXT_DB_CQ_EN_MASK BIT(2) +#define ERDMA_CMD_EXT_DB_RQ_EN_MASK BIT(1) +#define ERDMA_CMD_EXT_DB_SQ_EN_MASK BIT(0) + +struct erdma_cmdq_ext_db_req { + u64 hdr; + u32 cfg; + u16 rdb_off; + u16 sdb_off; + u16 rsvd0; + u16 cdb_off; + u32 rsvd1[3]; +}; + +/* alloc db response qword 0 definition */ +#define ERDMA_CMD_ALLOC_DB_RESP_RDB_MASK GENMASK_ULL(63, 48) +#define ERDMA_CMD_ALLOC_DB_RESP_CDB_MASK GENMASK_ULL(47, 32) +#define ERDMA_CMD_ALLOC_DB_RESP_SDB_MASK GENMASK_ULL(15, 0) + /* create_cq cfg0 */ #define ERDMA_CMD_CREATE_CQ_DEPTH_MASK GENMASK(31, 24) #define ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK GENMASK(23, 20) @@ -209,8 +229,12 @@ struct erdma_cmdq_config_mtu_req { /* create_cq cfg1 */ #define ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK GENMASK(31, 16) #define ERDMA_CMD_CREATE_CQ_MTT_TYPE_MASK BIT(15) +#define ERDMA_CMD_CREATE_CQ_MTT_DB_CFG_MASK BIT(11) #define ERDMA_CMD_CREATE_CQ_EQN_MASK GENMASK(9, 0) +/* create_cq cfg2 */ +#define ERDMA_CMD_CREATE_CQ_DB_CFG_MASK GENMASK(15, 0) + struct erdma_cmdq_create_cq_req { u64 hdr; u32 cfg0; @@ -219,6 +243,7 @@ struct erdma_cmdq_create_cq_req { u32 cfg1; u64 cq_db_info_addr; u32 first_page_offset; + u32 cfg2; }; /* regmr/deregmr cfg0 */ @@ -278,6 +303,7 @@ struct erdma_cmdq_modify_qp_req { /* create qp cqn_mtt_cfg */ #define ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK GENMASK(31, 28) +#define ERDMA_CMD_CREATE_QP_DB_CFG_MASK BIT(25) #define ERDMA_CMD_CREATE_QP_CQN_MASK GENMASK(23, 0) /* create qp mtt_cfg */ @@ -285,6 +311,10 @@ struct erdma_cmdq_modify_qp_req { #define ERDMA_CMD_CREATE_QP_MTT_CNT_MASK GENMASK(11, 1) #define ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK BIT(0) +/* create qp db cfg */ +#define ERDMA_CMD_CREATE_QP_SQDB_CFG_MASK GENMASK(31, 16) +#define ERDMA_CMD_CREATE_QP_RQDB_CFG_MASK GENMASK(15, 0) + #define ERDMA_CMDQ_CREATE_QP_RESP_COOKIE_MASK GENMASK_ULL(31, 0) struct erdma_cmdq_create_qp_req { @@ -299,6 +329,11 @@ struct erdma_cmdq_create_qp_req { u32 rq_mtt_cfg; u64 sq_db_info_dma_addr; u64 rq_db_info_dma_addr; + + u64 sq_mtt_entry[3]; + u64 rq_mtt_entry[3]; + + u32 db_cfg; }; struct erdma_cmdq_destroy_qp_req { @@ -329,6 +364,7 @@ struct erdma_cmdq_reflush_req { enum { ERDMA_DEV_CAP_FLAGS_ATOMIC = 1 << 7, + ERDMA_DEV_CAP_FLAGS_EXTEND_DB = 1 << 3, }; #define ERDMA_CMD_INFO0_FW_VER_MASK GENMASK_ULL(31, 0) diff --git a/drivers/infiniband/hw/erdma/erdma_main.c b/drivers/infiniband/hw/erdma/erdma_main.c index 7c74abeee864..0880c79a978c 100644 --- a/drivers/infiniband/hw/erdma/erdma_main.c +++ b/drivers/infiniband/hw/erdma/erdma_main.c @@ -130,33 +130,6 @@ static irqreturn_t erdma_comm_irq_handler(int irq, void *data) return IRQ_HANDLED; } -static void erdma_dwqe_resource_init(struct erdma_dev *dev) -{ - int total_pages, type0, type1; - - dev->attrs.grp_num = erdma_reg_read32(dev, ERDMA_REGS_GRP_NUM_REG); - - if (dev->attrs.grp_num < 4) - dev->attrs.disable_dwqe = true; - else - dev->attrs.disable_dwqe = false; - - /* One page contains 4 goups. */ - total_pages = dev->attrs.grp_num * 4; - - if (dev->attrs.grp_num >= ERDMA_DWQE_MAX_GRP_CNT) { - dev->attrs.grp_num = ERDMA_DWQE_MAX_GRP_CNT; - type0 = ERDMA_DWQE_TYPE0_CNT; - type1 = ERDMA_DWQE_TYPE1_CNT / ERDMA_DWQE_TYPE1_CNT_PER_PAGE; - } else { - type1 = total_pages / 3; - type0 = total_pages - type1 - 1; - } - - dev->attrs.dwqe_pages = type0; - dev->attrs.dwqe_entries = type1 * ERDMA_DWQE_TYPE1_CNT_PER_PAGE; -} - static int erdma_request_vectors(struct erdma_dev *dev) { int expect_irq_num = min(num_possible_cpus() + 1, ERDMA_NUM_MSIX_VEC); @@ -199,8 +172,6 @@ static int erdma_device_init(struct erdma_dev *dev, struct pci_dev *pdev) { int ret; - erdma_dwqe_resource_init(dev); - ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(ERDMA_PCI_WIDTH)); if (ret) @@ -426,6 +397,22 @@ static int erdma_dev_attrs_init(struct erdma_dev *dev) return err; } +static int erdma_device_config(struct erdma_dev *dev) +{ + struct erdma_cmdq_config_device_req req = {}; + + if (!(dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_EXTEND_DB)) + return 0; + + erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON, + CMDQ_OPCODE_CONF_DEVICE); + + req.cfg = FIELD_PREP(ERDMA_CMD_CONFIG_DEVICE_PGSHIFT_MASK, PAGE_SHIFT) | + FIELD_PREP(ERDMA_CMD_CONFIG_DEVICE_PS_EN_MASK, 1); + + return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); +} + static int erdma_res_cb_init(struct erdma_dev *dev) { int i, j; @@ -512,6 +499,10 @@ static int erdma_ib_device_add(struct pci_dev *pdev) if (ret) return ret; + ret = erdma_device_config(dev); + if (ret) + return ret; + ibdev->node_type = RDMA_NODE_RNIC; memcpy(ibdev->node_desc, ERDMA_NODE_DESC, sizeof(ERDMA_NODE_DESC)); @@ -537,10 +528,6 @@ static int erdma_ib_device_add(struct pci_dev *pdev) if (ret) return ret; - spin_lock_init(&dev->db_bitmap_lock); - bitmap_zero(dev->sdb_page, ERDMA_DWQE_TYPE0_CNT); - bitmap_zero(dev->sdb_entry, ERDMA_DWQE_TYPE1_CNT); - atomic_set(&dev->num_ctx, 0); mac = erdma_reg_read32(dev, ERDMA_REGS_NETDEV_MAC_L_REG); diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c index 83e1b0d55977..517676fbb8b1 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.c +++ b/drivers/infiniband/hw/erdma/erdma_verbs.c @@ -19,10 +19,11 @@ #include "erdma_cm.h" #include "erdma_verbs.h" -static int create_qp_cmd(struct erdma_dev *dev, struct erdma_qp *qp) +static int create_qp_cmd(struct erdma_ucontext *uctx, struct erdma_qp *qp) { - struct erdma_cmdq_create_qp_req req; + struct erdma_dev *dev = to_edev(qp->ibqp.device); struct erdma_pd *pd = to_epd(qp->ibqp.pd); + struct erdma_cmdq_create_qp_req req; struct erdma_uqp *user_qp; u64 resp0, resp1; int err; @@ -93,6 +94,16 @@ static int create_qp_cmd(struct erdma_dev *dev, struct erdma_qp *qp) req.sq_db_info_dma_addr = user_qp->sq_db_info_dma_addr; req.rq_db_info_dma_addr = user_qp->rq_db_info_dma_addr; + + if (uctx->ext_db.enable) { + req.sq_cqn_mtt_cfg |= + FIELD_PREP(ERDMA_CMD_CREATE_QP_DB_CFG_MASK, 1); + req.db_cfg = + FIELD_PREP(ERDMA_CMD_CREATE_QP_SQDB_CFG_MASK, + uctx->ext_db.sdb_off) | + FIELD_PREP(ERDMA_CMD_CREATE_QP_RQDB_CFG_MASK, + uctx->ext_db.rdb_off); + } } err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &resp0, @@ -146,11 +157,12 @@ post_cmd: return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); } -static int create_cq_cmd(struct erdma_dev *dev, struct erdma_cq *cq) +static int create_cq_cmd(struct erdma_ucontext *uctx, struct erdma_cq *cq) { + struct erdma_dev *dev = to_edev(cq->ibcq.device); struct erdma_cmdq_create_cq_req req; - u32 page_size; struct erdma_mem *mtt; + u32 page_size; erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, CMDQ_OPCODE_CREATE_CQ); @@ -192,6 +204,13 @@ static int create_cq_cmd(struct erdma_dev *dev, struct erdma_cq *cq) req.first_page_offset = mtt->page_offset; req.cq_db_info_addr = cq->user_cq.db_info_dma_addr; + + if (uctx->ext_db.enable) { + req.cfg1 |= FIELD_PREP( + ERDMA_CMD_CREATE_CQ_MTT_DB_CFG_MASK, 1); + req.cfg2 = FIELD_PREP(ERDMA_CMD_CREATE_CQ_DB_CFG_MASK, + uctx->ext_db.cdb_off); + } } return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); @@ -753,7 +772,7 @@ int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs, qp->attrs.state = ERDMA_QP_STATE_IDLE; INIT_DELAYED_WORK(&qp->reflush_dwork, erdma_flush_worker); - ret = create_qp_cmd(dev, qp); + ret = create_qp_cmd(uctx, qp); if (ret) goto err_out_cmd; @@ -1130,62 +1149,73 @@ void erdma_mmap_free(struct rdma_user_mmap_entry *rdma_entry) kfree(entry); } -#define ERDMA_SDB_PAGE 0 -#define ERDMA_SDB_ENTRY 1 -#define ERDMA_SDB_SHARED 2 - -static void alloc_db_resources(struct erdma_dev *dev, - struct erdma_ucontext *ctx) +static int alloc_db_resources(struct erdma_dev *dev, struct erdma_ucontext *ctx, + bool ext_db_en) { - u32 bitmap_idx; - struct erdma_devattr *attrs = &dev->attrs; - - if (attrs->disable_dwqe) - goto alloc_normal_db; - - /* Try to alloc independent SDB page. */ - spin_lock(&dev->db_bitmap_lock); - bitmap_idx = find_first_zero_bit(dev->sdb_page, attrs->dwqe_pages); - if (bitmap_idx != attrs->dwqe_pages) { - set_bit(bitmap_idx, dev->sdb_page); - spin_unlock(&dev->db_bitmap_lock); - - ctx->sdb_type = ERDMA_SDB_PAGE; - ctx->sdb_idx = bitmap_idx; - ctx->sdb_page_idx = bitmap_idx; - ctx->sdb = dev->func_bar_addr + ERDMA_BAR_SQDB_SPACE_OFFSET + - (bitmap_idx << PAGE_SHIFT); - ctx->sdb_page_off = 0; + struct erdma_cmdq_ext_db_req req = {}; + u64 val0, val1; + int ret; - return; + /* + * CAP_SYS_RAWIO is required if hardware does not support extend + * doorbell mechanism. + */ + if (!ext_db_en && !capable(CAP_SYS_RAWIO)) + return -EPERM; + + if (!ext_db_en) { + ctx->sdb = dev->func_bar_addr + ERDMA_BAR_SQDB_SPACE_OFFSET; + ctx->rdb = dev->func_bar_addr + ERDMA_BAR_RQDB_SPACE_OFFSET; + ctx->cdb = dev->func_bar_addr + ERDMA_BAR_CQDB_SPACE_OFFSET; + return 0; } - bitmap_idx = find_first_zero_bit(dev->sdb_entry, attrs->dwqe_entries); - if (bitmap_idx != attrs->dwqe_entries) { - set_bit(bitmap_idx, dev->sdb_entry); - spin_unlock(&dev->db_bitmap_lock); + erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON, + CMDQ_OPCODE_ALLOC_DB); + + req.cfg = FIELD_PREP(ERDMA_CMD_EXT_DB_CQ_EN_MASK, 1) | + FIELD_PREP(ERDMA_CMD_EXT_DB_RQ_EN_MASK, 1) | + FIELD_PREP(ERDMA_CMD_EXT_DB_SQ_EN_MASK, 1); + + ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &val0, &val1); + if (ret) + return ret; - ctx->sdb_type = ERDMA_SDB_ENTRY; - ctx->sdb_idx = bitmap_idx; - ctx->sdb_page_idx = attrs->dwqe_pages + - bitmap_idx / ERDMA_DWQE_TYPE1_CNT_PER_PAGE; - ctx->sdb_page_off = bitmap_idx % ERDMA_DWQE_TYPE1_CNT_PER_PAGE; + ctx->ext_db.enable = true; + ctx->ext_db.sdb_off = ERDMA_GET(val0, ALLOC_DB_RESP_SDB); + ctx->ext_db.rdb_off = ERDMA_GET(val0, ALLOC_DB_RESP_RDB); + ctx->ext_db.cdb_off = ERDMA_GET(val0, ALLOC_DB_RESP_CDB); - ctx->sdb = dev->func_bar_addr + ERDMA_BAR_SQDB_SPACE_OFFSET + - (ctx->sdb_page_idx << PAGE_SHIFT); + ctx->sdb = dev->func_bar_addr + (ctx->ext_db.sdb_off << PAGE_SHIFT); + ctx->cdb = dev->func_bar_addr + (ctx->ext_db.rdb_off << PAGE_SHIFT); + ctx->rdb = dev->func_bar_addr + (ctx->ext_db.cdb_off << PAGE_SHIFT); + + return 0; +} +static void free_db_resources(struct erdma_dev *dev, struct erdma_ucontext *ctx) +{ + struct erdma_cmdq_ext_db_req req = {}; + int ret; + + if (!ctx->ext_db.enable) return; - } - spin_unlock(&dev->db_bitmap_lock); + erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON, + CMDQ_OPCODE_FREE_DB); + + req.cfg = FIELD_PREP(ERDMA_CMD_EXT_DB_CQ_EN_MASK, 1) | + FIELD_PREP(ERDMA_CMD_EXT_DB_RQ_EN_MASK, 1) | + FIELD_PREP(ERDMA_CMD_EXT_DB_SQ_EN_MASK, 1); -alloc_normal_db: - ctx->sdb_type = ERDMA_SDB_SHARED; - ctx->sdb_idx = 0; - ctx->sdb_page_idx = ERDMA_SDB_SHARED_PAGE_INDEX; - ctx->sdb_page_off = 0; + req.sdb_off = ctx->ext_db.sdb_off; + req.rdb_off = ctx->ext_db.rdb_off; + req.cdb_off = ctx->ext_db.cdb_off; - ctx->sdb = dev->func_bar_addr + (ctx->sdb_page_idx << PAGE_SHIFT); + ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); + if (ret) + ibdev_err_ratelimited(&dev->ibdev, + "free db resources failed %d", ret); } static void erdma_uctx_user_mmap_entries_remove(struct erdma_ucontext *uctx) @@ -1207,71 +1237,67 @@ int erdma_alloc_ucontext(struct ib_ucontext *ibctx, struct ib_udata *udata) goto err_out; } - INIT_LIST_HEAD(&ctx->dbrecords_page_list); - mutex_init(&ctx->dbrecords_page_mutex); - - alloc_db_resources(dev, ctx); - - ctx->rdb = dev->func_bar_addr + ERDMA_BAR_RQDB_SPACE_OFFSET; - ctx->cdb = dev->func_bar_addr + ERDMA_BAR_CQDB_SPACE_OFFSET; - if (udata->outlen < sizeof(uresp)) { ret = -EINVAL; goto err_out; } + INIT_LIST_HEAD(&ctx->dbrecords_page_list); + mutex_init(&ctx->dbrecords_page_mutex); + + ret = alloc_db_resources(dev, ctx, + !!(dev->attrs.cap_flags & + ERDMA_DEV_CAP_FLAGS_EXTEND_DB)); + if (ret) + goto err_out; + ctx->sq_db_mmap_entry = erdma_user_mmap_entry_insert( ctx, (void *)ctx->sdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.sdb); if (!ctx->sq_db_mmap_entry) { ret = -ENOMEM; - goto err_out; + goto err_free_ext_db; } ctx->rq_db_mmap_entry = erdma_user_mmap_entry_insert( ctx, (void *)ctx->rdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.rdb); if (!ctx->rq_db_mmap_entry) { ret = -EINVAL; - goto err_out; + goto err_put_mmap_entries; } ctx->cq_db_mmap_entry = erdma_user_mmap_entry_insert( ctx, (void *)ctx->cdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.cdb); if (!ctx->cq_db_mmap_entry) { ret = -EINVAL; - goto err_out; + goto err_put_mmap_entries; } uresp.dev_id = dev->pdev->device; - uresp.sdb_type = ctx->sdb_type; - uresp.sdb_offset = ctx->sdb_page_off; ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (ret) - goto err_out; + goto err_put_mmap_entries; return 0; -err_out: +err_put_mmap_entries: erdma_uctx_user_mmap_entries_remove(ctx); + +err_free_ext_db: + free_db_resources(dev, ctx); + +err_out: atomic_dec(&dev->num_ctx); return ret; } void erdma_dealloc_ucontext(struct ib_ucontext *ibctx) { - struct erdma_ucontext *ctx = to_ectx(ibctx); struct erdma_dev *dev = to_edev(ibctx->device); - - spin_lock(&dev->db_bitmap_lock); - if (ctx->sdb_type == ERDMA_SDB_PAGE) - clear_bit(ctx->sdb_idx, dev->sdb_page); - else if (ctx->sdb_type == ERDMA_SDB_ENTRY) - clear_bit(ctx->sdb_idx, dev->sdb_entry); + struct erdma_ucontext *ctx = to_ectx(ibctx); erdma_uctx_user_mmap_entries_remove(ctx); - - spin_unlock(&dev->db_bitmap_lock); - + free_db_resources(dev, ctx); atomic_dec(&dev->num_ctx); } @@ -1438,7 +1464,7 @@ int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, goto err_out_xa; } - ret = create_cq_cmd(dev, cq); + ret = create_cq_cmd(ctx, cq); if (ret) goto err_free_res; diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.h b/drivers/infiniband/hw/erdma/erdma_verbs.h index 131cf5f40982..429fc3063f98 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.h +++ b/drivers/infiniband/hw/erdma/erdma_verbs.h @@ -31,13 +31,18 @@ struct erdma_user_mmap_entry { u8 mmap_flag; }; +struct erdma_ext_db_info { + bool enable; + u16 sdb_off; + u16 rdb_off; + u16 cdb_off; +}; + struct erdma_ucontext { struct ib_ucontext ibucontext; - u32 sdb_type; - u32 sdb_idx; - u32 sdb_page_idx; - u32 sdb_page_off; + struct erdma_ext_db_info ext_db; + u64 sdb; u64 rdb; u64 cdb; diff --git a/drivers/infiniband/hw/hfi1/ipoib_tx.c b/drivers/infiniband/hw/hfi1/ipoib_tx.c index 8973a081d641..e7d831330278 100644 --- a/drivers/infiniband/hw/hfi1/ipoib_tx.c +++ b/drivers/infiniband/hw/hfi1/ipoib_tx.c @@ -215,11 +215,11 @@ static int hfi1_ipoib_build_ulp_payload(struct ipoib_txreq *tx, const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; ret = sdma_txadd_page(dd, - NULL, txreq, skb_frag_page(frag), frag->bv_offset, - skb_frag_size(frag)); + skb_frag_size(frag), + NULL, NULL, NULL); if (unlikely(ret)) break; } diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c index 1cea8b0c78e0..7a51f7d73b61 100644 --- a/drivers/infiniband/hw/hfi1/mmu_rb.c +++ b/drivers/infiniband/hw/hfi1/mmu_rb.c @@ -19,8 +19,7 @@ static int mmu_notifier_range_start(struct mmu_notifier *, const struct mmu_notifier_range *); static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *, unsigned long, unsigned long); -static void do_remove(struct mmu_rb_handler *handler, - struct list_head *del_list); +static void release_immediate(struct kref *refcount); static void handle_remove(struct work_struct *work); static const struct mmu_notifier_ops mn_opts = { @@ -106,7 +105,11 @@ void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler) } spin_unlock_irqrestore(&handler->lock, flags); - do_remove(handler, &del_list); + while (!list_empty(&del_list)) { + rbnode = list_first_entry(&del_list, struct mmu_rb_node, list); + list_del(&rbnode->list); + kref_put(&rbnode->refcount, release_immediate); + } /* Now the mm may be freed. */ mmdrop(handler->mn.mm); @@ -121,7 +124,7 @@ int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler, unsigned long flags; int ret = 0; - trace_hfi1_mmu_rb_insert(mnode->addr, mnode->len); + trace_hfi1_mmu_rb_insert(mnode); if (current->mm != handler->mn.mm) return -EPERM; @@ -134,12 +137,6 @@ int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler, } __mmu_int_rb_insert(mnode, &handler->root); list_add_tail(&mnode->list, &handler->lru_list); - - ret = handler->ops->insert(handler->ops_arg, mnode); - if (ret) { - __mmu_int_rb_remove(mnode, &handler->root); - list_del(&mnode->list); /* remove from LRU list */ - } mnode->handler = handler; unlock: spin_unlock_irqrestore(&handler->lock, flags); @@ -183,6 +180,49 @@ static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler, return node; } +/* + * Must NOT call while holding mnode->handler->lock. + * mnode->handler->ops->remove() may sleep and mnode->handler->lock is a + * spinlock. + */ +static void release_immediate(struct kref *refcount) +{ + struct mmu_rb_node *mnode = + container_of(refcount, struct mmu_rb_node, refcount); + trace_hfi1_mmu_release_node(mnode); + mnode->handler->ops->remove(mnode->handler->ops_arg, mnode); +} + +/* Caller must hold mnode->handler->lock */ +static void release_nolock(struct kref *refcount) +{ + struct mmu_rb_node *mnode = + container_of(refcount, struct mmu_rb_node, refcount); + list_move(&mnode->list, &mnode->handler->del_list); + queue_work(mnode->handler->wq, &mnode->handler->del_work); +} + +/* + * struct mmu_rb_node->refcount kref_put() callback. + * Adds mmu_rb_node to mmu_rb_node->handler->del_list and queues + * handler->del_work on handler->wq. + * Does not remove mmu_rb_node from handler->lru_list or handler->rb_root. + * Acquires mmu_rb_node->handler->lock; do not call while already holding + * handler->lock. + */ +void hfi1_mmu_rb_release(struct kref *refcount) +{ + struct mmu_rb_node *mnode = + container_of(refcount, struct mmu_rb_node, refcount); + struct mmu_rb_handler *handler = mnode->handler; + unsigned long flags; + + spin_lock_irqsave(&handler->lock, flags); + list_move(&mnode->list, &mnode->handler->del_list); + spin_unlock_irqrestore(&handler->lock, flags); + queue_work(handler->wq, &handler->del_work); +} + void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg) { struct mmu_rb_node *rbnode, *ptr; @@ -197,6 +237,10 @@ void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg) spin_lock_irqsave(&handler->lock, flags); list_for_each_entry_safe(rbnode, ptr, &handler->lru_list, list) { + /* refcount == 1 implies mmu_rb_handler has only rbnode ref */ + if (kref_read(&rbnode->refcount) > 1) + continue; + if (handler->ops->evict(handler->ops_arg, rbnode, evict_arg, &stop)) { __mmu_int_rb_remove(rbnode, &handler->root); @@ -209,7 +253,8 @@ void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg) spin_unlock_irqrestore(&handler->lock, flags); list_for_each_entry_safe(rbnode, ptr, &del_list, list) { - handler->ops->remove(handler->ops_arg, rbnode); + trace_hfi1_mmu_rb_evict(rbnode); + kref_put(&rbnode->refcount, release_immediate); } } @@ -221,7 +266,6 @@ static int mmu_notifier_range_start(struct mmu_notifier *mn, struct rb_root_cached *root = &handler->root; struct mmu_rb_node *node, *ptr = NULL; unsigned long flags; - bool added = false; spin_lock_irqsave(&handler->lock, flags); for (node = __mmu_int_rb_iter_first(root, range->start, range->end-1); @@ -229,40 +273,18 @@ static int mmu_notifier_range_start(struct mmu_notifier *mn, /* Guard against node removal. */ ptr = __mmu_int_rb_iter_next(node, range->start, range->end - 1); - trace_hfi1_mmu_mem_invalidate(node->addr, node->len); - if (handler->ops->invalidate(handler->ops_arg, node)) { - __mmu_int_rb_remove(node, root); - /* move from LRU list to delete list */ - list_move(&node->list, &handler->del_list); - added = true; - } + trace_hfi1_mmu_mem_invalidate(node); + /* Remove from rb tree and lru_list. */ + __mmu_int_rb_remove(node, root); + list_del_init(&node->list); + kref_put(&node->refcount, release_nolock); } spin_unlock_irqrestore(&handler->lock, flags); - if (added) - queue_work(handler->wq, &handler->del_work); - return 0; } /* - * Call the remove function for the given handler and the list. This - * is expected to be called with a delete list extracted from handler. - * The caller should not be holding the handler lock. - */ -static void do_remove(struct mmu_rb_handler *handler, - struct list_head *del_list) -{ - struct mmu_rb_node *node; - - while (!list_empty(del_list)) { - node = list_first_entry(del_list, struct mmu_rb_node, list); - list_del(&node->list); - handler->ops->remove(handler->ops_arg, node); - } -} - -/* * Work queue function to remove all nodes that have been queued up to * be removed. The key feature is that mm->mmap_lock is not being held * and the remove callback can sleep while taking it, if needed. @@ -274,11 +296,17 @@ static void handle_remove(struct work_struct *work) del_work); struct list_head del_list; unsigned long flags; + struct mmu_rb_node *node; /* remove anything that is queued to get removed */ spin_lock_irqsave(&handler->lock, flags); list_replace_init(&handler->del_list, &del_list); spin_unlock_irqrestore(&handler->lock, flags); - do_remove(handler, &del_list); + while (!list_empty(&del_list)) { + node = list_first_entry(&del_list, struct mmu_rb_node, list); + list_del(&node->list); + trace_hfi1_mmu_release_node(node); + handler->ops->remove(handler->ops_arg, node); + } } diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.h b/drivers/infiniband/hw/hfi1/mmu_rb.h index c4da064188c9..751dc3fe1e02 100644 --- a/drivers/infiniband/hw/hfi1/mmu_rb.h +++ b/drivers/infiniband/hw/hfi1/mmu_rb.h @@ -16,18 +16,14 @@ struct mmu_rb_node { struct rb_node node; struct mmu_rb_handler *handler; struct list_head list; + struct kref refcount; }; -/* - * NOTE: filter, insert, invalidate, and evict must not sleep. Only remove is - * allowed to sleep. - */ +/* filter and evict must not sleep. Only remove is allowed to sleep. */ struct mmu_rb_ops { bool (*filter)(struct mmu_rb_node *node, unsigned long addr, unsigned long len); - int (*insert)(void *ops_arg, struct mmu_rb_node *mnode); void (*remove)(void *ops_arg, struct mmu_rb_node *mnode); - int (*invalidate)(void *ops_arg, struct mmu_rb_node *node); int (*evict)(void *ops_arg, struct mmu_rb_node *mnode, void *evict_arg, bool *stop); }; @@ -61,6 +57,8 @@ int hfi1_mmu_rb_register(void *ops_arg, void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler); int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler, struct mmu_rb_node *mnode); +void hfi1_mmu_rb_release(struct kref *refcount); + void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg); struct mmu_rb_node *hfi1_mmu_rb_get_first(struct mmu_rb_handler *handler, unsigned long addr, diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index bb2552dd29c1..26c62162759b 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -1593,7 +1593,20 @@ static inline void sdma_unmap_desc( struct hfi1_devdata *dd, struct sdma_desc *descp) { - system_descriptor_complete(dd, descp); + switch (sdma_mapping_type(descp)) { + case SDMA_MAP_SINGLE: + dma_unmap_single(&dd->pcidev->dev, sdma_mapping_addr(descp), + sdma_mapping_len(descp), DMA_TO_DEVICE); + break; + case SDMA_MAP_PAGE: + dma_unmap_page(&dd->pcidev->dev, sdma_mapping_addr(descp), + sdma_mapping_len(descp), DMA_TO_DEVICE); + break; + } + + if (descp->pinning_ctx && descp->ctx_put) + descp->ctx_put(descp->pinning_ctx); + descp->pinning_ctx = NULL; } /* @@ -3113,8 +3126,8 @@ int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx, /* Add descriptor for coalesce buffer */ tx->desc_limit = MAX_DESC; - return _sdma_txadd_daddr(dd, SDMA_MAP_SINGLE, NULL, tx, - addr, tx->tlen); + return _sdma_txadd_daddr(dd, SDMA_MAP_SINGLE, tx, + addr, tx->tlen, NULL, NULL, NULL); } return 1; @@ -3157,9 +3170,9 @@ int _pad_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx) make_tx_sdma_desc( tx, SDMA_MAP_NONE, - NULL, dd->sdma_pad_phys, - sizeof(u32) - (tx->packet_len & (sizeof(u32) - 1))); + sizeof(u32) - (tx->packet_len & (sizeof(u32) - 1)), + NULL, NULL, NULL); tx->num_desc++; _sdma_close_tx(dd, tx); return rval; diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h index 95aaec14c6c2..7fdebab202c4 100644 --- a/drivers/infiniband/hw/hfi1/sdma.h +++ b/drivers/infiniband/hw/hfi1/sdma.h @@ -594,9 +594,11 @@ static inline dma_addr_t sdma_mapping_addr(struct sdma_desc *d) static inline void make_tx_sdma_desc( struct sdma_txreq *tx, int type, - void *pinning_ctx, dma_addr_t addr, - size_t len) + size_t len, + void *pinning_ctx, + void (*ctx_get)(void *), + void (*ctx_put)(void *)) { struct sdma_desc *desc = &tx->descp[tx->num_desc]; @@ -613,7 +615,11 @@ static inline void make_tx_sdma_desc( << SDMA_DESC0_PHY_ADDR_SHIFT) | (((u64)len & SDMA_DESC0_BYTE_COUNT_MASK) << SDMA_DESC0_BYTE_COUNT_SHIFT); + desc->pinning_ctx = pinning_ctx; + desc->ctx_put = ctx_put; + if (pinning_ctx && ctx_get) + ctx_get(pinning_ctx); } /* helper to extend txreq */ @@ -645,18 +651,20 @@ static inline void _sdma_close_tx(struct hfi1_devdata *dd, static inline int _sdma_txadd_daddr( struct hfi1_devdata *dd, int type, - void *pinning_ctx, struct sdma_txreq *tx, dma_addr_t addr, - u16 len) + u16 len, + void *pinning_ctx, + void (*ctx_get)(void *), + void (*ctx_put)(void *)) { int rval = 0; make_tx_sdma_desc( tx, type, - pinning_ctx, - addr, len); + addr, len, + pinning_ctx, ctx_get, ctx_put); WARN_ON(len > tx->tlen); tx->num_desc++; tx->tlen -= len; @@ -676,11 +684,18 @@ static inline int _sdma_txadd_daddr( /** * sdma_txadd_page() - add a page to the sdma_txreq * @dd: the device to use for mapping - * @pinning_ctx: context to be released at descriptor retirement * @tx: tx request to which the page is added * @page: page to map * @offset: offset within the page * @len: length in bytes + * @pinning_ctx: context to be stored on struct sdma_desc .pinning_ctx. Not + * added if coalesce buffer is used. E.g. pointer to pinned-page + * cache entry for the sdma_desc. + * @ctx_get: optional function to take reference to @pinning_ctx. Not called if + * @pinning_ctx is NULL. + * @ctx_put: optional function to release reference to @pinning_ctx after + * sdma_desc completes. May be called in interrupt context so must + * not sleep. Not called if @pinning_ctx is NULL. * * This is used to add a page/offset/length descriptor. * @@ -692,11 +707,13 @@ static inline int _sdma_txadd_daddr( */ static inline int sdma_txadd_page( struct hfi1_devdata *dd, - void *pinning_ctx, struct sdma_txreq *tx, struct page *page, unsigned long offset, - u16 len) + u16 len, + void *pinning_ctx, + void (*ctx_get)(void *), + void (*ctx_put)(void *)) { dma_addr_t addr; int rval; @@ -720,7 +737,8 @@ static inline int sdma_txadd_page( return -ENOSPC; } - return _sdma_txadd_daddr(dd, SDMA_MAP_PAGE, pinning_ctx, tx, addr, len); + return _sdma_txadd_daddr(dd, SDMA_MAP_PAGE, tx, addr, len, + pinning_ctx, ctx_get, ctx_put); } /** @@ -754,8 +772,8 @@ static inline int sdma_txadd_daddr( return rval; } - return _sdma_txadd_daddr(dd, SDMA_MAP_NONE, NULL, tx, - addr, len); + return _sdma_txadd_daddr(dd, SDMA_MAP_NONE, tx, addr, len, + NULL, NULL, NULL); } /** @@ -801,7 +819,8 @@ static inline int sdma_txadd_kvaddr( return -ENOSPC; } - return _sdma_txadd_daddr(dd, SDMA_MAP_SINGLE, NULL, tx, addr, len); + return _sdma_txadd_daddr(dd, SDMA_MAP_SINGLE, tx, addr, len, + NULL, NULL, NULL); } struct iowait_work; @@ -1034,6 +1053,4 @@ u16 sdma_get_descq_cnt(void); extern uint mod_num_sdma; void sdma_update_lmc(struct hfi1_devdata *dd, u64 mask, u32 lid); - -void system_descriptor_complete(struct hfi1_devdata *dd, struct sdma_desc *descp); #endif diff --git a/drivers/infiniband/hw/hfi1/sdma_txreq.h b/drivers/infiniband/hw/hfi1/sdma_txreq.h index fad946cb5e0d..85ae7293c274 100644 --- a/drivers/infiniband/hw/hfi1/sdma_txreq.h +++ b/drivers/infiniband/hw/hfi1/sdma_txreq.h @@ -20,6 +20,8 @@ struct sdma_desc { /* private: don't use directly */ u64 qw[2]; void *pinning_ctx; + /* Release reference to @pinning_ctx. May be called in interrupt context. Must not sleep. */ + void (*ctx_put)(void *ctx); }; /** diff --git a/drivers/infiniband/hw/hfi1/trace_mmu.h b/drivers/infiniband/hw/hfi1/trace_mmu.h index 57900ebb7702..82cc12aa3fb8 100644 --- a/drivers/infiniband/hw/hfi1/trace_mmu.h +++ b/drivers/infiniband/hw/hfi1/trace_mmu.h @@ -15,31 +15,53 @@ #define TRACE_SYSTEM hfi1_mmu DECLARE_EVENT_CLASS(hfi1_mmu_rb_template, - TP_PROTO(unsigned long addr, unsigned long len), - TP_ARGS(addr, len), + TP_PROTO(struct mmu_rb_node *node), + TP_ARGS(node), TP_STRUCT__entry(__field(unsigned long, addr) __field(unsigned long, len) + __field(unsigned int, refcount) ), - TP_fast_assign(__entry->addr = addr; - __entry->len = len; + TP_fast_assign(__entry->addr = node->addr; + __entry->len = node->len; + __entry->refcount = kref_read(&node->refcount); ), - TP_printk("MMU node addr 0x%lx, len %lu", + TP_printk("MMU node addr 0x%lx, len %lu, refcount %u", __entry->addr, - __entry->len + __entry->len, + __entry->refcount ) ); DEFINE_EVENT(hfi1_mmu_rb_template, hfi1_mmu_rb_insert, - TP_PROTO(unsigned long addr, unsigned long len), - TP_ARGS(addr, len)); + TP_PROTO(struct mmu_rb_node *node), + TP_ARGS(node)); -DEFINE_EVENT(hfi1_mmu_rb_template, hfi1_mmu_rb_search, - TP_PROTO(unsigned long addr, unsigned long len), - TP_ARGS(addr, len)); +TRACE_EVENT(hfi1_mmu_rb_search, + TP_PROTO(unsigned long addr, unsigned long len), + TP_ARGS(addr, len), + TP_STRUCT__entry(__field(unsigned long, addr) + __field(unsigned long, len) + ), + TP_fast_assign(__entry->addr = addr; + __entry->len = len; + ), + TP_printk("MMU node addr 0x%lx, len %lu", + __entry->addr, + __entry->len + ) +); DEFINE_EVENT(hfi1_mmu_rb_template, hfi1_mmu_mem_invalidate, - TP_PROTO(unsigned long addr, unsigned long len), - TP_ARGS(addr, len)); + TP_PROTO(struct mmu_rb_node *node), + TP_ARGS(node)); + +DEFINE_EVENT(hfi1_mmu_rb_template, hfi1_mmu_rb_evict, + TP_PROTO(struct mmu_rb_node *node), + TP_ARGS(node)); + +DEFINE_EVENT(hfi1_mmu_rb_template, hfi1_mmu_release_node, + TP_PROTO(struct mmu_rb_node *node), + TP_ARGS(node)); #endif /* __HFI1_TRACE_RC_H */ diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index ae58b48afe07..02bd62b857b7 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -62,18 +62,14 @@ static int defer_packet_queue( static void activate_packet_queue(struct iowait *wait, int reason); static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr, unsigned long len); -static int sdma_rb_insert(void *arg, struct mmu_rb_node *mnode); static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode, void *arg2, bool *stop); static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode); -static int sdma_rb_invalidate(void *arg, struct mmu_rb_node *mnode); static struct mmu_rb_ops sdma_rb_ops = { .filter = sdma_rb_filter, - .insert = sdma_rb_insert, .evict = sdma_rb_evict, .remove = sdma_rb_remove, - .invalidate = sdma_rb_invalidate }; static int add_system_pages_to_sdma_packet(struct user_sdma_request *req, @@ -247,14 +243,14 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd, spin_unlock(&fd->pq_rcu_lock); synchronize_srcu(&fd->pq_srcu); /* at this point there can be no more new requests */ - if (pq->handler) - hfi1_mmu_rb_unregister(pq->handler); iowait_sdma_drain(&pq->busy); /* Wait until all requests have been freed. */ wait_event_interruptible( pq->wait, !atomic_read(&pq->n_reqs)); kfree(pq->reqs); + if (pq->handler) + hfi1_mmu_rb_unregister(pq->handler); bitmap_free(pq->req_in_use); kmem_cache_destroy(pq->txreq_cache); flush_pq_iowait(pq); @@ -1275,25 +1271,17 @@ static void free_system_node(struct sdma_mmu_node *node) kfree(node); } -static inline void acquire_node(struct sdma_mmu_node *node) -{ - atomic_inc(&node->refcount); - WARN_ON(atomic_read(&node->refcount) < 0); -} - -static inline void release_node(struct mmu_rb_handler *handler, - struct sdma_mmu_node *node) -{ - atomic_dec(&node->refcount); - WARN_ON(atomic_read(&node->refcount) < 0); -} - +/* + * kref_get()'s an additional kref on the returned rb_node to prevent rb_node + * from being released until after rb_node is assigned to an SDMA descriptor + * (struct sdma_desc) under add_system_iovec_to_sdma_packet(), even if the + * virtual address range for rb_node is invalidated between now and then. + */ static struct sdma_mmu_node *find_system_node(struct mmu_rb_handler *handler, unsigned long start, unsigned long end) { struct mmu_rb_node *rb_node; - struct sdma_mmu_node *node; unsigned long flags; spin_lock_irqsave(&handler->lock, flags); @@ -1302,11 +1290,12 @@ static struct sdma_mmu_node *find_system_node(struct mmu_rb_handler *handler, spin_unlock_irqrestore(&handler->lock, flags); return NULL; } - node = container_of(rb_node, struct sdma_mmu_node, rb); - acquire_node(node); + + /* "safety" kref to prevent release before add_system_iovec_to_sdma_packet() */ + kref_get(&rb_node->refcount); spin_unlock_irqrestore(&handler->lock, flags); - return node; + return container_of(rb_node, struct sdma_mmu_node, rb); } static int pin_system_pages(struct user_sdma_request *req, @@ -1355,6 +1344,13 @@ retry: return 0; } +/* + * kref refcount on *node_p will be 2 on successful addition: one kref from + * kref_init() for mmu_rb_handler and one kref to prevent *node_p from being + * released until after *node_p is assigned to an SDMA descriptor (struct + * sdma_desc) under add_system_iovec_to_sdma_packet(), even if the virtual + * address range for *node_p is invalidated between now and then. + */ static int add_system_pinning(struct user_sdma_request *req, struct sdma_mmu_node **node_p, unsigned long start, unsigned long len) @@ -1368,6 +1364,12 @@ static int add_system_pinning(struct user_sdma_request *req, if (!node) return -ENOMEM; + /* First kref "moves" to mmu_rb_handler */ + kref_init(&node->rb.refcount); + + /* "safety" kref to prevent release before add_system_iovec_to_sdma_packet() */ + kref_get(&node->rb.refcount); + node->pq = pq; ret = pin_system_pages(req, start, len, node, PFN_DOWN(len)); if (ret == 0) { @@ -1431,15 +1433,15 @@ static int get_system_cache_entry(struct user_sdma_request *req, return 0; } - SDMA_DBG(req, "prepend: node->rb.addr %lx, node->refcount %d", - node->rb.addr, atomic_read(&node->refcount)); + SDMA_DBG(req, "prepend: node->rb.addr %lx, node->rb.refcount %d", + node->rb.addr, kref_read(&node->rb.refcount)); prepend_len = node->rb.addr - start; /* * This node will not be returned, instead a new node * will be. So release the reference. */ - release_node(handler, node); + kref_put(&node->rb.refcount, hfi1_mmu_rb_release); /* Prepend a node to cover the beginning of the allocation */ ret = add_system_pinning(req, node_p, start, prepend_len); @@ -1451,6 +1453,20 @@ static int get_system_cache_entry(struct user_sdma_request *req, } } +static void sdma_mmu_rb_node_get(void *ctx) +{ + struct mmu_rb_node *node = ctx; + + kref_get(&node->refcount); +} + +static void sdma_mmu_rb_node_put(void *ctx) +{ + struct sdma_mmu_node *node = ctx; + + kref_put(&node->rb.refcount, hfi1_mmu_rb_release); +} + static int add_mapping_to_sdma_packet(struct user_sdma_request *req, struct user_sdma_txreq *tx, struct sdma_mmu_node *cache_entry, @@ -1494,9 +1510,12 @@ static int add_mapping_to_sdma_packet(struct user_sdma_request *req, ctx = cache_entry; } - ret = sdma_txadd_page(pq->dd, ctx, &tx->txreq, + ret = sdma_txadd_page(pq->dd, &tx->txreq, cache_entry->pages[page_index], - page_offset, from_this_page); + page_offset, from_this_page, + ctx, + sdma_mmu_rb_node_get, + sdma_mmu_rb_node_put); if (ret) { /* * When there's a failure, the entire request is freed by @@ -1518,8 +1537,6 @@ static int add_system_iovec_to_sdma_packet(struct user_sdma_request *req, struct user_sdma_iovec *iovec, size_t from_this_iovec) { - struct mmu_rb_handler *handler = req->pq->handler; - while (from_this_iovec > 0) { struct sdma_mmu_node *cache_entry; size_t from_this_cache_entry; @@ -1540,15 +1557,15 @@ static int add_system_iovec_to_sdma_packet(struct user_sdma_request *req, ret = add_mapping_to_sdma_packet(req, tx, cache_entry, start, from_this_cache_entry); + + /* + * Done adding cache_entry to zero or more sdma_desc. Can + * kref_put() the "safety" kref taken under + * get_system_cache_entry(). + */ + kref_put(&cache_entry->rb.refcount, hfi1_mmu_rb_release); + if (ret) { - /* - * We're guaranteed that there will be no descriptor - * completion callback that releases this node - * because only the last descriptor referencing it - * has a context attached, and a failure means the - * last descriptor was never added. - */ - release_node(handler, cache_entry); SDMA_DBG(req, "add system segment failed %d", ret); return ret; } @@ -1599,42 +1616,12 @@ static int add_system_pages_to_sdma_packet(struct user_sdma_request *req, return 0; } -void system_descriptor_complete(struct hfi1_devdata *dd, - struct sdma_desc *descp) -{ - switch (sdma_mapping_type(descp)) { - case SDMA_MAP_SINGLE: - dma_unmap_single(&dd->pcidev->dev, sdma_mapping_addr(descp), - sdma_mapping_len(descp), DMA_TO_DEVICE); - break; - case SDMA_MAP_PAGE: - dma_unmap_page(&dd->pcidev->dev, sdma_mapping_addr(descp), - sdma_mapping_len(descp), DMA_TO_DEVICE); - break; - } - - if (descp->pinning_ctx) { - struct sdma_mmu_node *node = descp->pinning_ctx; - - release_node(node->rb.handler, node); - } -} - static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr, unsigned long len) { return (bool)(node->addr == addr); } -static int sdma_rb_insert(void *arg, struct mmu_rb_node *mnode) -{ - struct sdma_mmu_node *node = - container_of(mnode, struct sdma_mmu_node, rb); - - atomic_inc(&node->refcount); - return 0; -} - /* * Return 1 to remove the node from the rb tree and call the remove op. * @@ -1647,10 +1634,6 @@ static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode, container_of(mnode, struct sdma_mmu_node, rb); struct evict_data *evict_data = evict_arg; - /* is this node still being used? */ - if (atomic_read(&node->refcount)) - return 0; /* keep this node */ - /* this node will be evicted, add its pages to our count */ evict_data->cleared += node->npages; @@ -1668,13 +1651,3 @@ static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode) free_system_node(node); } - -static int sdma_rb_invalidate(void *arg, struct mmu_rb_node *mnode) -{ - struct sdma_mmu_node *node = - container_of(mnode, struct sdma_mmu_node, rb); - - if (!atomic_read(&node->refcount)) - return 1; - return 0; -} diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h index a241836371dc..548347d4c5bc 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.h +++ b/drivers/infiniband/hw/hfi1/user_sdma.h @@ -104,7 +104,6 @@ struct hfi1_user_sdma_comp_q { struct sdma_mmu_node { struct mmu_rb_node rb; struct hfi1_user_sdma_pkt_q *pq; - atomic_t refcount; struct page **pages; unsigned int npages; }; diff --git a/drivers/infiniband/hw/hfi1/vnic_sdma.c b/drivers/infiniband/hw/hfi1/vnic_sdma.c index 727eedfba332..cc6324d2d1dd 100644 --- a/drivers/infiniband/hw/hfi1/vnic_sdma.c +++ b/drivers/infiniband/hw/hfi1/vnic_sdma.c @@ -64,11 +64,11 @@ static noinline int build_vnic_ulp_payload(struct sdma_engine *sde, /* combine physically continuous fragments later? */ ret = sdma_txadd_page(sde->dd, - NULL, &tx->txreq, skb_frag_page(frag), skb_frag_off(frag), - skb_frag_size(frag)); + skb_frag_size(frag), + NULL, NULL, NULL); if (unlikely(ret)) goto bail_txadd; } diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index aa8a08d1c014..47c0efed1821 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -595,11 +595,12 @@ int hns_roce_table_get(struct hns_roce_dev *hr_dev, } /* Set HEM base address(128K/page, pa) to Hardware */ - if (hr_dev->hw->set_hem(hr_dev, table, obj, HEM_HOP_STEP_DIRECT)) { + ret = hr_dev->hw->set_hem(hr_dev, table, obj, HEM_HOP_STEP_DIRECT); + if (ret) { hns_roce_free_hem(hr_dev, table->hem[i]); table->hem[i] = NULL; - ret = -ENODEV; - dev_err(dev, "set HEM base address to HW failed.\n"); + dev_err(dev, "set HEM base address to HW failed, ret = %d.\n", + ret); goto out; } @@ -618,6 +619,7 @@ static void clear_mhop_hem(struct hns_roce_dev *hr_dev, u32 hop_num = mhop->hop_num; u32 chunk_ba_num; u32 step_idx; + int ret; index->inited = HEM_INDEX_BUF; chunk_ba_num = mhop->bt_chunk_size / BA_BYTE_LEN; @@ -641,16 +643,24 @@ static void clear_mhop_hem(struct hns_roce_dev *hr_dev, else step_idx = hop_num; - if (hr_dev->hw->clear_hem(hr_dev, table, obj, step_idx)) - ibdev_warn(ibdev, "failed to clear hop%u HEM.\n", hop_num); - - if (index->inited & HEM_INDEX_L1) - if (hr_dev->hw->clear_hem(hr_dev, table, obj, 1)) - ibdev_warn(ibdev, "failed to clear HEM step 1.\n"); + ret = hr_dev->hw->clear_hem(hr_dev, table, obj, step_idx); + if (ret) + ibdev_warn(ibdev, "failed to clear hop%u HEM, ret = %d.\n", + hop_num, ret); + + if (index->inited & HEM_INDEX_L1) { + ret = hr_dev->hw->clear_hem(hr_dev, table, obj, 1); + if (ret) + ibdev_warn(ibdev, "failed to clear HEM step 1, ret = %d.\n", + ret); + } - if (index->inited & HEM_INDEX_L0) - if (hr_dev->hw->clear_hem(hr_dev, table, obj, 0)) - ibdev_warn(ibdev, "failed to clear HEM step 0.\n"); + if (index->inited & HEM_INDEX_L0) { + ret = hr_dev->hw->clear_hem(hr_dev, table, obj, 0); + if (ret) + ibdev_warn(ibdev, "failed to clear HEM step 0, ret = %d.\n", + ret); + } } } @@ -687,6 +697,7 @@ void hns_roce_table_put(struct hns_roce_dev *hr_dev, { struct device *dev = hr_dev->dev; unsigned long i; + int ret; if (hns_roce_check_whether_mhop(hr_dev, table->type)) { hns_roce_table_mhop_put(hr_dev, table, obj, 1); @@ -699,8 +710,10 @@ void hns_roce_table_put(struct hns_roce_dev *hr_dev, &table->mutex)) return; - if (hr_dev->hw->clear_hem(hr_dev, table, obj, HEM_HOP_STEP_DIRECT)) - dev_warn(dev, "failed to clear HEM base address.\n"); + ret = hr_dev->hw->clear_hem(hr_dev, table, obj, HEM_HOP_STEP_DIRECT); + if (ret) + dev_warn(dev, "failed to clear HEM base address, ret = %d.\n", + ret); hns_roce_free_hem(hr_dev, table->hem[i]); table->hem[i] = NULL; @@ -916,6 +929,8 @@ void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev, { struct device *dev = hr_dev->dev; unsigned long i; + int obj; + int ret; if (hns_roce_check_whether_mhop(hr_dev, table->type)) { hns_roce_cleanup_mhop_hem_table(hr_dev, table); @@ -924,9 +939,11 @@ void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev, for (i = 0; i < table->num_hem; ++i) if (table->hem[i]) { - if (hr_dev->hw->clear_hem(hr_dev, table, - i * table->table_chunk_size / table->obj_size, 0)) - dev_err(dev, "clear HEM base address failed.\n"); + obj = i * table->table_chunk_size / table->obj_size; + ret = hr_dev->hw->clear_hem(hr_dev, table, obj, 0); + if (ret) + dev_err(dev, "clear HEM base address failed, ret = %d.\n", + ret); hns_roce_free_hem(hr_dev, table->hem[i]); } diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index d4c6b9bc0a4e..8f7eb11066b4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -373,17 +373,10 @@ static int check_send_valid(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { struct ib_device *ibdev = &hr_dev->ib_dev; - struct ib_qp *ibqp = &hr_qp->ibqp; - if (unlikely(ibqp->qp_type != IB_QPT_RC && - ibqp->qp_type != IB_QPT_GSI && - ibqp->qp_type != IB_QPT_UD)) { - ibdev_err(ibdev, "not supported QP(0x%x)type!\n", - ibqp->qp_type); - return -EOPNOTSUPP; - } else if (unlikely(hr_qp->state == IB_QPS_RESET || - hr_qp->state == IB_QPS_INIT || - hr_qp->state == IB_QPS_RTR)) { + if (unlikely(hr_qp->state == IB_QPS_RESET || + hr_qp->state == IB_QPS_INIT || + hr_qp->state == IB_QPS_RTR)) { ibdev_err(ibdev, "failed to post WQE, QP state %u!\n", hr_qp->state); return -EINVAL; @@ -771,17 +764,6 @@ out: static int check_recv_valid(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { - struct ib_device *ibdev = &hr_dev->ib_dev; - struct ib_qp *ibqp = &hr_qp->ibqp; - - if (unlikely(ibqp->qp_type != IB_QPT_RC && - ibqp->qp_type != IB_QPT_GSI && - ibqp->qp_type != IB_QPT_UD)) { - ibdev_err(ibdev, "unsupported qp type, qp_type = %d.\n", - ibqp->qp_type); - return -EOPNOTSUPP; - } - if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN)) return -EIO; diff --git a/drivers/infiniband/hw/irdma/uk.c b/drivers/infiniband/hw/irdma/uk.c index 16183e894da7..dd428d915c17 100644 --- a/drivers/infiniband/hw/irdma/uk.c +++ b/drivers/infiniband/hw/irdma/uk.c @@ -93,16 +93,18 @@ static int irdma_nop_1(struct irdma_qp_uk *qp) */ void irdma_clr_wqes(struct irdma_qp_uk *qp, u32 qp_wqe_idx) { - __le64 *wqe; + struct irdma_qp_quanta *sq; u32 wqe_idx; if (!(qp_wqe_idx & 0x7F)) { wqe_idx = (qp_wqe_idx + 128) % qp->sq_ring.size; - wqe = qp->sq_base[wqe_idx].elem; + sq = qp->sq_base + wqe_idx; if (wqe_idx) - memset(wqe, qp->swqe_polarity ? 0 : 0xFF, 0x1000); + memset(sq, qp->swqe_polarity ? 0 : 0xFF, + 128 * sizeof(*sq)); else - memset(wqe, qp->swqe_polarity ? 0xFF : 0, 0x1000); + memset(sq, qp->swqe_polarity ? 0xFF : 0, + 128 * sizeof(*sq)); } } diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index eaa12c124598..9c4fe4fa9001 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -4452,8 +4452,16 @@ static const struct ib_device_ops irdma_roce_dev_ops = { }; static const struct ib_device_ops irdma_iw_dev_ops = { - .modify_qp = irdma_modify_qp, .get_port_immutable = irdma_iw_port_immutable, + .iw_accept = irdma_accept, + .iw_add_ref = irdma_qp_add_ref, + .iw_connect = irdma_connect, + .iw_create_listen = irdma_create_listen, + .iw_destroy_listen = irdma_destroy_listen, + .iw_get_qp = irdma_get_qp, + .iw_reject = irdma_reject, + .iw_rem_ref = irdma_qp_rem_ref, + .modify_qp = irdma_modify_qp, .query_gid = irdma_query_gid, }; @@ -4517,50 +4525,35 @@ static void irdma_init_roce_device(struct irdma_device *iwdev) * irdma_init_iw_device - initialization of iwarp rdma device * @iwdev: irdma device */ -static int irdma_init_iw_device(struct irdma_device *iwdev) +static void irdma_init_iw_device(struct irdma_device *iwdev) { struct net_device *netdev = iwdev->netdev; iwdev->ibdev.node_type = RDMA_NODE_RNIC; addrconf_addr_eui48((u8 *)&iwdev->ibdev.node_guid, netdev->dev_addr); - iwdev->ibdev.ops.iw_add_ref = irdma_qp_add_ref; - iwdev->ibdev.ops.iw_rem_ref = irdma_qp_rem_ref; - iwdev->ibdev.ops.iw_get_qp = irdma_get_qp; - iwdev->ibdev.ops.iw_connect = irdma_connect; - iwdev->ibdev.ops.iw_accept = irdma_accept; - iwdev->ibdev.ops.iw_reject = irdma_reject; - iwdev->ibdev.ops.iw_create_listen = irdma_create_listen; - iwdev->ibdev.ops.iw_destroy_listen = irdma_destroy_listen; memcpy(iwdev->ibdev.iw_ifname, netdev->name, sizeof(iwdev->ibdev.iw_ifname)); ib_set_device_ops(&iwdev->ibdev, &irdma_iw_dev_ops); - - return 0; } /** * irdma_init_rdma_device - initialization of rdma device * @iwdev: irdma device */ -static int irdma_init_rdma_device(struct irdma_device *iwdev) +static void irdma_init_rdma_device(struct irdma_device *iwdev) { struct pci_dev *pcidev = iwdev->rf->pcidev; - int ret; - if (iwdev->roce_mode) { + if (iwdev->roce_mode) irdma_init_roce_device(iwdev); - } else { - ret = irdma_init_iw_device(iwdev); - if (ret) - return ret; - } + else + irdma_init_iw_device(iwdev); + iwdev->ibdev.phys_port_cnt = 1; iwdev->ibdev.num_comp_vectors = iwdev->rf->ceqs_count; iwdev->ibdev.dev.parent = &pcidev->dev; ib_set_device_ops(&iwdev->ibdev, &irdma_dev_ops); - - return 0; } /** @@ -4598,9 +4591,7 @@ int irdma_ib_register_device(struct irdma_device *iwdev) { int ret; - ret = irdma_init_rdma_device(iwdev); - if (ret) - return ret; + irdma_init_rdma_device(iwdev); ret = ib_device_set_netdev(&iwdev->ibdev, iwdev->netdev, 1); if (ret) diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c index 54b61930a7fd..4b3b5b274e84 100644 --- a/drivers/infiniband/hw/mana/qp.c +++ b/drivers/infiniband/hw/mana/qp.c @@ -13,7 +13,7 @@ static int mana_ib_cfg_vport_steering(struct mana_ib_dev *dev, u8 *rx_hash_key) { struct mana_port_context *mpc = netdev_priv(ndev); - struct mana_cfg_rx_steer_req *req = NULL; + struct mana_cfg_rx_steer_req_v2 *req; struct mana_cfg_rx_steer_resp resp = {}; mana_handle_t *req_indir_tab; struct gdma_context *gc; @@ -33,6 +33,8 @@ static int mana_ib_cfg_vport_steering(struct mana_ib_dev *dev, mana_gd_init_req_hdr(&req->hdr, MANA_CONFIG_VPORT_RX, req_buf_size, sizeof(resp)); + req->hdr.req.msg_version = GDMA_MESSAGE_V2; + req->vport = mpc->port_handle; req->rx_enable = 1; req->update_default_rxobj = 1; @@ -46,6 +48,7 @@ static int mana_ib_cfg_vport_steering(struct mana_ib_dev *dev, req->num_indir_entries = MANA_INDIRECT_TABLE_SIZE; req->indir_tab_offset = sizeof(*req); req->update_indir_tab = true; + req->cqe_coalescing_enable = 1; req_indir_tab = (mana_handle_t *)(req + 1); /* The ind table passed to the hardware must have diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 2dfa6f49a6f4..9c33d960af3c 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -25,6 +25,7 @@ #include <rdma/mlx5_user_ioctl_verbs.h> #include "srq.h" +#include "qp.h" #define mlx5_ib_dbg(_dev, format, arg...) \ dev_dbg(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__, \ diff --git a/drivers/infiniband/hw/mlx5/qp.h b/drivers/infiniband/hw/mlx5/qp.h index 77f9b4a54816..b6ee7c3ee1ca 100644 --- a/drivers/infiniband/hw/mlx5/qp.h +++ b/drivers/infiniband/hw/mlx5/qp.h @@ -6,7 +6,17 @@ #ifndef _MLX5_IB_QP_H #define _MLX5_IB_QP_H -#include "mlx5_ib.h" +struct mlx5_ib_dev; + +struct mlx5_qp_table { + struct notifier_block nb; + struct xarray dct_xa; + + /* protect radix tree + */ + spinlock_t lock; + struct radix_tree_root tree; +}; int mlx5_init_qp_table(struct mlx5_ib_dev *dev); void mlx5_cleanup_qp_table(struct mlx5_ib_dev *dev); diff --git a/drivers/infiniband/hw/mlx5/qpc.c b/drivers/infiniband/hw/mlx5/qpc.c index bae0334d6e7f..d9cf6982d645 100644 --- a/drivers/infiniband/hw/mlx5/qpc.c +++ b/drivers/infiniband/hw/mlx5/qpc.c @@ -88,23 +88,35 @@ static bool is_event_type_allowed(int rsc_type, int event_type) } } +static int dct_event_notifier(struct mlx5_ib_dev *dev, struct mlx5_eqe *eqe) +{ + struct mlx5_core_dct *dct; + unsigned long flags; + u32 qpn; + + qpn = be32_to_cpu(eqe->data.dct.dctn) & 0xFFFFFF; + xa_lock_irqsave(&dev->qp_table.dct_xa, flags); + dct = xa_load(&dev->qp_table.dct_xa, qpn); + if (dct) + complete(&dct->drained); + xa_unlock_irqrestore(&dev->qp_table.dct_xa, flags); + return NOTIFY_OK; +} + static int rsc_event_notifier(struct notifier_block *nb, unsigned long type, void *data) { + struct mlx5_ib_dev *dev = + container_of(nb, struct mlx5_ib_dev, qp_table.nb); struct mlx5_core_rsc_common *common; - struct mlx5_qp_table *table; - struct mlx5_core_dct *dct; + struct mlx5_eqe *eqe = data; u8 event_type = (u8)type; struct mlx5_core_qp *qp; - struct mlx5_eqe *eqe; u32 rsn; switch (event_type) { case MLX5_EVENT_TYPE_DCT_DRAINED: - eqe = data; - rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff; - rsn |= (MLX5_RES_DCT << MLX5_USER_INDEX_LEN); - break; + return dct_event_notifier(dev, eqe); case MLX5_EVENT_TYPE_PATH_MIG: case MLX5_EVENT_TYPE_COMM_EST: case MLX5_EVENT_TYPE_SQ_DRAINED: @@ -113,7 +125,6 @@ static int rsc_event_notifier(struct notifier_block *nb, case MLX5_EVENT_TYPE_PATH_MIG_FAILED: case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: - eqe = data; rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff; rsn |= (eqe->data.qp_srq.type << MLX5_USER_INDEX_LEN); break; @@ -121,8 +132,7 @@ static int rsc_event_notifier(struct notifier_block *nb, return NOTIFY_DONE; } - table = container_of(nb, struct mlx5_qp_table, nb); - common = mlx5_get_rsc(table, rsn); + common = mlx5_get_rsc(&dev->qp_table, rsn); if (!common) return NOTIFY_OK; @@ -137,11 +147,6 @@ static int rsc_event_notifier(struct notifier_block *nb, qp->event(qp, event_type); /* Need to put resource in event handler */ return NOTIFY_OK; - case MLX5_RES_DCT: - dct = (struct mlx5_core_dct *)common; - if (event_type == MLX5_EVENT_TYPE_DCT_DRAINED) - complete(&dct->drained); - break; default: break; } @@ -188,28 +193,15 @@ static void destroy_resource_common(struct mlx5_ib_dev *dev, } static int _mlx5_core_destroy_dct(struct mlx5_ib_dev *dev, - struct mlx5_core_dct *dct, bool need_cleanup) + struct mlx5_core_dct *dct) { u32 in[MLX5_ST_SZ_DW(destroy_dct_in)] = {}; struct mlx5_core_qp *qp = &dct->mqp; - int err; - err = mlx5_core_drain_dct(dev, dct); - if (err) { - if (dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) - goto destroy; - - return err; - } - wait_for_completion(&dct->drained); -destroy: - if (need_cleanup) - destroy_resource_common(dev, &dct->mqp); MLX5_SET(destroy_dct_in, in, opcode, MLX5_CMD_OP_DESTROY_DCT); MLX5_SET(destroy_dct_in, in, dctn, qp->qpn); MLX5_SET(destroy_dct_in, in, uid, qp->uid); - err = mlx5_cmd_exec_in(dev->mdev, destroy_dct, in); - return err; + return mlx5_cmd_exec_in(dev->mdev, destroy_dct, in); } int mlx5_core_create_dct(struct mlx5_ib_dev *dev, struct mlx5_core_dct *dct, @@ -227,13 +219,13 @@ int mlx5_core_create_dct(struct mlx5_ib_dev *dev, struct mlx5_core_dct *dct, qp->qpn = MLX5_GET(create_dct_out, out, dctn); qp->uid = MLX5_GET(create_dct_in, in, uid); - err = create_resource_common(dev, qp, MLX5_RES_DCT); + err = xa_err(xa_store_irq(&dev->qp_table.dct_xa, qp->qpn, dct, GFP_KERNEL)); if (err) goto err_cmd; return 0; err_cmd: - _mlx5_core_destroy_dct(dev, dct, false); + _mlx5_core_destroy_dct(dev, dct); return err; } @@ -284,7 +276,31 @@ static int mlx5_core_drain_dct(struct mlx5_ib_dev *dev, int mlx5_core_destroy_dct(struct mlx5_ib_dev *dev, struct mlx5_core_dct *dct) { - return _mlx5_core_destroy_dct(dev, dct, true); + struct mlx5_qp_table *table = &dev->qp_table; + struct mlx5_core_dct *tmp; + int err; + + err = mlx5_core_drain_dct(dev, dct); + if (err) { + if (dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) + goto destroy; + + return err; + } + wait_for_completion(&dct->drained); + +destroy: + tmp = xa_cmpxchg_irq(&table->dct_xa, dct->mqp.qpn, dct, XA_ZERO_ENTRY, GFP_KERNEL); + if (WARN_ON(tmp != dct)) + return xa_err(tmp) ?: -EINVAL; + + err = _mlx5_core_destroy_dct(dev, dct); + if (err) { + xa_cmpxchg_irq(&table->dct_xa, dct->mqp.qpn, XA_ZERO_ENTRY, dct, 0); + return err; + } + xa_erase_irq(&table->dct_xa, dct->mqp.qpn); + return 0; } int mlx5_core_destroy_qp(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp) @@ -298,8 +314,7 @@ int mlx5_core_destroy_qp(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp) MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); MLX5_SET(destroy_qp_in, in, qpn, qp->qpn); MLX5_SET(destroy_qp_in, in, uid, qp->uid); - mlx5_cmd_exec_in(dev->mdev, destroy_qp, in); - return 0; + return mlx5_cmd_exec_in(dev->mdev, destroy_qp, in); } int mlx5_core_set_delay_drop(struct mlx5_ib_dev *dev, @@ -488,6 +503,7 @@ int mlx5_init_qp_table(struct mlx5_ib_dev *dev) spin_lock_init(&table->lock); INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); + xa_init(&table->dct_xa); mlx5_qp_debugfs_init(dev->mdev); table->nb.notifier_call = rsc_event_notifier; @@ -551,14 +567,14 @@ int mlx5_core_xrcd_dealloc(struct mlx5_ib_dev *dev, u32 xrcdn) return mlx5_cmd_exec_in(dev->mdev, dealloc_xrcd, in); } -static void destroy_rq_tracked(struct mlx5_ib_dev *dev, u32 rqn, u16 uid) +static int destroy_rq_tracked(struct mlx5_ib_dev *dev, u32 rqn, u16 uid) { u32 in[MLX5_ST_SZ_DW(destroy_rq_in)] = {}; MLX5_SET(destroy_rq_in, in, opcode, MLX5_CMD_OP_DESTROY_RQ); MLX5_SET(destroy_rq_in, in, rqn, rqn); MLX5_SET(destroy_rq_in, in, uid, uid); - mlx5_cmd_exec_in(dev->mdev, destroy_rq, in); + return mlx5_cmd_exec_in(dev->mdev, destroy_rq, in); } int mlx5_core_create_rq_tracked(struct mlx5_ib_dev *dev, u32 *in, int inlen, @@ -589,8 +605,7 @@ int mlx5_core_destroy_rq_tracked(struct mlx5_ib_dev *dev, struct mlx5_core_qp *rq) { destroy_resource_common(dev, rq); - destroy_rq_tracked(dev, rq->qpn, rq->uid); - return 0; + return destroy_rq_tracked(dev, rq->qpn, rq->uid); } static void destroy_sq_tracked(struct mlx5_ib_dev *dev, u32 sqn, u16 uid) diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c index f83cd4a9d992..98b2a0090bf2 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c @@ -709,14 +709,6 @@ int pvrdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, goto out; } - if (unlikely(wr->opcode < 0)) { - dev_warn_ratelimited(&dev->pdev->dev, - "invalid send opcode\n"); - *bad_wr = wr; - ret = -EINVAL; - goto out; - } - /* * Only support UD, RC. * Need to check opcode table for thorough checking. diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index 7a7e713de52d..54c723a6edda 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -212,10 +212,16 @@ static int __init rxe_module_init(void) { int err; - err = rxe_net_init(); + err = rxe_alloc_wq(); if (err) return err; + err = rxe_net_init(); + if (err) { + rxe_destroy_wq(); + return err; + } + rdma_link_register(&rxe_link_ops); pr_info("loaded\n"); return 0; @@ -226,6 +232,7 @@ static void __exit rxe_module_exit(void) rdma_link_unregister(&rxe_link_ops); ib_unregister_driver(RDMA_DRIVER_RXE); rxe_net_exit(); + rxe_destroy_wq(); pr_info("unloaded\n"); } diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index f46c5a5fd0ae..5111735aafae 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -832,7 +832,7 @@ int rxe_completer(struct rxe_qp *qp) } /* A non-zero return value will cause rxe_do_task to - * exit its loop and end the tasklet. A zero return + * exit its loop and end the work item. A zero return * will continue looping and return to rxe_completer */ done: diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c index 6ca2a05b6a2a..d5486cbb3f10 100644 --- a/drivers/infiniband/sw/rxe/rxe_cq.c +++ b/drivers/infiniband/sw/rxe/rxe_cq.c @@ -113,10 +113,9 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited) queue_advance_producer(cq->queue, QUEUE_TYPE_TO_CLIENT); - if ((cq->notify == IB_CQ_NEXT_COMP) || - (cq->notify == IB_CQ_SOLICITED && solicited)) { + if ((cq->notify & IB_CQ_NEXT_COMP) || + (cq->notify & IB_CQ_SOLICITED && solicited)) { cq->notify = 0; - cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); } diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index 804b15e929dd..666e06a82bc9 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -31,8 +31,6 @@ int rxe_cq_resize_queue(struct rxe_cq *cq, int new_cqe, int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited); -void rxe_cq_disable(struct rxe_cq *cq); - void rxe_cq_cleanup(struct rxe_pool_elem *elem); /* rxe_mcast.c */ diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 0e538fafcc20..f54042e9aeb2 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -45,22 +45,17 @@ int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length) } } -#define IB_ACCESS_REMOTE (IB_ACCESS_REMOTE_READ \ - | IB_ACCESS_REMOTE_WRITE \ - | IB_ACCESS_REMOTE_ATOMIC) - static void rxe_mr_init(int access, struct rxe_mr *mr) { - u32 lkey = mr->elem.index << 8 | rxe_get_next_key(-1); - u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0; + u32 key = mr->elem.index << 8 | rxe_get_next_key(-1); /* set ibmr->l/rkey and also copy into private l/rkey * for user MRs these will always be the same * for cases where caller 'owns' the key portion * they may be different until REG_MR WQE is executed. */ - mr->lkey = mr->ibmr.lkey = lkey; - mr->rkey = mr->ibmr.rkey = rkey; + mr->lkey = mr->ibmr.lkey = key; + mr->rkey = mr->ibmr.rkey = key; mr->access = access; mr->ibmr.page_size = PAGE_SIZE; @@ -195,7 +190,7 @@ int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr) int err; /* always allow remote access for FMRs */ - rxe_mr_init(IB_ACCESS_REMOTE, mr); + rxe_mr_init(RXE_ACCESS_REMOTE, mr); err = rxe_mr_alloc(mr, max_pages); if (err) @@ -644,6 +639,7 @@ int rxe_invalidate_mr(struct rxe_qp *qp, u32 key) { struct rxe_dev *rxe = to_rdev(qp->ibqp.device); struct rxe_mr *mr; + int remote; int ret; mr = rxe_pool_get_index(&rxe->mr_pool, key >> 8); @@ -653,9 +649,10 @@ int rxe_invalidate_mr(struct rxe_qp *qp, u32 key) goto err; } - if (mr->rkey ? (key != mr->rkey) : (key != mr->lkey)) { + remote = mr->access & RXE_ACCESS_REMOTE; + if (remote ? (key != mr->rkey) : (key != mr->lkey)) { rxe_dbg_mr(mr, "wr key (%#x) doesn't match mr key (%#x)\n", - key, (mr->rkey ? mr->rkey : mr->lkey)); + key, (remote ? mr->rkey : mr->lkey)); ret = -EINVAL; goto err_drop_ref; } @@ -715,7 +712,7 @@ int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe) mr->access = access; mr->lkey = key; - mr->rkey = (access & IB_ACCESS_REMOTE) ? key : 0; + mr->rkey = key; mr->ibmr.iova = wqe->wr.wr.reg.mr->iova; mr->state = RXE_MR_STATE_VALID; diff --git a/drivers/infiniband/sw/rxe/rxe_mw.c b/drivers/infiniband/sw/rxe/rxe_mw.c index afa5ce1a7116..d8a43d87de93 100644 --- a/drivers/infiniband/sw/rxe/rxe_mw.c +++ b/drivers/infiniband/sw/rxe/rxe_mw.c @@ -48,7 +48,7 @@ int rxe_dealloc_mw(struct ib_mw *ibmw) } static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, - struct rxe_mw *mw, struct rxe_mr *mr) + struct rxe_mw *mw, struct rxe_mr *mr, int access) { if (mw->ibmw.type == IB_MW_TYPE_1) { if (unlikely(mw->state != RXE_MW_STATE_VALID)) { @@ -58,7 +58,7 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, } /* o10-36.2.2 */ - if (unlikely((mw->access & IB_ZERO_BASED))) { + if (unlikely((access & IB_ZERO_BASED))) { rxe_dbg_mw(mw, "attempt to bind a zero based type 1 MW\n"); return -EINVAL; } @@ -104,7 +104,7 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, } /* C10-74 */ - if (unlikely((mw->access & + if (unlikely((access & (IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_ATOMIC)) && !(mr->access & IB_ACCESS_LOCAL_WRITE))) { rxe_dbg_mw(mw, @@ -113,7 +113,7 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, } /* C10-75 */ - if (mw->access & IB_ZERO_BASED) { + if (access & IB_ZERO_BASED) { if (unlikely(wqe->wr.wr.mw.length > mr->ibmr.length)) { rxe_dbg_mw(mw, "attempt to bind a ZB MW outside of the MR\n"); @@ -133,12 +133,12 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, } static void rxe_do_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, - struct rxe_mw *mw, struct rxe_mr *mr) + struct rxe_mw *mw, struct rxe_mr *mr, int access) { u32 key = wqe->wr.wr.mw.rkey & 0xff; mw->rkey = (mw->rkey & ~0xff) | key; - mw->access = wqe->wr.wr.mw.access; + mw->access = access; mw->state = RXE_MW_STATE_VALID; mw->addr = wqe->wr.wr.mw.addr; mw->length = wqe->wr.wr.mw.length; @@ -169,6 +169,7 @@ int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe) struct rxe_dev *rxe = to_rdev(qp->ibqp.device); u32 mw_rkey = wqe->wr.wr.mw.mw_rkey; u32 mr_lkey = wqe->wr.wr.mw.mr_lkey; + int access = wqe->wr.wr.mw.access; mw = rxe_pool_get_index(&rxe->mw_pool, mw_rkey >> 8); if (unlikely(!mw)) { @@ -196,13 +197,18 @@ int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe) mr = NULL; } + if (access & ~RXE_ACCESS_SUPPORTED_MW) { + rxe_err_mw(mw, "access %#x not supported", access); + return -EOPNOTSUPP; + } + spin_lock_bh(&mw->lock); - ret = rxe_check_bind_mw(qp, wqe, mw, mr); + ret = rxe_check_bind_mw(qp, wqe, mw, mr, access); if (ret) goto err_unlock; - rxe_do_bind_mw(qp, wqe, mw, mr); + rxe_do_bind_mw(qp, wqe, mw, mr, access); err_unlock: spin_unlock_bh(&mw->lock); err_drop_mr: diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.h b/drivers/infiniband/sw/rxe/rxe_opcode.h index cea4e0a63919..5686b691d6b8 100644 --- a/drivers/infiniband/sw/rxe/rxe_opcode.h +++ b/drivers/infiniband/sw/rxe/rxe_opcode.h @@ -91,6 +91,9 @@ enum rxe_hdr_mask { RXE_READ_OR_ATOMIC_MASK = (RXE_READ_MASK | RXE_ATOMIC_MASK), RXE_WRITE_OR_SEND_MASK = (RXE_WRITE_MASK | RXE_SEND_MASK), RXE_READ_OR_WRITE_MASK = (RXE_READ_MASK | RXE_WRITE_MASK), + RXE_RDMA_OP_MASK = (RXE_READ_MASK | RXE_WRITE_MASK | + RXE_ATOMIC_WRITE_MASK | RXE_FLUSH_MASK | + RXE_ATOMIC_MASK), }; #define OPCODE_NONE (-1) diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h index 7b41d79e72b2..d2f57ead78ad 100644 --- a/drivers/infiniband/sw/rxe/rxe_param.h +++ b/drivers/infiniband/sw/rxe/rxe_param.h @@ -112,7 +112,7 @@ enum rxe_device_param { RXE_INFLIGHT_SKBS_PER_QP_HIGH = 64, RXE_INFLIGHT_SKBS_PER_QP_LOW = 16, - /* Max number of interations of each tasklet + /* Max number of interations of each work item * before yielding the cpu to let other * work make progress */ diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index a0f206431cf8..a569b111a9d2 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -392,6 +392,13 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, if (mask & IB_QP_CAP && rxe_qp_chk_cap(rxe, &attr->cap, !!qp->srq)) goto err1; + if (mask & IB_QP_ACCESS_FLAGS) { + if (!(qp_type(qp) == IB_QPT_RC || qp_type(qp) == IB_QPT_UC)) + goto err1; + if (attr->qp_access_flags & ~RXE_ACCESS_SUPPORTED_QP) + goto err1; + } + if (mask & IB_QP_AV && rxe_av_chk_attr(qp, &attr->ah_attr)) goto err1; diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 5fe7cbae3031..2171f19494bc 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -857,7 +857,7 @@ int rxe_requester(struct rxe_qp *qp) update_state(qp, &pkt); /* A non-zero return value will cause rxe_do_task to - * exit its loop and end the tasklet. A zero return + * exit its loop and end the work item. A zero return * will continue looping and return to rxe_requester */ done: diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index ee68306555b9..64c64f5f36a8 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -387,7 +387,10 @@ static enum resp_states rxe_resp_check_length(struct rxe_qp *qp, } } - return RESPST_CHK_RKEY; + if (pkt->mask & RXE_RDMA_OP_MASK) + return RESPST_CHK_RKEY; + else + return RESPST_EXECUTE; } /* if the reth length field is zero we can assume nothing @@ -434,6 +437,10 @@ static enum resp_states check_rkey(struct rxe_qp *qp, enum resp_states state; int access = 0; + /* parse RETH or ATMETH header for first/only packets + * for va, length, rkey, etc. or use current value for + * middle/last packets. + */ if (pkt->mask & (RXE_READ_OR_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) { if (pkt->mask & RXE_RETH_MASK) qp_resp_from_reth(qp, pkt); @@ -454,7 +461,8 @@ static enum resp_states check_rkey(struct rxe_qp *qp, qp_resp_from_atmeth(qp, pkt); access = IB_ACCESS_REMOTE_ATOMIC; } else { - return RESPST_EXECUTE; + /* shouldn't happen */ + WARN_ON(1); } /* A zero-byte read or write op is not required to @@ -1449,8 +1457,17 @@ static void flush_recv_queue(struct rxe_qp *qp, bool notify) struct rxe_recv_wqe *wqe; int err; - if (qp->srq) + if (qp->srq) { + if (notify && qp->ibqp.event_handler) { + struct ib_event ev; + + ev.device = qp->ibqp.device; + ev.element.qp = &qp->ibqp; + ev.event = IB_EVENT_QP_LAST_WQE_REACHED; + qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); + } return; + } while ((wqe = queue_head(q, q->type))) { if (notify) { @@ -1657,7 +1674,7 @@ int rxe_responder(struct rxe_qp *qp) } /* A non-zero return value will cause rxe_do_task to - * exit its loop and end the tasklet. A zero return + * exit its loop and end the work item. A zero return * will continue looping and return to rxe_responder */ done: diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c index fb9a6bc8e620..1501120d4f52 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.c +++ b/drivers/infiniband/sw/rxe/rxe_task.c @@ -6,8 +6,24 @@ #include "rxe.h" +static struct workqueue_struct *rxe_wq; + +int rxe_alloc_wq(void) +{ + rxe_wq = alloc_workqueue("rxe_wq", WQ_UNBOUND, WQ_MAX_ACTIVE); + if (!rxe_wq) + return -ENOMEM; + + return 0; +} + +void rxe_destroy_wq(void) +{ + destroy_workqueue(rxe_wq); +} + /* Check if task is idle i.e. not running, not scheduled in - * tasklet queue and not draining. If so move to busy to + * work queue and not draining. If so move to busy to * reserve a slot in do_task() by setting to busy and taking * a qp reference to cover the gap from now until the task finishes. * state will move out of busy if task returns a non zero value @@ -21,9 +37,6 @@ static bool __reserve_if_idle(struct rxe_task *task) { WARN_ON(rxe_read(task->qp) <= 0); - if (task->tasklet.state & BIT(TASKLET_STATE_SCHED)) - return false; - if (task->state == TASK_STATE_IDLE) { rxe_get(task->qp); task->state = TASK_STATE_BUSY; @@ -38,7 +51,7 @@ static bool __reserve_if_idle(struct rxe_task *task) } /* check if task is idle or drained and not currently - * scheduled in the tasklet queue. This routine is + * scheduled in the work queue. This routine is * called by rxe_cleanup_task or rxe_disable_task to * see if the queue is empty. * Context: caller should hold task->lock. @@ -46,7 +59,7 @@ static bool __reserve_if_idle(struct rxe_task *task) */ static bool __is_done(struct rxe_task *task) { - if (task->tasklet.state & BIT(TASKLET_STATE_SCHED)) + if (work_pending(&task->work)) return false; if (task->state == TASK_STATE_IDLE || @@ -77,23 +90,23 @@ static bool is_done(struct rxe_task *task) * schedules the task. They must call __reserve_if_idle to * move the task to busy before calling or scheduling. * The task can also be moved to drained or invalid - * by calls to rxe-cleanup_task or rxe_disable_task. + * by calls to rxe_cleanup_task or rxe_disable_task. * In that case tasks which get here are not executed but * just flushed. The tasks are designed to look to see if - * there is work to do and do part of it before returning + * there is work to do and then do part of it before returning * here with a return value of zero until all the work - * has been consumed then it retuens a non-zero value. + * has been consumed then it returns a non-zero value. * The number of times the task can be run is limited by * max iterations so one task cannot hold the cpu forever. + * If the limit is hit and work remains the task is rescheduled. */ -static void do_task(struct tasklet_struct *t) +static void do_task(struct rxe_task *task) { - int cont; - int ret; - struct rxe_task *task = from_tasklet(task, t, tasklet); unsigned int iterations; unsigned long flags; int resched = 0; + int cont; + int ret; WARN_ON(rxe_read(task->qp) <= 0); @@ -115,25 +128,22 @@ static void do_task(struct tasklet_struct *t) } while (ret == 0 && iterations-- > 0); spin_lock_irqsave(&task->lock, flags); + /* we're not done yet but we ran out of iterations. + * yield the cpu and reschedule the task + */ + if (!ret) { + task->state = TASK_STATE_IDLE; + resched = 1; + goto exit; + } + switch (task->state) { case TASK_STATE_BUSY: - if (ret) { - task->state = TASK_STATE_IDLE; - } else { - /* This can happen if the client - * can add work faster than the - * tasklet can finish it. - * Reschedule the tasklet and exit - * the loop to give up the cpu - */ - task->state = TASK_STATE_IDLE; - resched = 1; - } + task->state = TASK_STATE_IDLE; break; - /* someone tried to run the task since the last time we called - * func, so we will call one more time regardless of the - * return value + /* someone tried to schedule the task while we + * were running, keep going */ case TASK_STATE_ARMED: task->state = TASK_STATE_BUSY; @@ -141,22 +151,24 @@ static void do_task(struct tasklet_struct *t) break; case TASK_STATE_DRAINING: - if (ret) - task->state = TASK_STATE_DRAINED; - else - cont = 1; + task->state = TASK_STATE_DRAINED; break; default: WARN_ON(1); - rxe_info_qp(task->qp, "unexpected task state = %d", task->state); + rxe_dbg_qp(task->qp, "unexpected task state = %d", + task->state); + task->state = TASK_STATE_IDLE; } +exit: if (!cont) { task->num_done++; if (WARN_ON(task->num_done != task->num_sched)) - rxe_err_qp(task->qp, "%ld tasks scheduled, %ld tasks done", - task->num_sched, task->num_done); + rxe_dbg_qp( + task->qp, + "%ld tasks scheduled, %ld tasks done", + task->num_sched, task->num_done); } spin_unlock_irqrestore(&task->lock, flags); } while (cont); @@ -169,6 +181,12 @@ static void do_task(struct tasklet_struct *t) rxe_put(task->qp); } +/* wrapper around do_task to fix argument for work queue */ +static void do_work(struct work_struct *work) +{ + do_task(container_of(work, struct rxe_task, work)); +} + int rxe_init_task(struct rxe_task *task, struct rxe_qp *qp, int (*func)(struct rxe_qp *)) { @@ -176,11 +194,9 @@ int rxe_init_task(struct rxe_task *task, struct rxe_qp *qp, task->qp = qp; task->func = func; - - tasklet_setup(&task->tasklet, do_task); - task->state = TASK_STATE_IDLE; spin_lock_init(&task->lock); + INIT_WORK(&task->work, do_work); return 0; } @@ -213,8 +229,6 @@ void rxe_cleanup_task(struct rxe_task *task) while (!is_done(task)) cond_resched(); - tasklet_kill(&task->tasklet); - spin_lock_irqsave(&task->lock, flags); task->state = TASK_STATE_INVALID; spin_unlock_irqrestore(&task->lock, flags); @@ -226,7 +240,7 @@ void rxe_cleanup_task(struct rxe_task *task) void rxe_run_task(struct rxe_task *task) { unsigned long flags; - int run; + bool run; WARN_ON(rxe_read(task->qp) <= 0); @@ -235,11 +249,11 @@ void rxe_run_task(struct rxe_task *task) spin_unlock_irqrestore(&task->lock, flags); if (run) - do_task(&task->tasklet); + do_task(task); } -/* schedule the task to run later as a tasklet. - * the tasklet)schedule call can be called holding +/* schedule the task to run later as a work queue entry. + * the queue_work call can be called holding * the lock. */ void rxe_sched_task(struct rxe_task *task) @@ -250,7 +264,7 @@ void rxe_sched_task(struct rxe_task *task) spin_lock_irqsave(&task->lock, flags); if (__reserve_if_idle(task)) - tasklet_schedule(&task->tasklet); + queue_work(rxe_wq, &task->work); spin_unlock_irqrestore(&task->lock, flags); } @@ -277,7 +291,9 @@ void rxe_disable_task(struct rxe_task *task) while (!is_done(task)) cond_resched(); - tasklet_disable(&task->tasklet); + spin_lock_irqsave(&task->lock, flags); + task->state = TASK_STATE_DRAINED; + spin_unlock_irqrestore(&task->lock, flags); } void rxe_enable_task(struct rxe_task *task) @@ -291,7 +307,7 @@ void rxe_enable_task(struct rxe_task *task) spin_unlock_irqrestore(&task->lock, flags); return; } + task->state = TASK_STATE_IDLE; - tasklet_enable(&task->tasklet); spin_unlock_irqrestore(&task->lock, flags); } diff --git a/drivers/infiniband/sw/rxe/rxe_task.h b/drivers/infiniband/sw/rxe/rxe_task.h index facb7c8e3729..a63e258b3d66 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.h +++ b/drivers/infiniband/sw/rxe/rxe_task.h @@ -22,7 +22,7 @@ enum { * called again. */ struct rxe_task { - struct tasklet_struct tasklet; + struct work_struct work; int state; spinlock_t lock; struct rxe_qp *qp; @@ -32,6 +32,10 @@ struct rxe_task { long num_done; }; +int rxe_alloc_wq(void); + +void rxe_destroy_wq(void); + /* * init rxe_task structure * qp => parameter to pass to func diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 4d8f6b8051ff..f4321a172000 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -1182,9 +1182,7 @@ static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) unsigned long irq_flags; spin_lock_irqsave(&cq->cq_lock, irq_flags); - if (cq->notify != IB_CQ_NEXT_COMP) - cq->notify = flags & IB_CQ_SOLICITED_MASK; - + cq->notify |= flags & IB_CQ_SOLICITED_MASK; empty = queue_empty(cq->queue, QUEUE_TYPE_TO_ULP); if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !empty) @@ -1261,6 +1259,12 @@ static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd, u64 start, struct rxe_mr *mr; int err, cleanup_err; + if (access & ~RXE_ACCESS_SUPPORTED_MR) { + rxe_err_pd(pd, "access = %#x not supported (%#x)", access, + RXE_ACCESS_SUPPORTED_MR); + return ERR_PTR(-EOPNOTSUPP); + } + mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); @@ -1294,6 +1298,40 @@ err_free: return ERR_PTR(err); } +static struct ib_mr *rxe_rereg_user_mr(struct ib_mr *ibmr, int flags, + u64 start, u64 length, u64 iova, + int access, struct ib_pd *ibpd, + struct ib_udata *udata) +{ + struct rxe_mr *mr = to_rmr(ibmr); + struct rxe_pd *old_pd = to_rpd(ibmr->pd); + struct rxe_pd *pd = to_rpd(ibpd); + + /* for now only support the two easy cases: + * rereg_pd and rereg_access + */ + if (flags & ~RXE_MR_REREG_SUPPORTED) { + rxe_err_mr(mr, "flags = %#x not supported", flags); + return ERR_PTR(-EOPNOTSUPP); + } + + if (flags & IB_MR_REREG_PD) { + rxe_put(old_pd); + rxe_get(pd); + mr->ibmr.pd = ibpd; + } + + if (flags & IB_MR_REREG_ACCESS) { + if (access & ~RXE_ACCESS_SUPPORTED_MR) { + rxe_err_mr(mr, "access = %#x not supported", access); + return ERR_PTR(-EOPNOTSUPP); + } + mr->access = access; + } + + return NULL; +} + static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, u32 max_num_sg) { @@ -1446,6 +1484,7 @@ static const struct ib_device_ops rxe_dev_ops = { .query_srq = rxe_query_srq, .reg_user_mr = rxe_reg_user_mr, .req_notify_cq = rxe_req_notify_cq, + .rereg_user_mr = rxe_rereg_user_mr, .resize_cq = rxe_resize_cq, INIT_RDMA_OBJ_SIZE(ib_ah, rxe_ah, ibah), diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index 26a20f088692..ccb9d19ffe8a 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -253,6 +253,22 @@ struct rxe_qp { struct execute_work cleanup_work; }; +enum { + RXE_ACCESS_REMOTE = IB_ACCESS_REMOTE_READ + | IB_ACCESS_REMOTE_WRITE + | IB_ACCESS_REMOTE_ATOMIC, + RXE_ACCESS_SUPPORTED_MR = RXE_ACCESS_REMOTE + | IB_ACCESS_LOCAL_WRITE + | IB_ACCESS_MW_BIND + | IB_ACCESS_ON_DEMAND + | IB_ACCESS_FLUSH_GLOBAL + | IB_ACCESS_FLUSH_PERSISTENT + | IB_ACCESS_OPTIONAL, + RXE_ACCESS_SUPPORTED_QP = RXE_ACCESS_SUPPORTED_MR, + RXE_ACCESS_SUPPORTED_MW = RXE_ACCESS_SUPPORTED_MR + | IB_ZERO_BASED, +}; + enum rxe_mr_state { RXE_MR_STATE_INVALID, RXE_MR_STATE_FREE, @@ -269,6 +285,11 @@ enum rxe_mr_lookup_type { RXE_LOOKUP_REMOTE, }; +enum rxe_rereg { + RXE_MR_REREG_SUPPORTED = IB_MR_REREG_PD + | IB_MR_REREG_ACCESS, +}; + static inline int rkey_is_mw(u32 rkey) { u32 index = rkey >> 8; diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index cfb50bfe53c3..b32941dd67cb 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -1710,7 +1710,6 @@ static int create_con_cq_qp(struct rtrs_clt_con *con) return -ENOMEM; con->queue_num = cq_num; } - cq_num = max_send_wr + max_recv_wr; cq_vector = con->cpu % clt_path->s.dev->ib_dev->num_comp_vectors; if (con->c.cid >= clt_path->s.irq_con_num) err = rtrs_cq_qp_create(&clt_path->s, &con->c, max_send_sge, |