diff options
Diffstat (limited to 'drivers/infiniband')
125 files changed, 2555 insertions, 2533 deletions
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 012156624b82..167e436ae11d 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -4271,8 +4271,8 @@ static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr, group = container_of(obj, struct cm_counter_group, obj); cm_attr = container_of(attr, struct cm_counter_attribute, attr); - return sprintf(buf, "%ld\n", - atomic_long_read(&group->counter[cm_attr->index])); + return sysfs_emit(buf, "%ld\n", + atomic_long_read(&group->counter[cm_attr->index])); } static const struct sysfs_ops cm_counter_ops = { diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index a77750b8954d..c06c87a4dc5e 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2495,8 +2495,9 @@ static int cma_listen_handler(struct rdma_cm_id *id, return id_priv->id.event_handler(id, event); } -static void cma_listen_on_dev(struct rdma_id_private *id_priv, - struct cma_device *cma_dev) +static int cma_listen_on_dev(struct rdma_id_private *id_priv, + struct cma_device *cma_dev, + struct rdma_id_private **to_destroy) { struct rdma_id_private *dev_id_priv; struct net *net = id_priv->id.route.addr.dev_addr.net; @@ -2504,21 +2505,21 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv, lockdep_assert_held(&lock); + *to_destroy = NULL; if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1)) - return; + return 0; dev_id_priv = __rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps, id_priv->id.qp_type, id_priv); if (IS_ERR(dev_id_priv)) - return; + return PTR_ERR(dev_id_priv); dev_id_priv->state = RDMA_CM_ADDR_BOUND; memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv), rdma_addr_size(cma_src_addr(id_priv))); _cma_attach_to_dev(dev_id_priv, cma_dev); - list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); cma_id_get(id_priv); dev_id_priv->internal_id = 1; dev_id_priv->afonly = id_priv->afonly; @@ -2527,19 +2528,42 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv, ret = rdma_listen(&dev_id_priv->id, id_priv->backlog); if (ret) - dev_warn(&cma_dev->device->dev, - "RDMA CMA: cma_listen_on_dev, error %d\n", ret); + goto err_listen; + list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); + return 0; +err_listen: + /* Caller must destroy this after releasing lock */ + *to_destroy = dev_id_priv; + dev_warn(&cma_dev->device->dev, "RDMA CMA: %s, error %d\n", __func__, ret); + return ret; } -static void cma_listen_on_all(struct rdma_id_private *id_priv) +static int cma_listen_on_all(struct rdma_id_private *id_priv) { + struct rdma_id_private *to_destroy; struct cma_device *cma_dev; + int ret; mutex_lock(&lock); list_add_tail(&id_priv->list, &listen_any_list); - list_for_each_entry(cma_dev, &dev_list, list) - cma_listen_on_dev(id_priv, cma_dev); + list_for_each_entry(cma_dev, &dev_list, list) { + ret = cma_listen_on_dev(id_priv, cma_dev, &to_destroy); + if (ret) { + /* Prevent racing with cma_process_remove() */ + if (to_destroy) + list_del_init(&to_destroy->list); + goto err_listen; + } + } mutex_unlock(&lock); + return 0; + +err_listen: + list_del(&id_priv->list); + mutex_unlock(&lock); + if (to_destroy) + rdma_destroy_id(&to_destroy->id); + return ret; } void rdma_set_service_type(struct rdma_cm_id *id, int tos) @@ -3692,8 +3716,11 @@ int rdma_listen(struct rdma_cm_id *id, int backlog) ret = -ENOSYS; goto err; } - } else - cma_listen_on_all(id_priv); + } else { + ret = cma_listen_on_all(id_priv); + if (ret) + goto err; + } return 0; err: @@ -4773,69 +4800,6 @@ static struct notifier_block cma_nb = { .notifier_call = cma_netdev_callback }; -static int cma_add_one(struct ib_device *device) -{ - struct cma_device *cma_dev; - struct rdma_id_private *id_priv; - unsigned int i; - unsigned long supported_gids = 0; - int ret; - - cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL); - if (!cma_dev) - return -ENOMEM; - - cma_dev->device = device; - cma_dev->default_gid_type = kcalloc(device->phys_port_cnt, - sizeof(*cma_dev->default_gid_type), - GFP_KERNEL); - if (!cma_dev->default_gid_type) { - ret = -ENOMEM; - goto free_cma_dev; - } - - cma_dev->default_roce_tos = kcalloc(device->phys_port_cnt, - sizeof(*cma_dev->default_roce_tos), - GFP_KERNEL); - if (!cma_dev->default_roce_tos) { - ret = -ENOMEM; - goto free_gid_type; - } - - rdma_for_each_port (device, i) { - supported_gids = roce_gid_type_mask_support(device, i); - WARN_ON(!supported_gids); - if (supported_gids & (1 << CMA_PREFERRED_ROCE_GID_TYPE)) - cma_dev->default_gid_type[i - rdma_start_port(device)] = - CMA_PREFERRED_ROCE_GID_TYPE; - else - cma_dev->default_gid_type[i - rdma_start_port(device)] = - find_first_bit(&supported_gids, BITS_PER_LONG); - cma_dev->default_roce_tos[i - rdma_start_port(device)] = 0; - } - - init_completion(&cma_dev->comp); - refcount_set(&cma_dev->refcount, 1); - INIT_LIST_HEAD(&cma_dev->id_list); - ib_set_client_data(device, &cma_client, cma_dev); - - mutex_lock(&lock); - list_add_tail(&cma_dev->list, &dev_list); - list_for_each_entry(id_priv, &listen_any_list, list) - cma_listen_on_dev(id_priv, cma_dev); - mutex_unlock(&lock); - - trace_cm_add_one(device); - return 0; - -free_gid_type: - kfree(cma_dev->default_gid_type); - -free_cma_dev: - kfree(cma_dev); - return ret; -} - static void cma_send_device_removal_put(struct rdma_id_private *id_priv) { struct rdma_cm_event event = { .event = RDMA_CM_EVENT_DEVICE_REMOVAL }; @@ -4898,6 +4862,80 @@ static void cma_process_remove(struct cma_device *cma_dev) wait_for_completion(&cma_dev->comp); } +static int cma_add_one(struct ib_device *device) +{ + struct rdma_id_private *to_destroy; + struct cma_device *cma_dev; + struct rdma_id_private *id_priv; + unsigned int i; + unsigned long supported_gids = 0; + int ret; + + cma_dev = kmalloc(sizeof(*cma_dev), GFP_KERNEL); + if (!cma_dev) + return -ENOMEM; + + cma_dev->device = device; + cma_dev->default_gid_type = kcalloc(device->phys_port_cnt, + sizeof(*cma_dev->default_gid_type), + GFP_KERNEL); + if (!cma_dev->default_gid_type) { + ret = -ENOMEM; + goto free_cma_dev; + } + + cma_dev->default_roce_tos = kcalloc(device->phys_port_cnt, + sizeof(*cma_dev->default_roce_tos), + GFP_KERNEL); + if (!cma_dev->default_roce_tos) { + ret = -ENOMEM; + goto free_gid_type; + } + + rdma_for_each_port (device, i) { + supported_gids = roce_gid_type_mask_support(device, i); + WARN_ON(!supported_gids); + if (supported_gids & (1 << CMA_PREFERRED_ROCE_GID_TYPE)) + cma_dev->default_gid_type[i - rdma_start_port(device)] = + CMA_PREFERRED_ROCE_GID_TYPE; + else + cma_dev->default_gid_type[i - rdma_start_port(device)] = + find_first_bit(&supported_gids, BITS_PER_LONG); + cma_dev->default_roce_tos[i - rdma_start_port(device)] = 0; + } + + init_completion(&cma_dev->comp); + refcount_set(&cma_dev->refcount, 1); + INIT_LIST_HEAD(&cma_dev->id_list); + ib_set_client_data(device, &cma_client, cma_dev); + + mutex_lock(&lock); + list_add_tail(&cma_dev->list, &dev_list); + list_for_each_entry(id_priv, &listen_any_list, list) { + ret = cma_listen_on_dev(id_priv, cma_dev, &to_destroy); + if (ret) + goto free_listen; + } + mutex_unlock(&lock); + + trace_cm_add_one(device); + return 0; + +free_listen: + list_del(&cma_dev->list); + mutex_unlock(&lock); + + /* cma_process_remove() will delete to_destroy */ + cma_process_remove(cma_dev); + kfree(cma_dev->default_roce_tos); +free_gid_type: + kfree(cma_dev->default_gid_type); + +free_cma_dev: + kfree(cma_dev); + return ret; +} + static void cma_remove_one(struct ib_device *device, void *client_data) { struct cma_device *cma_dev = client_data; diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c index 7ec4af2ed87a..7f70e5a7de10 100644 --- a/drivers/infiniband/core/cma_configfs.c +++ b/drivers/infiniband/core/cma_configfs.c @@ -115,7 +115,7 @@ static ssize_t default_roce_mode_show(struct config_item *item, if (gid_type < 0) return gid_type; - return sprintf(buf, "%s\n", ib_cache_gid_type_str(gid_type)); + return sysfs_emit(buf, "%s\n", ib_cache_gid_type_str(gid_type)); } static ssize_t default_roce_mode_store(struct config_item *item, @@ -157,7 +157,7 @@ static ssize_t default_roce_tos_show(struct config_item *item, char *buf) tos = cma_get_default_roce_tos(cma_dev, group->port_num); cma_configfs_params_put(cma_dev); - return sprintf(buf, "%u\n", tos); + return sysfs_emit(buf, "%u\n", tos); } static ssize_t default_roce_tos_store(struct config_item *item, diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index e84b0fedaacb..baa86c86efad 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -318,15 +318,12 @@ struct ib_device *ib_device_get_by_index(const struct net *net, u32 index); void nldev_init(void); void nldev_exit(void); -static inline struct ib_qp *_ib_create_qp(struct ib_device *dev, - struct ib_pd *pd, - struct ib_qp_init_attr *attr, - struct ib_udata *udata, - struct ib_uqp_object *uobj) +static inline struct ib_qp * +_ib_create_qp(struct ib_device *dev, struct ib_pd *pd, + struct ib_qp_init_attr *attr, struct ib_udata *udata, + struct ib_uqp_object *uobj, const char *caller) { - enum ib_qp_type qp_type = attr->qp_type; struct ib_qp *qp; - bool is_xrc; if (!dev->ops.create_qp) return ERR_PTR(-EOPNOTSUPP); @@ -347,6 +344,7 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev, qp->srq = attr->srq; qp->rwq_ind_tbl = attr->rwq_ind_tbl; qp->event_handler = attr->event_handler; + qp->port = attr->port_num; atomic_set(&qp->usecnt, 0); spin_lock_init(&qp->mr_lock); @@ -354,16 +352,9 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev, INIT_LIST_HEAD(&qp->sig_mrs); rdma_restrack_new(&qp->res, RDMA_RESTRACK_QP); - /* - * We don't track XRC QPs for now, because they don't have PD - * and more importantly they are created internaly by driver, - * see mlx5 create_dev_resources() as an example. - */ - is_xrc = qp_type == IB_QPT_XRC_INI || qp_type == IB_QPT_XRC_TGT; - if ((qp_type < IB_QPT_MAX && !is_xrc) || qp_type == IB_QPT_DRIVER) { - rdma_restrack_parent_name(&qp->res, &pd->res); - rdma_restrack_add(&qp->res); - } + WARN_ONCE(!udata && !caller, "Missing kernel QP owner"); + rdma_restrack_set_name(&qp->res, udata ? NULL : caller); + rdma_restrack_add(&qp->res); return qp; } diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c index e4ff0d3328b6..92745522250e 100644 --- a/drivers/infiniband/core/counters.c +++ b/drivers/infiniband/core/counters.c @@ -64,8 +64,40 @@ out: return ret; } -static struct rdma_counter *rdma_counter_alloc(struct ib_device *dev, u8 port, - enum rdma_nl_counter_mode mode) +static void auto_mode_init_counter(struct rdma_counter *counter, + const struct ib_qp *qp, + enum rdma_nl_counter_mask new_mask) +{ + struct auto_mode_param *param = &counter->mode.param; + + counter->mode.mode = RDMA_COUNTER_MODE_AUTO; + counter->mode.mask = new_mask; + + if (new_mask & RDMA_COUNTER_MASK_QP_TYPE) + param->qp_type = qp->qp_type; +} + +static int __rdma_counter_bind_qp(struct rdma_counter *counter, + struct ib_qp *qp) +{ + int ret; + + if (qp->counter) + return -EINVAL; + + if (!qp->device->ops.counter_bind_qp) + return -EOPNOTSUPP; + + mutex_lock(&counter->lock); + ret = qp->device->ops.counter_bind_qp(counter, qp); + mutex_unlock(&counter->lock); + + return ret; +} + +static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u8 port, + struct ib_qp *qp, + enum rdma_nl_counter_mode mode) { struct rdma_port_counter *port_counter; struct rdma_counter *counter; @@ -88,11 +120,22 @@ static struct rdma_counter *rdma_counter_alloc(struct ib_device *dev, u8 port, port_counter = &dev->port_data[port].port_counter; mutex_lock(&port_counter->lock); - if (mode == RDMA_COUNTER_MODE_MANUAL) { + switch (mode) { + case RDMA_COUNTER_MODE_MANUAL: ret = __counter_set_mode(&port_counter->mode, RDMA_COUNTER_MODE_MANUAL, 0); - if (ret) + if (ret) { + mutex_unlock(&port_counter->lock); goto err_mode; + } + break; + case RDMA_COUNTER_MODE_AUTO: + auto_mode_init_counter(counter, qp, port_counter->mode.mask); + break; + default: + ret = -EOPNOTSUPP; + mutex_unlock(&port_counter->lock); + goto err_mode; } port_counter->num_counters++; @@ -102,10 +145,15 @@ static struct rdma_counter *rdma_counter_alloc(struct ib_device *dev, u8 port, kref_init(&counter->kref); mutex_init(&counter->lock); + ret = __rdma_counter_bind_qp(counter, qp); + if (ret) + goto err_mode; + + rdma_restrack_parent_name(&counter->res, &qp->res); + rdma_restrack_add(&counter->res); return counter; err_mode: - mutex_unlock(&port_counter->lock); kfree(counter->stats); err_stats: rdma_restrack_put(&counter->res); @@ -132,19 +180,6 @@ static void rdma_counter_free(struct rdma_counter *counter) kfree(counter); } -static void auto_mode_init_counter(struct rdma_counter *counter, - const struct ib_qp *qp, - enum rdma_nl_counter_mask new_mask) -{ - struct auto_mode_param *param = &counter->mode.param; - - counter->mode.mode = RDMA_COUNTER_MODE_AUTO; - counter->mode.mask = new_mask; - - if (new_mask & RDMA_COUNTER_MASK_QP_TYPE) - param->qp_type = qp->qp_type; -} - static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter, enum rdma_nl_counter_mask auto_mask) { @@ -161,24 +196,6 @@ static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter, return match; } -static int __rdma_counter_bind_qp(struct rdma_counter *counter, - struct ib_qp *qp) -{ - int ret; - - if (qp->counter) - return -EINVAL; - - if (!qp->device->ops.counter_bind_qp) - return -EOPNOTSUPP; - - mutex_lock(&counter->lock); - ret = qp->device->ops.counter_bind_qp(counter, qp); - mutex_unlock(&counter->lock); - - return ret; -} - static int __rdma_counter_unbind_qp(struct ib_qp *qp) { struct rdma_counter *counter = qp->counter; @@ -247,13 +264,6 @@ next: return counter; } -static void rdma_counter_res_add(struct rdma_counter *counter, - struct ib_qp *qp) -{ - rdma_restrack_parent_name(&counter->res, &qp->res); - rdma_restrack_add(&counter->res); -} - static void counter_release(struct kref *kref) { struct rdma_counter *counter; @@ -275,7 +285,7 @@ int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port) struct rdma_counter *counter; int ret; - if (!qp->res.valid || rdma_is_kernel_res(&qp->res)) + if (!rdma_restrack_is_tracked(&qp->res) || rdma_is_kernel_res(&qp->res)) return 0; if (!rdma_is_port_valid(dev, port)) @@ -293,19 +303,9 @@ int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port) return ret; } } else { - counter = rdma_counter_alloc(dev, port, RDMA_COUNTER_MODE_AUTO); + counter = alloc_and_bind(dev, port, qp, RDMA_COUNTER_MODE_AUTO); if (!counter) return -ENOMEM; - - auto_mode_init_counter(counter, qp, port_counter->mode.mask); - - ret = __rdma_counter_bind_qp(counter, qp); - if (ret) { - rdma_counter_free(counter); - return ret; - } - - rdma_counter_res_add(counter, qp); } return 0; @@ -419,15 +419,6 @@ err: return NULL; } -static int rdma_counter_bind_qp_manual(struct rdma_counter *counter, - struct ib_qp *qp) -{ - if ((counter->device != qp->device) || (counter->port != qp->port)) - return -EINVAL; - - return __rdma_counter_bind_qp(counter, qp); -} - static struct rdma_counter *rdma_get_counter_by_id(struct ib_device *dev, u32 counter_id) { @@ -475,7 +466,12 @@ int rdma_counter_bind_qpn(struct ib_device *dev, u8 port, goto err_task; } - ret = rdma_counter_bind_qp_manual(counter, qp); + if ((counter->device != qp->device) || (counter->port != qp->port)) { + ret = -EINVAL; + goto err_task; + } + + ret = __rdma_counter_bind_qp(counter, qp); if (ret) goto err_task; @@ -520,26 +516,18 @@ int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u8 port, goto err; } - counter = rdma_counter_alloc(dev, port, RDMA_COUNTER_MODE_MANUAL); + counter = alloc_and_bind(dev, port, qp, RDMA_COUNTER_MODE_MANUAL); if (!counter) { ret = -ENOMEM; goto err; } - ret = rdma_counter_bind_qp_manual(counter, qp); - if (ret) - goto err_bind; - if (counter_id) *counter_id = counter->id; - rdma_counter_res_add(counter, qp); - rdma_restrack_put(&qp->res); - return ret; + return 0; -err_bind: - rdma_counter_free(counter); err: rdma_restrack_put(&qp->res); return ret; diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index a3b1fc84cdca..3ab1edea6acb 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -284,6 +284,7 @@ static void ib_device_check_mandatory(struct ib_device *device) IB_MANDATORY_FUNC(poll_cq), IB_MANDATORY_FUNC(req_notify_cq), IB_MANDATORY_FUNC(get_dma_mr), + IB_MANDATORY_FUNC(reg_user_mr), IB_MANDATORY_FUNC(dereg_mr), IB_MANDATORY_FUNC(get_port_immutable) }; @@ -600,6 +601,37 @@ struct ib_device *_ib_alloc_device(size_t size) init_completion(&device->unreg_completion); INIT_WORK(&device->unregistration_work, ib_unregister_work); + device->uverbs_cmd_mask = + BIT_ULL(IB_USER_VERBS_CMD_ALLOC_MW) | + BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD) | + BIT_ULL(IB_USER_VERBS_CMD_ATTACH_MCAST) | + BIT_ULL(IB_USER_VERBS_CMD_CLOSE_XRCD) | + BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH) | + BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | + BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ) | + BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP) | + BIT_ULL(IB_USER_VERBS_CMD_CREATE_SRQ) | + BIT_ULL(IB_USER_VERBS_CMD_CREATE_XSRQ) | + BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_MW) | + BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD) | + BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR) | + BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH) | + BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ) | + BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP) | + BIT_ULL(IB_USER_VERBS_CMD_DESTROY_SRQ) | + BIT_ULL(IB_USER_VERBS_CMD_DETACH_MCAST) | + BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT) | + BIT_ULL(IB_USER_VERBS_CMD_MODIFY_QP) | + BIT_ULL(IB_USER_VERBS_CMD_MODIFY_SRQ) | + BIT_ULL(IB_USER_VERBS_CMD_OPEN_QP) | + BIT_ULL(IB_USER_VERBS_CMD_OPEN_XRCD) | + BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE) | + BIT_ULL(IB_USER_VERBS_CMD_QUERY_PORT) | + BIT_ULL(IB_USER_VERBS_CMD_QUERY_QP) | + BIT_ULL(IB_USER_VERBS_CMD_QUERY_SRQ) | + BIT_ULL(IB_USER_VERBS_CMD_REG_MR) | + BIT_ULL(IB_USER_VERBS_CMD_REREG_MR) | + BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ); return device; } EXPORT_SYMBOL(_ib_alloc_device); @@ -1177,25 +1209,6 @@ out: return ret; } -static void setup_dma_device(struct ib_device *device, - struct device *dma_device) -{ - /* - * If the caller does not provide a DMA capable device then the IB - * device will be used. In this case the caller should fully setup the - * ibdev for DMA. This usually means using dma_virt_ops. - */ -#ifdef CONFIG_DMA_VIRT_OPS - if (!dma_device) { - device->dev.dma_ops = &dma_virt_ops; - dma_device = &device->dev; - } -#endif - WARN_ON(!dma_device); - device->dma_device = dma_device; - WARN_ON(!device->dma_device->dma_parms); -} - /* * setup_device() allocates memory and sets up data that requires calling the * device ops, this is the only reason these actions are not done during @@ -1341,7 +1354,14 @@ int ib_register_device(struct ib_device *device, const char *name, if (ret) return ret; - setup_dma_device(device, dma_device); + /* + * If the caller does not provide a DMA capable device then the IB core + * will set up ib_sge and scatterlist structures that stash the kernel + * virtual address into the address field. + */ + WARN_ON(dma_device && !dma_device->dma_parms); + device->dma_device = dma_device; + ret = setup_device(device); if (ret) return ret; @@ -2576,6 +2596,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, create_qp); SET_DEVICE_OP(dev_ops, create_rwq_ind_table); SET_DEVICE_OP(dev_ops, create_srq); + SET_DEVICE_OP(dev_ops, create_user_ah); SET_DEVICE_OP(dev_ops, create_wq); SET_DEVICE_OP(dev_ops, dealloc_dm); SET_DEVICE_OP(dev_ops, dealloc_driver); @@ -2675,6 +2696,21 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) } EXPORT_SYMBOL(ib_set_device_ops); +#ifdef CONFIG_INFINIBAND_VIRT_DMA +int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents) +{ + struct scatterlist *s; + int i; + + for_each_sg(sg, s, nents, i) { + sg_dma_address(s) = (uintptr_t)sg_virt(s); + sg_dma_len(s) = s->length; + } + return nents; +} +EXPORT_SYMBOL(ib_dma_virt_map_sg); +#endif /* CONFIG_INFINIBAND_VIRT_DMA */ + static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = { [RDMA_NL_LS_OP_RESOLVE] = { .doit = ib_nl_handle_resolve_resp, diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index ffe11b03724c..c44079b9158e 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -137,15 +137,9 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj, } else if (uobj->object) { ret = uobj->uapi_object->type_class->destroy_hw(uobj, reason, attrs); - if (ret) { - if (ib_is_destroy_retryable(ret, reason, uobj)) - return ret; - - /* Nothing to be done, dangle the memory and move on */ - WARN(true, - "ib_uverbs: failed to remove uobject id %d, driver err=%d", - uobj->id, ret); - } + if (ret) + /* Nothing to be done, wait till ucontext will clean it */ + return ret; uobj->object = NULL; } @@ -543,12 +537,7 @@ static int __must_check destroy_hw_idr_uobject(struct ib_uobject *uobj, struct uverbs_obj_idr_type, type); int ret = idr_type->destroy_object(uobj, why, attrs); - /* - * We can only fail gracefully if the user requested to destroy the - * object or when a retry may be called upon an error. - * In the rest of the cases, just remove whatever you can. - */ - if (ib_is_destroy_retryable(ret, why, uobj)) + if (ret) return ret; if (why == RDMA_REMOVE_ABORT) @@ -581,11 +570,8 @@ static int __must_check destroy_hw_fd_uobject(struct ib_uobject *uobj, { const struct uverbs_obj_fd_type *fd_type = container_of( uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type); - int ret = fd_type->destroy_object(uobj, why); - - if (ib_is_destroy_retryable(ret, why, uobj)) - return ret; + fd_type->destroy_object(uobj, why); return 0; } @@ -863,11 +849,18 @@ static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, * racing with a lookup_get. */ WARN_ON(uverbs_try_lock_object(obj, UVERBS_LOOKUP_WRITE)); + if (reason == RDMA_REMOVE_DRIVER_FAILURE) + obj->object = NULL; if (!uverbs_destroy_uobject(obj, reason, &attrs)) ret = 0; else atomic_set(&obj->usecnt, 0); } + + if (reason == RDMA_REMOVE_DRIVER_FAILURE) { + WARN_ON(!list_empty(&ufile->uobjects)); + return 0; + } return ret; } @@ -889,21 +882,12 @@ void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile, if (!ufile->ucontext) goto done; - ufile->ucontext->cleanup_retryable = true; - while (!list_empty(&ufile->uobjects)) - if (__uverbs_cleanup_ufile(ufile, reason)) { - /* - * No entry was cleaned-up successfully during this - * iteration. It is a driver bug to fail destruction. - */ - WARN_ON(!list_empty(&ufile->uobjects)); - break; - } - - ufile->ucontext->cleanup_retryable = false; - if (!list_empty(&ufile->uobjects)) - __uverbs_cleanup_ufile(ufile, reason); + while (!list_empty(&ufile->uobjects) && + !__uverbs_cleanup_ufile(ufile, reason)) { + } + if (WARN_ON(!list_empty(&ufile->uobjects))) + __uverbs_cleanup_ufile(ufile, RDMA_REMOVE_DRIVER_FAILURE); ufile_destroy_ucontext(ufile, reason); done: diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c index 4aeeaaed0f17..e0a41c867002 100644 --- a/drivers/infiniband/core/restrack.c +++ b/drivers/infiniband/core/restrack.c @@ -221,19 +221,29 @@ void rdma_restrack_add(struct rdma_restrack_entry *res) { struct ib_device *dev = res_to_dev(res); struct rdma_restrack_root *rt; - int ret; + int ret = 0; if (!dev) return; + if (res->no_track) + goto out; + rt = &dev->res[res->type]; if (res->type == RDMA_RESTRACK_QP) { /* Special case to ensure that LQPN points to right QP */ struct ib_qp *qp = container_of(res, struct ib_qp, res); - ret = xa_insert(&rt->xa, qp->qp_num, res, GFP_KERNEL); - res->id = ret ? 0 : qp->qp_num; + WARN_ONCE(qp->qp_num >> 24 || qp->port >> 8, + "QP number 0x%0X and port 0x%0X", qp->qp_num, + qp->port); + res->id = qp->qp_num; + if (qp->qp_type == IB_QPT_SMI || qp->qp_type == IB_QPT_GSI) + res->id |= qp->port << 24; + ret = xa_insert(&rt->xa, res->id, res, GFP_KERNEL); + if (ret) + res->id = 0; } else if (res->type == RDMA_RESTRACK_COUNTER) { /* Special case to ensure that cntn points to right counter */ struct rdma_counter *counter; @@ -246,6 +256,7 @@ void rdma_restrack_add(struct rdma_restrack_entry *res) &rt->next_id, GFP_KERNEL); } +out: if (!ret) res->valid = true; } @@ -318,6 +329,9 @@ void rdma_restrack_del(struct rdma_restrack_entry *res) return; } + if (res->no_track) + goto out; + dev = res_to_dev(res); if (WARN_ON(!dev)) return; @@ -328,8 +342,9 @@ void rdma_restrack_del(struct rdma_restrack_entry *res) if (res->type == RDMA_RESTRACK_MR || res->type == RDMA_RESTRACK_QP) return; WARN_ON(old != res); - res->valid = false; +out: + res->valid = false; rdma_restrack_put(res); wait_for_completion(&res->comp); } diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index 13f43ab7220b..a96030b784eb 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -285,8 +285,11 @@ static void rdma_rw_unmap_sg(struct ib_device *dev, struct scatterlist *sg, static int rdma_rw_map_sg(struct ib_device *dev, struct scatterlist *sg, u32 sg_cnt, enum dma_data_direction dir) { - if (is_pci_p2pdma_page(sg_page(sg))) + if (is_pci_p2pdma_page(sg_page(sg))) { + if (WARN_ON_ONCE(ib_uses_virt_dma(dev))) + return 0; return pci_p2pdma_map_sg(dev->dma_device, sg, sg_cnt, dir); + } return ib_dma_map_sg(dev, sg, sg_cnt, dir); } diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 914cddea525d..b8abb30f80df 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -165,9 +165,11 @@ static ssize_t state_show(struct ib_port *p, struct port_attribute *unused, if (ret) return ret; - return sprintf(buf, "%d: %s\n", attr.state, - attr.state >= 0 && attr.state < ARRAY_SIZE(state_name) ? - state_name[attr.state] : "UNKNOWN"); + return sysfs_emit(buf, "%d: %s\n", attr.state, + attr.state >= 0 && + attr.state < ARRAY_SIZE(state_name) ? + state_name[attr.state] : + "UNKNOWN"); } static ssize_t lid_show(struct ib_port *p, struct port_attribute *unused, @@ -180,7 +182,7 @@ static ssize_t lid_show(struct ib_port *p, struct port_attribute *unused, if (ret) return ret; - return sprintf(buf, "0x%x\n", attr.lid); + return sysfs_emit(buf, "0x%x\n", attr.lid); } static ssize_t lid_mask_count_show(struct ib_port *p, @@ -194,7 +196,7 @@ static ssize_t lid_mask_count_show(struct ib_port *p, if (ret) return ret; - return sprintf(buf, "%d\n", attr.lmc); + return sysfs_emit(buf, "%d\n", attr.lmc); } static ssize_t sm_lid_show(struct ib_port *p, struct port_attribute *unused, @@ -207,7 +209,7 @@ static ssize_t sm_lid_show(struct ib_port *p, struct port_attribute *unused, if (ret) return ret; - return sprintf(buf, "0x%x\n", attr.sm_lid); + return sysfs_emit(buf, "0x%x\n", attr.sm_lid); } static ssize_t sm_sl_show(struct ib_port *p, struct port_attribute *unused, @@ -220,7 +222,7 @@ static ssize_t sm_sl_show(struct ib_port *p, struct port_attribute *unused, if (ret) return ret; - return sprintf(buf, "%d\n", attr.sm_sl); + return sysfs_emit(buf, "%d\n", attr.sm_sl); } static ssize_t cap_mask_show(struct ib_port *p, struct port_attribute *unused, @@ -233,7 +235,7 @@ static ssize_t cap_mask_show(struct ib_port *p, struct port_attribute *unused, if (ret) return ret; - return sprintf(buf, "0x%08x\n", attr.port_cap_flags); + return sysfs_emit(buf, "0x%08x\n", attr.port_cap_flags); } static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused, @@ -273,6 +275,10 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused, speed = " HDR"; rate = 500; break; + case IB_SPEED_NDR: + speed = " NDR"; + rate = 1000; + break; case IB_SPEED_SDR: default: /* default to SDR for invalid rates */ speed = " SDR"; @@ -284,9 +290,9 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused, if (rate < 0) return -EINVAL; - return sprintf(buf, "%d%s Gb/sec (%dX%s)\n", - rate / 10, rate % 10 ? ".5" : "", - ib_width_enum_to_int(attr.active_width), speed); + return sysfs_emit(buf, "%d%s Gb/sec (%dX%s)\n", rate / 10, + rate % 10 ? ".5" : "", + ib_width_enum_to_int(attr.active_width), speed); } static const char *phys_state_to_str(enum ib_port_phys_state phys_state) @@ -318,21 +324,28 @@ static ssize_t phys_state_show(struct ib_port *p, struct port_attribute *unused, if (ret) return ret; - return sprintf(buf, "%d: %s\n", attr.phys_state, - phys_state_to_str(attr.phys_state)); + return sysfs_emit(buf, "%d: %s\n", attr.phys_state, + phys_state_to_str(attr.phys_state)); } static ssize_t link_layer_show(struct ib_port *p, struct port_attribute *unused, char *buf) { + const char *output; + switch (rdma_port_get_link_layer(p->ibdev, p->port_num)) { case IB_LINK_LAYER_INFINIBAND: - return sprintf(buf, "%s\n", "InfiniBand"); + output = "InfiniBand"; + break; case IB_LINK_LAYER_ETHERNET: - return sprintf(buf, "%s\n", "Ethernet"); + output = "Ethernet"; + break; default: - return sprintf(buf, "%s\n", "Unknown"); + output = "Unknown"; + break; } + + return sysfs_emit(buf, "%s\n", output); } static PORT_ATTR_RO(state); @@ -358,27 +371,28 @@ static struct attribute *port_default_attrs[] = { NULL }; -static size_t print_ndev(const struct ib_gid_attr *gid_attr, char *buf) +static ssize_t print_ndev(const struct ib_gid_attr *gid_attr, char *buf) { struct net_device *ndev; - size_t ret = -EINVAL; + int ret = -EINVAL; rcu_read_lock(); ndev = rcu_dereference(gid_attr->ndev); if (ndev) - ret = sprintf(buf, "%s\n", ndev->name); + ret = sysfs_emit(buf, "%s\n", ndev->name); rcu_read_unlock(); return ret; } -static size_t print_gid_type(const struct ib_gid_attr *gid_attr, char *buf) +static ssize_t print_gid_type(const struct ib_gid_attr *gid_attr, char *buf) { - return sprintf(buf, "%s\n", ib_cache_gid_type_str(gid_attr->gid_type)); + return sysfs_emit(buf, "%s\n", + ib_cache_gid_type_str(gid_attr->gid_type)); } static ssize_t _show_port_gid_attr( struct ib_port *p, struct port_attribute *attr, char *buf, - size_t (*print)(const struct ib_gid_attr *gid_attr, char *buf)) + ssize_t (*print)(const struct ib_gid_attr *gid_attr, char *buf)) { struct port_table_attribute *tab_attr = container_of(attr, struct port_table_attribute, attr); @@ -401,7 +415,7 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr, struct port_table_attribute *tab_attr = container_of(attr, struct port_table_attribute, attr); const struct ib_gid_attr *gid_attr; - ssize_t ret; + int len; gid_attr = rdma_get_gid_attr(p->ibdev, p->port_num, tab_attr->index); if (IS_ERR(gid_attr)) { @@ -416,12 +430,12 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr, * space throwing such error on fail to read gid, return zero * GID as before. This maintains backward compatibility. */ - return sprintf(buf, "%pI6\n", zgid.raw); + return sysfs_emit(buf, "%pI6\n", zgid.raw); } - ret = sprintf(buf, "%pI6\n", gid_attr->gid.raw); + len = sysfs_emit(buf, "%pI6\n", gid_attr->gid.raw); rdma_put_gid_attr(gid_attr); - return ret; + return len; } static ssize_t show_port_gid_attr_ndev(struct ib_port *p, @@ -443,13 +457,13 @@ static ssize_t show_port_pkey(struct ib_port *p, struct port_attribute *attr, struct port_table_attribute *tab_attr = container_of(attr, struct port_table_attribute, attr); u16 pkey; - ssize_t ret; + int ret; ret = ib_query_pkey(p->ibdev, p->port_num, tab_attr->index, &pkey); if (ret) return ret; - return sprintf(buf, "0x%04x\n", pkey); + return sysfs_emit(buf, "0x%04x\n", pkey); } #define PORT_PMA_ATTR(_name, _counter, _width, _offset) \ @@ -521,8 +535,9 @@ static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr, container_of(attr, struct port_table_attribute, attr); int offset = tab_attr->index & 0xffff; int width = (tab_attr->index >> 16) & 0xff; - ssize_t ret; + int ret; u8 data[8]; + int len; ret = get_perf_mad(p->ibdev, p->port_num, tab_attr->attr_id, &data, 40 + offset / 8, sizeof(data)); @@ -531,30 +546,27 @@ static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr, switch (width) { case 4: - ret = sprintf(buf, "%u\n", (*data >> - (4 - (offset % 8))) & 0xf); + len = sysfs_emit(buf, "%u\n", + (*data >> (4 - (offset % 8))) & 0xf); break; case 8: - ret = sprintf(buf, "%u\n", *data); + len = sysfs_emit(buf, "%u\n", *data); break; case 16: - ret = sprintf(buf, "%u\n", - be16_to_cpup((__be16 *)data)); + len = sysfs_emit(buf, "%u\n", be16_to_cpup((__be16 *)data)); break; case 32: - ret = sprintf(buf, "%u\n", - be32_to_cpup((__be32 *)data)); + len = sysfs_emit(buf, "%u\n", be32_to_cpup((__be32 *)data)); break; case 64: - ret = sprintf(buf, "%llu\n", - be64_to_cpup((__be64 *)data)); + len = sysfs_emit(buf, "%llu\n", be64_to_cpup((__be64 *)data)); break; - default: - ret = 0; + len = 0; + break; } - return ret; + return len; } static PORT_PMA_ATTR(symbol_error , 0, 16, 32); @@ -815,12 +827,12 @@ static int update_hw_stats(struct ib_device *dev, struct rdma_hw_stats *stats, return 0; } -static ssize_t print_hw_stat(struct ib_device *dev, int port_num, - struct rdma_hw_stats *stats, int index, char *buf) +static int print_hw_stat(struct ib_device *dev, int port_num, + struct rdma_hw_stats *stats, int index, char *buf) { u64 v = rdma_counter_get_hwstat_value(dev, port_num, index); - return sprintf(buf, "%llu\n", stats->value[index] + v); + return sysfs_emit(buf, "%llu\n", stats->value[index] + v); } static ssize_t show_hw_stats(struct kobject *kobj, struct attribute *attr, @@ -877,7 +889,7 @@ static ssize_t show_stats_lifespan(struct kobject *kobj, msecs = jiffies_to_msecs(stats->lifespan); mutex_unlock(&stats->lock); - return sprintf(buf, "%d\n", msecs); + return sysfs_emit(buf, "%d\n", msecs); } static ssize_t set_stats_lifespan(struct kobject *kobj, @@ -1224,21 +1236,34 @@ err_put: return ret; } +static const char *node_type_string(int node_type) +{ + switch (node_type) { + case RDMA_NODE_IB_CA: + return "CA"; + case RDMA_NODE_IB_SWITCH: + return "switch"; + case RDMA_NODE_IB_ROUTER: + return "router"; + case RDMA_NODE_RNIC: + return "RNIC"; + case RDMA_NODE_USNIC: + return "usNIC"; + case RDMA_NODE_USNIC_UDP: + return "usNIC UDP"; + case RDMA_NODE_UNSPECIFIED: + return "unspecified"; + } + return "<unknown>"; +} + static ssize_t node_type_show(struct device *device, struct device_attribute *attr, char *buf) { struct ib_device *dev = rdma_device_to_ibdev(device); - switch (dev->node_type) { - case RDMA_NODE_IB_CA: return sprintf(buf, "%d: CA\n", dev->node_type); - case RDMA_NODE_RNIC: return sprintf(buf, "%d: RNIC\n", dev->node_type); - case RDMA_NODE_USNIC: return sprintf(buf, "%d: usNIC\n", dev->node_type); - case RDMA_NODE_USNIC_UDP: return sprintf(buf, "%d: usNIC UDP\n", dev->node_type); - case RDMA_NODE_UNSPECIFIED: return sprintf(buf, "%d: unspecified\n", dev->node_type); - case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type); - case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type); - default: return sprintf(buf, "%d: <unknown>\n", dev->node_type); - } + return sysfs_emit(buf, "%d: %s\n", dev->node_type, + node_type_string(dev->node_type)); } static DEVICE_ATTR_RO(node_type); @@ -1246,12 +1271,13 @@ static ssize_t sys_image_guid_show(struct device *device, struct device_attribute *dev_attr, char *buf) { struct ib_device *dev = rdma_device_to_ibdev(device); + __be16 *guid = (__be16 *)&dev->attrs.sys_image_guid; - return sprintf(buf, "%04x:%04x:%04x:%04x\n", - be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[0]), - be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[1]), - be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[2]), - be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[3])); + return sysfs_emit(buf, "%04x:%04x:%04x:%04x\n", + be16_to_cpu(guid[0]), + be16_to_cpu(guid[1]), + be16_to_cpu(guid[2]), + be16_to_cpu(guid[3])); } static DEVICE_ATTR_RO(sys_image_guid); @@ -1259,12 +1285,13 @@ static ssize_t node_guid_show(struct device *device, struct device_attribute *attr, char *buf) { struct ib_device *dev = rdma_device_to_ibdev(device); + __be16 *node_guid = (__be16 *)&dev->node_guid; - return sprintf(buf, "%04x:%04x:%04x:%04x\n", - be16_to_cpu(((__be16 *) &dev->node_guid)[0]), - be16_to_cpu(((__be16 *) &dev->node_guid)[1]), - be16_to_cpu(((__be16 *) &dev->node_guid)[2]), - be16_to_cpu(((__be16 *) &dev->node_guid)[3])); + return sysfs_emit(buf, "%04x:%04x:%04x:%04x\n", + be16_to_cpu(node_guid[0]), + be16_to_cpu(node_guid[1]), + be16_to_cpu(node_guid[2]), + be16_to_cpu(node_guid[3])); } static DEVICE_ATTR_RO(node_guid); @@ -1273,7 +1300,7 @@ static ssize_t node_desc_show(struct device *device, { struct ib_device *dev = rdma_device_to_ibdev(device); - return sprintf(buf, "%.64s\n", dev->node_desc); + return sysfs_emit(buf, "%.64s\n", dev->node_desc); } static ssize_t node_desc_store(struct device *device, @@ -1300,10 +1327,11 @@ static ssize_t fw_ver_show(struct device *device, struct device_attribute *attr, char *buf) { struct ib_device *dev = rdma_device_to_ibdev(device); + char version[IB_FW_VERSION_NAME_MAX] = {}; + + ib_get_device_fw_str(dev, version); - ib_get_device_fw_str(dev, buf); - strlcat(buf, "\n", IB_FW_VERSION_NAME_MAX); - return strlen(buf); + return sysfs_emit(buf, "%s\n", version); } static DEVICE_ATTR_RO(fw_ver); diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index ffe2563ad345..7dab9a27a145 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -1825,7 +1825,7 @@ static ssize_t show_abi_version(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", RDMA_USER_CM_ABI_VERSION); + return sysfs_emit(buf, "%d\n", RDMA_USER_CM_ABI_VERSION); } static DEVICE_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index e9fecbdf391b..7ca4112e3e8f 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -84,6 +84,15 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, dma_addr_t mask; int i; + if (umem->is_odp) { + unsigned int page_size = BIT(to_ib_umem_odp(umem)->page_shift); + + /* ODP must always be self consistent. */ + if (!(pgsz_bitmap & page_size)) + return 0; + return page_size; + } + /* rdma_for_each_block() has a bug if the page size is smaller than the * page size used to build the umem. For now prevent smaller page sizes * from being returned. @@ -220,10 +229,10 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr, cur_base += ret * PAGE_SIZE; npages -= ret; - sg = __sg_alloc_table_from_pages( - &umem->sg_head, page_list, ret, 0, ret << PAGE_SHIFT, - dma_get_max_seg_size(device->dma_device), sg, npages, - GFP_KERNEL); + sg = __sg_alloc_table_from_pages(&umem->sg_head, page_list, ret, + 0, ret << PAGE_SHIFT, + ib_dma_max_seg_size(device), sg, npages, + GFP_KERNEL); umem->sg_nents = umem->sg_head.nents; if (IS_ERR(sg)) { unpin_user_pages_dirty_lock(page_list, ret, 0); diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index b0d0b522cc76..19104a675691 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -1191,7 +1191,7 @@ static ssize_t ibdev_show(struct device *dev, struct device_attribute *attr, if (!port) return -ENODEV; - return sprintf(buf, "%s\n", dev_name(&port->ib_dev->dev)); + return sysfs_emit(buf, "%s\n", dev_name(&port->ib_dev->dev)); } static DEVICE_ATTR_RO(ibdev); @@ -1203,7 +1203,7 @@ static ssize_t port_show(struct device *dev, struct device_attribute *attr, if (!port) return -ENODEV; - return sprintf(buf, "%d\n", port->port_num); + return sysfs_emit(buf, "%d\n", port->port_num); } static DEVICE_ATTR_RO(port); @@ -1222,7 +1222,7 @@ static char *umad_devnode(struct device *dev, umode_t *mode) static ssize_t abi_version_show(struct class *class, struct class_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", IB_USER_MAD_ABI_VERSION); + return sysfs_emit(buf, "%d\n", IB_USER_MAD_ABI_VERSION); } static CLASS_ATTR_RO(abi_version); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 418d133a8fb0..402d0b8bf58e 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -681,8 +681,7 @@ int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd, return 0; ret = ib_dealloc_xrcd_user(xrcd, &attrs->driver_udata); - - if (ib_is_destroy_retryable(ret, why, uobject)) { + if (ret) { atomic_inc(&xrcd->usecnt); return ret; } @@ -690,7 +689,7 @@ int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd, if (inode) xrcd_table_delete(dev, inode); - return ret; + return 0; } static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs) @@ -1401,8 +1400,8 @@ static int create_qp(struct uverbs_attr_bundle *attrs, if (cmd->qp_type == IB_QPT_XRC_TGT) qp = ib_create_qp(pd, &attr); else - qp = _ib_create_qp(device, pd, &attr, &attrs->driver_udata, - obj); + qp = _ib_create_qp(device, pd, &attr, &attrs->driver_udata, obj, + NULL); if (IS_ERR(qp)) { ret = PTR_ERR(qp); @@ -1906,8 +1905,7 @@ static int ib_uverbs_modify_qp(struct uverbs_attr_bundle *attrs) if (ret) return ret; - if (cmd.base.attr_mask & - ~((IB_USER_LEGACY_LAST_QP_ATTR_MASK << 1) - 1)) + if (cmd.base.attr_mask & ~IB_QP_ATTR_STANDARD_BITS) return -EOPNOTSUPP; return modify_qp(attrs, &cmd); @@ -1929,10 +1927,7 @@ static int ib_uverbs_ex_modify_qp(struct uverbs_attr_bundle *attrs) * Last bit is reserved for extending the attr_mask by * using another field. */ - BUILD_BUG_ON(IB_USER_LAST_QP_ATTR_MASK == (1ULL << 31)); - - if (cmd.base.attr_mask & - ~((IB_USER_LAST_QP_ATTR_MASK << 1) - 1)) + if (cmd.base.attr_mask & ~(IB_QP_ATTR_STANDARD_BITS | IB_QP_RATE_LIMIT)) return -EOPNOTSUPP; ret = modify_qp(attrs, &cmd); @@ -3693,13 +3688,13 @@ const struct uapi_definition uverbs_def_write_intf[] = { ib_uverbs_create_ah, UAPI_DEF_WRITE_UDATA_IO( struct ib_uverbs_create_ah, - struct ib_uverbs_create_ah_resp), - UAPI_DEF_METHOD_NEEDS_FN(create_ah)), + struct ib_uverbs_create_ah_resp)), DECLARE_UVERBS_WRITE( IB_USER_VERBS_CMD_DESTROY_AH, ib_uverbs_destroy_ah, - UAPI_DEF_WRITE_I(struct ib_uverbs_destroy_ah), - UAPI_DEF_METHOD_NEEDS_FN(destroy_ah))), + UAPI_DEF_WRITE_I(struct ib_uverbs_destroy_ah)), + UAPI_DEF_OBJ_NEEDS_FN(create_user_ah), + UAPI_DEF_OBJ_NEEDS_FN(destroy_ah)), DECLARE_UVERBS_OBJECT( UVERBS_OBJECT_COMP_CHANNEL, @@ -3753,7 +3748,7 @@ const struct uapi_definition uverbs_def_write_intf[] = { IB_USER_VERBS_EX_CMD_MODIFY_CQ, ib_uverbs_ex_modify_cq, UAPI_DEF_WRITE_I(struct ib_uverbs_ex_modify_cq), - UAPI_DEF_METHOD_NEEDS_FN(create_cq))), + UAPI_DEF_METHOD_NEEDS_FN(modify_cq))), DECLARE_UVERBS_OBJECT( UVERBS_OBJECT_DEVICE, @@ -3999,8 +3994,7 @@ const struct uapi_definition uverbs_def_write_intf[] = { DECLARE_UVERBS_WRITE( IB_USER_VERBS_CMD_CLOSE_XRCD, ib_uverbs_close_xrcd, - UAPI_DEF_WRITE_I(struct ib_uverbs_close_xrcd), - UAPI_DEF_METHOD_NEEDS_FN(dealloc_xrcd)), + UAPI_DEF_WRITE_I(struct ib_uverbs_close_xrcd)), DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_OPEN_QP, ib_uverbs_open_qp, UAPI_DEF_WRITE_UDATA_IO( @@ -4010,8 +4004,9 @@ const struct uapi_definition uverbs_def_write_intf[] = { ib_uverbs_open_xrcd, UAPI_DEF_WRITE_UDATA_IO( struct ib_uverbs_open_xrcd, - struct ib_uverbs_open_xrcd_resp), - UAPI_DEF_METHOD_NEEDS_FN(alloc_xrcd))), + struct ib_uverbs_open_xrcd_resp)), + UAPI_DEF_OBJ_NEEDS_FN(alloc_xrcd), + UAPI_DEF_OBJ_NEEDS_FN(dealloc_xrcd)), {}, }; diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 4bb7c642f80c..f173ecd102dc 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -1046,7 +1046,7 @@ static ssize_t ibdev_show(struct device *device, struct device_attribute *attr, srcu_key = srcu_read_lock(&dev->disassociate_srcu); ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu); if (ib_dev) - ret = sprintf(buf, "%s\n", dev_name(&ib_dev->dev)); + ret = sysfs_emit(buf, "%s\n", dev_name(&ib_dev->dev)); srcu_read_unlock(&dev->disassociate_srcu, srcu_key); return ret; @@ -1065,7 +1065,7 @@ static ssize_t abi_version_show(struct device *device, srcu_key = srcu_read_lock(&dev->disassociate_srcu); ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu); if (ib_dev) - ret = sprintf(buf, "%u\n", ib_dev->ops.uverbs_abi_ver); + ret = sysfs_emit(buf, "%u\n", ib_dev->ops.uverbs_abi_ver); srcu_read_unlock(&dev->disassociate_srcu, srcu_key); return ret; diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index 0658101fca00..13776a66e2e4 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -88,7 +88,7 @@ static int uverbs_free_rwq_ind_tbl(struct ib_uobject *uobject, return -EBUSY; ret = rwq_ind_tbl->device->ops.destroy_rwq_ind_table(rwq_ind_tbl); - if (ib_is_destroy_retryable(ret, why, uobject)) + if (ret) return ret; for (i = 0; i < table_size; i++) @@ -96,7 +96,7 @@ static int uverbs_free_rwq_ind_tbl(struct ib_uobject *uobject, kfree(rwq_ind_tbl); kfree(ind_tbl); - return ret; + return 0; } static int uverbs_free_xrcd(struct ib_uobject *uobject, @@ -108,9 +108,8 @@ static int uverbs_free_xrcd(struct ib_uobject *uobject, container_of(uobject, struct ib_uxrcd_object, uobject); int ret; - ret = ib_destroy_usecnt(&uxrcd->refcnt, why, uobject); - if (ret) - return ret; + if (atomic_read(&uxrcd->refcnt)) + return -EBUSY; mutex_lock(&attrs->ufile->device->xrcd_tree_mutex); ret = ib_uverbs_dealloc_xrcd(uobject, xrcd, why, attrs); @@ -124,11 +123,9 @@ static int uverbs_free_pd(struct ib_uobject *uobject, struct uverbs_attr_bundle *attrs) { struct ib_pd *pd = uobject->object; - int ret; - ret = ib_destroy_usecnt(&pd->usecnt, why, uobject); - if (ret) - return ret; + if (atomic_read(&pd->usecnt)) + return -EBUSY; return ib_dealloc_pd_user(pd, &attrs->driver_udata); } @@ -157,7 +154,7 @@ void ib_uverbs_free_event_queue(struct ib_uverbs_event_queue *event_queue) spin_unlock_irq(&event_queue->lock); } -static int +static void uverbs_completion_event_file_destroy_uobj(struct ib_uobject *uobj, enum rdma_remove_reason why) { @@ -166,7 +163,6 @@ uverbs_completion_event_file_destroy_uobj(struct ib_uobject *uobj, uobj); ib_uverbs_free_event_queue(&file->ev_queue); - return 0; } int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs) diff --git a/drivers/infiniband/core/uverbs_std_types_async_fd.c b/drivers/infiniband/core/uverbs_std_types_async_fd.c index 61899eaf1f91..cc24cfdf7aee 100644 --- a/drivers/infiniband/core/uverbs_std_types_async_fd.c +++ b/drivers/infiniband/core/uverbs_std_types_async_fd.c @@ -19,8 +19,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_ASYNC_EVENT_ALLOC)( return 0; } -static int uverbs_async_event_destroy_uobj(struct ib_uobject *uobj, - enum rdma_remove_reason why) +static void uverbs_async_event_destroy_uobj(struct ib_uobject *uobj, + enum rdma_remove_reason why) { struct ib_uverbs_async_event_file *event_file = container_of(uobj, struct ib_uverbs_async_event_file, uobj); @@ -30,7 +30,6 @@ static int uverbs_async_event_destroy_uobj(struct ib_uobject *uobj, if (why == RDMA_REMOVE_DRIVER_REMOVE) ib_uverbs_async_handler(event_file, 0, IB_EVENT_DEVICE_FATAL, NULL, NULL); - return 0; } int uverbs_async_event_release(struct inode *inode, struct file *filp) diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c index b3c6c066b601..999da9c79866 100644 --- a/drivers/infiniband/core/uverbs_std_types_counters.c +++ b/drivers/infiniband/core/uverbs_std_types_counters.c @@ -42,9 +42,8 @@ static int uverbs_free_counters(struct ib_uobject *uobject, struct ib_counters *counters = uobject->object; int ret; - ret = ib_destroy_usecnt(&counters->usecnt, why, uobject); - if (ret) - return ret; + if (atomic_read(&counters->usecnt)) + return -EBUSY; ret = counters->device->ops.destroy_counters(counters); if (ret) diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c index 8dabd05988b2..370ad7c83f88 100644 --- a/drivers/infiniband/core/uverbs_std_types_cq.c +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -46,7 +46,7 @@ static int uverbs_free_cq(struct ib_uobject *uobject, int ret; ret = ib_destroy_cq_user(cq, &attrs->driver_udata); - if (ib_is_destroy_retryable(ret, why, uobject)) + if (ret) return ret; ib_uverbs_release_ucq( @@ -55,7 +55,7 @@ static int uverbs_free_cq(struct ib_uobject *uobject, ev_queue) : NULL, ucq); - return ret; + return 0; } static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( diff --git a/drivers/infiniband/core/uverbs_std_types_dm.c b/drivers/infiniband/core/uverbs_std_types_dm.c index d5a1de33c2c9..98c522cf86d6 100644 --- a/drivers/infiniband/core/uverbs_std_types_dm.c +++ b/drivers/infiniband/core/uverbs_std_types_dm.c @@ -39,11 +39,9 @@ static int uverbs_free_dm(struct ib_uobject *uobject, struct uverbs_attr_bundle *attrs) { struct ib_dm *dm = uobject->object; - int ret; - ret = ib_destroy_usecnt(&dm->usecnt, why, uobject); - if (ret) - return ret; + if (atomic_read(&dm->usecnt)) + return -EBUSY; return dm->device->ops.dealloc_dm(dm, attrs); } diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c index 459cf165b231..d42ed7ff223e 100644 --- a/drivers/infiniband/core/uverbs_std_types_flow_action.c +++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c @@ -39,11 +39,9 @@ static int uverbs_free_flow_action(struct ib_uobject *uobject, struct uverbs_attr_bundle *attrs) { struct ib_flow_action *action = uobject->object; - int ret; - ret = ib_destroy_usecnt(&action->usecnt, why, uobject); - if (ret) - return ret; + if (atomic_read(&action->usecnt)) + return -EBUSY; return action->device->ops.destroy_flow_action(action); } diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c index 9b22bb553e8b..dc5856441729 100644 --- a/drivers/infiniband/core/uverbs_std_types_mr.c +++ b/drivers/infiniband/core/uverbs_std_types_mr.c @@ -33,6 +33,7 @@ #include "rdma_core.h" #include "uverbs.h" #include <rdma/uverbs_std_types.h> +#include "restrack.h" static int uverbs_free_mr(struct ib_uobject *uobject, enum rdma_remove_reason why, @@ -134,6 +135,9 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)( atomic_inc(&pd->usecnt); atomic_inc(&dm->usecnt); + rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR); + rdma_restrack_set_name(&mr->res, NULL); + rdma_restrack_add(&mr->res); uobj->object = mr; uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_REG_DM_MR_HANDLE); diff --git a/drivers/infiniband/core/uverbs_std_types_qp.c b/drivers/infiniband/core/uverbs_std_types_qp.c index 3bf8dcdfe7eb..c00cfb5ed387 100644 --- a/drivers/infiniband/core/uverbs_std_types_qp.c +++ b/drivers/infiniband/core/uverbs_std_types_qp.c @@ -32,14 +32,14 @@ static int uverbs_free_qp(struct ib_uobject *uobject, } ret = ib_destroy_qp_user(qp, &attrs->driver_udata); - if (ib_is_destroy_retryable(ret, why, uobject)) + if (ret) return ret; if (uqp->uxrcd) atomic_dec(&uqp->uxrcd->refcnt); ib_uverbs_release_uevent(&uqp->uevent); - return ret; + return 0; } static int check_creation_flags(enum ib_qp_type qp_type, @@ -251,8 +251,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_QP_CREATE)( if (attr.qp_type == IB_QPT_XRC_TGT) qp = ib_create_qp(pd, &attr); else - qp = _ib_create_qp(device, pd, &attr, &attrs->driver_udata, - obj); + qp = _ib_create_qp(device, pd, &attr, &attrs->driver_udata, obj, + NULL); if (IS_ERR(qp)) { ret = PTR_ERR(qp); diff --git a/drivers/infiniband/core/uverbs_std_types_srq.c b/drivers/infiniband/core/uverbs_std_types_srq.c index c0ecbba26bf4..e5513f828bdc 100644 --- a/drivers/infiniband/core/uverbs_std_types_srq.c +++ b/drivers/infiniband/core/uverbs_std_types_srq.c @@ -18,7 +18,7 @@ static int uverbs_free_srq(struct ib_uobject *uobject, int ret; ret = ib_destroy_srq_user(srq, &attrs->driver_udata); - if (ib_is_destroy_retryable(ret, why, uobject)) + if (ret) return ret; if (srq_type == IB_SRQT_XRC) { @@ -30,7 +30,7 @@ static int uverbs_free_srq(struct ib_uobject *uobject, } ib_uverbs_release_uevent(uevent); - return ret; + return 0; } static int UVERBS_HANDLER(UVERBS_METHOD_SRQ_CREATE)( diff --git a/drivers/infiniband/core/uverbs_std_types_wq.c b/drivers/infiniband/core/uverbs_std_types_wq.c index f2e6a625724a..7ded8339346f 100644 --- a/drivers/infiniband/core/uverbs_std_types_wq.c +++ b/drivers/infiniband/core/uverbs_std_types_wq.c @@ -17,11 +17,11 @@ static int uverbs_free_wq(struct ib_uobject *uobject, int ret; ret = ib_destroy_wq_user(wq, &attrs->driver_udata); - if (ib_is_destroy_retryable(ret, why, uobject)) + if (ret) return ret; ib_uverbs_release_uevent(&uwq->uevent); - return ret; + return 0; } static int UVERBS_HANDLER(UVERBS_METHOD_WQ_CREATE)( diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c index 5addc8fae3f3..62f5bcb712cf 100644 --- a/drivers/infiniband/core/uverbs_uapi.c +++ b/drivers/infiniband/core/uverbs_uapi.c @@ -79,10 +79,7 @@ static int uapi_create_write(struct uverbs_api *uapi, method_elm->is_ex = def->write.is_ex; method_elm->handler = def->func_write; - if (def->write.is_ex) - method_elm->disabled = !(ibdev->uverbs_ex_cmd_mask & - BIT_ULL(def->write.command_num)); - else + if (!def->write.is_ex) method_elm->disabled = !(ibdev->uverbs_cmd_mask & BIT_ULL(def->write.command_num)); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 740f8454b6b4..5d4c7c263665 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -516,7 +516,7 @@ static struct ib_ah *_rdma_create_ah(struct ib_pd *pd, might_sleep_if(flags & RDMA_CREATE_AH_SLEEPABLE); - if (!device->ops.create_ah) + if (!udata && !device->ops.create_ah) return ERR_PTR(-EOPNOTSUPP); ah = rdma_zalloc_drv_obj_gfp( @@ -533,7 +533,10 @@ static struct ib_ah *_rdma_create_ah(struct ib_pd *pd, init_attr.flags = flags; init_attr.xmit_slave = xmit_slave; - ret = device->ops.create_ah(ah, &init_attr, udata); + if (udata) + ret = device->ops.create_user_ah(ah, &init_attr, udata); + else + ret = device->ops.create_ah(ah, &init_attr, NULL); if (ret) { kfree(ah); return ERR_PTR(ret); @@ -1188,7 +1191,7 @@ static struct ib_qp *create_xrc_qp_user(struct ib_qp *qp, } /** - * ib_create_qp - Creates a kernel QP associated with the specified protection + * ib_create_named_qp - Creates a kernel QP associated with the specified protection * domain. * @pd: The protection domain associated with the QP. * @qp_init_attr: A list of initial attributes required to create the @@ -1197,8 +1200,9 @@ static struct ib_qp *create_xrc_qp_user(struct ib_qp *qp, * * NOTE: for user qp use ib_create_qp_user with valid udata! */ -struct ib_qp *ib_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *qp_init_attr) +struct ib_qp *ib_create_named_qp(struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr, + const char *caller) { struct ib_device *device = pd ? pd->device : qp_init_attr->xrcd->device; struct ib_qp *qp; @@ -1223,7 +1227,7 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, if (qp_init_attr->cap.max_rdma_ctxs) rdma_rw_init_qp(device, qp_init_attr); - qp = _ib_create_qp(device, pd, qp_init_attr, NULL, NULL); + qp = _ib_create_qp(device, pd, qp_init_attr, NULL, NULL, caller); if (IS_ERR(qp)) return qp; @@ -1289,7 +1293,7 @@ err: return ERR_PTR(ret); } -EXPORT_SYMBOL(ib_create_qp); +EXPORT_SYMBOL(ib_create_named_qp); static const struct { int valid; @@ -1698,8 +1702,10 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, slave = rdma_lag_get_ah_roce_slave(qp->device, &attr->ah_attr, GFP_KERNEL); - if (IS_ERR(slave)) + if (IS_ERR(slave)) { + ret = PTR_ERR(slave); goto out_av; + } attr->xmit_slave = slave; } } diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index cf3db9628397..401bdc9e931e 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -1271,10 +1271,12 @@ static int bnxt_re_init_qp_attr(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd, } qplqp->mtu = ib_mtu_enum_to_int(iboe_get_mtu(rdev->netdev->mtu)); qplqp->dpi = &rdev->dpi_privileged; /* Doorbell page */ - if (init_attr->create_flags) + if (init_attr->create_flags) { ibdev_dbg(&rdev->ibdev, "QP create flags 0x%x not supported", init_attr->create_flags); + return -EOPNOTSUPP; + } /* Setup CQs */ if (init_attr->send_cq) { @@ -1657,8 +1659,8 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, srq->qplib_srq.max_wqe = entries; srq->qplib_srq.max_sge = srq_init_attr->attr.max_sge; - srq->qplib_srq.wqe_size = - bnxt_re_get_rwqe_size(srq->qplib_srq.max_sge); + /* 128 byte wqe size for SRQ . So use max sges */ + srq->qplib_srq.wqe_size = bnxt_re_get_rwqe_size(dev_attr->max_srq_sges); srq->qplib_srq.threshold = srq_init_attr->attr.srq_limit; srq->srq_limit = srq_init_attr->attr.srq_limit; srq->qplib_srq.eventq_hw_ring_id = rdev->nq[0].ring_id; @@ -1829,6 +1831,9 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, unsigned int flags; u8 nw_type; + if (qp_attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + qp->qplib_qp.modify_flags = 0; if (qp_attr_mask & IB_QP_STATE) { curr_qp_state = __to_ib_qp_state(qp->qplib_qp.cur_qp_state); @@ -2078,6 +2083,7 @@ int bnxt_re_query_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, goto out; } qp_attr->qp_state = __to_ib_qp_state(qplib_qp->state); + qp_attr->cur_qp_state = __to_ib_qp_state(qplib_qp->cur_qp_state); qp_attr->en_sqd_async_notify = qplib_qp->en_sqd_async_notify ? 1 : 0; qp_attr->qp_access_flags = __to_ib_access_flags(qplib_qp->access); qp_attr->pkey_index = qplib_qp->pkey_index; @@ -2827,6 +2833,9 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct bnxt_qplib_nq *nq = NULL; unsigned int nq_alloc_cnt; + if (attr->flags) + return -EOPNOTSUPP; + /* Validate CQ fields */ if (cqe < 1 || cqe > dev_attr->max_cq_wqes) { ibdev_err(&rdev->ibdev, "Failed to create CQ -max exceeded"); diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 04621ba8fa76..fdb8c2478258 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -608,7 +608,7 @@ static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, struct bnxt_re_dev *rdev = rdma_device_to_drv_device(device, struct bnxt_re_dev, ibdev); - return scnprintf(buf, PAGE_SIZE, "0x%x\n", rdev->en_dev->pdev->vendor); + return sysfs_emit(buf, "0x%x\n", rdev->en_dev->pdev->vendor); } static DEVICE_ATTR_RO(hw_rev); @@ -618,7 +618,7 @@ static ssize_t hca_type_show(struct device *device, struct bnxt_re_dev *rdev = rdma_device_to_drv_device(device, struct bnxt_re_dev, ibdev); - return scnprintf(buf, PAGE_SIZE, "%s\n", rdev->ibdev.node_desc); + return sysfs_emit(buf, "%s\n", rdev->ibdev.node_desc); } static DEVICE_ATTR_RO(hca_type); @@ -646,6 +646,7 @@ static const struct ib_device_ops bnxt_re_dev_ops = { .create_cq = bnxt_re_create_cq, .create_qp = bnxt_re_create_qp, .create_srq = bnxt_re_create_srq, + .create_user_ah = bnxt_re_create_ah, .dealloc_driver = bnxt_re_dealloc_driver, .dealloc_pd = bnxt_re_dealloc_pd, .dealloc_ucontext = bnxt_re_dealloc_ucontext, @@ -701,35 +702,6 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) ibdev->dev.parent = &rdev->en_dev->pdev->dev; ibdev->local_dma_lkey = BNXT_QPLIB_RSVD_LKEY; - /* User space */ - ibdev->uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_REREG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_AH) | - (1ull << IB_USER_VERBS_CMD_MODIFY_AH) | - (1ull << IB_USER_VERBS_CMD_QUERY_AH) | - (1ull << IB_USER_VERBS_CMD_DESTROY_AH); - /* POLL_CQ and REQ_NOTIFY_CQ is directly handled in libbnxt_re */ - - rdma_set_device_sysfs_group(ibdev, &bnxt_re_dev_attr_group); ib_set_device_ops(ibdev, &bnxt_re_dev_ops); ret = ib_device_set_netdev(&rdev->ibdev, rdev->netdev, 1); diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 28349ed50885..44c2416588d4 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -1006,6 +1006,9 @@ int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, pr_debug("ib_dev %p entries %d\n", ibdev, entries); if (attr->flags) + return -EOPNOTSUPP; + + if (entries < 1 || entries > ibdev->attrs.max_cqe) return -EINVAL; if (vector >= rhp->rdev.lldi.nciq) diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index a27899402f59..f85477f3b037 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -983,9 +983,7 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); int c4iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); -int c4iw_dealloc_mw(struct ib_mw *mw); void c4iw_dealloc(struct uld_ctx *ctx); -int c4iw_alloc_mw(struct ib_mw *mw, struct ib_udata *udata); struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt, int acc, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 42234df896fb..a2c71a1d93d5 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -365,22 +365,6 @@ static int dereg_mem(struct c4iw_rdev *rdev, u32 stag, u32 pbl_size, pbl_size, pbl_addr, skb, wr_waitp); } -static int allocate_window(struct c4iw_rdev *rdev, u32 *stag, u32 pdid, - struct c4iw_wr_wait *wr_waitp) -{ - *stag = T4_STAG_UNSET; - return write_tpt_entry(rdev, 0, stag, 0, pdid, FW_RI_STAG_MW, 0, 0, 0, - 0UL, 0, 0, 0, 0, NULL, wr_waitp); -} - -static int deallocate_window(struct c4iw_rdev *rdev, u32 stag, - struct sk_buff *skb, - struct c4iw_wr_wait *wr_waitp) -{ - return write_tpt_entry(rdev, 1, &stag, 0, 0, 0, 0, 0, 0, 0UL, 0, 0, 0, - 0, skb, wr_waitp); -} - static int allocate_stag(struct c4iw_rdev *rdev, u32 *stag, u32 pdid, u32 pbl_size, u32 pbl_addr, struct c4iw_wr_wait *wr_waitp) @@ -611,74 +595,6 @@ err_free_mhp: return ERR_PTR(err); } -int c4iw_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata) -{ - struct c4iw_mw *mhp = to_c4iw_mw(ibmw); - struct c4iw_dev *rhp; - struct c4iw_pd *php; - u32 mmid; - u32 stag = 0; - int ret; - - if (ibmw->type != IB_MW_TYPE_1) - return -EINVAL; - - php = to_c4iw_pd(ibmw->pd); - rhp = php->rhp; - mhp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL); - if (!mhp->wr_waitp) - return -ENOMEM; - - mhp->dereg_skb = alloc_skb(SGE_MAX_WR_LEN, GFP_KERNEL); - if (!mhp->dereg_skb) { - ret = -ENOMEM; - goto free_wr_wait; - } - - ret = allocate_window(&rhp->rdev, &stag, php->pdid, mhp->wr_waitp); - if (ret) - goto free_skb; - - mhp->rhp = rhp; - mhp->attr.pdid = php->pdid; - mhp->attr.type = FW_RI_STAG_MW; - mhp->attr.stag = stag; - mmid = (stag) >> 8; - ibmw->rkey = stag; - if (xa_insert_irq(&rhp->mrs, mmid, mhp, GFP_KERNEL)) { - ret = -ENOMEM; - goto dealloc_win; - } - pr_debug("mmid 0x%x mhp %p stag 0x%x\n", mmid, mhp, stag); - return 0; - -dealloc_win: - deallocate_window(&rhp->rdev, mhp->attr.stag, mhp->dereg_skb, - mhp->wr_waitp); -free_skb: - kfree_skb(mhp->dereg_skb); -free_wr_wait: - c4iw_put_wr_wait(mhp->wr_waitp); - return ret; -} - -int c4iw_dealloc_mw(struct ib_mw *mw) -{ - struct c4iw_dev *rhp; - struct c4iw_mw *mhp; - u32 mmid; - - mhp = to_c4iw_mw(mw); - rhp = mhp->rhp; - mmid = (mw->rkey) >> 8; - xa_erase_irq(&rhp->mrs, mmid); - deallocate_window(&rhp->rdev, mhp->attr.stag, mhp->dereg_skb, - mhp->wr_waitp); - kfree_skb(mhp->dereg_skb); - c4iw_put_wr_wait(mhp->wr_waitp); - return 0; -} - struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg) { diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 8138c57a1e43..1f1f856f8715 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -322,8 +322,9 @@ static ssize_t hw_rev_show(struct device *dev, rdma_device_to_drv_device(dev, struct c4iw_dev, ibdev); pr_debug("dev 0x%p\n", dev); - return sprintf(buf, "%d\n", - CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type)); + return sysfs_emit( + buf, "%d\n", + CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type)); } static DEVICE_ATTR_RO(hw_rev); @@ -337,7 +338,7 @@ static ssize_t hca_type_show(struct device *dev, pr_debug("dev 0x%p\n", dev); lldev->ethtool_ops->get_drvinfo(lldev, &info); - return sprintf(buf, "%s\n", info.driver); + return sysfs_emit(buf, "%s\n", info.driver); } static DEVICE_ATTR_RO(hca_type); @@ -348,8 +349,8 @@ static ssize_t board_id_show(struct device *dev, struct device_attribute *attr, rdma_device_to_drv_device(dev, struct c4iw_dev, ibdev); pr_debug("dev 0x%p\n", dev); - return sprintf(buf, "%x.%x\n", c4iw_dev->rdev.lldi.pdev->vendor, - c4iw_dev->rdev.lldi.pdev->device); + return sysfs_emit(buf, "%x.%x\n", c4iw_dev->rdev.lldi.pdev->vendor, + c4iw_dev->rdev.lldi.pdev->device); } static DEVICE_ATTR_RO(board_id); @@ -456,13 +457,11 @@ static const struct ib_device_ops c4iw_dev_ops = { .alloc_hw_stats = c4iw_alloc_stats, .alloc_mr = c4iw_alloc_mr, - .alloc_mw = c4iw_alloc_mw, .alloc_pd = c4iw_allocate_pd, .alloc_ucontext = c4iw_alloc_ucontext, .create_cq = c4iw_create_cq, .create_qp = c4iw_create_qp, .create_srq = c4iw_create_srq, - .dealloc_mw = c4iw_dealloc_mw, .dealloc_pd = c4iw_deallocate_pd, .dealloc_ucontext = c4iw_dealloc_ucontext, .dereg_mr = c4iw_dereg_mr, @@ -533,28 +532,6 @@ void c4iw_register_device(struct work_struct *work) if (fastreg_support) dev->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; dev->ibdev.local_dma_lkey = 0; - dev->ibdev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_POLL_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_POST_SEND) | - (1ull << IB_USER_VERBS_CMD_POST_RECV) | - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); dev->ibdev.node_type = RDMA_NODE_RNIC; BUILD_BUG_ON(sizeof(C4IW_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX); memcpy(dev->ibdev.node_desc, C4IW_NODE_DESC, sizeof(C4IW_NODE_DESC)); diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index f20379e4e2ec..a7401398cb34 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -2126,7 +2126,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, pr_debug("ib_pd %p\n", pd); - if (attrs->qp_type != IB_QPT_RC) + if (attrs->qp_type != IB_QPT_RC || attrs->create_flags) return ERR_PTR(-EOPNOTSUPP); php = to_c4iw_pd(pd); @@ -2374,6 +2374,9 @@ int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, pr_debug("ib_qp %p\n", ibqp); + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + /* iwarp does not support the RTR state */ if ((attr_mask & IB_QP_STATE) && (attr->qp_state == IB_QPS_RTR)) attr_mask &= ~IB_QP_STATE; @@ -2680,6 +2683,9 @@ int c4iw_create_srq(struct ib_srq *ib_srq, struct ib_srq_init_attr *attrs, int ret; int wr_len; + if (attrs->srq_type != IB_SRQT_BASIC) + return -EOPNOTSUPP; + pr_debug("%s ib_pd %p\n", __func__, pd); php = to_c4iw_pd(pd); diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c index 6faed3a81e08..cb2f2c647ee5 100644 --- a/drivers/infiniband/hw/efa/efa_main.c +++ b/drivers/infiniband/hw/efa/efa_main.c @@ -245,9 +245,9 @@ static const struct ib_device_ops efa_dev_ops = { .alloc_hw_stats = efa_alloc_hw_stats, .alloc_pd = efa_alloc_pd, .alloc_ucontext = efa_alloc_ucontext, - .create_ah = efa_create_ah, .create_cq = efa_create_cq, .create_qp = efa_create_qp, + .create_user_ah = efa_create_ah, .dealloc_pd = efa_dealloc_pd, .dealloc_ucontext = efa_dealloc_ucontext, .dereg_mr = efa_dereg_mr, @@ -308,27 +308,6 @@ static int efa_ib_device_add(struct efa_dev *dev) dev->ibdev.num_comp_vectors = 1; dev->ibdev.dev.parent = &pdev->dev; - dev->ibdev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_CREATE_AH) | - (1ull << IB_USER_VERBS_CMD_DESTROY_AH); - - dev->ibdev.uverbs_ex_cmd_mask = - (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE); - ib_set_device_ops(&dev->ibdev, &efa_dev_ops); err = ib_register_device(&dev->ibdev, "efa_%d", &pdev->dev); diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index 191e0843f090..2fe5708b2d9d 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -917,6 +917,9 @@ int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, enum ib_qp_state new_state; int err; + if (qp_attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + if (udata->inlen && !ib_is_udata_cleared(udata, 0, udata->inlen)) { ibdev_dbg(&dev->ibdev, @@ -1029,6 +1032,9 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, ibdev_dbg(ibdev, "create_cq entries %d\n", entries); + if (attr->flags) + return -EOPNOTSUPP; + if (entries < 1 || entries > dev->dev_attr.max_cq_depth) { ibdev_dbg(ibdev, "cq: requested entries[%u] non-positive or greater than max[%u]\n", diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 356518e17fa6..681bb4e918c9 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -339,6 +339,7 @@ int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe, bool *call_send) return -EINVAL; if (ibp->sl_to_sc[rdma_ah_get_sl(&ah->attr)] == 0xf) return -EINVAL; + break; default: break; } diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c index 074ec71772d2..5650130e68d4 100644 --- a/drivers/infiniband/hw/hfi1/sysfs.c +++ b/drivers/infiniband/hw/hfi1/sysfs.c @@ -151,7 +151,7 @@ struct hfi1_port_attr { static ssize_t cc_prescan_show(struct hfi1_pportdata *ppd, char *buf) { - return sprintf(buf, "%s\n", ppd->cc_prescan ? "on" : "off"); + return sysfs_emit(buf, "%s\n", ppd->cc_prescan ? "on" : "off"); } static ssize_t cc_prescan_store(struct hfi1_pportdata *ppd, const char *buf, @@ -296,7 +296,7 @@ static ssize_t sc2vl_attr_show(struct kobject *kobj, struct attribute *attr, container_of(kobj, struct hfi1_pportdata, sc2vl_kobj); struct hfi1_devdata *dd = ppd->dd; - return sprintf(buf, "%u\n", *((u8 *)dd->sc2vl + sattr->sc)); + return sysfs_emit(buf, "%u\n", *((u8 *)dd->sc2vl + sattr->sc)); } static const struct sysfs_ops hfi1_sc2vl_ops = { @@ -401,7 +401,7 @@ static ssize_t sl2sc_attr_show(struct kobject *kobj, struct attribute *attr, container_of(kobj, struct hfi1_pportdata, sl2sc_kobj); struct hfi1_ibport *ibp = &ppd->ibport_data; - return sprintf(buf, "%u\n", ibp->sl_to_sc[sattr->sl]); + return sysfs_emit(buf, "%u\n", ibp->sl_to_sc[sattr->sl]); } static const struct sysfs_ops hfi1_sl2sc_ops = { @@ -475,7 +475,7 @@ static ssize_t vl2mtu_attr_show(struct kobject *kobj, struct attribute *attr, container_of(kobj, struct hfi1_pportdata, vl2mtu_kobj); struct hfi1_devdata *dd = ppd->dd; - return sprintf(buf, "%u\n", dd->vld[vlattr->vl].mtu); + return sysfs_emit(buf, "%u\n", dd->vld[vlattr->vl].mtu); } static const struct sysfs_ops hfi1_vl2mtu_ops = { @@ -500,7 +500,7 @@ static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, struct hfi1_ibdev *dev = rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev); - return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev); + return sysfs_emit(buf, "%x\n", dd_from_dev(dev)->minrev); } static DEVICE_ATTR_RO(hw_rev); @@ -510,13 +510,11 @@ static ssize_t board_id_show(struct device *device, struct hfi1_ibdev *dev = rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev); struct hfi1_devdata *dd = dd_from_dev(dev); - int ret; if (!dd->boardname) - ret = -EINVAL; - else - ret = scnprintf(buf, PAGE_SIZE, "%s\n", dd->boardname); - return ret; + return -EINVAL; + + return sysfs_emit(buf, "%s\n", dd->boardname); } static DEVICE_ATTR_RO(board_id); @@ -528,7 +526,7 @@ static ssize_t boardversion_show(struct device *device, struct hfi1_devdata *dd = dd_from_dev(dev); /* The string printed here is already newline-terminated. */ - return scnprintf(buf, PAGE_SIZE, "%s", dd->boardversion); + return sysfs_emit(buf, "%s", dd->boardversion); } static DEVICE_ATTR_RO(boardversion); @@ -545,9 +543,9 @@ static ssize_t nctxts_show(struct device *device, * and a receive context, so returning the smaller of the two counts * give a more accurate picture of total contexts available. */ - return scnprintf(buf, PAGE_SIZE, "%u\n", - min(dd->num_user_contexts, - (u32)dd->sc_sizes[SC_USER].count)); + return sysfs_emit(buf, "%u\n", + min(dd->num_user_contexts, + (u32)dd->sc_sizes[SC_USER].count)); } static DEVICE_ATTR_RO(nctxts); @@ -559,7 +557,7 @@ static ssize_t nfreectxts_show(struct device *device, struct hfi1_devdata *dd = dd_from_dev(dev); /* Return the number of free user ports (contexts) available. */ - return scnprintf(buf, PAGE_SIZE, "%u\n", dd->freectxts); + return sysfs_emit(buf, "%u\n", dd->freectxts); } static DEVICE_ATTR_RO(nfreectxts); @@ -570,7 +568,8 @@ static ssize_t serial_show(struct device *device, rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev); struct hfi1_devdata *dd = dd_from_dev(dev); - return scnprintf(buf, PAGE_SIZE, "%s", dd->serial); + /* dd->serial is already newline terminated in chip.c */ + return sysfs_emit(buf, "%s", dd->serial); } static DEVICE_ATTR_RO(serial); @@ -598,9 +597,8 @@ static DEVICE_ATTR_WO(chip_reset); * Convert the reported temperature from an integer (reported in * units of 0.25C) to a floating point number. */ -#define temp2str(temp, buf, size, idx) \ - scnprintf((buf) + (idx), (size) - (idx), "%u.%02u ", \ - ((temp) >> 2), ((temp) & 0x3) * 25) +#define temp_d(t) ((t) >> 2) +#define temp_f(t) (((t)&0x3) * 25u) /* * Dump tempsense values, in decimal, to ease shell-scripts. @@ -615,19 +613,17 @@ static ssize_t tempsense_show(struct device *device, int ret; ret = hfi1_tempsense_rd(dd, &temp); - if (!ret) { - int idx = 0; - - idx += temp2str(temp.curr, buf, PAGE_SIZE, idx); - idx += temp2str(temp.lo_lim, buf, PAGE_SIZE, idx); - idx += temp2str(temp.hi_lim, buf, PAGE_SIZE, idx); - idx += temp2str(temp.crit_lim, buf, PAGE_SIZE, idx); - idx += scnprintf(buf + idx, PAGE_SIZE - idx, - "%u %u %u\n", temp.triggers & 0x1, - temp.triggers & 0x2, temp.triggers & 0x4); - ret = idx; - } - return ret; + if (ret) + return ret; + + return sysfs_emit(buf, "%u.%02u %u.%02u %u.%02u %u.%02u %u %u %u\n", + temp_d(temp.curr), temp_f(temp.curr), + temp_d(temp.lo_lim), temp_f(temp.lo_lim), + temp_d(temp.hi_lim), temp_f(temp.hi_lim), + temp_d(temp.crit_lim), temp_f(temp.crit_lim), + temp.triggers & 0x1, + temp.triggers & 0x2, + temp.triggers & 0x4); } static DEVICE_ATTR_RO(tempsense); @@ -817,7 +813,7 @@ static ssize_t sde_show_vl(struct sdma_engine *sde, char *buf) if (vl < 0) return vl; - return snprintf(buf, PAGE_SIZE, "%d\n", vl); + return sysfs_emit(buf, "%d\n", vl); } static SDE_ATTR(cpu_list, S_IWUSR | S_IRUGO, diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index 73d197e21730..92aa2a9b3b5a 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -2826,6 +2826,7 @@ static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd, default: break; } + break; default: break; } @@ -3005,6 +3006,7 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd, default: break; } + break; default: break; } @@ -3221,6 +3223,7 @@ bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe) req = wqe_to_tid_req(prev); if (req->ack_seg != req->total_segs) goto interlock; + break; default: break; } @@ -3239,9 +3242,11 @@ bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe) req = wqe_to_tid_req(prev); if (req->ack_seg != req->total_segs) goto interlock; + break; default: break; } + break; default: break; } diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index 75b06db60f7c..b09ef3335e96 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -31,13 +31,13 @@ */ #include <linux/platform_device.h> +#include <linux/pci.h> #include <rdma/ib_addr.h> #include <rdma/ib_cache.h> #include "hns_roce_device.h" -#define HNS_ROCE_PORT_NUM_SHIFT 24 -#define HNS_ROCE_VLAN_SL_BIT_MASK 7 -#define HNS_ROCE_VLAN_SL_SHIFT 13 +#define VLAN_SL_MASK 7 +#define VLAN_SL_SHIFT 13 static inline u16 get_ah_udp_sport(const struct rdma_ah_attr *ah_attr) { @@ -58,47 +58,47 @@ static inline u16 get_ah_udp_sport(const struct rdma_ah_attr *ah_attr) int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata) { - struct hns_roce_dev *hr_dev = to_hr_dev(ibah->device); - const struct ib_gid_attr *gid_attr; - struct device *dev = hr_dev->dev; - struct hns_roce_ah *ah = to_hr_ah(ibah); struct rdma_ah_attr *ah_attr = init_attr->ah_attr; const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); - u16 vlan_id = 0xffff; - bool vlan_en = false; - int ret; - - gid_attr = ah_attr->grh.sgid_attr; - ret = rdma_read_gid_l2_fields(gid_attr, &vlan_id, NULL); - if (ret) - return ret; - - /* Get mac address */ - memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN); + struct hns_roce_dev *hr_dev = to_hr_dev(ibah->device); + struct hns_roce_ah *ah = to_hr_ah(ibah); + int ret = 0; - if (vlan_id < VLAN_N_VID) { - vlan_en = true; - vlan_id |= (rdma_ah_get_sl(ah_attr) & - HNS_ROCE_VLAN_SL_BIT_MASK) << - HNS_ROCE_VLAN_SL_SHIFT; - } + if (hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08 && udata) + return -EOPNOTSUPP; ah->av.port = rdma_ah_get_port_num(ah_attr); ah->av.gid_index = grh->sgid_index; - ah->av.vlan_id = vlan_id; - ah->av.vlan_en = vlan_en; - dev_dbg(dev, "gid_index = 0x%x,vlan_id = 0x%x\n", ah->av.gid_index, - ah->av.vlan_id); if (rdma_ah_get_static_rate(ah_attr)) ah->av.stat_rate = IB_RATE_10_GBPS; - memcpy(ah->av.dgid, grh->dgid.raw, HNS_ROCE_GID_SIZE); - ah->av.sl = rdma_ah_get_sl(ah_attr); + ah->av.hop_limit = grh->hop_limit; ah->av.flowlabel = grh->flow_label; ah->av.udp_sport = get_ah_udp_sport(ah_attr); + ah->av.sl = rdma_ah_get_sl(ah_attr); + ah->av.tclass = grh->traffic_class; - return 0; + memcpy(ah->av.dgid, grh->dgid.raw, HNS_ROCE_GID_SIZE); + memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN); + + /* HIP08 needs to record vlan info in Address Vector */ + if (hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08) { + ah->av.vlan_en = 0; + + ret = rdma_read_gid_l2_fields(ah_attr->grh.sgid_attr, + &ah->av.vlan_id, NULL); + if (ret) + return ret; + + if (ah->av.vlan_id < VLAN_N_VID) { + ah->av.vlan_en = 1; + ah->av.vlan_id |= (rdma_ah_get_sl(ah_attr) & VLAN_SL_MASK) << + VLAN_SL_SHIFT; + } + } + + return ret; } int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c index a6b23dec1adc..dad2b9ba7b7a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_alloc.c +++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c @@ -159,76 +159,96 @@ void hns_roce_bitmap_cleanup(struct hns_roce_bitmap *bitmap) void hns_roce_buf_free(struct hns_roce_dev *hr_dev, struct hns_roce_buf *buf) { - struct device *dev = hr_dev->dev; - u32 size = buf->size; - int i; + struct hns_roce_buf_list *trunks; + u32 i; - if (size == 0) + if (!buf) return; - buf->size = 0; + trunks = buf->trunk_list; + if (trunks) { + buf->trunk_list = NULL; + for (i = 0; i < buf->ntrunks; i++) + dma_free_coherent(hr_dev->dev, 1 << buf->trunk_shift, + trunks[i].buf, trunks[i].map); - if (hns_roce_buf_is_direct(buf)) { - dma_free_coherent(dev, size, buf->direct.buf, buf->direct.map); - } else { - for (i = 0; i < buf->npages; ++i) - if (buf->page_list[i].buf) - dma_free_coherent(dev, 1 << buf->page_shift, - buf->page_list[i].buf, - buf->page_list[i].map); - kfree(buf->page_list); - buf->page_list = NULL; + kfree(trunks); } + + kfree(buf); } -int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, - struct hns_roce_buf *buf, u32 page_shift) +/* + * Allocate the dma buffer for storing ROCEE table entries + * + * @size: required size + * @page_shift: the unit size in a continuous dma address range + * @flags: HNS_ROCE_BUF_ flags to control the allocation flow. + */ +struct hns_roce_buf *hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, + u32 page_shift, u32 flags) { - struct hns_roce_buf_list *buf_list; - struct device *dev = hr_dev->dev; - u32 page_size; - int i; + u32 trunk_size, page_size, alloced_size; + struct hns_roce_buf_list *trunks; + struct hns_roce_buf *buf; + gfp_t gfp_flags; + u32 ntrunk, i; /* The minimum shift of the page accessed by hw is HNS_HW_PAGE_SHIFT */ - buf->page_shift = max_t(int, HNS_HW_PAGE_SHIFT, page_shift); + if (WARN_ON(page_shift < HNS_HW_PAGE_SHIFT)) + return ERR_PTR(-EINVAL); + + gfp_flags = (flags & HNS_ROCE_BUF_NOSLEEP) ? GFP_ATOMIC : GFP_KERNEL; + buf = kzalloc(sizeof(*buf), gfp_flags); + if (!buf) + return ERR_PTR(-ENOMEM); + buf->page_shift = page_shift; page_size = 1 << buf->page_shift; - buf->npages = DIV_ROUND_UP(size, page_size); - - /* required size is not bigger than one trunk size */ - if (size <= max_direct) { - buf->page_list = NULL; - buf->direct.buf = dma_alloc_coherent(dev, size, - &buf->direct.map, - GFP_KERNEL); - if (!buf->direct.buf) - return -ENOMEM; + + /* Calc the trunk size and num by required size and page_shift */ + if (flags & HNS_ROCE_BUF_DIRECT) { + buf->trunk_shift = ilog2(ALIGN(size, PAGE_SIZE)); + ntrunk = 1; } else { - buf_list = kcalloc(buf->npages, sizeof(*buf_list), GFP_KERNEL); - if (!buf_list) - return -ENOMEM; - - for (i = 0; i < buf->npages; i++) { - buf_list[i].buf = dma_alloc_coherent(dev, page_size, - &buf_list[i].map, - GFP_KERNEL); - if (!buf_list[i].buf) - break; - } + buf->trunk_shift = ilog2(ALIGN(page_size, PAGE_SIZE)); + ntrunk = DIV_ROUND_UP(size, 1 << buf->trunk_shift); + } - if (i != buf->npages && i > 0) { - while (i-- > 0) - dma_free_coherent(dev, page_size, - buf_list[i].buf, - buf_list[i].map); - kfree(buf_list); - return -ENOMEM; - } - buf->page_list = buf_list; + trunks = kcalloc(ntrunk, sizeof(*trunks), gfp_flags); + if (!trunks) { + kfree(buf); + return ERR_PTR(-ENOMEM); } - buf->size = size; - return 0; + trunk_size = 1 << buf->trunk_shift; + alloced_size = 0; + for (i = 0; i < ntrunk; i++) { + trunks[i].buf = dma_alloc_coherent(hr_dev->dev, trunk_size, + &trunks[i].map, gfp_flags); + if (!trunks[i].buf) + break; + + alloced_size += trunk_size; + } + + buf->ntrunks = i; + + /* In nofail mode, it's only failed when the alloced size is 0 */ + if ((flags & HNS_ROCE_BUF_NOFAIL) ? i == 0 : i != ntrunk) { + for (i = 0; i < buf->ntrunks; i++) + dma_free_coherent(hr_dev->dev, trunk_size, + trunks[i].buf, trunks[i].map); + + kfree(trunks); + kfree(buf); + return ERR_PTR(-ENOMEM); + } + + buf->npages = DIV_ROUND_UP(alloced_size, page_size); + buf->trunk_list = trunks; + + return buf; } int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h index f5669ff8cfeb..29469e15dfd3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_common.h +++ b/drivers/infiniband/hw/hns/hns_roce_common.h @@ -53,6 +53,18 @@ #define roce_set_bit(origin, shift, val) \ roce_set_field((origin), (1ul << (shift)), (shift), (val)) +#define FIELD_LOC(field_type, field_h, field_l) field_type, field_h, field_l + +#define _hr_reg_enable(ptr, field_type, field_h, field_l) \ + ({ \ + const field_type *_ptr = ptr; \ + *((__le32 *)_ptr + (field_h) / 32) |= \ + cpu_to_le32(BIT((field_l) % 32)) + \ + BUILD_BUG_ON_ZERO((field_h) != (field_l)); \ + }) + +#define hr_reg_enable(ptr, field) _hr_reg_enable(ptr, field) + #define ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S 3 #define ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S 4 diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 809b22aa5056..68f355fba425 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -251,6 +251,9 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, u32 cq_entries = attr->cqe; int ret; + if (attr->flags) + return -EOPNOTSUPP; + if (cq_entries < 1 || cq_entries > hr_dev->caps.max_cqes) { ibdev_err(ibdev, "Failed to check CQ count %d max=%d\n", cq_entries, hr_dev->caps.max_cqes); diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 6d2acff69f98..a5c6bb073569 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -117,6 +117,8 @@ #define HNS_ROCE_IDX_QUE_ENTRY_SZ 4 #define SRQ_DB_REG 0x230 +#define HNS_ROCE_QP_BANK_NUM 8 + /* The chip implementation of the consumer index is calculated * according to twice the actual EQ depth */ @@ -129,9 +131,10 @@ enum { SERV_TYPE_UD, }; -enum { +enum hns_roce_qp_caps { HNS_ROCE_QP_CAP_RQ_RECORD_DB = BIT(0), HNS_ROCE_QP_CAP_SQ_RECORD_DB = BIT(1), + HNS_ROCE_QP_CAP_OWNER_DB = BIT(2), }; enum hns_roce_cq_flags { @@ -221,6 +224,8 @@ enum { HNS_ROCE_CAP_FLAG_FRMR = BIT(8), HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL = BIT(9), HNS_ROCE_CAP_FLAG_ATOMIC = BIT(10), + HNS_ROCE_CAP_FLAG_SDI_MODE = BIT(14), + HNS_ROCE_CAP_FLAG_STASH = BIT(17), }; #define HNS_ROCE_DB_TYPE_COUNT 2 @@ -265,9 +270,6 @@ enum { #define HNS_HW_PAGE_SHIFT 12 #define HNS_HW_PAGE_SIZE (1 << HNS_HW_PAGE_SHIFT) -/* The minimum page count for hardware access page directly. */ -#define HNS_HW_DIRECT_PAGE_COUNT 2 - struct hns_roce_uar { u64 pfn; unsigned long index; @@ -419,11 +421,26 @@ struct hns_roce_buf_list { dma_addr_t map; }; +/* + * %HNS_ROCE_BUF_DIRECT indicates that the all memory must be in a continuous + * dma address range. + * + * %HNS_ROCE_BUF_NOSLEEP indicates that the caller cannot sleep. + * + * %HNS_ROCE_BUF_NOFAIL allocation only failed when allocated size is zero, even + * the allocated size is smaller than the required size. + */ +enum { + HNS_ROCE_BUF_DIRECT = BIT(0), + HNS_ROCE_BUF_NOSLEEP = BIT(1), + HNS_ROCE_BUF_NOFAIL = BIT(2), +}; + struct hns_roce_buf { - struct hns_roce_buf_list direct; - struct hns_roce_buf_list *page_list; + struct hns_roce_buf_list *trunk_list; + u32 ntrunks; u32 npages; - u32 size; + unsigned int trunk_shift; unsigned int page_shift; }; @@ -510,13 +527,22 @@ struct hns_roce_uar_table { struct hns_roce_bitmap bitmap; }; +struct hns_roce_bank { + struct ida ida; + u32 inuse; /* Number of IDs allocated */ + u32 min; /* Lowest ID to allocate. */ + u32 max; /* Highest ID to allocate. */ + u32 next; /* Next ID to allocate. */ +}; + struct hns_roce_qp_table { - struct hns_roce_bitmap bitmap; struct hns_roce_hem_table qp_table; struct hns_roce_hem_table irrl_table; struct hns_roce_hem_table trrl_table; struct hns_roce_hem_table sccc_table; struct mutex scc_mutex; + struct hns_roce_bank bank[HNS_ROCE_QP_BANK_NUM]; + spinlock_t bank_lock; }; struct hns_roce_cq_table { @@ -547,7 +573,7 @@ struct hns_roce_av { u8 dgid[HNS_ROCE_GID_SIZE]; u8 mac[ETH_ALEN]; u16 vlan_id; - bool vlan_en; + u8 vlan_en; }; struct hns_roce_ah { @@ -766,7 +792,7 @@ struct hns_roce_caps { u32 max_rq_sg; u32 max_extend_sg; int num_qps; - int reserved_qps; + u32 reserved_qps; int num_qpc_timer; int num_cqc_timer; int num_srqs; @@ -825,6 +851,7 @@ struct hns_roce_caps { u32 cqc_timer_bt_num; u32 mpt_bt_num; u32 sccc_bt_num; + u32 gmv_bt_num; u32 qpc_ba_pg_sz; u32 qpc_buf_pg_sz; u32 qpc_hop_num; @@ -864,6 +891,11 @@ struct hns_roce_caps { u32 eqe_ba_pg_sz; u32 eqe_buf_pg_sz; u32 eqe_hop_num; + u32 gmv_entry_num; + u32 gmv_entry_sz; + u32 gmv_ba_pg_sz; + u32 gmv_buf_pg_sz; + u32 gmv_hop_num; u32 sl_num; u32 tsq_buf_pg_sz; u32 tpq_buf_pg_sz; @@ -999,6 +1031,10 @@ struct hns_roce_dev { struct hns_roce_eq_table eq_table; struct hns_roce_hem_table qpc_timer_table; struct hns_roce_hem_table cqc_timer_table; + /* GMV is the memory area that the driver allocates for the hardware + * to store SGID, SMAC and VLAN information. + */ + struct hns_roce_hem_table gmv_table; int cmd_mod; int loop_idc; @@ -1069,29 +1105,18 @@ static inline struct hns_roce_qp return xa_load(&hr_dev->qp_table_xa, qpn & (hr_dev->caps.num_qps - 1)); } -static inline bool hns_roce_buf_is_direct(struct hns_roce_buf *buf) -{ - if (buf->page_list) - return false; - - return true; -} - static inline void *hns_roce_buf_offset(struct hns_roce_buf *buf, int offset) { - if (hns_roce_buf_is_direct(buf)) - return (char *)(buf->direct.buf) + (offset & (buf->size - 1)); - - return (char *)(buf->page_list[offset >> buf->page_shift].buf) + - (offset & ((1 << buf->page_shift) - 1)); + return (char *)(buf->trunk_list[offset >> buf->trunk_shift].buf) + + (offset & ((1 << buf->trunk_shift) - 1)); } static inline dma_addr_t hns_roce_buf_page(struct hns_roce_buf *buf, int idx) { - if (hns_roce_buf_is_direct(buf)) - return buf->direct.map + ((dma_addr_t)idx << buf->page_shift); - else - return buf->page_list[idx].map; + int offset = idx << buf->page_shift; + + return buf->trunk_list[offset >> buf->trunk_shift].map + + (offset & ((1 << buf->trunk_shift) - 1)); } #define hr_hw_page_align(x) ALIGN(x, 1 << HNS_HW_PAGE_SHIFT) @@ -1215,8 +1240,8 @@ int hns_roce_alloc_mw(struct ib_mw *mw, struct ib_udata *udata); int hns_roce_dealloc_mw(struct ib_mw *ibmw); void hns_roce_buf_free(struct hns_roce_dev *hr_dev, struct hns_roce_buf *buf); -int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, - struct hns_roce_buf *buf, u32 page_shift); +struct hns_roce_buf *hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, + u32 page_shift, u32 flags); int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, int buf_cnt, int start, struct hns_roce_buf *buf); diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index 7487cf3d2c37..5c302aecf050 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -75,6 +75,9 @@ bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type) case HEM_TYPE_CQC_TIMER: hop_num = hr_dev->caps.cqc_timer_hop_num; break; + case HEM_TYPE_GMV: + hop_num = hr_dev->caps.gmv_hop_num; + break; default: return false; } @@ -183,6 +186,14 @@ static int get_hem_table_config(struct hns_roce_dev *hr_dev, mhop->ba_l0_num = hr_dev->caps.srqc_bt_num; mhop->hop_num = hr_dev->caps.srqc_hop_num; break; + case HEM_TYPE_GMV: + mhop->buf_chunk_size = 1 << (hr_dev->caps.gmv_buf_pg_sz + + PAGE_SHIFT); + mhop->bt_chunk_size = 1 << (hr_dev->caps.gmv_ba_pg_sz + + PAGE_SHIFT); + mhop->ba_l0_num = hr_dev->caps.gmv_bt_num; + mhop->hop_num = hr_dev->caps.gmv_hop_num; + break; default: dev_err(dev, "Table %d not support multi-hop addressing!\n", type); @@ -1033,6 +1044,10 @@ void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev) if (hr_dev->caps.trrl_entry_sz) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.trrl_table); + + if (hr_dev->caps.gmv_entry_sz) + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->gmv_table); + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.irrl_table); hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.qp_table); hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table); diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h index b34c940077bb..c6bd98228a44 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.h +++ b/drivers/infiniband/hw/hns/hns_roce_hem.h @@ -47,6 +47,7 @@ enum { HEM_TYPE_SCCC, HEM_TYPE_QPC_TIMER, HEM_TYPE_CQC_TIMER, + HEM_TYPE_GMV, /* UNMAP HEM */ HEM_TYPE_MTT, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 5f4d8a32ed6d..f18380f827dd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -2062,11 +2062,6 @@ static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev, CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_S, 0); } -static int hns_roce_v1_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) -{ - return -EOPNOTSUPP; -} - static int hns_roce_v1_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) { @@ -3261,6 +3256,8 @@ static int hns_roce_v1_modify_qp(struct ib_qp *ibqp, enum ib_qp_state cur_state, enum ib_qp_state new_state) { + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) return hns_roce_v1_m_sqp(ibqp, attr, attr_mask, cur_state, @@ -4347,7 +4344,6 @@ static void hns_roce_v1_cleanup_eq_table(struct hns_roce_dev *hr_dev) static const struct ib_device_ops hns_roce_v1_dev_ops = { .destroy_qp = hns_roce_v1_destroy_qp, - .modify_cq = hns_roce_v1_modify_cq, .poll_cq = hns_roce_v1_poll_cq, .post_recv = hns_roce_v1_post_recv, .post_send = hns_roce_v1_post_send, @@ -4367,7 +4363,6 @@ static const struct hns_roce_hw hns_roce_hw_v1 = { .set_mtu = hns_roce_v1_set_mtu, .write_mtpt = hns_roce_v1_write_mtpt, .write_cqc = hns_roce_v1_write_cqc, - .modify_cq = hns_roce_v1_modify_cq, .clear_hem = hns_roce_v1_clear_hem, .modify_qp = hns_roce_v1_modify_qp, .query_qp = hns_roce_v1_query_qp, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 0468028ffe39..8575ad7acce2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -422,19 +422,50 @@ static int set_ud_opcode(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe, return 0; } +static int fill_ud_av(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe, + struct hns_roce_ah *ah) +{ + struct ib_device *ib_dev = ah->ibah.device; + struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); + + roce_set_field(ud_sq_wqe->byte_24, V2_UD_SEND_WQE_BYTE_24_UDPSPN_M, + V2_UD_SEND_WQE_BYTE_24_UDPSPN_S, ah->av.udp_sport); + + roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M, + V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S, ah->av.hop_limit); + roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_TCLASS_M, + V2_UD_SEND_WQE_BYTE_36_TCLASS_S, ah->av.tclass); + roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M, + V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S, ah->av.flowlabel); + roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_SL_M, + V2_UD_SEND_WQE_BYTE_40_SL_S, ah->av.sl); + + ud_sq_wqe->sgid_index = ah->av.gid_index; + + memcpy(ud_sq_wqe->dmac, ah->av.mac, ETH_ALEN); + memcpy(ud_sq_wqe->dgid, ah->av.dgid, GID_LEN_V2); + + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + return 0; + + roce_set_bit(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S, + ah->av.vlan_en); + roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_VLAN_M, + V2_UD_SEND_WQE_BYTE_36_VLAN_S, ah->av.vlan_id); + + return 0; +} + static inline int set_ud_wqe(struct hns_roce_qp *qp, const struct ib_send_wr *wr, void *wqe, unsigned int *sge_idx, unsigned int owner_bit) { - struct hns_roce_dev *hr_dev = to_hr_dev(qp->ibqp.device); struct hns_roce_ah *ah = to_hr_ah(ud_wr(wr)->ah); struct hns_roce_v2_ud_send_wqe *ud_sq_wqe = wqe; unsigned int curr_idx = *sge_idx; - int valid_num_sge; + unsigned int valid_num_sge; u32 msg_len = 0; - bool loopback; - u8 *smac; int ret; valid_num_sge = calc_wr_sge_num(wr, &msg_len); @@ -444,38 +475,13 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, if (WARN_ON(ret)) return ret; - roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_0_M, - V2_UD_SEND_WQE_DMAC_0_S, ah->av.mac[0]); - roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_1_M, - V2_UD_SEND_WQE_DMAC_1_S, ah->av.mac[1]); - roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_2_M, - V2_UD_SEND_WQE_DMAC_2_S, ah->av.mac[2]); - roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_3_M, - V2_UD_SEND_WQE_DMAC_3_S, ah->av.mac[3]); - roce_set_field(ud_sq_wqe->byte_48, V2_UD_SEND_WQE_BYTE_48_DMAC_4_M, - V2_UD_SEND_WQE_BYTE_48_DMAC_4_S, ah->av.mac[4]); - roce_set_field(ud_sq_wqe->byte_48, V2_UD_SEND_WQE_BYTE_48_DMAC_5_M, - V2_UD_SEND_WQE_BYTE_48_DMAC_5_S, ah->av.mac[5]); - - /* MAC loopback */ - smac = (u8 *)hr_dev->dev_addr[qp->port]; - loopback = ether_addr_equal_unaligned(ah->av.mac, smac) ? 1 : 0; - - roce_set_bit(ud_sq_wqe->byte_40, - V2_UD_SEND_WQE_BYTE_40_LBI_S, loopback); - ud_sq_wqe->msg_len = cpu_to_le32(msg_len); - /* Set sig attr */ roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_CQE_S, - (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0); + !!(wr->send_flags & IB_SEND_SIGNALED)); - /* Set se attr */ roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_SE_S, - (wr->send_flags & IB_SEND_SOLICITED) ? 1 : 0); - - roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_OWNER_S, - owner_bit); + !!(wr->send_flags & IB_SEND_SOLICITED)); roce_set_field(ud_sq_wqe->byte_16, V2_UD_SEND_WQE_BYTE_16_PD_M, V2_UD_SEND_WQE_BYTE_16_PD_S, to_hr_pd(qp->ibqp.pd)->pdn); @@ -488,36 +494,29 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S, curr_idx & (qp->sge.sge_cnt - 1)); - roce_set_field(ud_sq_wqe->byte_24, V2_UD_SEND_WQE_BYTE_24_UDPSPN_M, - V2_UD_SEND_WQE_BYTE_24_UDPSPN_S, ah->av.udp_sport); ud_sq_wqe->qkey = cpu_to_le32(ud_wr(wr)->remote_qkey & 0x80000000 ? qp->qkey : ud_wr(wr)->remote_qkey); roce_set_field(ud_sq_wqe->byte_32, V2_UD_SEND_WQE_BYTE_32_DQPN_M, V2_UD_SEND_WQE_BYTE_32_DQPN_S, ud_wr(wr)->remote_qpn); - roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_VLAN_M, - V2_UD_SEND_WQE_BYTE_36_VLAN_S, ah->av.vlan_id); - roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M, - V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S, ah->av.hop_limit); - roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_TCLASS_M, - V2_UD_SEND_WQE_BYTE_36_TCLASS_S, ah->av.tclass); - roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M, - V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S, ah->av.flowlabel); - roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_SL_M, - V2_UD_SEND_WQE_BYTE_40_SL_S, ah->av.sl); - roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_PORTN_M, - V2_UD_SEND_WQE_BYTE_40_PORTN_S, qp->port); - - roce_set_bit(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S, - ah->av.vlan_en ? 1 : 0); - roce_set_field(ud_sq_wqe->byte_48, V2_UD_SEND_WQE_BYTE_48_SGID_INDX_M, - V2_UD_SEND_WQE_BYTE_48_SGID_INDX_S, ah->av.gid_index); - - memcpy(&ud_sq_wqe->dgid[0], &ah->av.dgid[0], GID_LEN_V2); + ret = fill_ud_av(ud_sq_wqe, ah); + if (ret) + return ret; set_extend_sge(qp, wr, &curr_idx, valid_num_sge); + /* + * The pipeline can sequentially post all valid WQEs into WQ buffer, + * including new WQEs waiting for the doorbell to update the PI again. + * Therefore, the owner bit of WQE MUST be updated after all fields + * and extSGEs have been written into DDR instead of cache. + */ + if (qp->en_flags & HNS_ROCE_QP_CAP_OWNER_DB) + dma_wmb(); + *sge_idx = curr_idx; + roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_OWNER_S, + owner_bit); return 0; } @@ -591,9 +590,6 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp, roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_CQE_S, (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0); - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OWNER_S, - owner_bit); - if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) set_atomic_seg(wr, rc_sq_wqe, valid_num_sge); @@ -601,7 +597,18 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp, ret = set_rwqe_data_seg(&qp->ibqp, wr, rc_sq_wqe, &curr_idx, valid_num_sge); + /* + * The pipeline can sequentially post all valid WQEs into WQ buffer, + * including new WQEs waiting for the doorbell to update the PI again. + * Therefore, the owner bit of WQE MUST be updated after all fields + * and extSGEs have been written into DDR instead of cache. + */ + if (qp->en_flags & HNS_ROCE_QP_CAP_OWNER_DB) + dma_wmb(); + *sge_idx = curr_idx; + roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OWNER_S, + owner_bit); return ret; } @@ -686,7 +693,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, ~(((qp->sq.head + nreq) >> ilog2(qp->sq.wqe_cnt)) & 0x1); /* Corresponding to the QP type, wqe process separately */ - if (ibqp->qp_type == IB_QPT_GSI) + if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_UD) ret = set_ud_wqe(qp, wr, wqe, &sge_idx, owner_bit); else if (ibqp->qp_type == IB_QPT_RC) ret = set_rc_wqe(qp, wr, wqe, &sge_idx, owner_bit); @@ -1573,6 +1580,10 @@ static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev) PF_RES_DATA_4_PF_SCCC_BT_NUM_M, PF_RES_DATA_4_PF_SCCC_BT_NUM_S); + hr_dev->caps.gmv_bt_num = roce_get_field(req_b->gmv_idx_num, + PF_RES_DATA_5_PF_GMV_BT_NUM_M, + PF_RES_DATA_5_PF_GMV_BT_NUM_S); + return 0; } @@ -1896,6 +1907,15 @@ static void set_default_caps(struct hns_roce_dev *hr_dev) caps->ceqe_size = HNS_ROCE_V3_EQE_SIZE; caps->cqe_sz = HNS_ROCE_V3_CQE_SIZE; caps->qpc_sz = HNS_ROCE_V3_QPC_SZ; + caps->sccc_sz = HNS_ROCE_V3_SCCC_SZ; + caps->gmv_entry_sz = HNS_ROCE_V3_GMV_ENTRY_SZ; + caps->gmv_entry_num = caps->gmv_bt_num * (PAGE_SIZE / + caps->gmv_entry_sz); + caps->gmv_hop_num = HNS_ROCE_HOP_NUM_0; + caps->gmv_ba_pg_sz = 0; + caps->gmv_buf_pg_sz = 0; + caps->gid_table_len[0] = caps->gmv_bt_num * (HNS_HW_PAGE_SIZE / + caps->gmv_entry_sz); } } @@ -2122,6 +2142,14 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) caps->cqe_sz = HNS_ROCE_V3_CQE_SIZE; caps->qpc_sz = HNS_ROCE_V3_QPC_SZ; caps->sccc_sz = HNS_ROCE_V3_SCCC_SZ; + caps->gmv_entry_sz = HNS_ROCE_V3_GMV_ENTRY_SZ; + caps->gmv_entry_num = caps->gmv_bt_num * (PAGE_SIZE / + caps->gmv_entry_sz); + caps->gmv_hop_num = HNS_ROCE_HOP_NUM_0; + caps->gmv_ba_pg_sz = 0; + caps->gmv_buf_pg_sz = 0; + caps->gid_table_len[0] = caps->gmv_bt_num * + (HNS_HW_PAGE_SIZE / caps->gmv_entry_sz); } calc_pg_sz(caps->num_qps, caps->qpc_sz, caps->qpc_hop_num, @@ -2465,24 +2493,13 @@ static void hns_roce_free_link_table(struct hns_roce_dev *hr_dev, link_tbl->table.map); } -static int hns_roce_v2_init(struct hns_roce_dev *hr_dev) +static int get_hem_table(struct hns_roce_dev *hr_dev) { - struct hns_roce_v2_priv *priv = hr_dev->priv; - int qpc_count, cqc_count; - int ret, i; - - /* TSQ includes SQ doorbell and ack doorbell */ - ret = hns_roce_init_link_table(hr_dev, TSQ_LINK_TABLE); - if (ret) { - dev_err(hr_dev->dev, "TSQ init failed, ret = %d.\n", ret); - return ret; - } - - ret = hns_roce_init_link_table(hr_dev, TPQ_LINK_TABLE); - if (ret) { - dev_err(hr_dev->dev, "TPQ init failed, ret = %d.\n", ret); - goto err_tpq_init_failed; - } + unsigned int qpc_count; + unsigned int cqc_count; + unsigned int gmv_count; + int ret; + int i; /* Alloc memory for QPC Timer buffer space chunk */ for (qpc_count = 0; qpc_count < hr_dev->caps.qpc_timer_bt_num; @@ -2506,8 +2523,23 @@ static int hns_roce_v2_init(struct hns_roce_dev *hr_dev) } } + /* Alloc memory for GMV(GID/MAC/VLAN) table buffer space chunk */ + for (gmv_count = 0; gmv_count < hr_dev->caps.gmv_entry_num; + gmv_count++) { + ret = hns_roce_table_get(hr_dev, &hr_dev->gmv_table, gmv_count); + if (ret) { + dev_err(hr_dev->dev, + "failed to get gmv table, ret = %d.\n", ret); + goto err_gmv_failed; + } + } + return 0; +err_gmv_failed: + for (i = 0; i < gmv_count; i++) + hns_roce_table_put(hr_dev, &hr_dev->gmv_table, i); + err_cqc_timer_failed: for (i = 0; i < cqc_count; i++) hns_roce_table_put(hr_dev, &hr_dev->cqc_timer_table, i); @@ -2516,6 +2548,34 @@ err_qpc_timer_failed: for (i = 0; i < qpc_count; i++) hns_roce_table_put(hr_dev, &hr_dev->qpc_timer_table, i); + return ret; +} + +static int hns_roce_v2_init(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_v2_priv *priv = hr_dev->priv; + int ret; + + /* TSQ includes SQ doorbell and ack doorbell */ + ret = hns_roce_init_link_table(hr_dev, TSQ_LINK_TABLE); + if (ret) { + dev_err(hr_dev->dev, "failed to init TSQ, ret = %d.\n", ret); + return ret; + } + + ret = hns_roce_init_link_table(hr_dev, TPQ_LINK_TABLE); + if (ret) { + dev_err(hr_dev->dev, "failed to init TPQ, ret = %d.\n", ret); + goto err_tpq_init_failed; + } + + ret = get_hem_table(hr_dev); + if (ret) + goto err_get_hem_table_failed; + + return 0; + +err_get_hem_table_failed: hns_roce_free_link_table(hr_dev, &priv->tpq); err_tpq_init_failed: @@ -2637,14 +2697,27 @@ static int hns_roce_v2_chk_mbox(struct hns_roce_dev *hr_dev, return 0; } -static int hns_roce_config_sgid_table(struct hns_roce_dev *hr_dev, - int gid_index, const union ib_gid *gid, - enum hns_roce_sgid_type sgid_type) +static void copy_gid(void *dest, const union ib_gid *gid) +{ +#define GID_SIZE 4 + const union ib_gid *src = gid; + __le32 (*p)[GID_SIZE] = dest; + int i; + + if (!gid) + src = &zgid; + + for (i = 0; i < GID_SIZE; i++) + (*p)[i] = cpu_to_le32(*(u32 *)&src->raw[i * sizeof(u32)]); +} + +static int config_sgid_table(struct hns_roce_dev *hr_dev, + int gid_index, const union ib_gid *gid, + enum hns_roce_sgid_type sgid_type) { struct hns_roce_cmq_desc desc; struct hns_roce_cfg_sgid_tb *sgid_tb = (struct hns_roce_cfg_sgid_tb *)desc.data; - u32 *p; hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_SGID_TB, false); @@ -2653,19 +2726,54 @@ static int hns_roce_config_sgid_table(struct hns_roce_dev *hr_dev, roce_set_field(sgid_tb->vf_sgid_type_rsv, CFG_SGID_TB_VF_SGID_TYPE_M, CFG_SGID_TB_VF_SGID_TYPE_S, sgid_type); - p = (u32 *)&gid->raw[0]; - sgid_tb->vf_sgid_l = cpu_to_le32(*p); + copy_gid(&sgid_tb->vf_sgid_l, gid); - p = (u32 *)&gid->raw[4]; - sgid_tb->vf_sgid_ml = cpu_to_le32(*p); + return hns_roce_cmq_send(hr_dev, &desc, 1); +} - p = (u32 *)&gid->raw[8]; - sgid_tb->vf_sgid_mh = cpu_to_le32(*p); +static int config_gmv_table(struct hns_roce_dev *hr_dev, + int gid_index, const union ib_gid *gid, + enum hns_roce_sgid_type sgid_type, + const struct ib_gid_attr *attr) +{ + struct hns_roce_cmq_desc desc[2]; + struct hns_roce_cfg_gmv_tb_a *tb_a = + (struct hns_roce_cfg_gmv_tb_a *)desc[0].data; + struct hns_roce_cfg_gmv_tb_b *tb_b = + (struct hns_roce_cfg_gmv_tb_b *)desc[1].data; - p = (u32 *)&gid->raw[0xc]; - sgid_tb->vf_sgid_h = cpu_to_le32(*p); + u16 vlan_id = VLAN_CFI_MASK; + u8 mac[ETH_ALEN] = {}; + int ret; - return hns_roce_cmq_send(hr_dev, &desc, 1); + if (gid) { + ret = rdma_read_gid_l2_fields(attr, &vlan_id, mac); + if (ret) + return ret; + } + + hns_roce_cmq_setup_basic_desc(&desc[0], HNS_ROCE_OPC_CFG_GMV_TBL, false); + desc[0].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); + + hns_roce_cmq_setup_basic_desc(&desc[1], HNS_ROCE_OPC_CFG_GMV_TBL, false); + + copy_gid(&tb_a->vf_sgid_l, gid); + + roce_set_field(tb_a->vf_sgid_type_vlan, CFG_GMV_TB_VF_SGID_TYPE_M, + CFG_GMV_TB_VF_SGID_TYPE_S, sgid_type); + roce_set_bit(tb_a->vf_sgid_type_vlan, CFG_GMV_TB_VF_VLAN_EN_S, + vlan_id < VLAN_CFI_MASK); + roce_set_field(tb_a->vf_sgid_type_vlan, CFG_GMV_TB_VF_VLAN_ID_M, + CFG_GMV_TB_VF_VLAN_ID_S, vlan_id); + + tb_b->vf_smac_l = cpu_to_le32(*(u32 *)mac); + roce_set_field(tb_b->vf_smac_h, CFG_GMV_TB_SMAC_H_M, + CFG_GMV_TB_SMAC_H_S, *(u16 *)&mac[4]); + + roce_set_field(tb_b->table_idx_rsv, CFG_GMV_TB_SGID_IDX_M, + CFG_GMV_TB_SGID_IDX_S, gid_index); + + return hns_roce_cmq_send(hr_dev, desc, 2); } static int hns_roce_v2_set_gid(struct hns_roce_dev *hr_dev, u8 port, @@ -2675,23 +2783,24 @@ static int hns_roce_v2_set_gid(struct hns_roce_dev *hr_dev, u8 port, enum hns_roce_sgid_type sgid_type = GID_TYPE_FLAG_ROCE_V1; int ret; - if (!gid || !attr) - return -EINVAL; - - if (attr->gid_type == IB_GID_TYPE_ROCE) - sgid_type = GID_TYPE_FLAG_ROCE_V1; - - if (attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) { - if (ipv6_addr_v4mapped((void *)gid)) - sgid_type = GID_TYPE_FLAG_ROCE_V2_IPV4; - else - sgid_type = GID_TYPE_FLAG_ROCE_V2_IPV6; + if (gid) { + if (attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) { + if (ipv6_addr_v4mapped((void *)gid)) + sgid_type = GID_TYPE_FLAG_ROCE_V2_IPV4; + else + sgid_type = GID_TYPE_FLAG_ROCE_V2_IPV6; + } else if (attr->gid_type == IB_GID_TYPE_ROCE) { + sgid_type = GID_TYPE_FLAG_ROCE_V1; + } } - ret = hns_roce_config_sgid_table(hr_dev, gid_index, gid, sgid_type); + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + ret = config_gmv_table(hr_dev, gid_index, gid, sgid_type, attr); + else + ret = config_sgid_table(hr_dev, gid_index, gid, sgid_type); + if (ret) - ibdev_err(&hr_dev->ib_dev, - "failed to configure sgid table, ret = %d!\n", + ibdev_err(&hr_dev->ib_dev, "failed to set gid, ret = %d!\n", ret); return ret; @@ -3060,6 +3169,9 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev, V2_CQC_BYTE_8_CQE_SIZE_S, hr_cq->cqe_size == HNS_ROCE_V3_CQE_SIZE ? 1 : 0); + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_STASH) + hr_reg_enable(cq_context, CQC_STASH); + cq_context->cqe_cur_blk_addr = cpu_to_le32(to_hr_hw_page_addr(mtts[0])); roce_set_field(cq_context->byte_16_hop_addr, @@ -3583,9 +3695,25 @@ static int set_hem_to_hw(struct hns_roce_dev *hr_dev, int obj, u64 bt_ba, u32 hem_type, int step_idx) { struct hns_roce_cmd_mailbox *mailbox; + struct hns_roce_cmq_desc desc; + struct hns_roce_cfg_gmv_bt *gmv_bt = + (struct hns_roce_cfg_gmv_bt *)desc.data; int ret; int op; + if (hem_type == HEM_TYPE_GMV) { + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_GMV_BT, + false); + + gmv_bt->gmv_ba_l = cpu_to_le32(bt_ba >> HNS_HW_PAGE_SHIFT); + gmv_bt->gmv_ba_h = cpu_to_le32(bt_ba >> (HNS_HW_PAGE_SHIFT + + 32)); + gmv_bt->gmv_bt_idx = cpu_to_le32(obj / + (HNS_HW_PAGE_SIZE / hr_dev->caps.gmv_entry_sz)); + + return hns_roce_cmq_send(hr_dev, &desc, 1); + } + op = get_op_for_set_hem(hr_dev, hem_type, step_idx); if (op < 0) return 0; @@ -3683,24 +3811,20 @@ static int hns_roce_v2_clear_hem(struct hns_roce_dev *hr_dev, case HEM_TYPE_CQC: op = HNS_ROCE_CMD_DESTROY_CQC_BT0; break; - case HEM_TYPE_SCCC: - case HEM_TYPE_QPC_TIMER: - case HEM_TYPE_CQC_TIMER: - break; case HEM_TYPE_SRQC: op = HNS_ROCE_CMD_DESTROY_SRQC_BT0; break; + case HEM_TYPE_SCCC: + case HEM_TYPE_QPC_TIMER: + case HEM_TYPE_CQC_TIMER: + case HEM_TYPE_GMV: + return 0; default: dev_warn(dev, "Table %d not to be destroyed by mailbox!\n", table->type); return 0; } - if (table->type == HEM_TYPE_SCCC || - table->type == HEM_TYPE_QPC_TIMER || - table->type == HEM_TYPE_CQC_TIMER) - return 0; - op += step_idx; mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); @@ -3854,6 +3978,12 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp, hr_qp->access_flags = attr->qp_access_flags; roce_set_field(context->byte_252_err_txcqn, V2_QPC_BYTE_252_TX_CQN_M, V2_QPC_BYTE_252_TX_CQN_S, to_hr_cq(ibqp->send_cq)->cqn); + + if (hr_dev->caps.qpc_sz < HNS_ROCE_V3_QPC_SZ) + return; + + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_STASH) + hr_reg_enable(&context->ext, QPCEX_STASH); } static void modify_qp_init_to_init(struct ib_qp *ibqp, @@ -4421,7 +4551,9 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, IB_GID_TYPE_ROCE_UDP_ENCAP); } - if (vlan_id < VLAN_N_VID) { + /* Only HIP08 needs to set the vlan_en bits in QPC */ + if (vlan_id < VLAN_N_VID && + hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) { roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQ_VLAN_EN_S, 1); roce_set_bit(qpc_mask->byte_76_srqn_op_en, @@ -4758,6 +4890,9 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, unsigned long rq_flag = 0; int ret; + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + /* * In v2 engine, software pass context and context mask to hardware * when modifying qp. If software need modify some fields in context, @@ -5026,7 +5161,9 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, unsigned long flags; int ret = 0; - if (hr_qp->ibqp.qp_type == IB_QPT_RC && hr_qp->state != IB_QPS_RESET) { + if ((hr_qp->ibqp.qp_type == IB_QPT_RC || + hr_qp->ibqp.qp_type == IB_QPT_UD) && + hr_qp->state != IB_QPS_RESET) { /* Modify qp to reset before destroying qp */ ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state, IB_QPS_RESET); @@ -6193,6 +6330,7 @@ static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = { {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA), 0}, {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA_MACSEC), 0}, {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_MACSEC), 0}, + {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_200G_RDMA), 0}, /* required last entry */ {0, } }; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index be7f2fe1e883..fac85369d84f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -44,6 +44,7 @@ #define HNS_ROCE_VF_SMAC_NUM 32 #define HNS_ROCE_VF_SGID_NUM 32 #define HNS_ROCE_VF_SL_NUM 8 +#define HNS_ROCE_VF_GMV_BT_NUM 256 #define HNS_ROCE_V2_MAX_QP_NUM 0x100000 #define HNS_ROCE_V2_MAX_QPC_TIMER_NUM 0x200 @@ -89,6 +90,7 @@ #define HNS_ROCE_V2_SCCC_SZ 32 #define HNS_ROCE_V3_SCCC_SZ 64 +#define HNS_ROCE_V3_GMV_ENTRY_SZ 32 #define HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ PAGE_SIZE #define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ PAGE_SIZE @@ -241,6 +243,8 @@ enum hns_roce_opcode_type { HNS_ROCE_OPC_CLR_SCCC = 0x8509, HNS_ROCE_OPC_QUERY_SCCC = 0x850a, HNS_ROCE_OPC_RESET_SCCC = 0x850b, + HNS_ROCE_OPC_CFG_GMV_TBL = 0x850f, + HNS_ROCE_OPC_CFG_GMV_BT = 0x8510, HNS_SWITCH_PARAMETER_CFG = 0x1033, }; @@ -263,23 +267,24 @@ enum hns_roce_sgid_type { }; struct hns_roce_v2_cq_context { - __le32 byte_4_pg_ceqn; - __le32 byte_8_cqn; - __le32 cqe_cur_blk_addr; - __le32 byte_16_hop_addr; - __le32 cqe_nxt_blk_addr; - __le32 byte_24_pgsz_addr; - __le32 byte_28_cq_pi; - __le32 byte_32_cq_ci; - __le32 cqe_ba; - __le32 byte_40_cqe_ba; - __le32 byte_44_db_record; - __le32 db_record_addr; - __le32 byte_52_cqe_cnt; - __le32 byte_56_cqe_period_maxcnt; - __le32 cqe_report_timer; - __le32 byte_64_se_cqe_idx; + __le32 byte_4_pg_ceqn; + __le32 byte_8_cqn; + __le32 cqe_cur_blk_addr; + __le32 byte_16_hop_addr; + __le32 cqe_nxt_blk_addr; + __le32 byte_24_pgsz_addr; + __le32 byte_28_cq_pi; + __le32 byte_32_cq_ci; + __le32 cqe_ba; + __le32 byte_40_cqe_ba; + __le32 byte_44_db_record; + __le32 db_record_addr; + __le32 byte_52_cqe_cnt; + __le32 byte_56_cqe_period_maxcnt; + __le32 cqe_report_timer; + __le32 byte_64_se_cqe_idx; }; + #define HNS_ROCE_V2_CQ_DEFAULT_BURST_NUM 0x0 #define HNS_ROCE_V2_CQ_DEFAULT_INTERVAL 0x0 @@ -356,6 +361,10 @@ struct hns_roce_v2_cq_context { #define V2_CQC_BYTE_64_SE_CQE_IDX_S 0 #define V2_CQC_BYTE_64_SE_CQE_IDX_M GENMASK(23, 0) +#define CQC_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_cq_context, h, l) + +#define CQC_STASH CQC_FIELD_LOC(63, 63) + struct hns_roce_srq_context { __le32 byte_4_srqn_srqst; __le32 byte_8_limit_wl; @@ -457,68 +466,72 @@ enum hns_roce_v2_qp_state { HNS_ROCE_QP_NUM_ST }; +struct hns_roce_v2_qp_context_ex { + __le32 data[64]; +}; struct hns_roce_v2_qp_context { - __le32 byte_4_sqpn_tst; - __le32 wqe_sge_ba; - __le32 byte_12_sq_hop; - __le32 byte_16_buf_ba_pg_sz; - __le32 byte_20_smac_sgid_idx; - __le32 byte_24_mtu_tc; - __le32 byte_28_at_fl; - u8 dgid[GID_LEN_V2]; - __le32 dmac; - __le32 byte_52_udpspn_dmac; - __le32 byte_56_dqpn_err; - __le32 byte_60_qpst_tempid; - __le32 qkey_xrcd; - __le32 byte_68_rq_db; - __le32 rq_db_record_addr; - __le32 byte_76_srqn_op_en; - __le32 byte_80_rnr_rx_cqn; - __le32 byte_84_rq_ci_pi; - __le32 rq_cur_blk_addr; - __le32 byte_92_srq_info; - __le32 byte_96_rx_reqmsn; - __le32 rq_nxt_blk_addr; - __le32 byte_104_rq_sge; - __le32 byte_108_rx_reqepsn; - __le32 rq_rnr_timer; - __le32 rx_msg_len; - __le32 rx_rkey_pkt_info; - __le64 rx_va; - __le32 byte_132_trrl; - __le32 trrl_ba; - __le32 byte_140_raq; - __le32 byte_144_raq; - __le32 byte_148_raq; - __le32 byte_152_raq; - __le32 byte_156_raq; - __le32 byte_160_sq_ci_pi; - __le32 sq_cur_blk_addr; - __le32 byte_168_irrl_idx; - __le32 byte_172_sq_psn; - __le32 byte_176_msg_pktn; - __le32 sq_cur_sge_blk_addr; - __le32 byte_184_irrl_idx; - __le32 cur_sge_offset; - __le32 byte_192_ext_sge; - __le32 byte_196_sq_psn; - __le32 byte_200_sq_max; - __le32 irrl_ba; - __le32 byte_208_irrl; - __le32 byte_212_lsn; - __le32 sq_timer; - __le32 byte_220_retry_psn_msn; - __le32 byte_224_retry_msg; - __le32 rx_sq_cur_blk_addr; - __le32 byte_232_irrl_sge; - __le32 irrl_cur_sge_offset; - __le32 byte_240_irrl_tail; - __le32 byte_244_rnr_rxack; - __le32 byte_248_ack_psn; - __le32 byte_252_err_txcqn; - __le32 byte_256_sqflush_rqcqe; - __le32 ext[64]; + __le32 byte_4_sqpn_tst; + __le32 wqe_sge_ba; + __le32 byte_12_sq_hop; + __le32 byte_16_buf_ba_pg_sz; + __le32 byte_20_smac_sgid_idx; + __le32 byte_24_mtu_tc; + __le32 byte_28_at_fl; + u8 dgid[GID_LEN_V2]; + __le32 dmac; + __le32 byte_52_udpspn_dmac; + __le32 byte_56_dqpn_err; + __le32 byte_60_qpst_tempid; + __le32 qkey_xrcd; + __le32 byte_68_rq_db; + __le32 rq_db_record_addr; + __le32 byte_76_srqn_op_en; + __le32 byte_80_rnr_rx_cqn; + __le32 byte_84_rq_ci_pi; + __le32 rq_cur_blk_addr; + __le32 byte_92_srq_info; + __le32 byte_96_rx_reqmsn; + __le32 rq_nxt_blk_addr; + __le32 byte_104_rq_sge; + __le32 byte_108_rx_reqepsn; + __le32 rq_rnr_timer; + __le32 rx_msg_len; + __le32 rx_rkey_pkt_info; + __le64 rx_va; + __le32 byte_132_trrl; + __le32 trrl_ba; + __le32 byte_140_raq; + __le32 byte_144_raq; + __le32 byte_148_raq; + __le32 byte_152_raq; + __le32 byte_156_raq; + __le32 byte_160_sq_ci_pi; + __le32 sq_cur_blk_addr; + __le32 byte_168_irrl_idx; + __le32 byte_172_sq_psn; + __le32 byte_176_msg_pktn; + __le32 sq_cur_sge_blk_addr; + __le32 byte_184_irrl_idx; + __le32 cur_sge_offset; + __le32 byte_192_ext_sge; + __le32 byte_196_sq_psn; + __le32 byte_200_sq_max; + __le32 irrl_ba; + __le32 byte_208_irrl; + __le32 byte_212_lsn; + __le32 sq_timer; + __le32 byte_220_retry_psn_msn; + __le32 byte_224_retry_msg; + __le32 rx_sq_cur_blk_addr; + __le32 byte_232_irrl_sge; + __le32 irrl_cur_sge_offset; + __le32 byte_240_irrl_tail; + __le32 byte_244_rnr_rxack; + __le32 byte_248_ack_psn; + __le32 byte_252_err_txcqn; + __le32 byte_256_sqflush_rqcqe; + + struct hns_roce_v2_qp_context_ex ext; }; #define V2_QPC_BYTE_4_TST_S 0 @@ -887,6 +900,10 @@ struct hns_roce_v2_qp_context { #define V2_QPC_BYTE_256_SQ_FLUSH_IDX_S 16 #define V2_QPC_BYTE_256_SQ_FLUSH_IDX_M GENMASK(31, 16) +#define QPCEX_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_qp_context_ex, h, l) + +#define QPCEX_STASH QPCEX_FIELD_LOC(82, 82) + #define V2_QP_RWE_S 1 /* rdma write enable */ #define V2_QP_RRE_S 2 /* rdma read enable */ #define V2_QP_ATE_S 3 /* rdma atomic enable */ @@ -1073,8 +1090,9 @@ struct hns_roce_v2_ud_send_wqe { __le32 byte_32; __le32 byte_36; __le32 byte_40; - __le32 dmac; - __le32 byte_48; + u8 dmac[ETH_ALEN]; + u8 sgid_index; + u8 smac_index; u8 dgid[GID_LEN_V2]; }; @@ -1117,37 +1135,10 @@ struct hns_roce_v2_ud_send_wqe { #define V2_UD_SEND_WQE_BYTE_40_SL_S 20 #define V2_UD_SEND_WQE_BYTE_40_SL_M GENMASK(23, 20) -#define V2_UD_SEND_WQE_BYTE_40_PORTN_S 24 -#define V2_UD_SEND_WQE_BYTE_40_PORTN_M GENMASK(26, 24) - #define V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S 30 #define V2_UD_SEND_WQE_BYTE_40_LBI_S 31 -#define V2_UD_SEND_WQE_DMAC_0_S 0 -#define V2_UD_SEND_WQE_DMAC_0_M GENMASK(7, 0) - -#define V2_UD_SEND_WQE_DMAC_1_S 8 -#define V2_UD_SEND_WQE_DMAC_1_M GENMASK(15, 8) - -#define V2_UD_SEND_WQE_DMAC_2_S 16 -#define V2_UD_SEND_WQE_DMAC_2_M GENMASK(23, 16) - -#define V2_UD_SEND_WQE_DMAC_3_S 24 -#define V2_UD_SEND_WQE_DMAC_3_M GENMASK(31, 24) - -#define V2_UD_SEND_WQE_BYTE_48_DMAC_4_S 0 -#define V2_UD_SEND_WQE_BYTE_48_DMAC_4_M GENMASK(7, 0) - -#define V2_UD_SEND_WQE_BYTE_48_DMAC_5_S 8 -#define V2_UD_SEND_WQE_BYTE_48_DMAC_5_M GENMASK(15, 8) - -#define V2_UD_SEND_WQE_BYTE_48_SGID_INDX_S 16 -#define V2_UD_SEND_WQE_BYTE_48_SGID_INDX_M GENMASK(23, 16) - -#define V2_UD_SEND_WQE_BYTE_48_SMAC_INDX_S 24 -#define V2_UD_SEND_WQE_BYTE_48_SMAC_INDX_M GENMASK(31, 24) - struct hns_roce_v2_rc_send_wqe { __le32 byte_4; __le32 msg_len; @@ -1334,7 +1325,7 @@ struct hns_roce_pf_res_b { __le32 sgid_idx_num; __le32 qid_idx_sl_num; __le32 sccc_bt_idx_num; - __le32 rsv; + __le32 gmv_idx_num; }; #define PF_RES_DATA_1_PF_SMAC_IDX_S 0 @@ -1361,6 +1352,12 @@ struct hns_roce_pf_res_b { #define PF_RES_DATA_4_PF_SCCC_BT_NUM_S 9 #define PF_RES_DATA_4_PF_SCCC_BT_NUM_M GENMASK(17, 9) +#define PF_RES_DATA_5_PF_GMV_BT_IDX_S 0 +#define PF_RES_DATA_5_PF_GMV_BT_IDX_M GENMASK(7, 0) + +#define PF_RES_DATA_5_PF_GMV_BT_NUM_S 8 +#define PF_RES_DATA_5_PF_GMV_BT_NUM_M GENMASK(16, 8) + struct hns_roce_pf_timer_res_a { __le32 rsv0; __le32 qpc_timer_bt_idx_num; @@ -1425,7 +1422,7 @@ struct hns_roce_vf_res_b { __le32 vf_sgid_idx_num; __le32 vf_qid_idx_sl_num; __le32 vf_sccc_idx_num; - __le32 rsv1; + __le32 vf_gmv_idx_num; }; #define VF_RES_B_DATA_0_VF_ID_S 0 @@ -1455,6 +1452,12 @@ struct hns_roce_vf_res_b { #define VF_RES_B_DATA_4_VF_SCCC_BT_NUM_S 9 #define VF_RES_B_DATA_4_VF_SCCC_BT_NUM_M GENMASK(17, 9) +#define VF_RES_B_DATA_5_VF_GMV_BT_IDX_S 0 +#define VF_RES_B_DATA_5_VF_GMV_BT_IDX_M GENMASK(7, 0) + +#define VF_RES_B_DATA_5_VF_GMV_BT_NUM_S 16 +#define VF_RES_B_DATA_5_VF_GMV_BT_NUM_M GENMASK(24, 16) + struct hns_roce_vf_switch { __le32 rocee_sel; __le32 fun_id; @@ -1577,6 +1580,46 @@ struct hns_roce_cfg_smac_tb { #define CFG_SMAC_TB_VF_SMAC_H_S 0 #define CFG_SMAC_TB_VF_SMAC_H_M GENMASK(15, 0) +struct hns_roce_cfg_gmv_bt { + __le32 gmv_ba_l; + __le32 gmv_ba_h; + __le32 gmv_bt_idx; + __le32 rsv[3]; +}; + +#define CFG_GMV_BA_H_S 0 +#define CFG_GMV_BA_H_M GENMASK(19, 0) + +struct hns_roce_cfg_gmv_tb_a { + __le32 vf_sgid_l; + __le32 vf_sgid_ml; + __le32 vf_sgid_mh; + __le32 vf_sgid_h; + __le32 vf_sgid_type_vlan; + __le32 resv; +}; + +#define CFG_GMV_TB_SGID_IDX_S 0 +#define CFG_GMV_TB_SGID_IDX_M GENMASK(7, 0) + +#define CFG_GMV_TB_VF_SGID_TYPE_S 0 +#define CFG_GMV_TB_VF_SGID_TYPE_M GENMASK(1, 0) + +#define CFG_GMV_TB_VF_VLAN_EN_S 2 + +#define CFG_GMV_TB_VF_VLAN_ID_S 16 +#define CFG_GMV_TB_VF_VLAN_ID_M GENMASK(27, 16) + +struct hns_roce_cfg_gmv_tb_b { + __le32 vf_smac_l; + __le32 vf_smac_h; + __le32 table_idx_rsv; + __le32 resv[3]; +}; + +#define CFG_GMV_TB_SMAC_H_S 0 +#define CFG_GMV_TB_SMAC_H_M GENMASK(15, 0) + #define HNS_ROCE_QUERY_PF_CAPS_CMD_NUM 5 struct hns_roce_query_pf_caps_a { u8 number_ports; diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index afeffafc59f9..f01590d8c3cf 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -33,6 +33,7 @@ #include <linux/acpi.h> #include <linux/of_platform.h> #include <linux/module.h> +#include <linux/pci.h> #include <rdma/ib_addr.h> #include <rdma/ib_smi.h> #include <rdma/ib_user_verbs.h> @@ -61,7 +62,10 @@ int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index) static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u8 port, u8 *addr) { u8 phy_port; - u32 i = 0; + u32 i; + + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + return 0; if (!memcmp(hr_dev->dev_addr[port], addr, ETH_ALEN)) return 0; @@ -90,14 +94,13 @@ static int hns_roce_add_gid(const struct ib_gid_attr *attr, void **context) static int hns_roce_del_gid(const struct ib_gid_attr *attr, void **context) { struct hns_roce_dev *hr_dev = to_hr_dev(attr->device); - struct ib_gid_attr zattr = {}; u8 port = attr->port_num - 1; int ret; if (port >= hr_dev->caps.num_ports) return -EINVAL; - ret = hr_dev->hw->set_gid(hr_dev, port, attr->index, &zgid, &zattr); + ret = hr_dev->hw->set_gid(hr_dev, port, attr->index, NULL, NULL); return ret; } @@ -421,6 +424,7 @@ static const struct ib_device_ops hns_roce_dev_ops = { .alloc_pd = hns_roce_alloc_pd, .alloc_ucontext = hns_roce_alloc_ucontext, .create_ah = hns_roce_create_ah, + .create_user_ah = hns_roce_create_ah, .create_cq = hns_roce_create_cq, .create_qp = hns_roce_create_qp, .dealloc_pd = hns_roce_dealloc_pd, @@ -491,36 +495,13 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) ib_dev->phys_port_cnt = hr_dev->caps.num_ports; ib_dev->local_dma_lkey = hr_dev->caps.reserved_lkey; ib_dev->num_comp_vectors = hr_dev->caps.num_comp_vectors; - ib_dev->uverbs_cmd_mask = - (1ULL << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ULL << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ULL << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ULL << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ULL << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ULL << IB_USER_VERBS_CMD_REG_MR) | - (1ULL << IB_USER_VERBS_CMD_DEREG_MR) | - (1ULL << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ULL << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ULL << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ULL << IB_USER_VERBS_CMD_CREATE_QP) | - (1ULL << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ULL << IB_USER_VERBS_CMD_QUERY_QP) | - (1ULL << IB_USER_VERBS_CMD_DESTROY_QP); - - ib_dev->uverbs_ex_cmd_mask |= (1ULL << IB_USER_VERBS_EX_CMD_MODIFY_CQ); - - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_REREG_MR) { - ib_dev->uverbs_cmd_mask |= (1ULL << IB_USER_VERBS_CMD_REREG_MR); + + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_REREG_MR) ib_set_device_ops(ib_dev, &hns_roce_dev_mr_ops); - } /* MW */ - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_MW) { - ib_dev->uverbs_cmd_mask |= - (1ULL << IB_USER_VERBS_CMD_ALLOC_MW) | - (1ULL << IB_USER_VERBS_CMD_DEALLOC_MW); + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_MW) ib_set_device_ops(ib_dev, &hns_roce_dev_mw_ops); - } /* FRMR */ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR) @@ -528,12 +509,6 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) /* SRQ */ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) { - ib_dev->uverbs_cmd_mask |= - (1ULL << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ULL << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ULL << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ULL << IB_USER_VERBS_CMD_DESTROY_SRQ) | - (1ULL << IB_USER_VERBS_CMD_POST_SRQ_RECV); ib_set_device_ops(ib_dev, &hns_roce_dev_srq_ops); ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_srq_ops); } @@ -680,8 +655,25 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) } } + if (hr_dev->caps.gmv_entry_sz) { + ret = hns_roce_init_hem_table(hr_dev, &hr_dev->gmv_table, + HEM_TYPE_GMV, + hr_dev->caps.gmv_entry_sz, + hr_dev->caps.gmv_entry_num, 1); + if (ret) { + dev_err(dev, + "failed to init gmv table memory, ret = %d\n", + ret); + goto err_unmap_cqc_timer; + } + } + return 0; +err_unmap_cqc_timer: + if (hr_dev->caps.cqc_timer_entry_sz) + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cqc_timer_table); + err_unmap_qpc_timer: if (hr_dev->caps.qpc_timer_entry_sz) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qpc_timer_table); diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 7f81a695e9af..87e2e6236c69 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -695,15 +695,6 @@ static inline size_t mtr_bufs_size(struct hns_roce_buf_attr *attr) return size; } -static inline size_t mtr_kmem_direct_size(bool is_direct, size_t alloc_size, - unsigned int page_shift) -{ - if (is_direct) - return ALIGN(alloc_size, 1 << page_shift); - else - return HNS_HW_DIRECT_PAGE_COUNT << page_shift; -} - /* * check the given pages in continuous address space * Returns 0 on success, or the error page num. @@ -732,7 +723,6 @@ static void mtr_free_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr) /* release kernel buffers */ if (mtr->kmem) { hns_roce_buf_free(hr_dev, mtr->kmem); - kfree(mtr->kmem); mtr->kmem = NULL; } } @@ -744,13 +734,12 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, struct ib_device *ibdev = &hr_dev->ib_dev; unsigned int best_pg_shift; int all_pg_count = 0; - size_t direct_size; size_t total_size; int ret; total_size = mtr_bufs_size(buf_attr); if (total_size < 1) { - ibdev_err(ibdev, "Failed to check mtr size\n"); + ibdev_err(ibdev, "failed to check mtr size\n."); return -EINVAL; } @@ -762,7 +751,7 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, mtr->umem = ib_umem_get(ibdev, user_addr, total_size, buf_attr->user_access); if (IS_ERR_OR_NULL(mtr->umem)) { - ibdev_err(ibdev, "Failed to get umem, ret %ld\n", + ibdev_err(ibdev, "failed to get umem, ret = %ld.\n", PTR_ERR(mtr->umem)); return -ENOMEM; } @@ -780,19 +769,16 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, ret = 0; } else { mtr->umem = NULL; - mtr->kmem = kzalloc(sizeof(*mtr->kmem), GFP_KERNEL); - if (!mtr->kmem) { - ibdev_err(ibdev, "Failed to alloc kmem\n"); - return -ENOMEM; - } - direct_size = mtr_kmem_direct_size(is_direct, total_size, - buf_attr->page_shift); - ret = hns_roce_buf_alloc(hr_dev, total_size, direct_size, - mtr->kmem, buf_attr->page_shift); - if (ret) { - ibdev_err(ibdev, "Failed to alloc kmem, ret %d\n", ret); - goto err_alloc_mem; + mtr->kmem = + hns_roce_buf_alloc(hr_dev, total_size, + buf_attr->page_shift, + is_direct ? HNS_ROCE_BUF_DIRECT : 0); + if (IS_ERR(mtr->kmem)) { + ibdev_err(ibdev, "failed to alloc kmem, ret = %ld.\n", + PTR_ERR(mtr->kmem)); + return PTR_ERR(mtr->kmem); } + best_pg_shift = buf_attr->page_shift; all_pg_count = mtr->kmem->npages; } @@ -800,7 +786,8 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, /* must bigger than minimum hardware page shift */ if (best_pg_shift < HNS_HW_PAGE_SHIFT || all_pg_count < 1) { ret = -EINVAL; - ibdev_err(ibdev, "Failed to check mtr page shift %d count %d\n", + ibdev_err(ibdev, + "failed to check mtr, page shift = %u count = %d.\n", best_pg_shift, all_pg_count); goto err_alloc_mem; } diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 6c081dd985fc..62da30a1575a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -154,9 +154,50 @@ static void hns_roce_ib_qp_event(struct hns_roce_qp *hr_qp, } } +static u8 get_least_load_bankid_for_qp(struct hns_roce_bank *bank) +{ + u32 least_load = bank[0].inuse; + u8 bankid = 0; + u32 bankcnt; + u8 i; + + for (i = 1; i < HNS_ROCE_QP_BANK_NUM; i++) { + bankcnt = bank[i].inuse; + if (bankcnt < least_load) { + least_load = bankcnt; + bankid = i; + } + } + + return bankid; +} + +static int alloc_qpn_with_bankid(struct hns_roce_bank *bank, u8 bankid, + unsigned long *qpn) +{ + int id; + + id = ida_alloc_range(&bank->ida, bank->next, bank->max, GFP_KERNEL); + if (id < 0) { + id = ida_alloc_range(&bank->ida, bank->min, bank->max, + GFP_KERNEL); + if (id < 0) + return id; + } + + /* the QPN should keep increasing until the max value is reached. */ + bank->next = (id + 1) > bank->max ? bank->min : id + 1; + + /* the lower 3 bits is bankid */ + *qpn = (id << 3) | bankid; + + return 0; +} static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { + struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; unsigned long num = 0; + u8 bankid; int ret; if (hr_qp->ibqp.qp_type == IB_QPT_GSI) { @@ -169,13 +210,21 @@ static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) hr_qp->doorbell_qpn = 1; } else { - ret = hns_roce_bitmap_alloc_range(&hr_dev->qp_table.bitmap, - 1, 1, &num); + spin_lock(&qp_table->bank_lock); + bankid = get_least_load_bankid_for_qp(qp_table->bank); + + ret = alloc_qpn_with_bankid(&qp_table->bank[bankid], bankid, + &num); if (ret) { - ibdev_err(&hr_dev->ib_dev, "Failed to alloc bitmap\n"); - return -ENOMEM; + ibdev_err(&hr_dev->ib_dev, + "failed to alloc QPN, ret = %d\n", ret); + spin_unlock(&qp_table->bank_lock); + return ret; } + qp_table->bank[bankid].inuse++; + spin_unlock(&qp_table->bank_lock); + hr_qp->doorbell_qpn = (u32)num; } @@ -340,9 +389,15 @@ static void free_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) hns_roce_table_put(hr_dev, &qp_table->irrl_table, hr_qp->qpn); } +static inline u8 get_qp_bankid(unsigned long qpn) +{ + /* The lower 3 bits of QPN are used to hash to different banks */ + return (u8)(qpn & GENMASK(2, 0)); +} + static void free_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { - struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; + u8 bankid; if (hr_qp->ibqp.qp_type == IB_QPT_GSI) return; @@ -350,7 +405,13 @@ static void free_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) if (hr_qp->qpn < hr_dev->caps.reserved_qps) return; - hns_roce_bitmap_free_range(&qp_table->bitmap, hr_qp->qpn, 1, BITMAP_RR); + bankid = get_qp_bankid(hr_qp->qpn); + + ida_free(&hr_dev->qp_table.bank[bankid].ida, hr_qp->qpn >> 3); + + spin_lock(&hr_dev->qp_table.bank_lock); + hr_dev->qp_table.bank[bankid].inuse--; + spin_unlock(&hr_dev->qp_table.bank_lock); } static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, @@ -725,6 +786,9 @@ static int alloc_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct ib_device *ibdev = &hr_dev->ib_dev; int ret; + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SDI_MODE) + hr_qp->en_flags |= HNS_ROCE_QP_CAP_OWNER_DB; + if (udata) { if (user_qp_has_sdb(hr_dev, init_attr, udata, resp, ucmd)) { ret = hns_roce_db_map_user(uctx, udata, ucmd->sdb_addr, @@ -869,17 +933,6 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, if (ret) ibdev_err(ibdev, "Failed to set user SQ size\n"); } else { - if (init_attr->create_flags & - IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) { - ibdev_err(ibdev, "Failed to check multicast loopback\n"); - return -EINVAL; - } - - if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) { - ibdev_err(ibdev, "Failed to check ipoib ud lso\n"); - return -EINVAL; - } - ret = set_kernel_sq_size(hr_dev, &init_attr->cap, hr_qp); if (ret) ibdev_err(ibdev, "Failed to set kernel SQ size\n"); @@ -906,6 +959,9 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, hr_qp->state = IB_QPS_RESET; hr_qp->flush_flag = 0; + if (init_attr->create_flags) + return -EOPNOTSUPP; + ret = set_qp_param(hr_dev, hr_qp, init_attr, udata, &ucmd); if (ret) { ibdev_err(ibdev, "Failed to set QP param\n"); @@ -1003,6 +1059,30 @@ void hns_roce_qp_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, kfree(hr_qp); } +static int check_qp_type(struct hns_roce_dev *hr_dev, enum ib_qp_type type, + bool is_user) +{ + switch (type) { + case IB_QPT_UD: + if (hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08 && + is_user) + goto out; + fallthrough; + case IB_QPT_RC: + case IB_QPT_GSI: + break; + default: + goto out; + } + + return 0; + +out: + ibdev_err(&hr_dev->ib_dev, "not support QP type %d\n", type); + + return -EOPNOTSUPP; +} + struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) @@ -1012,15 +1092,9 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, struct hns_roce_qp *hr_qp; int ret; - switch (init_attr->qp_type) { - case IB_QPT_RC: - case IB_QPT_GSI: - break; - default: - ibdev_err(ibdev, "not support QP type %d\n", - init_attr->qp_type); - return ERR_PTR(-EOPNOTSUPP); - } + ret = check_qp_type(hr_dev, init_attr->qp_type, !!udata); + if (ret) + return ERR_PTR(ret); hr_qp = kzalloc(sizeof(*hr_qp), GFP_KERNEL); if (!hr_qp) @@ -1035,10 +1109,11 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, if (ret) { ibdev_err(ibdev, "Create QP type 0x%x failed(%d)\n", init_attr->qp_type, ret); - ibdev_err(ibdev, "Create GSI QP failed!\n"); + kfree(hr_qp); return ERR_PTR(ret); } + return &hr_qp->ibqp; } @@ -1280,22 +1355,24 @@ bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq, int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev) { struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; - int reserved_from_top = 0; - int reserved_from_bot; - int ret; + unsigned int reserved_from_bot; + unsigned int i; mutex_init(&qp_table->scc_mutex); xa_init(&hr_dev->qp_table_xa); reserved_from_bot = hr_dev->caps.reserved_qps; - ret = hns_roce_bitmap_init(&qp_table->bitmap, hr_dev->caps.num_qps, - hr_dev->caps.num_qps - 1, reserved_from_bot, - reserved_from_top); - if (ret) { - dev_err(hr_dev->dev, "qp bitmap init failed!error=%d\n", - ret); - return ret; + for (i = 0; i < reserved_from_bot; i++) { + hr_dev->qp_table.bank[get_qp_bankid(i)].inuse++; + hr_dev->qp_table.bank[get_qp_bankid(i)].min++; + } + + for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++) { + ida_init(&hr_dev->qp_table.bank[i].ida); + hr_dev->qp_table.bank[i].max = hr_dev->caps.num_qps / + HNS_ROCE_QP_BANK_NUM - 1; + hr_dev->qp_table.bank[i].next = hr_dev->qp_table.bank[i].min; } return 0; @@ -1303,5 +1380,8 @@ int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev) void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev) { - hns_roce_bitmap_cleanup(&hr_dev->qp_table.bitmap); + int i; + + for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++) + ida_destroy(&hr_dev->qp_table.bank[i].ida); } diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 8caf74e44efd..27646b9e35df 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -288,6 +288,10 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, int ret; u32 cqn; + if (init_attr->srq_type != IB_SRQT_BASIC && + init_attr->srq_type != IB_SRQT_XRC) + return -EOPNOTSUPP; + /* Check the actual SRQ wqe and SRQ sge num */ if (init_attr->attr.max_wr >= hr_dev->caps.max_srq_wrs || init_attr->attr.max_sge > hr_dev->caps.max_srq_sges) diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c index 86d3f8aff329..7ed9826221c1 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c @@ -5098,7 +5098,7 @@ void i40iw_vsi_stats_free(struct i40iw_sc_vsi *vsi) i40iw_hw_stats_stop_timer(vsi); } -static struct i40iw_cqp_ops iw_cqp_ops = { +static const struct i40iw_cqp_ops iw_cqp_ops = { .cqp_init = i40iw_sc_cqp_init, .cqp_create = i40iw_sc_cqp_create, .cqp_post_sq = i40iw_sc_cqp_post_sq, @@ -5107,7 +5107,7 @@ static struct i40iw_cqp_ops iw_cqp_ops = { .poll_for_cqp_op_done = i40iw_sc_poll_for_cqp_op_done }; -static struct i40iw_ccq_ops iw_ccq_ops = { +static const struct i40iw_ccq_ops iw_ccq_ops = { .ccq_init = i40iw_sc_ccq_init, .ccq_create = i40iw_sc_ccq_create, .ccq_destroy = i40iw_sc_ccq_destroy, @@ -5116,7 +5116,7 @@ static struct i40iw_ccq_ops iw_ccq_ops = { .ccq_arm = i40iw_sc_ccq_arm }; -static struct i40iw_ceq_ops iw_ceq_ops = { +static const struct i40iw_ceq_ops iw_ceq_ops = { .ceq_init = i40iw_sc_ceq_init, .ceq_create = i40iw_sc_ceq_create, .cceq_create_done = i40iw_sc_cceq_create_done, @@ -5126,7 +5126,7 @@ static struct i40iw_ceq_ops iw_ceq_ops = { .process_ceq = i40iw_sc_process_ceq }; -static struct i40iw_aeq_ops iw_aeq_ops = { +static const struct i40iw_aeq_ops iw_aeq_ops = { .aeq_init = i40iw_sc_aeq_init, .aeq_create = i40iw_sc_aeq_create, .aeq_destroy = i40iw_sc_aeq_destroy, @@ -5137,11 +5137,11 @@ static struct i40iw_aeq_ops iw_aeq_ops = { }; /* iwarp pd ops */ -static struct i40iw_pd_ops iw_pd_ops = { +static const struct i40iw_pd_ops iw_pd_ops = { .pd_init = i40iw_sc_pd_init, }; -static struct i40iw_priv_qp_ops iw_priv_qp_ops = { +static const struct i40iw_priv_qp_ops iw_priv_qp_ops = { .qp_init = i40iw_sc_qp_init, .qp_create = i40iw_sc_qp_create, .qp_modify = i40iw_sc_qp_modify, @@ -5156,14 +5156,14 @@ static struct i40iw_priv_qp_ops iw_priv_qp_ops = { .iw_mr_fast_register = i40iw_sc_mr_fast_register }; -static struct i40iw_priv_cq_ops iw_priv_cq_ops = { +static const struct i40iw_priv_cq_ops iw_priv_cq_ops = { .cq_init = i40iw_sc_cq_init, .cq_create = i40iw_sc_cq_create, .cq_destroy = i40iw_sc_cq_destroy, .cq_modify = i40iw_sc_cq_modify, }; -static struct i40iw_mr_ops iw_mr_ops = { +static const struct i40iw_mr_ops iw_mr_ops = { .alloc_stag = i40iw_sc_alloc_stag, .mr_reg_non_shared = i40iw_sc_mr_reg_non_shared, .mr_reg_shared = i40iw_sc_mr_reg_shared, @@ -5172,7 +5172,7 @@ static struct i40iw_mr_ops iw_mr_ops = { .mw_alloc = i40iw_sc_mw_alloc }; -static struct i40iw_cqp_misc_ops iw_cqp_misc_ops = { +static const struct i40iw_cqp_misc_ops iw_cqp_misc_ops = { .manage_push_page = i40iw_sc_manage_push_page, .manage_hmc_pm_func_table = i40iw_sc_manage_hmc_pm_func_table, .set_hmc_resource_profile = i40iw_sc_set_hmc_resource_profile, @@ -5195,7 +5195,7 @@ static struct i40iw_cqp_misc_ops iw_cqp_misc_ops = { .update_resume_qp = i40iw_sc_resume_qp }; -static struct i40iw_hmc_ops iw_hmc_ops = { +static const struct i40iw_hmc_ops iw_hmc_ops = { .init_iw_hmc = i40iw_sc_init_iw_hmc, .parse_fpm_query_buf = i40iw_sc_parse_fpm_query_buf, .configure_iw_fpm = i40iw_sc_configure_iw_fpm, diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h index c3babf3cbb8e..1dbf3991cc54 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_type.h +++ b/drivers/infiniband/hw/i40iw/i40iw_type.h @@ -493,16 +493,16 @@ struct i40iw_sc_dev { struct i40iw_sc_aeq *aeq; struct i40iw_sc_ceq *ceq[I40IW_CEQ_MAX_COUNT]; struct i40iw_sc_cq *ccq; - struct i40iw_cqp_ops *cqp_ops; - struct i40iw_ccq_ops *ccq_ops; - struct i40iw_ceq_ops *ceq_ops; - struct i40iw_aeq_ops *aeq_ops; - struct i40iw_pd_ops *iw_pd_ops; - struct i40iw_priv_qp_ops *iw_priv_qp_ops; - struct i40iw_priv_cq_ops *iw_priv_cq_ops; - struct i40iw_mr_ops *mr_ops; - struct i40iw_cqp_misc_ops *cqp_misc_ops; - struct i40iw_hmc_ops *hmc_ops; + const struct i40iw_cqp_ops *cqp_ops; + const struct i40iw_ccq_ops *ccq_ops; + const struct i40iw_ceq_ops *ceq_ops; + const struct i40iw_aeq_ops *aeq_ops; + const struct i40iw_pd_ops *iw_pd_ops; + const struct i40iw_priv_qp_ops *iw_priv_qp_ops; + const struct i40iw_priv_cq_ops *iw_priv_cq_ops; + const struct i40iw_mr_ops *mr_ops; + const struct i40iw_cqp_misc_ops *cqp_misc_ops; + const struct i40iw_hmc_ops *hmc_ops; struct i40iw_vchnl_if vchnl_if; const struct i40iw_vf_cqp_ops *iw_vf_cqp_ops; diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 533f3caecb7a..aab69c6ddd7c 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -533,7 +533,7 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, return ERR_PTR(-ENODEV); if (init_attr->create_flags) - return ERR_PTR(-EINVAL); + return ERR_PTR(-EOPNOTSUPP); if (init_attr->cap.max_inline_data > I40IW_MAX_INLINE_DATA_SIZE) init_attr->cap.max_inline_data = I40IW_MAX_INLINE_DATA_SIZE; @@ -832,6 +832,9 @@ int i40iw_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, u32 err; unsigned long flags; + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + memset(&info, 0, sizeof(info)); ctx_info = &iwqp->ctx_info; iwarp_info = &iwqp->iwarp_info; @@ -1081,6 +1084,9 @@ static int i40iw_create_cq(struct ib_cq *ibcq, int err_code; int entries = attr->cqe; + if (attr->flags) + return -EOPNOTSUPP; + if (iwdev->closing) return -ENODEV; @@ -2033,7 +2039,7 @@ static ssize_t hw_rev_show(struct device *dev, rdma_device_to_drv_device(dev, struct i40iw_ib_device, ibdev); u32 hw_rev = iwibdev->iwdev->sc_dev.hw_rev; - return sprintf(buf, "%x\n", hw_rev); + return sysfs_emit(buf, "%x\n", hw_rev); } static DEVICE_ATTR_RO(hw_rev); @@ -2043,7 +2049,7 @@ static DEVICE_ATTR_RO(hw_rev); static ssize_t hca_type_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "I40IW\n"); + return sysfs_emit(buf, "I40IW\n"); } static DEVICE_ATTR_RO(hca_type); @@ -2053,7 +2059,7 @@ static DEVICE_ATTR_RO(hca_type); static ssize_t board_id_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%.*s\n", 32, "I40IW Board ID"); + return sysfs_emit(buf, "%.*s\n", 32, "I40IW Board ID"); } static DEVICE_ATTR_RO(board_id); @@ -2661,27 +2667,6 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev iwibdev->ibdev.node_type = RDMA_NODE_RNIC; ether_addr_copy((u8 *)&iwibdev->ibdev.node_guid, netdev->dev_addr); - iwibdev->ibdev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_POLL_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_AH) | - (1ull << IB_USER_VERBS_CMD_DESTROY_AH) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_POST_RECV) | - (1ull << IB_USER_VERBS_CMD_POST_SEND); iwibdev->ibdev.phys_port_cnt = 1; iwibdev->ibdev.num_comp_vectors = iwdev->ceqs_count; iwibdev->ibdev.dev.parent = &pcidev->dev; diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 8bd16474708f..f3ace85552f3 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -1523,6 +1523,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc return; } else *slave_id = slave; + break; default: /* nothing */; } diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index cd0fba6b0964..f0864f40ea1a 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2024,7 +2024,8 @@ static ssize_t hca_type_show(struct device *device, { struct mlx4_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev); - return sprintf(buf, "MT%d\n", dev->dev->persist->pdev->device); + + return sysfs_emit(buf, "MT%d\n", dev->dev->persist->pdev->device); } static DEVICE_ATTR_RO(hca_type); @@ -2033,7 +2034,8 @@ static ssize_t hw_rev_show(struct device *device, { struct mlx4_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev); - return sprintf(buf, "%x\n", dev->dev->rev_id); + + return sysfs_emit(buf, "%x\n", dev->dev->rev_id); } static DEVICE_ATTR_RO(hw_rev); @@ -2043,8 +2045,7 @@ static ssize_t board_id_show(struct device *device, struct mlx4_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev); - return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN, - dev->dev->board_id); + return sysfs_emit(buf, "%.*s\n", MLX4_BOARD_ID_LEN, dev->dev->board_id); } static DEVICE_ATTR_RO(board_id); @@ -2657,73 +2658,25 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors; ibdev->ib_dev.dev.parent = &dev->persist->pdev->dev; - ibdev->ib_dev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_REREG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) | - (1ull << IB_USER_VERBS_CMD_OPEN_QP); - ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_ops); - ibdev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ) | - (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP); if ((dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) && ((mlx4_ib_port_link_layer(&ibdev->ib_dev, 1) == IB_LINK_LAYER_ETHERNET) || (mlx4_ib_port_link_layer(&ibdev->ib_dev, 2) == - IB_LINK_LAYER_ETHERNET))) { - ibdev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); + IB_LINK_LAYER_ETHERNET))) ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_wq_ops); - } if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW || - dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) { - ibdev->ib_dev.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_ALLOC_MW) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_MW); + dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_mw_ops); - } if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) { - ibdev->ib_dev.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) | - (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_xrc_ops); } if (check_flow_steering_support(dev)) { ibdev->steering_support = MLX4_STEERING_MODE_DEVICE_MANAGED; - ibdev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_fs_ops); } diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c index 5e4ec9786081..33f525b744f2 100644 --- a/drivers/infiniband/hw/mlx4/mcg.c +++ b/drivers/infiniband/hw/mlx4/mcg.c @@ -988,53 +988,63 @@ int mlx4_ib_mcg_multiplex_handler(struct ib_device *ibdev, int port, } static ssize_t sysfs_show_group(struct device *dev, - struct device_attribute *attr, char *buf) + struct device_attribute *attr, char *buf) { struct mcast_group *group = container_of(attr, struct mcast_group, dentry); struct mcast_req *req = NULL; - char pending_str[40]; char state_str[40]; - ssize_t len = 0; - int f; + char pending_str[40]; + int len; + int i; + u32 hoplimit; if (group->state == MCAST_IDLE) - sprintf(state_str, "%s", get_state_string(group->state)); + scnprintf(state_str, sizeof(state_str), "%s", + get_state_string(group->state)); else - sprintf(state_str, "%s(TID=0x%llx)", - get_state_string(group->state), - be64_to_cpu(group->last_req_tid)); + scnprintf(state_str, sizeof(state_str), "%s(TID=0x%llx)", + get_state_string(group->state), + be64_to_cpu(group->last_req_tid)); + if (list_empty(&group->pending_list)) { - sprintf(pending_str, "No"); + scnprintf(pending_str, sizeof(pending_str), "No"); } else { - req = list_first_entry(&group->pending_list, struct mcast_req, group_list); - sprintf(pending_str, "Yes(TID=0x%llx)", - be64_to_cpu(req->sa_mad.mad_hdr.tid)); + req = list_first_entry(&group->pending_list, struct mcast_req, + group_list); + scnprintf(pending_str, sizeof(pending_str), "Yes(TID=0x%llx)", + be64_to_cpu(req->sa_mad.mad_hdr.tid)); } - len += sprintf(buf + len, "%1d [%02d,%02d,%02d] %4d %4s %5s ", - group->rec.scope_join_state & 0xf, - group->members[2], group->members[1], group->members[0], - atomic_read(&group->refcount), - pending_str, - state_str); - for (f = 0; f < MAX_VFS; ++f) - if (group->func[f].state == MCAST_MEMBER) - len += sprintf(buf + len, "%d[%1x] ", - f, group->func[f].join_state); - - len += sprintf(buf + len, "\t\t(%4hx %4x %2x %2x %2x %2x %2x " - "%4x %4x %2x %2x)\n", - be16_to_cpu(group->rec.pkey), - be32_to_cpu(group->rec.qkey), - (group->rec.mtusel_mtu & 0xc0) >> 6, - group->rec.mtusel_mtu & 0x3f, - group->rec.tclass, - (group->rec.ratesel_rate & 0xc0) >> 6, - group->rec.ratesel_rate & 0x3f, - (be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0xf0000000) >> 28, - (be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0x0fffff00) >> 8, - be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0x000000ff, - group->rec.proxy_join); + + len = sysfs_emit(buf, "%1d [%02d,%02d,%02d] %4d %4s %5s ", + group->rec.scope_join_state & 0xf, + group->members[2], + group->members[1], + group->members[0], + atomic_read(&group->refcount), + pending_str, + state_str); + + for (i = 0; i < MAX_VFS; i++) { + if (group->func[i].state == MCAST_MEMBER) + len += sysfs_emit_at(buf, len, "%d[%1x] ", i, + group->func[i].join_state); + } + + hoplimit = be32_to_cpu(group->rec.sl_flowlabel_hoplimit); + len += sysfs_emit_at(buf, len, + "\t\t(%4hx %4x %2x %2x %2x %2x %2x %4x %4x %2x %2x)\n", + be16_to_cpu(group->rec.pkey), + be32_to_cpu(group->rec.qkey), + (group->rec.mtusel_mtu & 0xc0) >> 6, + (group->rec.mtusel_mtu & 0x3f), + group->rec.tclass, + (group->rec.ratesel_rate & 0xc0) >> 6, + (group->rec.ratesel_rate & 0x3f), + (hoplimit & 0xf0000000) >> 28, + (hoplimit & 0x0fffff00) >> 8, + (hoplimit & 0x000000ff), + group->rec.proxy_join); return len; } diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 5cb8e602294c..651785bd57f2 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1493,7 +1493,7 @@ static int _mlx4_ib_create_qp(struct ib_pd *pd, struct mlx4_ib_qp *qp, MLX4_IB_SRIOV_SQP | MLX4_IB_QP_NETIF | MLX4_IB_QP_CREATE_ROCE_V2_GSI)) - return -EINVAL; + return -EOPNOTSUPP; if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) { if (init_attr->qp_type != IB_QPT_UD) @@ -1561,6 +1561,11 @@ static int _mlx4_ib_create_qp(struct ib_pd *pd, struct mlx4_ib_qp *qp, if (err) return err; + if (init_attr->create_flags & + (MLX4_IB_SRIOV_SQP | MLX4_IB_SRIOV_TUNNEL_QP)) + /* Internal QP created with ib_create_qp */ + rdma_restrack_no_track(&qp->ibqp.res); + qp->port = init_attr->port_num; qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI ? sqpn : 1; @@ -2787,6 +2792,9 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, struct mlx4_ib_qp *mqp = to_mqp(ibqp); int ret; + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + ret = _mlx4_ib_modify_qp(ibqp, attr, attr_mask, udata); if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) { @@ -4007,7 +4015,9 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr qp_attr->qp_access_flags = to_ib_qp_access_flags(be32_to_cpu(context.params2)); - if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) { + if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC || + qp->ibqp.qp_type == IB_QPT_XRC_INI || + qp->ibqp.qp_type == IB_QPT_XRC_TGT) { to_rdma_ah_attr(dev, &qp_attr->ah_attr, &context.pri_path); to_rdma_ah_attr(dev, &qp_attr->alt_ah_attr, &context.alt_path); qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f; diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index bf618529e734..6a381751c0d8 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -86,6 +86,10 @@ int mlx4_ib_create_srq(struct ib_srq *ib_srq, int err; int i; + if (init_attr->srq_type != IB_SRQT_BASIC && + init_attr->srq_type != IB_SRQT_XRC) + return -EOPNOTSUPP; + /* Sanity check SRQ size before proceeding */ if (init_attr->attr.max_wr >= dev->dev->caps.max_srq_wqes || init_attr->attr.max_sge > dev->dev->caps.max_srq_sge) diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c index ea1f3a081b05..1b5891130aab 100644 --- a/drivers/infiniband/hw/mlx4/sysfs.c +++ b/drivers/infiniband/hw/mlx4/sysfs.c @@ -56,7 +56,7 @@ static ssize_t show_admin_alias_guid(struct device *dev, mlx4_ib_iov_dentry->entry_num, port->num); - return sprintf(buf, "%llx\n", be64_to_cpu(sysadmin_ag_val)); + return sysfs_emit(buf, "%llx\n", be64_to_cpu(sysadmin_ag_val)); } /* store_admin_alias_guid stores the (new) administratively assigned value of that GUID. @@ -117,22 +117,24 @@ static ssize_t show_port_gid(struct device *dev, struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx; struct mlx4_ib_dev *mdev = port->dev; union ib_gid gid; - ssize_t ret; + int ret; + __be16 *raw; ret = __mlx4_ib_query_gid(&mdev->ib_dev, port->num, mlx4_ib_iov_dentry->entry_num, &gid, 1); if (ret) return ret; - ret = sprintf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", - be16_to_cpu(((__be16 *) gid.raw)[0]), - be16_to_cpu(((__be16 *) gid.raw)[1]), - be16_to_cpu(((__be16 *) gid.raw)[2]), - be16_to_cpu(((__be16 *) gid.raw)[3]), - be16_to_cpu(((__be16 *) gid.raw)[4]), - be16_to_cpu(((__be16 *) gid.raw)[5]), - be16_to_cpu(((__be16 *) gid.raw)[6]), - be16_to_cpu(((__be16 *) gid.raw)[7])); - return ret; + + raw = (__be16 *)gid.raw; + return sysfs_emit(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", + be16_to_cpu(raw[0]), + be16_to_cpu(raw[1]), + be16_to_cpu(raw[2]), + be16_to_cpu(raw[3]), + be16_to_cpu(raw[4]), + be16_to_cpu(raw[5]), + be16_to_cpu(raw[6]), + be16_to_cpu(raw[7])); } static ssize_t show_phys_port_pkey(struct device *dev, @@ -151,7 +153,7 @@ static ssize_t show_phys_port_pkey(struct device *dev, if (ret) return ret; - return sprintf(buf, "0x%04x\n", pkey); + return sysfs_emit(buf, "0x%04x\n", pkey); } #define DENTRY_REMOVE(_dentry) \ @@ -441,16 +443,12 @@ static ssize_t show_port_pkey(struct mlx4_port *p, struct port_attribute *attr, { struct port_table_attribute *tab_attr = container_of(attr, struct port_table_attribute, attr); - ssize_t ret = -ENODEV; - - if (p->dev->pkeys.virt2phys_pkey[p->slave][p->port_num - 1][tab_attr->index] >= - (p->dev->dev->caps.pkey_table_len[p->port_num])) - ret = sprintf(buf, "none\n"); - else - ret = sprintf(buf, "%d\n", - p->dev->pkeys.virt2phys_pkey[p->slave] - [p->port_num - 1][tab_attr->index]); - return ret; + struct pkey_mgt *m = &p->dev->pkeys; + u8 key = m->virt2phys_pkey[p->slave][p->port_num - 1][tab_attr->index]; + + if (key >= p->dev->dev->caps.pkey_table_len[p->port_num]) + return sysfs_emit(buf, "none\n"); + return sysfs_emit(buf, "%d\n", key); } static ssize_t store_port_pkey(struct mlx4_port *p, struct port_attribute *attr, @@ -488,7 +486,7 @@ static ssize_t store_port_pkey(struct mlx4_port *p, struct port_attribute *attr, static ssize_t show_port_gid_idx(struct mlx4_port *p, struct port_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", p->slave); + return sysfs_emit(buf, "%d\n", p->slave); } static struct attribute ** @@ -542,14 +540,10 @@ static ssize_t sysfs_show_smi_enabled(struct device *dev, { struct mlx4_port *p = container_of(attr, struct mlx4_port, smi_enabled); - ssize_t len = 0; - if (mlx4_vf_smi_enabled(p->dev->dev, p->slave, p->port_num)) - len = sprintf(buf, "%d\n", 1); - else - len = sprintf(buf, "%d\n", 0); - - return len; + return sysfs_emit(buf, "%d\n", + !!mlx4_vf_smi_enabled(p->dev->dev, p->slave, + p->port_num)); } static ssize_t sysfs_show_enable_smi_admin(struct device *dev, @@ -558,14 +552,10 @@ static ssize_t sysfs_show_enable_smi_admin(struct device *dev, { struct mlx4_port *p = container_of(attr, struct mlx4_port, enable_smi_admin); - ssize_t len = 0; - - if (mlx4_vf_get_enable_smi_admin(p->dev->dev, p->slave, p->port_num)) - len = sprintf(buf, "%d\n", 1); - else - len = sprintf(buf, "%d\n", 0); - return len; + return sysfs_emit(buf, "%d\n", + !!mlx4_vf_get_enable_smi_admin(p->dev->dev, p->slave, + p->port_num)); } static ssize_t sysfs_store_enable_smi_admin(struct device *dev, diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index fb62f1d04afa..eb92cefffd77 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -707,10 +707,10 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, int *cqe_size, int *index, int *inlen) { struct mlx5_ib_create_cq ucmd = {}; + unsigned long page_size; + unsigned int page_offset_quantized; size_t ucmdlen; - int page_shift; __be64 *pas; - int npages; int ncont; void *cqc; int err; @@ -742,14 +742,24 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, return err; } + page_size = mlx5_umem_find_best_cq_quantized_pgoff( + cq->buf.umem, cqc, log_page_size, MLX5_ADAPTER_PAGE_SHIFT, + page_offset, 64, &page_offset_quantized); + if (!page_size) { + err = -EINVAL; + goto err_umem; + } + err = mlx5_ib_db_map_user(context, udata, ucmd.db_addr, &cq->db); if (err) goto err_umem; - mlx5_ib_cont_pages(cq->buf.umem, ucmd.buf_addr, 0, &npages, &page_shift, - &ncont, NULL); - mlx5_ib_dbg(dev, "addr 0x%llx, size %u, npages %d, page_shift %d, ncont %d\n", - ucmd.buf_addr, entries * ucmd.cqe_size, npages, page_shift, ncont); + ncont = ib_umem_num_dma_blocks(cq->buf.umem, page_size); + mlx5_ib_dbg( + dev, + "addr 0x%llx, size %u, npages %zu, page_size %lu, ncont %d\n", + ucmd.buf_addr, entries * ucmd.cqe_size, + ib_umem_num_pages(cq->buf.umem), page_size, ncont); *inlen = MLX5_ST_SZ_BYTES(create_cq_in) + MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * ncont; @@ -760,11 +770,12 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, } pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas); - mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, pas, 0); + mlx5_ib_populate_pas(cq->buf.umem, page_size, pas, 0); cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context); MLX5_SET(cqc, cqc, log_page_size, - page_shift - MLX5_ADAPTER_PAGE_SHIFT); + order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(cqc, cqc, page_offset, page_offset_quantized); if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX) { *index = ucmd.uar_page_index; @@ -1128,13 +1139,12 @@ int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) } static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, - int entries, struct ib_udata *udata, int *npas, - int *page_shift, int *cqe_size) + int entries, struct ib_udata *udata, + int *cqe_size) { struct mlx5_ib_resize_cq ucmd; struct ib_umem *umem; int err; - int npages; err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); if (err) @@ -1155,9 +1165,6 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, return err; } - mlx5_ib_cont_pages(umem, ucmd.buf_addr, 0, &npages, page_shift, - npas, NULL); - cq->resize_umem = umem; *cqe_size = ucmd.cqe_size; @@ -1250,7 +1257,8 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) int err; int npas; __be64 *pas; - int page_shift; + unsigned int page_offset_quantized = 0; + unsigned int page_shift; int inlen; int cqe_size; unsigned long flags; @@ -1277,22 +1285,34 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) mutex_lock(&cq->resize_mutex); if (udata) { - err = resize_user(dev, cq, entries, udata, &npas, &page_shift, - &cqe_size); + unsigned long page_size; + + err = resize_user(dev, cq, entries, udata, &cqe_size); + if (err) + goto ex; + + page_size = mlx5_umem_find_best_cq_quantized_pgoff( + cq->resize_umem, cqc, log_page_size, + MLX5_ADAPTER_PAGE_SHIFT, page_offset, 64, + &page_offset_quantized); + if (!page_size) { + err = -EINVAL; + goto ex_resize; + } + npas = ib_umem_num_dma_blocks(cq->resize_umem, page_size); + page_shift = order_base_2(page_size); } else { + struct mlx5_frag_buf *frag_buf; + cqe_size = 64; err = resize_kernel(dev, cq, entries, cqe_size); - if (!err) { - struct mlx5_frag_buf *frag_buf = &cq->resize_buf->frag_buf; - - npas = frag_buf->npages; - page_shift = frag_buf->page_shift; - } + if (err) + goto ex; + frag_buf = &cq->resize_buf->frag_buf; + npas = frag_buf->npages; + page_shift = frag_buf->page_shift; } - if (err) - goto ex; - inlen = MLX5_ST_SZ_BYTES(modify_cq_in) + MLX5_FLD_SZ_BYTES(modify_cq_in, pas[0]) * npas; @@ -1304,8 +1324,8 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) pas = (__be64 *)MLX5_ADDR_OF(modify_cq_in, in, pas); if (udata) - mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift, - pas, 0); + mlx5_ib_populate_pas(cq->resize_umem, 1UL << page_shift, pas, + 0); else mlx5_fill_page_frag_array(&cq->resize_buf->frag_buf, pas); @@ -1319,6 +1339,7 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) MLX5_SET(cqc, cqc, log_page_size, page_shift - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(cqc, cqc, page_offset, page_offset_quantized); MLX5_SET(cqc, cqc, cqe_sz, cqe_sz_to_mlx_sz(cqe_size, cq->private_flags & diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 9e3d8b826498..ad0173f62c0e 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -93,9 +93,6 @@ struct devx_async_event_file { struct devx_umem { struct mlx5_core_dev *mdev; struct ib_umem *umem; - u32 page_offset; - int page_shift; - int ncont; u32 dinlen; u32 dinbox[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)]; }; @@ -1311,7 +1308,7 @@ static int devx_obj_cleanup(struct ib_uobject *uobject, else ret = mlx5_cmd_exec(obj->ib_dev->mdev, obj->dinbox, obj->dinlen, out, sizeof(out)); - if (ib_is_destroy_retryable(ret, why, uobject)) + if (ret) return ret; devx_event_table = &dev->devx_event_table; @@ -2057,9 +2054,7 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext, u64 addr; size_t size; u32 access; - int npages; int err; - u32 page_mask; if (uverbs_copy_from(&addr, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR) || uverbs_copy_from(&size, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_LEN)) @@ -2080,50 +2075,55 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext, obj->umem = ib_umem_get(&dev->ib_dev, addr, size, access); if (IS_ERR(obj->umem)) return PTR_ERR(obj->umem); - - mlx5_ib_cont_pages(obj->umem, obj->umem->address, - MLX5_MKEY_PAGE_SHIFT_MASK, &npages, - &obj->page_shift, &obj->ncont, NULL); - - if (!npages) { - ib_umem_release(obj->umem); - return -EINVAL; - } - - page_mask = (1 << obj->page_shift) - 1; - obj->page_offset = obj->umem->address & page_mask; - return 0; } -static int devx_umem_reg_cmd_alloc(struct uverbs_attr_bundle *attrs, +static int devx_umem_reg_cmd_alloc(struct mlx5_ib_dev *dev, + struct uverbs_attr_bundle *attrs, struct devx_umem *obj, struct devx_umem_reg_cmd *cmd) { + unsigned int page_size; + __be64 *mtt; + void *umem; + + /* + * We don't know what the user intends to use this umem for, but the HW + * restrictions must be met. MR, doorbell records, QP, WQ and CQ all + * have different requirements. Since we have no idea how to sort this + * out, only support PAGE_SIZE with the expectation that userspace will + * provide the necessary alignments inside the known PAGE_SIZE and that + * FW will check everything. + */ + page_size = ib_umem_find_best_pgoff( + obj->umem, PAGE_SIZE, + __mlx5_page_offset_to_bitmask(__mlx5_bit_sz(umem, page_offset), + 0)); + if (!page_size) + return -EINVAL; + cmd->inlen = MLX5_ST_SZ_BYTES(create_umem_in) + - (MLX5_ST_SZ_BYTES(mtt) * obj->ncont); + (MLX5_ST_SZ_BYTES(mtt) * + ib_umem_num_dma_blocks(obj->umem, page_size)); cmd->in = uverbs_zalloc(attrs, cmd->inlen); - return PTR_ERR_OR_ZERO(cmd->in); -} - -static void devx_umem_reg_cmd_build(struct mlx5_ib_dev *dev, - struct devx_umem *obj, - struct devx_umem_reg_cmd *cmd) -{ - void *umem; - __be64 *mtt; + if (IS_ERR(cmd->in)) + return PTR_ERR(cmd->in); umem = MLX5_ADDR_OF(create_umem_in, cmd->in, umem); mtt = (__be64 *)MLX5_ADDR_OF(umem, umem, mtt); MLX5_SET(create_umem_in, cmd->in, opcode, MLX5_CMD_OP_CREATE_UMEM); - MLX5_SET64(umem, umem, num_of_mtt, obj->ncont); - MLX5_SET(umem, umem, log_page_size, obj->page_shift - - MLX5_ADAPTER_PAGE_SHIFT); - MLX5_SET(umem, umem, page_offset, obj->page_offset); - mlx5_ib_populate_pas(dev, obj->umem, obj->page_shift, mtt, + MLX5_SET64(umem, umem, num_of_mtt, + ib_umem_num_dma_blocks(obj->umem, page_size)); + MLX5_SET(umem, umem, log_page_size, + order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(umem, umem, page_offset, + ib_umem_dma_offset(obj->umem, page_size)); + + mlx5_ib_populate_pas(obj->umem, page_size, mtt, (obj->umem->writable ? MLX5_IB_MTT_WRITE : 0) | - MLX5_IB_MTT_READ); + MLX5_IB_MTT_READ); + return 0; } static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)( @@ -2150,12 +2150,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)( if (err) goto err_obj_free; - err = devx_umem_reg_cmd_alloc(attrs, obj, &cmd); + err = devx_umem_reg_cmd_alloc(dev, attrs, obj, &cmd); if (err) goto err_umem_release; - devx_umem_reg_cmd_build(dev, obj, &cmd); - MLX5_SET(create_umem_in, cmd.in, uid, c->devx_uid); err = mlx5_cmd_exec(dev->mdev, cmd.in, cmd.inlen, cmd.out, sizeof(cmd.out)); @@ -2187,7 +2185,7 @@ static int devx_umem_cleanup(struct ib_uobject *uobject, int err; err = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out)); - if (ib_is_destroy_retryable(err, why, uobject)) + if (err) return err; ib_umem_release(obj->umem); @@ -2600,8 +2598,8 @@ static const struct file_operations devx_async_event_fops = { .llseek = no_llseek, }; -static int devx_async_cmd_event_destroy_uobj(struct ib_uobject *uobj, - enum rdma_remove_reason why) +static void devx_async_cmd_event_destroy_uobj(struct ib_uobject *uobj, + enum rdma_remove_reason why) { struct devx_async_cmd_event_file *comp_ev_file = container_of(uobj, struct devx_async_cmd_event_file, @@ -2623,11 +2621,10 @@ static int devx_async_cmd_event_destroy_uobj(struct ib_uobject *uobj, kvfree(entry); } spin_unlock_irq(&comp_ev_file->ev_queue.lock); - return 0; }; -static int devx_async_event_destroy_uobj(struct ib_uobject *uobj, - enum rdma_remove_reason why) +static void devx_async_event_destroy_uobj(struct ib_uobject *uobj, + enum rdma_remove_reason why) { struct devx_async_event_file *ev_file = container_of(uobj, struct devx_async_event_file, @@ -2671,7 +2668,6 @@ static int devx_async_event_destroy_uobj(struct ib_uobject *uobj, mutex_unlock(&dev->devx_event_table.event_xa_lock); put_device(&dev->ib_dev.dev); - return 0; }; DECLARE_UVERBS_NAMED_METHOD( diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c index 492cfe063bca..25da0b05b4e2 100644 --- a/drivers/infiniband/hw/mlx5/fs.c +++ b/drivers/infiniband/hw/mlx5/fs.c @@ -2035,11 +2035,9 @@ static int flow_matcher_cleanup(struct ib_uobject *uobject, struct uverbs_attr_bundle *attrs) { struct mlx5_ib_flow_matcher *obj = uobject->object; - int ret; - ret = ib_destroy_usecnt(&obj->usecnt, why, uobject); - if (ret) - return ret; + if (atomic_read(&obj->usecnt)) + return -EBUSY; kfree(obj); return 0; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 246e3cbe0b2c..4a054ebeb520 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -75,12 +75,6 @@ static LIST_HEAD(mlx5_ib_dev_list); */ static DEFINE_MUTEX(mlx5_ib_multiport_mutex); -/* We can't use an array for xlt_emergency_page because dma_map_single - * doesn't work on kernel modules memory - */ -static unsigned long xlt_emergency_page; -static struct mutex xlt_emergency_page_mutex; - struct mlx5_ib_dev *mlx5_ib_get_ibdev_from_mpi(struct mlx5_ib_multiport_info *mpi) { struct mlx5_ib_dev *dev; @@ -425,10 +419,22 @@ static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u16 *active_speed, *active_width = IB_WIDTH_2X; *active_speed = IB_SPEED_HDR; break; + case MLX5E_PROT_MASK(MLX5E_100GAUI_1_100GBASE_CR_KR): + *active_width = IB_WIDTH_1X; + *active_speed = IB_SPEED_NDR; + break; case MLX5E_PROT_MASK(MLX5E_200GAUI_4_200GBASE_CR4_KR4): *active_width = IB_WIDTH_4X; *active_speed = IB_SPEED_HDR; break; + case MLX5E_PROT_MASK(MLX5E_200GAUI_2_200GBASE_CR2_KR2): + *active_width = IB_WIDTH_2X; + *active_speed = IB_SPEED_NDR; + break; + case MLX5E_PROT_MASK(MLX5E_400GAUI_4_400GBASE_CR4_KR4): + *active_width = IB_WIDTH_4X; + *active_speed = IB_SPEED_NDR; + break; default: return -EINVAL; } @@ -2628,7 +2634,7 @@ static ssize_t fw_pages_show(struct device *device, struct mlx5_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); - return sprintf(buf, "%d\n", dev->mdev->priv.fw_pages); + return sysfs_emit(buf, "%d\n", dev->mdev->priv.fw_pages); } static DEVICE_ATTR_RO(fw_pages); @@ -2638,7 +2644,7 @@ static ssize_t reg_pages_show(struct device *device, struct mlx5_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); - return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages)); + return sysfs_emit(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages)); } static DEVICE_ATTR_RO(reg_pages); @@ -2648,7 +2654,7 @@ static ssize_t hca_type_show(struct device *device, struct mlx5_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); - return sprintf(buf, "MT%d\n", dev->mdev->pdev->device); + return sysfs_emit(buf, "MT%d\n", dev->mdev->pdev->device); } static DEVICE_ATTR_RO(hca_type); @@ -2658,7 +2664,7 @@ static ssize_t hw_rev_show(struct device *device, struct mlx5_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); - return sprintf(buf, "%x\n", dev->mdev->rev_id); + return sysfs_emit(buf, "%x\n", dev->mdev->rev_id); } static DEVICE_ATTR_RO(hw_rev); @@ -2668,8 +2674,8 @@ static ssize_t board_id_show(struct device *device, struct mlx5_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); - return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN, - dev->mdev->board_id); + return sysfs_emit(buf, "%.*s\n", MLX5_BOARD_ID_LEN, + dev->mdev->board_id); } static DEVICE_ATTR_RO(board_id); @@ -4024,6 +4030,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .create_cq = mlx5_ib_create_cq, .create_qp = mlx5_ib_create_qp, .create_srq = mlx5_ib_create_srq, + .create_user_ah = mlx5_ib_create_ah, .dealloc_pd = mlx5_ib_dealloc_pd, .dealloc_ucontext = mlx5_ib_dealloc_ucontext, .del_gid = mlx5_ib_del_gid, @@ -4141,42 +4148,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) struct mlx5_core_dev *mdev = dev->mdev; int err; - dev->ib_dev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_CREATE_AH) | - (1ull << IB_USER_VERBS_CMD_DESTROY_AH) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_REREG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) | - (1ull << IB_USER_VERBS_CMD_OPEN_QP); - dev->ib_dev.uverbs_ex_cmd_mask = - (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); - if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) && IS_ENABLED(CONFIG_MLX5_CORE_IPOIB)) ib_set_device_ops(&dev->ib_dev, @@ -4187,19 +4158,11 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) dev->umr_fence = mlx5_get_umr_fence(MLX5_CAP_GEN(mdev, umr_fence)); - if (MLX5_CAP_GEN(mdev, imaicl)) { - dev->ib_dev.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_ALLOC_MW) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_MW); + if (MLX5_CAP_GEN(mdev, imaicl)) ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_mw_ops); - } - if (MLX5_CAP_GEN(mdev, xrc)) { - dev->ib_dev.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) | - (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); + if (MLX5_CAP_GEN(mdev, xrc)) ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_xrc_ops); - } if (MLX5_CAP_DEV_MEM(mdev, memic) || MLX5_CAP_GEN_64(dev->mdev, general_obj_types) & @@ -4278,12 +4241,6 @@ static int mlx5_ib_roce_init(struct mlx5_ib_dev *dev) ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); if (ll == IB_LINK_LAYER_ETHERNET) { - dev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_common_roce_ops); port_num = mlx5_core_native_port_num(dev->mdev) - 1; @@ -4855,30 +4812,17 @@ static struct mlx5_interface mlx5_ib_interface = { .protocol = MLX5_INTERFACE_PROTOCOL_IB, }; -unsigned long mlx5_ib_get_xlt_emergency_page(void) -{ - mutex_lock(&xlt_emergency_page_mutex); - return xlt_emergency_page; -} - -void mlx5_ib_put_xlt_emergency_page(void) -{ - mutex_unlock(&xlt_emergency_page_mutex); -} - static int __init mlx5_ib_init(void) { int err; - xlt_emergency_page = __get_free_page(GFP_KERNEL); + xlt_emergency_page = (void *)__get_free_page(GFP_KERNEL); if (!xlt_emergency_page) return -ENOMEM; - mutex_init(&xlt_emergency_page_mutex); - mlx5_ib_event_wq = alloc_ordered_workqueue("mlx5_ib_event_wq", 0); if (!mlx5_ib_event_wq) { - free_page(xlt_emergency_page); + free_page((unsigned long)xlt_emergency_page); return -ENOMEM; } @@ -4893,8 +4837,7 @@ static void __exit mlx5_ib_cleanup(void) { mlx5_unregister_interface(&mlx5_ib_interface); destroy_workqueue(mlx5_ib_event_wq); - mutex_destroy(&xlt_emergency_page_mutex); - free_page(xlt_emergency_page); + free_page((unsigned long)xlt_emergency_page); } module_init(mlx5_ib_init); diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index 13de3d2edd34..844545064c9e 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -36,161 +36,65 @@ #include "mlx5_ib.h" #include <linux/jiffies.h> -/* @umem: umem object to scan - * @addr: ib virtual address requested by the user - * @max_page_shift: high limit for page_shift - 0 means no limit - * @count: number of PAGE_SIZE pages covered by umem - * @shift: page shift for the compound pages found in the region - * @ncont: number of compund pages - * @order: log2 of the number of compound pages +/* + * Fill in a physical address list. ib_umem_num_dma_blocks() entries will be + * filled in the pas array. */ -void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, - unsigned long max_page_shift, - int *count, int *shift, - int *ncont, int *order) +void mlx5_ib_populate_pas(struct ib_umem *umem, size_t page_size, __be64 *pas, + u64 access_flags) { - unsigned long tmp; - unsigned long m; - u64 base = ~0, p = 0; - u64 len, pfn; - int i = 0; - struct scatterlist *sg; - int entry; - - addr = addr >> PAGE_SHIFT; - tmp = (unsigned long)addr; - m = find_first_bit(&tmp, BITS_PER_LONG); - if (max_page_shift) - m = min_t(unsigned long, max_page_shift - PAGE_SHIFT, m); - - for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { - len = sg_dma_len(sg) >> PAGE_SHIFT; - pfn = sg_dma_address(sg) >> PAGE_SHIFT; - if (base + p != pfn) { - /* If either the offset or the new - * base are unaligned update m - */ - tmp = (unsigned long)(pfn | p); - if (!IS_ALIGNED(tmp, 1 << m)) - m = find_first_bit(&tmp, BITS_PER_LONG); - - base = pfn; - p = 0; - } - - p += len; - i += len; - } - - if (i) { - m = min_t(unsigned long, ilog2(roundup_pow_of_two(i)), m); - - if (order) - *order = ilog2(roundup_pow_of_two(i) >> m); - - *ncont = DIV_ROUND_UP(i, (1 << m)); - } else { - m = 0; + struct ib_block_iter biter; - if (order) - *order = 0; - - *ncont = 0; + rdma_umem_for_each_dma_block (umem, &biter, page_size) { + *pas = cpu_to_be64(rdma_block_iter_dma_address(&biter) | + access_flags); + pas++; } - *shift = PAGE_SHIFT + m; - *count = i; } /* - * Populate the given array with bus addresses from the umem. - * - * dev - mlx5_ib device - * umem - umem to use to fill the pages - * page_shift - determines the page size used in the resulting array - * offset - offset into the umem to start from, - * only implemented for ODP umems - * num_pages - total number of pages to fill - * pas - bus addresses array to fill - * access_flags - access flags to set on all present pages. - use enum mlx5_ib_mtt_access_flags for this. + * Compute the page shift and page_offset for mailboxes that use a quantized + * page_offset. The granulatity of the page offset scales according to page + * size. */ -void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, - int page_shift, size_t offset, size_t num_pages, - __be64 *pas, int access_flags) +unsigned long __mlx5_umem_find_best_quantized_pgoff( + struct ib_umem *umem, unsigned long pgsz_bitmap, + unsigned int page_offset_bits, u64 pgoff_bitmask, unsigned int scale, + unsigned int *page_offset_quantized) { - int shift = page_shift - PAGE_SHIFT; - int mask = (1 << shift) - 1; - int i, k, idx; - u64 cur = 0; - u64 base; - int len; - struct scatterlist *sg; - int entry; - - i = 0; - for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { - len = sg_dma_len(sg) >> PAGE_SHIFT; - base = sg_dma_address(sg); - - /* Skip elements below offset */ - if (i + len < offset << shift) { - i += len; - continue; - } - - /* Skip pages below offset */ - if (i < offset << shift) { - k = (offset << shift) - i; - i = offset << shift; - } else { - k = 0; - } - - for (; k < len; k++) { - if (!(i & mask)) { - cur = base + (k << PAGE_SHIFT); - cur |= access_flags; - idx = (i >> shift) - offset; - - pas[idx] = cpu_to_be64(cur); - mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n", - i >> shift, be64_to_cpu(pas[idx])); - } - i++; - - /* Stop after num_pages reached */ - if (i >> shift >= offset + num_pages) - return; - } + const u64 page_offset_mask = (1UL << page_offset_bits) - 1; + unsigned long page_size; + u64 page_offset; + + page_size = ib_umem_find_best_pgoff(umem, pgsz_bitmap, pgoff_bitmask); + if (!page_size) + return 0; + + /* + * page size is the largest possible page size. + * + * Reduce the page_size, and thus the page_offset and quanta, until the + * page_offset fits into the mailbox field. Once page_size < scale this + * loop is guaranteed to terminate. + */ + page_offset = ib_umem_dma_offset(umem, page_size); + while (page_offset & ~(u64)(page_offset_mask * (page_size / scale))) { + page_size /= 2; + page_offset = ib_umem_dma_offset(umem, page_size); } -} -void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, - int page_shift, __be64 *pas, int access_flags) -{ - return __mlx5_ib_populate_pas(dev, umem, page_shift, 0, - ib_umem_num_dma_blocks(umem, PAGE_SIZE), - pas, access_flags); -} -int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset) -{ - u64 page_size; - u64 page_mask; - u64 off_size; - u64 off_mask; - u64 buf_off; - - page_size = (u64)1 << page_shift; - page_mask = page_size - 1; - buf_off = addr & page_mask; - off_size = page_size >> 6; - off_mask = off_size - 1; - - if (buf_off & off_mask) - return -EINVAL; - - *offset = buf_off >> ilog2(off_size); - return 0; + /* + * The address is not aligned, or otherwise cannot be represented by the + * page_offset. + */ + if (!(pgsz_bitmap & page_size)) + return 0; + + *page_offset_quantized = + (unsigned long)page_offset / (page_size / scale); + if (WARN_ON(*page_offset_quantized > page_offset_mask)) + return 0; + return page_size; } #define WR_ID_BF 0xBF diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index b1f2b34e5955..718e59fce006 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -40,7 +40,73 @@ #define MLX5_IB_DEFAULT_UIDX 0xffffff #define MLX5_USER_ASSIGNED_UIDX_MASK __mlx5_mask(qpc, user_index) -#define MLX5_MKEY_PAGE_SHIFT_MASK __mlx5_mask(mkc, log_page_size) +static __always_inline unsigned long +__mlx5_log_page_size_to_bitmap(unsigned int log_pgsz_bits, + unsigned int pgsz_shift) +{ + unsigned int largest_pg_shift = + min_t(unsigned long, (1ULL << log_pgsz_bits) - 1 + pgsz_shift, + BITS_PER_LONG - 1); + + /* + * Despite a command allowing it, the device does not support lower than + * 4k page size. + */ + pgsz_shift = max_t(unsigned int, MLX5_ADAPTER_PAGE_SHIFT, pgsz_shift); + return GENMASK(largest_pg_shift, pgsz_shift); +} + +/* + * For mkc users, instead of a page_offset the command has a start_iova which + * specifies both the page_offset and the on-the-wire IOVA + */ +#define mlx5_umem_find_best_pgsz(umem, typ, log_pgsz_fld, pgsz_shift, iova) \ + ib_umem_find_best_pgsz(umem, \ + __mlx5_log_page_size_to_bitmap( \ + __mlx5_bit_sz(typ, log_pgsz_fld), \ + pgsz_shift), \ + iova) + +static __always_inline unsigned long +__mlx5_page_offset_to_bitmask(unsigned int page_offset_bits, + unsigned int offset_shift) +{ + unsigned int largest_offset_shift = + min_t(unsigned long, page_offset_bits - 1 + offset_shift, + BITS_PER_LONG - 1); + + return GENMASK(largest_offset_shift, offset_shift); +} + +/* + * QP/CQ/WQ/etc type commands take a page offset that satisifies: + * page_offset_quantized * (page_size/scale) = page_offset + * Which restricts allowed page sizes to ones that satisify the above. + */ +unsigned long __mlx5_umem_find_best_quantized_pgoff( + struct ib_umem *umem, unsigned long pgsz_bitmap, + unsigned int page_offset_bits, u64 pgoff_bitmask, unsigned int scale, + unsigned int *page_offset_quantized); +#define mlx5_umem_find_best_quantized_pgoff(umem, typ, log_pgsz_fld, \ + pgsz_shift, page_offset_fld, \ + scale, page_offset_quantized) \ + __mlx5_umem_find_best_quantized_pgoff( \ + umem, \ + __mlx5_log_page_size_to_bitmap( \ + __mlx5_bit_sz(typ, log_pgsz_fld), pgsz_shift), \ + __mlx5_bit_sz(typ, page_offset_fld), \ + GENMASK(31, order_base_2(scale)), scale, \ + page_offset_quantized) + +#define mlx5_umem_find_best_cq_quantized_pgoff(umem, typ, log_pgsz_fld, \ + pgsz_shift, page_offset_fld, \ + scale, page_offset_quantized) \ + __mlx5_umem_find_best_quantized_pgoff( \ + umem, \ + __mlx5_log_page_size_to_bitmap( \ + __mlx5_bit_sz(typ, log_pgsz_fld), pgsz_shift), \ + __mlx5_bit_sz(typ, page_offset_fld), 0, scale, \ + page_offset_quantized) enum { MLX5_IB_MMAP_OFFSET_START = 9, @@ -597,13 +663,12 @@ struct mlx5_ib_mr { int max_descs; int desc_size; int access_mode; + unsigned int page_shift; struct mlx5_core_mkey mmkey; struct ib_umem *umem; struct mlx5_shared_mr_info *smr_info; struct list_head list; - unsigned int order; struct mlx5_cache_ent *cache_ent; - int npages; struct mlx5_ib_dev *dev; u32 out[MLX5_ST_SZ_DW(create_mkey_out)]; struct mlx5_core_sig_ctx *sig; @@ -1210,7 +1275,6 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, size_t *out_mad_size, u16 *out_mad_pkey_index); int mlx5_ib_alloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); -int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset); int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port); int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev, struct ib_smp *out_mad); @@ -1230,15 +1294,8 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props); int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props); -void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, - unsigned long max_page_shift, - int *count, int *shift, - int *ncont, int *order); -void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, - int page_shift, size_t offset, size_t num_pages, - __be64 *pas, int access_flags); -void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, - int page_shift, __be64 *pas, int access_flags); +void mlx5_ib_populate_pas(struct ib_umem *umem, size_t page_size, __be64 *pas, + u64 access_flags); void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num); int mlx5_ib_get_cqe_size(struct ib_cq *ibcq); int mlx5_mr_cache_init(struct mlx5_ib_dev *dev); @@ -1456,8 +1513,7 @@ static inline int get_num_static_uars(struct mlx5_ib_dev *dev, return get_uars_per_sys_page(dev, bfregi->lib_uar_4k) * bfregi->num_static_sys_pages; } -unsigned long mlx5_ib_get_xlt_emergency_page(void); -void mlx5_ib_put_xlt_emergency_page(void); +extern void *xlt_emergency_page; int bfregn_to_uar_index(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi, u32 bfregn, diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index b261797b258f..b6116f6d065d 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -41,6 +41,13 @@ #include <rdma/ib_verbs.h> #include "mlx5_ib.h" +/* + * We can't use an array for xlt_emergency_page because dma_map_single doesn't + * work on kernel modules memory + */ +void *xlt_emergency_page; +static DEFINE_MUTEX(xlt_emergency_page_mutex); + enum { MAX_PENDING_REG_MR = 8, }; @@ -126,7 +133,9 @@ static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) static inline bool mlx5_ib_pas_fits_in_mr(struct mlx5_ib_mr *mr, u64 start, u64 length) { - return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >= + if (!mr->cache_ent) + return false; + return ((u64)1 << mr->cache_ent->order) * MLX5_ADAPTER_PAGE_SIZE >= length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1)); } @@ -172,7 +181,6 @@ static struct mlx5_ib_mr *alloc_cache_mr(struct mlx5_cache_ent *ent, void *mkc) mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return NULL; - mr->order = ent->order; mr->cache_ent = ent; mr->dev = ent->dev; @@ -867,14 +875,11 @@ static int mr_cache_max_order(struct mlx5_ib_dev *dev) return MLX5_MAX_UMR_SHIFT; } -static int mr_umem_get(struct mlx5_ib_dev *dev, u64 start, u64 length, - int access_flags, struct ib_umem **umem, int *npages, - int *page_shift, int *ncont, int *order) +static struct ib_umem *mr_umem_get(struct mlx5_ib_dev *dev, u64 start, + u64 length, int access_flags) { struct ib_umem *u; - *umem = NULL; - if (access_flags & IB_ACCESS_ON_DEMAND) { struct ib_umem_odp *odp; @@ -883,39 +888,17 @@ static int mr_umem_get(struct mlx5_ib_dev *dev, u64 start, u64 length, if (IS_ERR(odp)) { mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(odp)); - return PTR_ERR(odp); - } - - u = &odp->umem; - - *page_shift = odp->page_shift; - *ncont = ib_umem_odp_num_pages(odp); - *npages = *ncont << (*page_shift - PAGE_SHIFT); - if (order) - *order = ilog2(roundup_pow_of_two(*ncont)); - } else { - u = ib_umem_get(&dev->ib_dev, start, length, access_flags); - if (IS_ERR(u)) { - mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(u)); - return PTR_ERR(u); + return ERR_CAST(odp); } - - mlx5_ib_cont_pages(u, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages, - page_shift, ncont, order); + return &odp->umem; } - if (!*npages) { - mlx5_ib_warn(dev, "avoid zero region\n"); - ib_umem_release(u); - return -EINVAL; + u = ib_umem_get(&dev->ib_dev, start, length, access_flags); + if (IS_ERR(u)) { + mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(u)); + return u; } - - *umem = u; - - mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n", - *npages, *ncont, *order, *page_shift); - - return 0; + return u; } static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc) @@ -974,15 +957,20 @@ static struct mlx5_cache_ent *mr_cache_ent_from_order(struct mlx5_ib_dev *dev, return &cache->ent[order]; } -static struct mlx5_ib_mr * -alloc_mr_from_cache(struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr, - u64 len, int npages, int page_shift, unsigned int order, - int access_flags) +static struct mlx5_ib_mr *alloc_mr_from_cache(struct ib_pd *pd, + struct ib_umem *umem, u64 iova, + int access_flags) { struct mlx5_ib_dev *dev = to_mdev(pd->device); - struct mlx5_cache_ent *ent = mr_cache_ent_from_order(dev, order); + struct mlx5_cache_ent *ent; struct mlx5_ib_mr *mr; + unsigned int page_size; + page_size = mlx5_umem_find_best_pgsz(umem, mkc, log_page_size, 0, iova); + if (WARN_ON(!page_size)) + return ERR_PTR(-EINVAL); + ent = mr_cache_ent_from_order( + dev, order_base_2(ib_umem_num_dma_blocks(umem, page_size))); if (!ent) return ERR_PTR(-E2BIG); @@ -1001,9 +989,10 @@ alloc_mr_from_cache(struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr, mr->umem = umem; mr->access_flags = access_flags; mr->desc_size = sizeof(struct mlx5_mtt); - mr->mmkey.iova = virt_addr; - mr->mmkey.size = len; + mr->mmkey.iova = iova; + mr->mmkey.size = umem->length; mr->mmkey.pd = to_mpd(pd)->pdn; + mr->page_shift = order_base_2(page_size); return mr; } @@ -1012,14 +1001,144 @@ alloc_mr_from_cache(struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr, MLX5_UMR_MTT_ALIGNMENT) #define MLX5_SPARE_UMR_CHUNK 0x10000 +/* + * Allocate a temporary buffer to hold the per-page information to transfer to + * HW. For efficiency this should be as large as it can be, but buffer + * allocation failure is not allowed, so try smaller sizes. + */ +static void *mlx5_ib_alloc_xlt(size_t *nents, size_t ent_size, gfp_t gfp_mask) +{ + const size_t xlt_chunk_align = + MLX5_UMR_MTT_ALIGNMENT / sizeof(ent_size); + size_t size; + void *res = NULL; + + static_assert(PAGE_SIZE % MLX5_UMR_MTT_ALIGNMENT == 0); + + /* + * MLX5_IB_UPD_XLT_ATOMIC doesn't signal an atomic context just that the + * allocation can't trigger any kind of reclaim. + */ + might_sleep(); + + gfp_mask |= __GFP_ZERO; + + /* + * If the system already has a suitable high order page then just use + * that, but don't try hard to create one. This max is about 1M, so a + * free x86 huge page will satisfy it. + */ + size = min_t(size_t, ent_size * ALIGN(*nents, xlt_chunk_align), + MLX5_MAX_UMR_CHUNK); + *nents = size / ent_size; + res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN, + get_order(size)); + if (res) + return res; + + if (size > MLX5_SPARE_UMR_CHUNK) { + size = MLX5_SPARE_UMR_CHUNK; + *nents = get_order(size) / ent_size; + res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN, + get_order(size)); + if (res) + return res; + } + + *nents = PAGE_SIZE / ent_size; + res = (void *)__get_free_page(gfp_mask); + if (res) + return res; + + mutex_lock(&xlt_emergency_page_mutex); + memset(xlt_emergency_page, 0, PAGE_SIZE); + return xlt_emergency_page; +} + +static void mlx5_ib_free_xlt(void *xlt, size_t length) +{ + if (xlt == xlt_emergency_page) { + mutex_unlock(&xlt_emergency_page_mutex); + return; + } + + free_pages((unsigned long)xlt, get_order(length)); +} + +/* + * Create a MLX5_IB_SEND_UMR_UPDATE_XLT work request and XLT buffer ready for + * submission. + */ +static void *mlx5_ib_create_xlt_wr(struct mlx5_ib_mr *mr, + struct mlx5_umr_wr *wr, struct ib_sge *sg, + size_t nents, size_t ent_size, + unsigned int flags) +{ + struct mlx5_ib_dev *dev = mr->dev; + struct device *ddev = &dev->mdev->pdev->dev; + dma_addr_t dma; + void *xlt; + + xlt = mlx5_ib_alloc_xlt(&nents, ent_size, + flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC : + GFP_KERNEL); + sg->length = nents * ent_size; + dma = dma_map_single(ddev, xlt, sg->length, DMA_TO_DEVICE); + if (dma_mapping_error(ddev, dma)) { + mlx5_ib_err(dev, "unable to map DMA during XLT update.\n"); + mlx5_ib_free_xlt(xlt, sg->length); + return NULL; + } + sg->addr = dma; + sg->lkey = dev->umrc.pd->local_dma_lkey; + + memset(wr, 0, sizeof(*wr)); + wr->wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT; + if (!(flags & MLX5_IB_UPD_XLT_ENABLE)) + wr->wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE; + wr->wr.sg_list = sg; + wr->wr.num_sge = 1; + wr->wr.opcode = MLX5_IB_WR_UMR; + wr->pd = mr->ibmr.pd; + wr->mkey = mr->mmkey.key; + wr->length = mr->mmkey.size; + wr->virt_addr = mr->mmkey.iova; + wr->access_flags = mr->access_flags; + wr->page_shift = mr->page_shift; + wr->xlt_size = sg->length; + return xlt; +} + +static void mlx5_ib_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt, + struct ib_sge *sg) +{ + struct device *ddev = &dev->mdev->pdev->dev; + + dma_unmap_single(ddev, sg->addr, sg->length, DMA_TO_DEVICE); + mlx5_ib_free_xlt(xlt, sg->length); +} + +static unsigned int xlt_wr_final_send_flags(unsigned int flags) +{ + unsigned int res = 0; + + if (flags & MLX5_IB_UPD_XLT_ENABLE) + res |= MLX5_IB_SEND_UMR_ENABLE_MR | + MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS | + MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; + if (flags & MLX5_IB_UPD_XLT_PD || flags & MLX5_IB_UPD_XLT_ACCESS) + res |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; + if (flags & MLX5_IB_UPD_XLT_ADDR) + res |= MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; + return res; +} + int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, int page_shift, int flags) { struct mlx5_ib_dev *dev = mr->dev; - struct device *ddev = dev->ib_dev.dev.parent; - int size; + struct device *ddev = &dev->mdev->pdev->dev; void *xlt; - dma_addr_t dma; struct mlx5_umr_wr wr; struct ib_sge sg; int err = 0; @@ -1030,15 +1149,17 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, const int page_mask = page_align - 1; size_t pages_mapped = 0; size_t pages_to_map = 0; - size_t pages_iter = 0; + size_t pages_iter; size_t size_to_map = 0; - gfp_t gfp; - bool use_emergency_page = false; + size_t orig_sg_length; if ((flags & MLX5_IB_UPD_XLT_INDIRECT) && !umr_can_use_indirect_mkey(dev)) return -EPERM; + if (WARN_ON(!mr->umem->is_odp)) + return -EINVAL; + /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes, * so we need to align the offset and length accordingly */ @@ -1046,63 +1167,21 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, npages += idx & page_mask; idx &= ~page_mask; } - - gfp = flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC : GFP_KERNEL; - gfp |= __GFP_ZERO | __GFP_NOWARN; - pages_to_map = ALIGN(npages, page_align); - size = desc_size * pages_to_map; - size = min_t(int, size, MLX5_MAX_UMR_CHUNK); - - xlt = (void *)__get_free_pages(gfp, get_order(size)); - if (!xlt && size > MLX5_SPARE_UMR_CHUNK) { - mlx5_ib_dbg(dev, "Failed to allocate %d bytes of order %d. fallback to spare UMR allocation od %d bytes\n", - size, get_order(size), MLX5_SPARE_UMR_CHUNK); - - size = MLX5_SPARE_UMR_CHUNK; - xlt = (void *)__get_free_pages(gfp, get_order(size)); - } - if (!xlt) { - mlx5_ib_warn(dev, "Using XLT emergency buffer\n"); - xlt = (void *)mlx5_ib_get_xlt_emergency_page(); - size = PAGE_SIZE; - memset(xlt, 0, size); - use_emergency_page = true; - } - pages_iter = size / desc_size; - dma = dma_map_single(ddev, xlt, size, DMA_TO_DEVICE); - if (dma_mapping_error(ddev, dma)) { - mlx5_ib_err(dev, "unable to map DMA during XLT update.\n"); - err = -ENOMEM; - goto free_xlt; - } + xlt = mlx5_ib_create_xlt_wr(mr, &wr, &sg, npages, desc_size, flags); + if (!xlt) + return -ENOMEM; + pages_iter = sg.length / desc_size; + orig_sg_length = sg.length; - if (mr->umem->is_odp) { - if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) { - struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); - size_t max_pages = ib_umem_odp_num_pages(odp) - idx; + if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) { + struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); + size_t max_pages = ib_umem_odp_num_pages(odp) - idx; - pages_to_map = min_t(size_t, pages_to_map, max_pages); - } + pages_to_map = min_t(size_t, pages_to_map, max_pages); } - sg.addr = dma; - sg.lkey = dev->umrc.pd->local_dma_lkey; - - memset(&wr, 0, sizeof(wr)); - wr.wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT; - if (!(flags & MLX5_IB_UPD_XLT_ENABLE)) - wr.wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE; - wr.wr.sg_list = &sg; - wr.wr.num_sge = 1; - wr.wr.opcode = MLX5_IB_WR_UMR; - - wr.pd = mr->ibmr.pd; - wr.mkey = mr->mmkey.key; - wr.length = mr->mmkey.size; - wr.virt_addr = mr->mmkey.iova; - wr.access_flags = mr->access_flags; wr.page_shift = page_shift; for (pages_mapped = 0; @@ -1110,50 +1189,87 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, pages_mapped += pages_iter, idx += pages_iter) { npages = min_t(int, pages_iter, pages_to_map - pages_mapped); size_to_map = npages * desc_size; - dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE); - if (mr->umem->is_odp) { - mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags); - } else { - __mlx5_ib_populate_pas(dev, mr->umem, page_shift, idx, - npages, xlt, - MLX5_IB_MTT_PRESENT); - /* Clear padding after the pages - * brought from the umem. - */ - memset(xlt + size_to_map, 0, size - size_to_map); - } - dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE); + dma_sync_single_for_cpu(ddev, sg.addr, sg.length, + DMA_TO_DEVICE); + mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags); + dma_sync_single_for_device(ddev, sg.addr, sg.length, + DMA_TO_DEVICE); sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT); - if (pages_mapped + pages_iter >= pages_to_map) { - if (flags & MLX5_IB_UPD_XLT_ENABLE) - wr.wr.send_flags |= - MLX5_IB_SEND_UMR_ENABLE_MR | - MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS | - MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; - if (flags & MLX5_IB_UPD_XLT_PD || - flags & MLX5_IB_UPD_XLT_ACCESS) - wr.wr.send_flags |= - MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; - if (flags & MLX5_IB_UPD_XLT_ADDR) - wr.wr.send_flags |= - MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; - } + if (pages_mapped + pages_iter >= pages_to_map) + wr.wr.send_flags |= xlt_wr_final_send_flags(flags); wr.offset = idx * desc_size; wr.xlt_size = sg.length; err = mlx5_ib_post_send_wait(dev, &wr); } - dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE); + sg.length = orig_sg_length; + mlx5_ib_unmap_free_xlt(dev, xlt, &sg); + return err; +} -free_xlt: - if (use_emergency_page) - mlx5_ib_put_xlt_emergency_page(); - else - free_pages((unsigned long)xlt, get_order(size)); +/* + * Send the DMA list to the HW for a normal MR using UMR. + */ +static int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags) +{ + struct mlx5_ib_dev *dev = mr->dev; + struct device *ddev = &dev->mdev->pdev->dev; + struct ib_block_iter biter; + struct mlx5_mtt *cur_mtt; + struct mlx5_umr_wr wr; + size_t orig_sg_length; + struct mlx5_mtt *mtt; + size_t final_size; + struct ib_sge sg; + int err = 0; + + if (WARN_ON(mr->umem->is_odp)) + return -EINVAL; + + mtt = mlx5_ib_create_xlt_wr(mr, &wr, &sg, + ib_umem_num_dma_blocks(mr->umem, + 1 << mr->page_shift), + sizeof(*mtt), flags); + if (!mtt) + return -ENOMEM; + orig_sg_length = sg.length; + + cur_mtt = mtt; + rdma_for_each_block (mr->umem->sg_head.sgl, &biter, mr->umem->nmap, + BIT(mr->page_shift)) { + if (cur_mtt == (void *)mtt + sg.length) { + dma_sync_single_for_device(ddev, sg.addr, sg.length, + DMA_TO_DEVICE); + err = mlx5_ib_post_send_wait(dev, &wr); + if (err) + goto err; + dma_sync_single_for_cpu(ddev, sg.addr, sg.length, + DMA_TO_DEVICE); + wr.offset += sg.length; + cur_mtt = mtt; + } + + cur_mtt->ptag = + cpu_to_be64(rdma_block_iter_dma_address(&biter) | + MLX5_IB_MTT_PRESENT); + cur_mtt++; + } + final_size = (void *)cur_mtt - (void *)mtt; + sg.length = ALIGN(final_size, MLX5_UMR_MTT_ALIGNMENT); + memset(cur_mtt, 0, sg.length - final_size); + wr.wr.send_flags |= xlt_wr_final_send_flags(flags); + wr.xlt_size = sg.length; + + dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE); + err = mlx5_ib_post_send_wait(dev, &wr); + +err: + sg.length = orig_sg_length; + mlx5_ib_unmap_free_xlt(dev, mtt, &sg); return err; } @@ -1162,12 +1278,11 @@ free_xlt: * Else, the given ibmr will be used. */ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, - u64 virt_addr, u64 length, - struct ib_umem *umem, int npages, - int page_shift, int access_flags, - bool populate) + struct ib_umem *umem, u64 iova, + int access_flags, bool populate) { struct mlx5_ib_dev *dev = to_mdev(pd->device); + unsigned int page_size; struct mlx5_ib_mr *mr; __be64 *pas; void *mkc; @@ -1176,16 +1291,23 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, int err; bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg)); + page_size = + mlx5_umem_find_best_pgsz(umem, mkc, log_page_size, 0, iova); + if (WARN_ON(!page_size)) + return ERR_PTR(-EINVAL); + mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); mr->ibmr.pd = pd; mr->access_flags = access_flags; + mr->page_shift = order_base_2(page_size); inlen = MLX5_ST_SZ_BYTES(create_mkey_in); if (populate) - inlen += sizeof(*pas) * roundup(npages, 2); + inlen += sizeof(*pas) * + roundup(ib_umem_num_dma_blocks(umem, page_size), 2); in = kvzalloc(inlen, GFP_KERNEL); if (!in) { err = -ENOMEM; @@ -1197,7 +1319,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, err = -EINVAL; goto err_2; } - mlx5_ib_populate_pas(dev, umem, page_shift, pas, + mlx5_ib_populate_pas(umem, 1UL << mr->page_shift, pas, pg_cap ? MLX5_IB_MTT_PRESENT : 0); } @@ -1206,20 +1328,20 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap)); mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); - set_mkc_access_pd_addr_fields(mkc, access_flags, virt_addr, + set_mkc_access_pd_addr_fields(mkc, access_flags, iova, populate ? pd : dev->umrc.pd); MLX5_SET(mkc, mkc, free, !populate); MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT); MLX5_SET(mkc, mkc, umr_en, 1); - MLX5_SET64(mkc, mkc, len, length); + MLX5_SET64(mkc, mkc, len, umem->length); MLX5_SET(mkc, mkc, bsf_octword_size, 0); MLX5_SET(mkc, mkc, translations_octword_size, - get_octo_len(virt_addr, length, page_shift)); - MLX5_SET(mkc, mkc, log_page_size, page_shift); + get_octo_len(iova, umem->length, mr->page_shift)); + MLX5_SET(mkc, mkc, log_page_size, mr->page_shift); if (populate) { MLX5_SET(create_mkey_in, in, translations_octword_actual_size, - get_octo_len(virt_addr, length, page_shift)); + get_octo_len(iova, umem->length, mr->page_shift)); } err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); @@ -1247,10 +1369,8 @@ err_1: } static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, - int npages, u64 length, int access_flags) + u64 length, int access_flags) { - mr->npages = npages; - atomic_add(npages, &dev->mdev->priv.reg_pages); mr->ibmr.lkey = mr->mmkey.key; mr->ibmr.rkey = mr->mmkey.key; mr->ibmr.length = length; @@ -1290,8 +1410,7 @@ static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr, kfree(in); - mr->umem = NULL; - set_mr_fields(dev, mr, 0, length, acc); + set_mr_fields(dev, mr, length, acc); return &mr->ibmr; @@ -1360,10 +1479,6 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, struct mlx5_ib_mr *mr = NULL; bool xlt_with_umr; struct ib_umem *umem; - int page_shift; - int npages; - int ncont; - int order; int err; if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM)) @@ -1391,23 +1506,20 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return &mr->ibmr; } - err = mr_umem_get(dev, start, length, access_flags, &umem, - &npages, &page_shift, &ncont, &order); - - if (err < 0) - return ERR_PTR(err); + umem = mr_umem_get(dev, start, length, access_flags); + if (IS_ERR(umem)) + return ERR_CAST(umem); if (xlt_with_umr) { - mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont, - page_shift, order, access_flags); + mr = alloc_mr_from_cache(pd, umem, virt_addr, access_flags); if (IS_ERR(mr)) mr = NULL; } if (!mr) { mutex_lock(&dev->slow_path_mutex); - mr = reg_create(NULL, pd, virt_addr, length, umem, ncont, - page_shift, access_flags, !xlt_with_umr); + mr = reg_create(NULL, pd, umem, virt_addr, access_flags, + !xlt_with_umr); mutex_unlock(&dev->slow_path_mutex); } @@ -1419,7 +1531,8 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key); mr->umem = umem; - set_mr_fields(dev, mr, npages, length, access_flags); + atomic_add(ib_umem_num_pages(mr->umem), &dev->mdev->priv.reg_pages); + set_mr_fields(dev, mr, length, access_flags); if (xlt_with_umr && !(access_flags & IB_ACCESS_ON_DEMAND)) { /* @@ -1427,10 +1540,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, * configured properly but left disabled. It is safe to go ahead * and configure it again via UMR while enabling it. */ - int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE; - - err = mlx5_ib_update_xlt(mr, 0, ncont, page_shift, - update_xlt_flags); + err = mlx5_ib_update_mr_pas(mr, MLX5_IB_UPD_XLT_ENABLE); if (err) { dereg_mr(dev, mr); return ERR_PTR(err); @@ -1520,19 +1630,13 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, int access_flags = flags & IB_MR_REREG_ACCESS ? new_access_flags : mr->access_flags; - int page_shift = 0; int upd_flags = 0; - int npages = 0; - int ncont = 0; - int order = 0; u64 addr, len; int err; mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", start, virt_addr, length, access_flags); - atomic_sub(mr->npages, &dev->mdev->priv.reg_pages); - if (!mr->umem) return -EINVAL; @@ -1553,12 +1657,17 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, * used. */ flags |= IB_MR_REREG_TRANS; + atomic_sub(ib_umem_num_pages(mr->umem), + &dev->mdev->priv.reg_pages); ib_umem_release(mr->umem); - mr->umem = NULL; - err = mr_umem_get(dev, addr, len, access_flags, &mr->umem, - &npages, &page_shift, &ncont, &order); - if (err) + mr->umem = mr_umem_get(dev, addr, len, access_flags); + if (IS_ERR(mr->umem)) { + err = PTR_ERR(mr->umem); + mr->umem = NULL; goto err; + } + atomic_add(ib_umem_num_pages(mr->umem), + &dev->mdev->priv.reg_pages); } if (!mlx5_ib_can_reconfig_with_umr(dev, mr->access_flags, @@ -1575,9 +1684,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, if (err) goto err; - mr = reg_create(ib_mr, pd, addr, len, mr->umem, ncont, - page_shift, access_flags, true); - + mr = reg_create(ib_mr, pd, mr->umem, addr, access_flags, true); if (IS_ERR(mr)) { err = PTR_ERR(mr); mr = to_mmr(ib_mr); @@ -1599,8 +1706,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, upd_flags |= MLX5_IB_UPD_XLT_PD; if (flags & IB_MR_REREG_ACCESS) upd_flags |= MLX5_IB_UPD_XLT_ACCESS; - err = mlx5_ib_update_xlt(mr, 0, npages, page_shift, - upd_flags); + err = mlx5_ib_update_mr_pas(mr, upd_flags); } else { err = rereg_umr(pd, mr, access_flags, flags); } @@ -1609,7 +1715,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, goto err; } - set_mr_fields(dev, mr, npages, len, access_flags); + set_mr_fields(dev, mr, len, access_flags); return 0; @@ -1627,6 +1733,8 @@ mlx5_alloc_priv_descs(struct ib_device *device, int ndescs, int desc_size) { + struct mlx5_ib_dev *dev = to_mdev(device); + struct device *ddev = &dev->mdev->pdev->dev; int size = ndescs * desc_size; int add_size; int ret; @@ -1639,9 +1747,8 @@ mlx5_alloc_priv_descs(struct ib_device *device, mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN); - mr->desc_map = dma_map_single(device->dev.parent, mr->descs, - size, DMA_TO_DEVICE); - if (dma_mapping_error(device->dev.parent, mr->desc_map)) { + mr->desc_map = dma_map_single(ddev, mr->descs, size, DMA_TO_DEVICE); + if (dma_mapping_error(ddev, mr->desc_map)) { ret = -ENOMEM; goto err; } @@ -1659,9 +1766,10 @@ mlx5_free_priv_descs(struct mlx5_ib_mr *mr) if (mr->descs) { struct ib_device *device = mr->ibmr.device; int size = mr->max_descs * mr->desc_size; + struct mlx5_ib_dev *dev = to_mdev(device); - dma_unmap_single(device->dev.parent, mr->desc_map, - size, DMA_TO_DEVICE); + dma_unmap_single(&dev->mdev->pdev->dev, mr->desc_map, size, + DMA_TO_DEVICE); kfree(mr->descs_alloc); mr->descs = NULL; } @@ -1691,7 +1799,6 @@ static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { - int npages = mr->npages; struct ib_umem *umem = mr->umem; /* Stop all DMA */ @@ -1700,14 +1807,17 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) else clean_mr(dev, mr); + if (umem) { + if (!is_odp_mr(mr)) + atomic_sub(ib_umem_num_pages(umem), + &dev->mdev->priv.reg_pages); + ib_umem_release(umem); + } + if (mr->cache_ent) mlx5_mr_cache_free(dev, mr); else kfree(mr); - - ib_umem_release(umem); - atomic_sub(npages, &dev->mdev->priv.reg_pages); - } int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 600e056798c0..9d32f3173231 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -778,39 +778,6 @@ int bfregn_to_uar_index(struct mlx5_ib_dev *dev, return bfregi->sys_pages[index_of_sys_page] + offset; } -static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata, - unsigned long addr, size_t size, - struct ib_umem **umem, int *npages, int *page_shift, - int *ncont, u32 *offset) -{ - int err; - - *umem = ib_umem_get(&dev->ib_dev, addr, size, 0); - if (IS_ERR(*umem)) { - mlx5_ib_dbg(dev, "umem_get failed\n"); - return PTR_ERR(*umem); - } - - mlx5_ib_cont_pages(*umem, addr, 0, npages, page_shift, ncont, NULL); - - err = mlx5_ib_get_buf_offset(addr, *page_shift, offset); - if (err) { - mlx5_ib_warn(dev, "bad offset\n"); - goto err_umem; - } - - mlx5_ib_dbg(dev, "addr 0x%lx, size %zu, npages %d, page_shift %d, ncont %d, offset %d\n", - addr, size, *npages, *page_shift, *ncont, *offset); - - return 0; - -err_umem: - ib_umem_release(*umem); - *umem = NULL; - - return err; -} - static void destroy_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct mlx5_ib_rwq *rwq, struct ib_udata *udata) { @@ -833,10 +800,8 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, { struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context( udata, struct mlx5_ib_ucontext, ibucontext); - int page_shift = 0; - int npages; + unsigned long page_size = 0; u32 offset = 0; - int ncont = 0; int err; if (!ucmd->buf_addr) @@ -849,23 +814,26 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, return err; } - mlx5_ib_cont_pages(rwq->umem, ucmd->buf_addr, 0, &npages, &page_shift, - &ncont, NULL); - err = mlx5_ib_get_buf_offset(ucmd->buf_addr, page_shift, - &rwq->rq_page_offset); - if (err) { + page_size = mlx5_umem_find_best_quantized_pgoff( + rwq->umem, wq, log_wq_pg_sz, MLX5_ADAPTER_PAGE_SHIFT, + page_offset, 64, &rwq->rq_page_offset); + if (!page_size) { mlx5_ib_warn(dev, "bad offset\n"); + err = -EINVAL; goto err_umem; } - rwq->rq_num_pas = ncont; - rwq->page_shift = page_shift; - rwq->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT; + rwq->rq_num_pas = ib_umem_num_dma_blocks(rwq->umem, page_size); + rwq->page_shift = order_base_2(page_size); + rwq->log_page_size = rwq->page_shift - MLX5_ADAPTER_PAGE_SHIFT; rwq->wq_sig = !!(ucmd->flags & MLX5_WQ_FLAG_SIGNATURE); - mlx5_ib_dbg(dev, "addr 0x%llx, size %zd, npages %d, page_shift %d, ncont %d, offset %d\n", - (unsigned long long)ucmd->buf_addr, rwq->buf_size, - npages, page_shift, ncont, offset); + mlx5_ib_dbg( + dev, + "addr 0x%llx, size %zd, npages %zu, page_size %ld, ncont %d, offset %d\n", + (unsigned long long)ucmd->buf_addr, rwq->buf_size, + ib_umem_num_pages(rwq->umem), page_size, rwq->rq_num_pas, + offset); err = mlx5_ib_db_map_user(ucontext, udata, ucmd->db_addr, &rwq->db); if (err) { @@ -896,10 +864,9 @@ static int _create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, { struct mlx5_ib_ucontext *context; struct mlx5_ib_ubuffer *ubuffer = &base->ubuffer; - int page_shift = 0; + unsigned int page_offset_quantized = 0; + unsigned long page_size = 0; int uar_index = 0; - int npages; - u32 offset = 0; int bfregn; int ncont = 0; __be64 *pas; @@ -950,11 +917,21 @@ static int _create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (ucmd->buf_addr && ubuffer->buf_size) { ubuffer->buf_addr = ucmd->buf_addr; - err = mlx5_ib_umem_get(dev, udata, ubuffer->buf_addr, - ubuffer->buf_size, &ubuffer->umem, - &npages, &page_shift, &ncont, &offset); - if (err) + ubuffer->umem = ib_umem_get(&dev->ib_dev, ubuffer->buf_addr, + ubuffer->buf_size, 0); + if (IS_ERR(ubuffer->umem)) { + err = PTR_ERR(ubuffer->umem); goto err_bfreg; + } + page_size = mlx5_umem_find_best_quantized_pgoff( + ubuffer->umem, qpc, log_page_size, + MLX5_ADAPTER_PAGE_SHIFT, page_offset, 64, + &page_offset_quantized); + if (!page_size) { + err = -EINVAL; + goto err_umem; + } + ncont = ib_umem_num_dma_blocks(ubuffer->umem, page_size); } else { ubuffer->umem = NULL; } @@ -969,15 +946,14 @@ static int _create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, uid = (attr->qp_type != IB_QPT_XRC_INI) ? to_mpd(pd)->uid : 0; MLX5_SET(create_qp_in, *in, uid, uid); - pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas); - if (ubuffer->umem) - mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift, pas, 0); - qpc = MLX5_ADDR_OF(create_qp_in, *in, qpc); - - MLX5_SET(qpc, qpc, log_page_size, page_shift - MLX5_ADAPTER_PAGE_SHIFT); - MLX5_SET(qpc, qpc, page_offset, offset); - + pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas); + if (ubuffer->umem) { + mlx5_ib_populate_pas(ubuffer->umem, page_size, pas, 0); + MLX5_SET(qpc, qpc, log_page_size, + order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(qpc, qpc, page_offset, page_offset_quantized); + } MLX5_SET(qpc, qpc, uar_page, uar_index); if (bfregn != MLX5_IB_INVALID_BFREG) resp->bfreg_index = adjust_bfregn(dev, &context->bfregi, bfregn); @@ -1209,18 +1185,24 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, void *wq; int inlen; int err; - int page_shift = 0; - int npages; - int ncont = 0; - u32 offset = 0; - - err = mlx5_ib_umem_get(dev, udata, ubuffer->buf_addr, ubuffer->buf_size, - &sq->ubuffer.umem, &npages, &page_shift, &ncont, - &offset); - if (err) - return err; + unsigned int page_offset_quantized; + unsigned long page_size; + + sq->ubuffer.umem = ib_umem_get(&dev->ib_dev, ubuffer->buf_addr, + ubuffer->buf_size, 0); + if (IS_ERR(sq->ubuffer.umem)) + return PTR_ERR(sq->ubuffer.umem); + page_size = mlx5_umem_find_best_quantized_pgoff( + ubuffer->umem, wq, log_wq_pg_sz, MLX5_ADAPTER_PAGE_SHIFT, + page_offset, 64, &page_offset_quantized); + if (!page_size) { + err = -EINVAL; + goto err_umem; + } - inlen = MLX5_ST_SZ_BYTES(create_sq_in) + sizeof(u64) * ncont; + inlen = MLX5_ST_SZ_BYTES(create_sq_in) + + sizeof(u64) * + ib_umem_num_dma_blocks(sq->ubuffer.umem, page_size); in = kvzalloc(inlen, GFP_KERNEL); if (!in) { err = -ENOMEM; @@ -1248,11 +1230,12 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr)); MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_sq_size)); - MLX5_SET(wq, wq, log_wq_pg_sz, page_shift - MLX5_ADAPTER_PAGE_SHIFT); - MLX5_SET(wq, wq, page_offset, offset); + MLX5_SET(wq, wq, log_wq_pg_sz, + order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(wq, wq, page_offset, page_offset_quantized); pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas); - mlx5_ib_populate_pas(dev, sq->ubuffer.umem, page_shift, pas, 0); + mlx5_ib_populate_pas(sq->ubuffer.umem, page_size, pas, 0); err = mlx5_core_create_sq_tracked(dev, in, inlen, &sq->base.mqp); @@ -1278,40 +1261,31 @@ static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev *dev, ib_umem_release(sq->ubuffer.umem); } -static size_t get_rq_pas_size(void *qpc) -{ - u32 log_page_size = MLX5_GET(qpc, qpc, log_page_size) + 12; - u32 log_rq_stride = MLX5_GET(qpc, qpc, log_rq_stride); - u32 log_rq_size = MLX5_GET(qpc, qpc, log_rq_size); - u32 page_offset = MLX5_GET(qpc, qpc, page_offset); - u32 po_quanta = 1 << (log_page_size - 6); - u32 rq_sz = 1 << (log_rq_size + 4 + log_rq_stride); - u32 page_size = 1 << log_page_size; - u32 rq_sz_po = rq_sz + (page_offset * po_quanta); - u32 rq_num_pas = (rq_sz_po + page_size - 1) / page_size; - - return rq_num_pas * sizeof(u64); -} - static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev, struct mlx5_ib_rq *rq, void *qpin, - size_t qpinlen, struct ib_pd *pd) + struct ib_pd *pd) { struct mlx5_ib_qp *mqp = rq->base.container_mibqp; __be64 *pas; - __be64 *qp_pas; void *in; void *rqc; void *wq; void *qpc = MLX5_ADDR_OF(create_qp_in, qpin, qpc); - size_t rq_pas_size = get_rq_pas_size(qpc); + struct ib_umem *umem = rq->base.ubuffer.umem; + unsigned int page_offset_quantized; + unsigned long page_size = 0; size_t inlen; int err; - if (qpinlen < rq_pas_size + MLX5_BYTE_OFF(create_qp_in, pas)) + page_size = mlx5_umem_find_best_quantized_pgoff(umem, wq, log_wq_pg_sz, + MLX5_ADAPTER_PAGE_SHIFT, + page_offset, 64, + &page_offset_quantized); + if (!page_size) return -EINVAL; - inlen = MLX5_ST_SZ_BYTES(create_rq_in) + rq_pas_size; + inlen = MLX5_ST_SZ_BYTES(create_rq_in) + + sizeof(u64) * ib_umem_num_dma_blocks(umem, page_size); in = kvzalloc(inlen, GFP_KERNEL); if (!in) return -ENOMEM; @@ -1333,16 +1307,16 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev, MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); if (rq->flags & MLX5_IB_RQ_PCI_WRITE_END_PADDING) MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); - MLX5_SET(wq, wq, page_offset, MLX5_GET(qpc, qpc, page_offset)); + MLX5_SET(wq, wq, page_offset, page_offset_quantized); MLX5_SET(wq, wq, pd, MLX5_GET(qpc, qpc, pd)); MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr)); MLX5_SET(wq, wq, log_wq_stride, MLX5_GET(qpc, qpc, log_rq_stride) + 4); - MLX5_SET(wq, wq, log_wq_pg_sz, MLX5_GET(qpc, qpc, log_page_size)); + MLX5_SET(wq, wq, log_wq_pg_sz, + order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT); MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_rq_size)); pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas); - qp_pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, qpin, pas); - memcpy(pas, qp_pas, rq_pas_size); + mlx5_ib_populate_pas(umem, page_size, pas, 0); err = mlx5_core_create_rq_tracked(dev, in, inlen, &rq->base.mqp); @@ -1463,7 +1437,7 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, rq->flags |= MLX5_IB_RQ_CVLAN_STRIPPING; if (qp->flags & IB_QP_CREATE_PCI_WRITE_END_PADDING) rq->flags |= MLX5_IB_RQ_PCI_WRITE_END_PADDING; - err = create_raw_packet_qp_rq(dev, rq, in, inlen, pd); + err = create_raw_packet_qp_rq(dev, rq, in, pd); if (err) goto err_destroy_sq; @@ -2436,7 +2410,7 @@ static int create_dct(struct mlx5_ib_dev *dev, struct ib_pd *pd, } qp->state = IB_QPS_RESET; - + rdma_restrack_no_track(&qp->ibqp.res); return 0; } @@ -2460,6 +2434,7 @@ static int check_qp_type(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, case IB_QPT_GSI: if (dev->profile == &raw_eth_profile) goto out; + fallthrough; case IB_QPT_RAW_PACKET: case IB_QPT_UD: case MLX5_IB_QPT_REG_UMR: @@ -2712,11 +2687,12 @@ static int process_create_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, process_create_flag(dev, &create_flags, MLX5_IB_QP_CREATE_SQPN_QP1, true, qp); - if (create_flags) + if (create_flags) { mlx5_ib_dbg(dev, "Create QP has unsupported flags 0x%X\n", create_flags); - - return (create_flags) ? -EINVAL : 0; + return -EOPNOTSUPP; + } + return 0; } static int process_udata_size(struct mlx5_ib_dev *dev, @@ -4247,6 +4223,9 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int err = -EINVAL; int port; + if (attr_mask & ~(IB_QP_ATTR_STANDARD_BITS | IB_QP_RATE_LIMIT)) + return -EOPNOTSUPP; + if (ibqp->rwq_ind_tbl) return -ENOSYS; @@ -4576,7 +4555,9 @@ static int query_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, pri_path = MLX5_ADDR_OF(qpc, qpc, primary_address_path); alt_path = MLX5_ADDR_OF(qpc, qpc, secondary_address_path); - if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) { + if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC || + qp->ibqp.qp_type == IB_QPT_XRC_INI || + qp->ibqp.qp_type == IB_QPT_XRC_TGT) { to_rdma_ah_attr(dev, &qp_attr->ah_attr, pri_path); to_rdma_ah_attr(dev, &qp_attr->alt_ah_attr, alt_path); qp_attr->alt_pkey_index = MLX5_GET(ads, alt_path, pkey_index); @@ -4882,7 +4863,7 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd, MLX5_SET(rqc, rqc, delay_drop_en, 1); } rq_pas0 = (__be64 *)MLX5_ADDR_OF(wq, wq, pas); - mlx5_ib_populate_pas(dev, rwq->umem, rwq->page_shift, rq_pas0, 0); + mlx5_ib_populate_pas(rwq->umem, 1UL << rwq->page_shift, rq_pas0, 0); err = mlx5_core_create_rq_tracked(dev, in, inlen, &rwq->core_qp); if (!err && init_attr->create_flags & IB_WQ_FLAGS_DELAY_DROP) { err = set_delay_drop(dev); diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index e2f720eec1e1..fab6736e4d6a 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -51,10 +51,6 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, udata, struct mlx5_ib_ucontext, ibucontext); size_t ucmdlen; int err; - int npages; - int page_shift; - int ncont; - u32 offset; u32 uidx = MLX5_IB_DEFAULT_UIDX; ucmdlen = min(udata->inlen, sizeof(ucmd)); @@ -86,32 +82,14 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, err = PTR_ERR(srq->umem); return err; } - - mlx5_ib_cont_pages(srq->umem, ucmd.buf_addr, 0, &npages, - &page_shift, &ncont, NULL); - err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, - &offset); - if (err) { - mlx5_ib_warn(dev, "bad offset\n"); - goto err_umem; - } - - in->pas = kvcalloc(ncont, sizeof(*in->pas), GFP_KERNEL); - if (!in->pas) { - err = -ENOMEM; - goto err_umem; - } - - mlx5_ib_populate_pas(dev, srq->umem, page_shift, in->pas, 0); + in->umem = srq->umem; err = mlx5_ib_db_map_user(ucontext, udata, ucmd.db_addr, &srq->db); if (err) { mlx5_ib_dbg(dev, "map doorbell failed\n"); - goto err_in; + goto err_umem; } - in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT; - in->page_offset = offset; in->uid = (in->type != IB_SRQT_XRC) ? to_mpd(pd)->uid : 0; if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 && in->type != IB_SRQT_BASIC) @@ -119,9 +97,6 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, return 0; -err_in: - kvfree(in->pas); - err_umem: ib_umem_release(srq->umem); @@ -226,6 +201,11 @@ int mlx5_ib_create_srq(struct ib_srq *ib_srq, struct mlx5_srq_attr in = {}; __u32 max_srq_wqes = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); + if (init_attr->srq_type != IB_SRQT_BASIC && + init_attr->srq_type != IB_SRQT_XRC && + init_attr->srq_type != IB_SRQT_TM) + return -EOPNOTSUPP; + /* Sanity check SRQ size before proceeding */ if (init_attr->attr.max_wr >= max_srq_wqes) { mlx5_ib_dbg(dev, "max_wr %d, cap %d\n", diff --git a/drivers/infiniband/hw/mlx5/srq.h b/drivers/infiniband/hw/mlx5/srq.h index 2c3627b2509d..a7e3dc5564ac 100644 --- a/drivers/infiniband/hw/mlx5/srq.h +++ b/drivers/infiniband/hw/mlx5/srq.h @@ -28,6 +28,7 @@ struct mlx5_srq_attr { u32 user_index; u64 db_record; __be64 *pas; + struct ib_umem *umem; u32 tm_log_list_size; u32 tm_next_tag; u32 tm_hw_phase_cnt; diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c index db889ec3fd48..8b3385396599 100644 --- a/drivers/infiniband/hw/mlx5/srq_cmd.c +++ b/drivers/infiniband/hw/mlx5/srq_cmd.c @@ -92,6 +92,25 @@ struct mlx5_core_srq *mlx5_cmd_get_srq(struct mlx5_ib_dev *dev, u32 srqn) return srq; } +static int __set_srq_page_size(struct mlx5_srq_attr *in, + unsigned long page_size) +{ + if (!page_size) + return -EINVAL; + in->log_page_size = order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT; + + if (WARN_ON(get_pas_size(in) != + ib_umem_num_dma_blocks(in->umem, page_size) * sizeof(u64))) + return -EINVAL; + return 0; +} + +#define set_srq_page_size(in, typ, log_pgsz_fld) \ + __set_srq_page_size(in, mlx5_umem_find_best_quantized_pgoff( \ + (in)->umem, typ, log_pgsz_fld, \ + MLX5_ADAPTER_PAGE_SHIFT, page_offset, \ + 64, &(in)->page_offset)) + static int create_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, struct mlx5_srq_attr *in) { @@ -103,6 +122,12 @@ static int create_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, int inlen; int err; + if (in->umem) { + err = set_srq_page_size(in, srqc, log_page_size); + if (err) + return err; + } + pas_size = get_pas_size(in); inlen = MLX5_ST_SZ_BYTES(create_srq_in) + pas_size; create_in = kvzalloc(inlen, GFP_KERNEL); @@ -114,7 +139,13 @@ static int create_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, pas = MLX5_ADDR_OF(create_srq_in, create_in, pas); set_srqc(srqc, in); - memcpy(pas, in->pas, pas_size); + if (in->umem) + mlx5_ib_populate_pas( + in->umem, + 1UL << (in->log_page_size + MLX5_ADAPTER_PAGE_SHIFT), + pas, 0); + else + memcpy(pas, in->pas, pas_size); MLX5_SET(create_srq_in, create_in, opcode, MLX5_CMD_OP_CREATE_SRQ); @@ -194,6 +225,12 @@ static int create_xrc_srq_cmd(struct mlx5_ib_dev *dev, int inlen; int err; + if (in->umem) { + err = set_srq_page_size(in, xrc_srqc, log_page_size); + if (err) + return err; + } + pas_size = get_pas_size(in); inlen = MLX5_ST_SZ_BYTES(create_xrc_srq_in) + pas_size; create_in = kvzalloc(inlen, GFP_KERNEL); @@ -207,7 +244,13 @@ static int create_xrc_srq_cmd(struct mlx5_ib_dev *dev, set_srqc(xrc_srqc, in); MLX5_SET(xrc_srqc, xrc_srqc, user_index, in->user_index); - memcpy(pas, in->pas, pas_size); + if (in->umem) + mlx5_ib_populate_pas( + in->umem, + 1UL << (in->log_page_size + MLX5_ADAPTER_PAGE_SHIFT), + pas, 0); + else + memcpy(pas, in->pas, pas_size); MLX5_SET(create_xrc_srq_in, create_in, opcode, MLX5_CMD_OP_CREATE_XRC_SRQ); @@ -289,11 +332,18 @@ static int create_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, void *create_in = NULL; void *rmpc; void *wq; + void *pas; int pas_size; int outlen; int inlen; int err; + if (in->umem) { + err = set_srq_page_size(in, wq, log_wq_pg_sz); + if (err) + return err; + } + pas_size = get_pas_size(in); inlen = MLX5_ST_SZ_BYTES(create_rmp_in) + pas_size; outlen = MLX5_ST_SZ_BYTES(create_rmp_out); @@ -309,8 +359,16 @@ static int create_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); MLX5_SET(create_rmp_in, create_in, uid, in->uid); + pas = MLX5_ADDR_OF(rmpc, rmpc, wq.pas); + set_wq(wq, in); - memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size); + if (in->umem) + mlx5_ib_populate_pas( + in->umem, + 1UL << (in->log_page_size + MLX5_ADAPTER_PAGE_SHIFT), + pas, 0); + else + memcpy(pas, in->pas, pas_size); MLX5_SET(create_rmp_in, create_in, opcode, MLX5_CMD_OP_CREATE_RMP); err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out, outlen); @@ -421,10 +479,17 @@ static int create_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, void *create_in; void *xrqc; void *wq; + void *pas; int pas_size; int inlen; int err; + if (in->umem) { + err = set_srq_page_size(in, wq, log_wq_pg_sz); + if (err) + return err; + } + pas_size = get_pas_size(in); inlen = MLX5_ST_SZ_BYTES(create_xrq_in) + pas_size; create_in = kvzalloc(inlen, GFP_KERNEL); @@ -433,9 +498,16 @@ static int create_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, xrqc = MLX5_ADDR_OF(create_xrq_in, create_in, xrq_context); wq = MLX5_ADDR_OF(xrqc, xrqc, wq); + pas = MLX5_ADDR_OF(xrqc, xrqc, wq.pas); set_wq(wq, in); - memcpy(MLX5_ADDR_OF(xrqc, xrqc, wq.pas), in->pas, pas_size); + if (in->umem) + mlx5_ib_populate_pas( + in->umem, + 1UL << (in->log_page_size + MLX5_ADAPTER_PAGE_SHIFT), + pas, 0); + else + memcpy(pas, in->pas, pas_size); if (in->type == IB_SRQT_TM) { MLX5_SET(xrqc, xrqc, topology, MLX5_XRQC_TOPOLOGY_TAG_MATCHING); diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c index 119b2573c9a0..26c3408dcaca 100644 --- a/drivers/infiniband/hw/mthca/mthca_cq.c +++ b/drivers/infiniband/hw/mthca/mthca_cq.c @@ -604,7 +604,7 @@ static inline int mthca_poll_one(struct mthca_dev *dev, entry->byte_len = MTHCA_ATOMIC_BYTE_LEN; break; default: - entry->opcode = MTHCA_OPCODE_INVALID; + entry->opcode = 0xFF; break; } } else { diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index 9dbbf4d16796..a445160de3e1 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -105,7 +105,6 @@ enum { MTHCA_OPCODE_ATOMIC_CS = 0x11, MTHCA_OPCODE_ATOMIC_FA = 0x12, MTHCA_OPCODE_BIND_MW = 0x18, - MTHCA_OPCODE_INVALID = 0xff }; enum { diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index c4d9cdc4ee97..1a3dd07f993b 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -470,7 +470,7 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd, int err; if (init_attr->create_flags) - return ERR_PTR(-EINVAL); + return ERR_PTR(-EOPNOTSUPP); switch (init_attr->qp_type) { case IB_QPT_RC: @@ -612,7 +612,7 @@ static int mthca_create_cq(struct ib_cq *ibcq, udata, struct mthca_ucontext, ibucontext); if (attr->flags) - return -EINVAL; + return -EOPNOTSUPP; if (entries < 1 || entries > to_mdev(ibdev)->limits.max_cqes) return -EINVAL; @@ -961,29 +961,34 @@ static ssize_t hw_rev_show(struct device *device, struct mthca_dev *dev = rdma_device_to_drv_device(device, struct mthca_dev, ib_dev); - return sprintf(buf, "%x\n", dev->rev_id); + return sysfs_emit(buf, "%x\n", dev->rev_id); } static DEVICE_ATTR_RO(hw_rev); -static ssize_t hca_type_show(struct device *device, - struct device_attribute *attr, char *buf) +static const char *hca_type_string(int hca_type) { - struct mthca_dev *dev = - rdma_device_to_drv_device(device, struct mthca_dev, ib_dev); - - switch (dev->pdev->device) { + switch (hca_type) { case PCI_DEVICE_ID_MELLANOX_TAVOR: - return sprintf(buf, "MT23108\n"); + return "MT23108"; case PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT: - return sprintf(buf, "MT25208 (MT23108 compat mode)\n"); + return "MT25208 (MT23108 compat mode)"; case PCI_DEVICE_ID_MELLANOX_ARBEL: - return sprintf(buf, "MT25208\n"); + return "MT25208"; case PCI_DEVICE_ID_MELLANOX_SINAI: case PCI_DEVICE_ID_MELLANOX_SINAI_OLD: - return sprintf(buf, "MT25204\n"); - default: - return sprintf(buf, "unknown\n"); + return "MT25204"; } + + return "unknown"; +} + +static ssize_t hca_type_show(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct mthca_dev *dev = + rdma_device_to_drv_device(device, struct mthca_dev, ib_dev); + + return sysfs_emit(buf, "%s\n", hca_type_string(dev->pdev->device)); } static DEVICE_ATTR_RO(hca_type); @@ -993,7 +998,7 @@ static ssize_t board_id_show(struct device *device, struct mthca_dev *dev = rdma_device_to_drv_device(device, struct mthca_dev, ib_dev); - return sprintf(buf, "%.*s\n", MTHCA_BOARD_ID_LEN, dev->board_id); + return sysfs_emit(buf, "%.*s\n", MTHCA_BOARD_ID_LEN, dev->board_id); } static DEVICE_ATTR_RO(board_id); @@ -1158,36 +1163,12 @@ int mthca_register_device(struct mthca_dev *dev) if (ret) return ret; - dev->ib_dev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_DETACH_MCAST); dev->ib_dev.node_type = RDMA_NODE_IB_CA; dev->ib_dev.phys_port_cnt = dev->limits.num_ports; dev->ib_dev.num_comp_vectors = 1; dev->ib_dev.dev.parent = &dev->pdev->dev; if (dev->mthca_flags & MTHCA_FLAG_SRQ) { - dev->ib_dev.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); - if (mthca_is_memfree(dev)) ib_set_device_ops(&dev->ib_dev, &mthca_dev_arbel_srq_ops); diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 08a2a7afafd3..07cfc0934b17 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -863,6 +863,9 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, enum ib_qp_state cur_state, new_state; int err = -EINVAL; + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + mutex_lock(&qp->mutex); if (attr_mask & IB_QP_CUR_STATE) { cur_state = attr->cur_qp_state; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 9b96661a7143..9a834a9cca0e 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -119,7 +119,7 @@ static ssize_t hw_rev_show(struct device *device, struct ocrdma_dev *dev = rdma_device_to_drv_device(device, struct ocrdma_dev, ibdev); - return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->nic_info.pdev->vendor); + return sysfs_emit(buf, "0x%x\n", dev->nic_info.pdev->vendor); } static DEVICE_ATTR_RO(hw_rev); @@ -129,7 +129,7 @@ static ssize_t hca_type_show(struct device *device, struct ocrdma_dev *dev = rdma_device_to_drv_device(device, struct ocrdma_dev, ibdev); - return scnprintf(buf, PAGE_SIZE, "%s\n", &dev->model_number[0]); + return sysfs_emit(buf, "%s\n", &dev->model_number[0]); } static DEVICE_ATTR_RO(hca_type); @@ -154,6 +154,7 @@ static const struct ib_device_ops ocrdma_dev_ops = { .create_ah = ocrdma_create_ah, .create_cq = ocrdma_create_cq, .create_qp = ocrdma_create_qp, + .create_user_ah = ocrdma_create_ah, .dealloc_pd = ocrdma_dealloc_pd, .dealloc_ucontext = ocrdma_dealloc_ucontext, .dereg_mr = ocrdma_dereg_mr, @@ -204,32 +205,6 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) BUILD_BUG_ON(sizeof(OCRDMA_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX); memcpy(dev->ibdev.node_desc, OCRDMA_NODE_DESC, sizeof(OCRDMA_NODE_DESC)); - dev->ibdev.uverbs_cmd_mask = - OCRDMA_UVERBS(GET_CONTEXT) | - OCRDMA_UVERBS(QUERY_DEVICE) | - OCRDMA_UVERBS(QUERY_PORT) | - OCRDMA_UVERBS(ALLOC_PD) | - OCRDMA_UVERBS(DEALLOC_PD) | - OCRDMA_UVERBS(REG_MR) | - OCRDMA_UVERBS(DEREG_MR) | - OCRDMA_UVERBS(CREATE_COMP_CHANNEL) | - OCRDMA_UVERBS(CREATE_CQ) | - OCRDMA_UVERBS(RESIZE_CQ) | - OCRDMA_UVERBS(DESTROY_CQ) | - OCRDMA_UVERBS(REQ_NOTIFY_CQ) | - OCRDMA_UVERBS(CREATE_QP) | - OCRDMA_UVERBS(MODIFY_QP) | - OCRDMA_UVERBS(QUERY_QP) | - OCRDMA_UVERBS(DESTROY_QP) | - OCRDMA_UVERBS(POLL_CQ) | - OCRDMA_UVERBS(POST_SEND) | - OCRDMA_UVERBS(POST_RECV); - - dev->ibdev.uverbs_cmd_mask |= - OCRDMA_UVERBS(CREATE_AH) | - OCRDMA_UVERBS(MODIFY_AH) | - OCRDMA_UVERBS(QUERY_AH) | - OCRDMA_UVERBS(DESTROY_AH); dev->ibdev.node_type = RDMA_NODE_IB_CA; dev->ibdev.phys_port_cnt = 1; @@ -240,16 +215,9 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) ib_set_device_ops(&dev->ibdev, &ocrdma_dev_ops); - if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) { - dev->ibdev.uverbs_cmd_mask |= - OCRDMA_UVERBS(CREATE_SRQ) | - OCRDMA_UVERBS(MODIFY_SRQ) | - OCRDMA_UVERBS(QUERY_SRQ) | - OCRDMA_UVERBS(DESTROY_SRQ) | - OCRDMA_UVERBS(POST_SRQ_RECV); - + if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) ib_set_device_ops(&dev->ibdev, &ocrdma_dev_srq_ops); - } + rdma_set_device_sysfs_group(&dev->ibdev, &ocrdma_attr_group); ret = ib_device_set_netdev(&dev->ibdev, dev->nic_info.netdev, 1); if (ret) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 7350fe16f164..bc98bd950d99 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -974,7 +974,7 @@ int ocrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ocrdma_create_cq_ureq ureq; if (attr->flags) - return -EINVAL; + return -EOPNOTSUPP; if (udata) { if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) @@ -1299,6 +1299,9 @@ struct ib_qp *ocrdma_create_qp(struct ib_pd *ibpd, struct ocrdma_create_qp_ureq ureq; u16 dpp_credit_lmt, dpp_offset; + if (attrs->create_flags) + return ERR_PTR(-EOPNOTSUPP); + status = ocrdma_check_qp_params(ibpd, dev, attrs, udata); if (status) goto gen_err; @@ -1391,6 +1394,9 @@ int ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, struct ocrdma_dev *dev; enum ib_qp_state old_qps, new_qps; + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + qp = get_ocrdma_qp(ibqp); dev = get_ocrdma_dev(ibqp->device); @@ -1770,6 +1776,9 @@ int ocrdma_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr, struct ocrdma_dev *dev = get_ocrdma_dev(ibsrq->device); struct ocrdma_srq *srq = get_ocrdma_srq(ibsrq); + if (init_attr->srq_type != IB_SRQT_BASIC) + return -EOPNOTSUPP; + if (init_attr->attr.max_sge > dev->attr.max_recv_sge) return -EINVAL; if (init_attr->attr.max_wr > dev->attr.max_rqe) diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index 967641662b24..8e7c069e1a2d 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -124,7 +124,7 @@ static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, struct qedr_dev *dev = rdma_device_to_drv_device(device, struct qedr_dev, ibdev); - return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->attr.hw_ver); + return sysfs_emit(buf, "0x%x\n", dev->attr.hw_ver); } static DEVICE_ATTR_RO(hw_rev); @@ -134,10 +134,9 @@ static ssize_t hca_type_show(struct device *device, struct qedr_dev *dev = rdma_device_to_drv_device(device, struct qedr_dev, ibdev); - return scnprintf(buf, PAGE_SIZE, "FastLinQ QL%x %s\n", - dev->pdev->device, - rdma_protocol_iwarp(&dev->ibdev, 1) ? - "iWARP" : "RoCE"); + return sysfs_emit(buf, "FastLinQ QL%x %s\n", dev->pdev->device, + rdma_protocol_iwarp(&dev->ibdev, 1) ? "iWARP" : + "RoCE"); } static DEVICE_ATTR_RO(hca_type); @@ -188,10 +187,6 @@ static void qedr_roce_register_device(struct qedr_dev *dev) dev->ibdev.node_type = RDMA_NODE_IB_CA; ib_set_device_ops(&dev->ibdev, &qedr_roce_dev_ops); - - dev->ibdev.uverbs_cmd_mask |= QEDR_UVERBS(OPEN_XRCD) | - QEDR_UVERBS(CLOSE_XRCD) | - QEDR_UVERBS(CREATE_XSRQ); } static const struct ib_device_ops qedr_dev_ops = { @@ -249,31 +244,6 @@ static int qedr_register_device(struct qedr_dev *dev) dev->ibdev.node_guid = dev->attr.node_guid; memcpy(dev->ibdev.node_desc, QEDR_NODE_DESC, sizeof(QEDR_NODE_DESC)); - dev->ibdev.uverbs_cmd_mask = QEDR_UVERBS(GET_CONTEXT) | - QEDR_UVERBS(QUERY_DEVICE) | - QEDR_UVERBS(QUERY_PORT) | - QEDR_UVERBS(ALLOC_PD) | - QEDR_UVERBS(DEALLOC_PD) | - QEDR_UVERBS(CREATE_COMP_CHANNEL) | - QEDR_UVERBS(CREATE_CQ) | - QEDR_UVERBS(RESIZE_CQ) | - QEDR_UVERBS(DESTROY_CQ) | - QEDR_UVERBS(REQ_NOTIFY_CQ) | - QEDR_UVERBS(CREATE_QP) | - QEDR_UVERBS(MODIFY_QP) | - QEDR_UVERBS(QUERY_QP) | - QEDR_UVERBS(DESTROY_QP) | - QEDR_UVERBS(CREATE_SRQ) | - QEDR_UVERBS(DESTROY_SRQ) | - QEDR_UVERBS(QUERY_SRQ) | - QEDR_UVERBS(MODIFY_SRQ) | - QEDR_UVERBS(POST_SRQ_RECV) | - QEDR_UVERBS(REG_MR) | - QEDR_UVERBS(DEREG_MR) | - QEDR_UVERBS(POLL_CQ) | - QEDR_UVERBS(POST_SEND) | - QEDR_UVERBS(POST_RECV); - if (IS_IWARP(dev)) { rc = qedr_iw_register_device(dev); if (rc) @@ -796,6 +766,7 @@ static void qedr_affiliated_event(void *context, u8 e_code, void *fw_handle) } xa_unlock_irqrestore(&dev->srqs, flags); DP_NOTICE(dev, "SRQ event %d on handle %p\n", e_code, srq); + break; default: break; } diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 019642ff24a7..2e85bb5104cb 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -928,6 +928,9 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, "create_cq: called from %s. entries=%d, vector=%d\n", udata ? "User Lib" : "Kernel", entries, vector); + if (attr->flags) + return -EOPNOTSUPP; + if (entries > QEDR_MAX_CQES) { DP_ERR(dev, "create cq: the number of entries %d is too high. Must be equal or below %d.\n", @@ -1546,6 +1549,10 @@ int qedr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr, "create SRQ called from %s (pd %p)\n", (udata) ? "User lib" : "kernel", pd); + if (init_attr->srq_type != IB_SRQT_BASIC && + init_attr->srq_type != IB_SRQT_XRC) + return -EOPNOTSUPP; + rc = qedr_check_srq_params(dev, init_attr, udata); if (rc) return -EINVAL; @@ -2232,6 +2239,9 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd, struct ib_qp *ibqp; int rc = 0; + if (attrs->create_flags) + return ERR_PTR(-EOPNOTSUPP); + if (attrs->qp_type == IB_QPT_XRC_TGT) { xrcd = get_qedr_xrcd(attrs->xrcd); dev = get_qedr_dev(xrcd->ibxrcd.device); @@ -2468,6 +2478,9 @@ int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, "modify qp: qp %p attr_mask=0x%x, state=%d", qp, attr_mask, attr->qp_state); + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + old_qp_state = qedr_get_ibqp_state(qp->state); if (attr_mask & IB_QP_STATE) new_qp_state = attr->qp_state; diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c index 3dc6ce033319..2e07b3749b88 100644 --- a/drivers/infiniband/hw/qib/qib_pcie.c +++ b/drivers/infiniband/hw/qib/qib_pcie.c @@ -90,25 +90,18 @@ int qib_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent) goto bail; } - ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); if (ret) { /* * If the 64 bit setup fails, try 32 bit. Some systems * do not setup 64 bit maps on systems with 2GB or less * memory installed. */ - ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); if (ret) { qib_devinfo(pdev, "Unable to set DMA mask: %d\n", ret); goto bail; } - ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); - } else - ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); - if (ret) { - qib_early_err(&pdev->dev, - "Unable to set DMA consistent mask: %d\n", ret); - goto bail; } pci_set_master(pdev); diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c index 021df0654ba7..62c179fc764b 100644 --- a/drivers/infiniband/hw/qib/qib_sysfs.c +++ b/drivers/infiniband/hw/qib/qib_sysfs.c @@ -43,11 +43,8 @@ static ssize_t show_hrtbt_enb(struct qib_pportdata *ppd, char *buf) { struct qib_devdata *dd = ppd->dd; - int ret; - ret = dd->f_get_ib_cfg(ppd, QIB_IB_CFG_HRTBT); - ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret); - return ret; + return sysfs_emit(buf, "%d\n", dd->f_get_ib_cfg(ppd, QIB_IB_CFG_HRTBT)); } static ssize_t store_hrtbt_enb(struct qib_pportdata *ppd, const char *buf, @@ -106,14 +103,10 @@ static ssize_t store_led_override(struct qib_pportdata *ppd, const char *buf, static ssize_t show_status(struct qib_pportdata *ppd, char *buf) { - ssize_t ret; - if (!ppd->statusp) - ret = -EINVAL; - else - ret = scnprintf(buf, PAGE_SIZE, "0x%llx\n", - (unsigned long long) *(ppd->statusp)); - return ret; + return -EINVAL; + + return sysfs_emit(buf, "0x%llx\n", (unsigned long long)*(ppd->statusp)); } /* @@ -392,7 +385,7 @@ static ssize_t sl2vl_attr_show(struct kobject *kobj, struct attribute *attr, container_of(kobj, struct qib_pportdata, sl2vl_kobj); struct qib_ibport *qibp = &ppd->ibport_data; - return sprintf(buf, "%u\n", qibp->sl_to_vl[sattr->sl]); + return sysfs_emit(buf, "%u\n", qibp->sl_to_vl[sattr->sl]); } static const struct sysfs_ops qib_sl2vl_ops = { @@ -501,17 +494,18 @@ static ssize_t diagc_attr_show(struct kobject *kobj, struct attribute *attr, struct qib_pportdata *ppd = container_of(kobj, struct qib_pportdata, diagc_kobj); struct qib_ibport *qibp = &ppd->ibport_data; + u64 val; if (!strncmp(dattr->attr.name, "rc_acks", 7)) - return sprintf(buf, "%llu\n", READ_PER_CPU_CNTR(rc_acks)); + val = READ_PER_CPU_CNTR(rc_acks); else if (!strncmp(dattr->attr.name, "rc_qacks", 8)) - return sprintf(buf, "%llu\n", READ_PER_CPU_CNTR(rc_qacks)); + val = READ_PER_CPU_CNTR(rc_qacks); else if (!strncmp(dattr->attr.name, "rc_delayed_comp", 15)) - return sprintf(buf, "%llu\n", - READ_PER_CPU_CNTR(rc_delayed_comp)); + val = READ_PER_CPU_CNTR(rc_delayed_comp); else - return sprintf(buf, "%u\n", - *(u32 *)((char *)qibp + dattr->counter)); + val = *(u32 *)((char *)qibp + dattr->counter); + + return sysfs_emit(buf, "%llu\n", val); } static ssize_t diagc_attr_store(struct kobject *kobj, struct attribute *attr, @@ -565,7 +559,7 @@ static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, struct qib_ibdev *dev = rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev); - return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev); + return sysfs_emit(buf, "%x\n", dd_from_dev(dev)->minrev); } static DEVICE_ATTR_RO(hw_rev); @@ -575,13 +569,10 @@ static ssize_t hca_type_show(struct device *device, struct qib_ibdev *dev = rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev); struct qib_devdata *dd = dd_from_dev(dev); - int ret; if (!dd->boardname) - ret = -EINVAL; - else - ret = scnprintf(buf, PAGE_SIZE, "%s\n", dd->boardname); - return ret; + return -EINVAL; + return sysfs_emit(buf, "%s\n", dd->boardname); } static DEVICE_ATTR_RO(hca_type); static DEVICE_ATTR(board_id, 0444, hca_type_show, NULL); @@ -590,7 +581,7 @@ static ssize_t version_show(struct device *device, struct device_attribute *attr, char *buf) { /* The string printed here is already newline-terminated. */ - return scnprintf(buf, PAGE_SIZE, "%s", (char *)ib_qib_version); + return sysfs_emit(buf, "%s", (char *)ib_qib_version); } static DEVICE_ATTR_RO(version); @@ -602,7 +593,7 @@ static ssize_t boardversion_show(struct device *device, struct qib_devdata *dd = dd_from_dev(dev); /* The string printed here is already newline-terminated. */ - return scnprintf(buf, PAGE_SIZE, "%s", dd->boardversion); + return sysfs_emit(buf, "%s", dd->boardversion); } static DEVICE_ATTR_RO(boardversion); @@ -614,7 +605,7 @@ static ssize_t localbus_info_show(struct device *device, struct qib_devdata *dd = dd_from_dev(dev); /* The string printed here is already newline-terminated. */ - return scnprintf(buf, PAGE_SIZE, "%s", dd->lbus_info); + return sysfs_emit(buf, "%s", dd->lbus_info); } static DEVICE_ATTR_RO(localbus_info); @@ -628,9 +619,10 @@ static ssize_t nctxts_show(struct device *device, /* Return the number of user ports (contexts) available. */ /* The calculation below deals with a special case where * cfgctxts is set to 1 on a single-port board. */ - return scnprintf(buf, PAGE_SIZE, "%u\n", - (dd->first_user_ctxt > dd->cfgctxts) ? 0 : - (dd->cfgctxts - dd->first_user_ctxt)); + return sysfs_emit(buf, "%u\n", + (dd->first_user_ctxt > dd->cfgctxts) ? + 0 : + (dd->cfgctxts - dd->first_user_ctxt)); } static DEVICE_ATTR_RO(nctxts); @@ -642,21 +634,20 @@ static ssize_t nfreectxts_show(struct device *device, struct qib_devdata *dd = dd_from_dev(dev); /* Return the number of free user ports (contexts) available. */ - return scnprintf(buf, PAGE_SIZE, "%u\n", dd->freectxts); + return sysfs_emit(buf, "%u\n", dd->freectxts); } static DEVICE_ATTR_RO(nfreectxts); -static ssize_t serial_show(struct device *device, - struct device_attribute *attr, char *buf) +static ssize_t serial_show(struct device *device, struct device_attribute *attr, + char *buf) { struct qib_ibdev *dev = rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev); struct qib_devdata *dd = dd_from_dev(dev); + const u8 *end = memchr(dd->serial, 0, ARRAY_SIZE(dd->serial)); + int size = end ? end - dd->serial : ARRAY_SIZE(dd->serial); - buf[sizeof(dd->serial)] = '\0'; - memcpy(buf, dd->serial, sizeof(dd->serial)); - strcat(buf, "\n"); - return strlen(buf); + return sysfs_emit(buf, ".%*s\n", size, dd->serial); } static DEVICE_ATTR_RO(serial); @@ -689,27 +680,26 @@ static ssize_t tempsense_show(struct device *device, struct qib_ibdev *dev = rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev); struct qib_devdata *dd = dd_from_dev(dev); - int ret; - int idx; + int i; u8 regvals[8]; - ret = -ENXIO; - for (idx = 0; idx < 8; ++idx) { - if (idx == 6) + for (i = 0; i < 8; i++) { + int ret; + + if (i == 6) continue; - ret = dd->f_tempsense_rd(dd, idx); + ret = dd->f_tempsense_rd(dd, i); if (ret < 0) - break; - regvals[idx] = ret; + return ret; /* return error on bad read */ + regvals[i] = ret; } - if (idx == 8) - ret = scnprintf(buf, PAGE_SIZE, "%d %d %02X %02X %d %d\n", - *(signed char *)(regvals), - *(signed char *)(regvals + 1), - regvals[2], regvals[3], - *(signed char *)(regvals + 5), - *(signed char *)(regvals + 7)); - return ret; + return sysfs_emit(buf, "%d %d %02X %02X %d %d\n", + (signed char)regvals[0], + (signed char)regvals[1], + regvals[2], + regvals[3], + (signed char)regvals[5], + (signed char)regvals[7]); } static DEVICE_ATTR_RO(tempsense); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index aa2e65fc5cd6..1b63a491fa72 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -398,25 +398,6 @@ static void *usnic_ib_device_add(struct pci_dev *dev) us_ibdev->ib_dev.num_comp_vectors = USNIC_IB_NUM_COMP_VECTORS; us_ibdev->ib_dev.dev.parent = &dev->dev; - us_ibdev->ib_dev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_OPEN_QP); - ib_set_device_ops(&us_ibdev->ib_dev, &usnic_dev_ops); rdma_set_device_sysfs_group(&us_ibdev->ib_dev, &usnic_attr_group); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c index c85d48ae7442..e59615a4c9d9 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c @@ -57,7 +57,7 @@ static ssize_t board_id_show(struct device *device, subsystem_device_id = us_ibdev->pdev->subsystem_device; mutex_unlock(&us_ibdev->usdev_lock); - return scnprintf(buf, PAGE_SIZE, "%hu\n", subsystem_device_id); + return sysfs_emit(buf, "%u\n", subsystem_device_id); } static DEVICE_ATTR_RO(board_id); @@ -69,19 +69,13 @@ config_show(struct device *device, struct device_attribute *attr, char *buf) { struct usnic_ib_dev *us_ibdev = rdma_device_to_drv_device(device, struct usnic_ib_dev, ib_dev); - char *ptr; - unsigned left; - unsigned n; enum usnic_vnic_res_type res_type; - - /* Buffer space limit is 1 page */ - ptr = buf; - left = PAGE_SIZE; + int len; mutex_lock(&us_ibdev->usdev_lock); if (kref_read(&us_ibdev->vf_cnt) > 0) { char *busname; - + char *sep = ""; /* * bus name seems to come with annoying prefix. * Remove it if it is predictable @@ -90,39 +84,35 @@ config_show(struct device *device, struct device_attribute *attr, char *buf) if (strncmp(busname, "PCI Bus ", 8) == 0) busname += 8; - n = scnprintf(ptr, left, - "%s: %s:%d.%d, %s, %pM, %u VFs\n Per VF:", - dev_name(&us_ibdev->ib_dev.dev), - busname, - PCI_SLOT(us_ibdev->pdev->devfn), - PCI_FUNC(us_ibdev->pdev->devfn), - netdev_name(us_ibdev->netdev), - us_ibdev->ufdev->mac, - kref_read(&us_ibdev->vf_cnt)); - UPDATE_PTR_LEFT(n, ptr, left); + len = sysfs_emit(buf, "%s: %s:%d.%d, %s, %pM, %u VFs\n", + dev_name(&us_ibdev->ib_dev.dev), + busname, + PCI_SLOT(us_ibdev->pdev->devfn), + PCI_FUNC(us_ibdev->pdev->devfn), + netdev_name(us_ibdev->netdev), + us_ibdev->ufdev->mac, + kref_read(&us_ibdev->vf_cnt)); + len += sysfs_emit_at(buf, len, " Per VF:"); for (res_type = USNIC_VNIC_RES_TYPE_EOL; - res_type < USNIC_VNIC_RES_TYPE_MAX; - res_type++) { + res_type < USNIC_VNIC_RES_TYPE_MAX; res_type++) { if (us_ibdev->vf_res_cnt[res_type] == 0) continue; - n = scnprintf(ptr, left, " %d %s%s", - us_ibdev->vf_res_cnt[res_type], - usnic_vnic_res_type_to_str(res_type), - (res_type < (USNIC_VNIC_RES_TYPE_MAX - 1)) ? - "," : ""); - UPDATE_PTR_LEFT(n, ptr, left); + len += sysfs_emit_at(buf, len, "%s %d %s", + sep, + us_ibdev->vf_res_cnt[res_type], + usnic_vnic_res_type_to_str(res_type)); + sep = ","; } - n = scnprintf(ptr, left, "\n"); - UPDATE_PTR_LEFT(n, ptr, left); + len += sysfs_emit_at(buf, len, "\n"); } else { - n = scnprintf(ptr, left, "%s: no VFs\n", - dev_name(&us_ibdev->ib_dev.dev)); - UPDATE_PTR_LEFT(n, ptr, left); + len = sysfs_emit(buf, "%s: no VFs\n", + dev_name(&us_ibdev->ib_dev.dev)); } + mutex_unlock(&us_ibdev->usdev_lock); - return ptr - buf; + return len; } static DEVICE_ATTR_RO(config); @@ -132,8 +122,7 @@ iface_show(struct device *device, struct device_attribute *attr, char *buf) struct usnic_ib_dev *us_ibdev = rdma_device_to_drv_device(device, struct usnic_ib_dev, ib_dev); - return scnprintf(buf, PAGE_SIZE, "%s\n", - netdev_name(us_ibdev->netdev)); + return sysfs_emit(buf, "%s\n", netdev_name(us_ibdev->netdev)); } static DEVICE_ATTR_RO(iface); @@ -143,8 +132,7 @@ max_vf_show(struct device *device, struct device_attribute *attr, char *buf) struct usnic_ib_dev *us_ibdev = rdma_device_to_drv_device(device, struct usnic_ib_dev, ib_dev); - return scnprintf(buf, PAGE_SIZE, "%u\n", - kref_read(&us_ibdev->vf_cnt)); + return sysfs_emit(buf, "%u\n", kref_read(&us_ibdev->vf_cnt)); } static DEVICE_ATTR_RO(max_vf); @@ -158,8 +146,7 @@ qp_per_vf_show(struct device *device, struct device_attribute *attr, char *buf) qp_per_vf = max(us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_WQ], us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_RQ]); - return scnprintf(buf, PAGE_SIZE, - "%d\n", qp_per_vf); + return sysfs_emit(buf, "%d\n", qp_per_vf); } static DEVICE_ATTR_RO(qp_per_vf); @@ -169,8 +156,8 @@ cq_per_vf_show(struct device *device, struct device_attribute *attr, char *buf) struct usnic_ib_dev *us_ibdev = rdma_device_to_drv_device(device, struct usnic_ib_dev, ib_dev); - return scnprintf(buf, PAGE_SIZE, "%d\n", - us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ]); + return sysfs_emit(buf, "%d\n", + us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ]); } static DEVICE_ATTR_RO(cq_per_vf); @@ -217,43 +204,36 @@ struct qpn_attribute qpn_attr_##NAME = __ATTR_RO(NAME) static ssize_t context_show(struct usnic_ib_qp_grp *qp_grp, char *buf) { - return scnprintf(buf, PAGE_SIZE, "0x%p\n", qp_grp->ctx); + return sysfs_emit(buf, "0x%p\n", qp_grp->ctx); } static ssize_t summary_show(struct usnic_ib_qp_grp *qp_grp, char *buf) { - int i, j, n; - int left; - char *ptr; + int i, j; struct usnic_vnic_res_chunk *res_chunk; struct usnic_vnic_res *vnic_res; + int len; - left = PAGE_SIZE; - ptr = buf; - - n = scnprintf(ptr, left, - "QPN: %d State: (%s) PID: %u VF Idx: %hu ", - qp_grp->ibqp.qp_num, - usnic_ib_qp_grp_state_to_string(qp_grp->state), - qp_grp->owner_pid, - usnic_vnic_get_index(qp_grp->vf->vnic)); - UPDATE_PTR_LEFT(n, ptr, left); + len = sysfs_emit(buf, "QPN: %d State: (%s) PID: %u VF Idx: %hu ", + qp_grp->ibqp.qp_num, + usnic_ib_qp_grp_state_to_string(qp_grp->state), + qp_grp->owner_pid, + usnic_vnic_get_index(qp_grp->vf->vnic)); for (i = 0; qp_grp->res_chunk_list[i]; i++) { res_chunk = qp_grp->res_chunk_list[i]; for (j = 0; j < res_chunk->cnt; j++) { vnic_res = res_chunk->res[j]; - n = scnprintf(ptr, left, "%s[%d] ", + len += sysfs_emit_at( + buf, len, "%s[%d] ", usnic_vnic_res_type_to_str(vnic_res->type), vnic_res->vnic_idx); - UPDATE_PTR_LEFT(n, ptr, left); } } - n = scnprintf(ptr, left, "\n"); - UPDATE_PTR_LEFT(n, ptr, left); + len = sysfs_emit_at(buf, len, "\n"); - return ptr - buf; + return len; } static QPN_ATTR_RO(context); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index 9e961f8ffa10..38a37770c016 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -474,7 +474,7 @@ struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd, us_ibdev = to_usdev(pd->device); if (init_attr->create_flags) - return ERR_PTR(-EINVAL); + return ERR_PTR(-EOPNOTSUPP); err = ib_copy_from_udata(&cmd, udata, sizeof(cmd)); if (err) { @@ -557,6 +557,9 @@ int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int status; usnic_dbg("\n"); + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + qp_grp = to_uqp_grp(ibqp); mutex_lock(&qp_grp->vf->pf->usdev_lock); @@ -581,7 +584,7 @@ int usnic_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata) { if (attr->flags) - return -EINVAL; + return -EOPNOTSUPP; return 0; } diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c index 319546a39a0d..a119ac3e103c 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c @@ -119,6 +119,9 @@ int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, BUILD_BUG_ON(sizeof(struct pvrdma_cqe) != 64); + if (attr->flags) + return -EOPNOTSUPP; + entries = roundup_pow_of_two(entries); if (entries < 1 || entries > dev->dsr->caps.max_cqe) return -EINVAL; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index 6895bac53990..00a330909bb3 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -68,21 +68,21 @@ static int pvrdma_del_gid(const struct ib_gid_attr *attr, void **context); static ssize_t hca_type_show(struct device *device, struct device_attribute *attr, char *buf) { - return sprintf(buf, "VMW_PVRDMA-%s\n", DRV_VERSION); + return sysfs_emit(buf, "VMW_PVRDMA-%s\n", DRV_VERSION); } static DEVICE_ATTR_RO(hca_type); static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", PVRDMA_REV_ID); + return sysfs_emit(buf, "%d\n", PVRDMA_REV_ID); } static DEVICE_ATTR_RO(hw_rev); static ssize_t board_id_show(struct device *device, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", PVRDMA_BOARD_ID); + return sysfs_emit(buf, "%d\n", PVRDMA_BOARD_ID); } static DEVICE_ATTR_RO(board_id); @@ -205,27 +205,6 @@ static int pvrdma_register_device(struct pvrdma_dev *dev) dev->flags = 0; dev->ib_dev.num_comp_vectors = 1; dev->ib_dev.dev.parent = &dev->pdev->dev; - dev->ib_dev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_POLL_CQ) | - (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_POST_SEND) | - (1ull << IB_USER_VERBS_CMD_POST_RECV) | - (1ull << IB_USER_VERBS_CMD_CREATE_AH) | - (1ull << IB_USER_VERBS_CMD_DESTROY_AH); dev->ib_dev.node_type = RDMA_NODE_IB_CA; dev->ib_dev.phys_port_cnt = dev->dsr->caps.phys_port_cnt; @@ -249,13 +228,6 @@ static int pvrdma_register_device(struct pvrdma_dev *dev) /* Check if SRQ is supported by backend */ if (dev->dsr->caps.max_srq) { - dev->ib_dev.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | - (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); - ib_set_device_ops(&dev->ib_dev, &pvrdma_dev_srq_ops); dev->srq_tbl = kcalloc(dev->dsr->caps.max_srq, diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c index 428256c55065..1d3bdd7bb51d 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c @@ -209,7 +209,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, dev_warn(&dev->pdev->dev, "invalid create queuepair flags %#x\n", init_attr->create_flags); - return ERR_PTR(-EINVAL); + return ERR_PTR(-EOPNOTSUPP); } if (init_attr->qp_type != IB_QPT_RC && @@ -544,6 +544,9 @@ int pvrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, enum ib_qp_state cur_state, next_state; int ret; + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + /* Sanity checking. Should need lock here */ mutex_lock(&qp->mutex); cur_state = (attr_mask & IB_QP_CUR_STATE) ? attr->cur_qp_state : diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c index 082208f9aa90..bdc2703532c6 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c @@ -121,7 +121,7 @@ int pvrdma_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr, dev_warn(&dev->pdev->dev, "shared receive queue type %d not supported\n", init_attr->srq_type); - return -EINVAL; + return -EOPNOTSUPP; } if (init_attr->attr.max_wr > dev->dsr->caps.max_srq_wr || diff --git a/drivers/infiniband/sw/rdmavt/Kconfig b/drivers/infiniband/sw/rdmavt/Kconfig index c8e268082952..0df48b3a6b56 100644 --- a/drivers/infiniband/sw/rdmavt/Kconfig +++ b/drivers/infiniband/sw/rdmavt/Kconfig @@ -4,6 +4,5 @@ config INFINIBAND_RDMAVT depends on INFINIBAND_VIRT_DMA depends on X86_64 depends on PCI - select DMA_VIRT_OPS help This is a common software verbs provider for RDMA networks. diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c index b938c4ffa99a..f9754dcd250b 100644 --- a/drivers/infiniband/sw/rdmavt/ah.c +++ b/drivers/infiniband/sw/rdmavt/ah.c @@ -129,7 +129,6 @@ int rvt_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, * rvt_destory_ah - Destory an address handle * @ibah: address handle * @destroy_flags: destroy address handle flags (see enum rdma_destroy_ah_flags) - * * Return: 0 on success */ int rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags) diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index 19248be14093..20cc0799ac4b 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -211,7 +211,7 @@ int rvt_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, int err; if (attr->flags) - return -EINVAL; + return -EOPNOTSUPP; if (entries < 1 || entries > rdi->dparms.props.max_cqe) return -EINVAL; diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 8490fdb9c91e..90fc234f489a 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -324,8 +324,6 @@ static void __rvt_free_mr(struct rvt_mr *mr) * @acc: access flags * * Return: the memory region on success, otherwise returns an errno. - * Note that all DMA addresses should be created via the functions in - * struct dma_virt_ops. */ struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc) { @@ -766,7 +764,7 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, /* * We use LKEY == zero for kernel virtual addresses - * (see rvt_get_dma_mr() and dma_virt_ops). + * (see rvt_get_dma_mr()). */ if (sge->lkey == 0) { struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device); @@ -877,7 +875,7 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, /* * We use RKEY == zero for kernel virtual addresses - * (see rvt_get_dma_mr() and dma_virt_ops). + * (see rvt_get_dma_mr()). */ rcu_read_lock(); if (rkey == 0) { diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index ee48befc8978..e9db6bf10618 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1083,10 +1083,11 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, if (!rdi) return ERR_PTR(-EINVAL); + if (init_attr->create_flags & ~IB_QP_CREATE_NETDEV_USE) + return ERR_PTR(-EOPNOTSUPP); + if (init_attr->cap.max_send_sge > rdi->dparms.props.max_send_sge || - init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr || - (init_attr->create_flags && - init_attr->create_flags != IB_QP_CREATE_NETDEV_USE)) + init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr) return ERR_PTR(-EINVAL); /* Check receive queue parameters if no SRQ is specified. */ @@ -1469,6 +1470,9 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int pmtu = 0; /* for gcc warning only */ int opa_ah; + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + spin_lock_irq(&qp->r_lock); spin_lock(&qp->s_hlock); spin_lock(&qp->s_lock); diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 670a9623b46e..49cec85a372a 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -384,6 +384,7 @@ static const struct ib_device_ops rvt_dev_ops = { .create_cq = rvt_create_cq, .create_qp = rvt_create_qp, .create_srq = rvt_create_srq, + .create_user_ah = rvt_create_ah, .dealloc_pd = rvt_dealloc_pd, .dealloc_ucontext = rvt_dealloc_ucontext, .dereg_mr = rvt_dereg_mr, @@ -524,7 +525,6 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb) int rvt_register_device(struct rvt_dev_info *rdi) { int ret = 0, i; - u64 dma_mask; if (!rdi) return -EINVAL; @@ -579,13 +579,6 @@ int rvt_register_device(struct rvt_dev_info *rdi) /* Completion queues */ spin_lock_init(&rdi->n_cqs_lock); - /* DMA Operations */ - rdi->ibdev.dev.dma_parms = rdi->ibdev.dev.parent->dma_parms; - dma_mask = IS_ENABLED(CONFIG_64BIT) ? DMA_BIT_MASK(64) : DMA_BIT_MASK(32); - ret = dma_coerce_mask_and_coherent(&rdi->ibdev.dev, dma_mask); - if (ret) - goto bail_wss; - /* Protection Domain */ spin_lock_init(&rdi->n_pds_lock); rdi->n_pds_allocated = 0; @@ -596,36 +589,11 @@ int rvt_register_device(struct rvt_dev_info *rdi) * exactly which functions rdmavt supports, nor do they know the ABI * version, so we do all of this sort of stuff here. */ - rdi->ibdev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_CREATE_AH) | - (1ull << IB_USER_VERBS_CMD_MODIFY_AH) | - (1ull << IB_USER_VERBS_CMD_QUERY_AH) | - (1ull << IB_USER_VERBS_CMD_DESTROY_AH) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | + rdi->ibdev.uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_POLL_CQ) | (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | (1ull << IB_USER_VERBS_CMD_POST_SEND) | (1ull << IB_USER_VERBS_CMD_POST_RECV) | - (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); rdi->ibdev.node_type = RDMA_NODE_IB_CA; if (!rdi->ibdev.num_comp_vectors) diff --git a/drivers/infiniband/sw/rxe/Kconfig b/drivers/infiniband/sw/rxe/Kconfig index 8810bfa68049..452149066792 100644 --- a/drivers/infiniband/sw/rxe/Kconfig +++ b/drivers/infiniband/sw/rxe/Kconfig @@ -5,7 +5,6 @@ config RDMA_RXE depends on INFINIBAND_VIRT_DMA select NET_UDP_TUNNEL select CRYPTO_CRC32 - select DMA_VIRT_OPS help This driver implements the InfiniBand RDMA transport over the Linux network stack. It enables a system with a diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index d2ce852447c1..6e8c41567ba0 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -31,7 +31,6 @@ int mem_check_range(struct rxe_mem *mem, u64 iova, size_t length) return 0; case RXE_MEM_TYPE_MR: - case RXE_MEM_TYPE_FMR: if (iova < mem->iova || length > mem->length || iova > mem->iova + mem->length - length) diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 34bef7d8e6b4..c4b06ced30a7 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -8,7 +8,6 @@ #include <linux/if_arp.h> #include <linux/netdevice.h> #include <linux/if.h> -#include <linux/if_vlan.h> #include <net/udp_tunnel.h> #include <net/sch_generic.h> #include <linux/netfilter.h> @@ -20,18 +19,6 @@ static struct rxe_recv_sockets recv_sockets; -struct device *rxe_dma_device(struct rxe_dev *rxe) -{ - struct net_device *ndev; - - ndev = rxe->ndev; - - if (is_vlan_dev(ndev)) - ndev = vlan_dev_real_dev(ndev); - - return ndev->dev.parent; -} - int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid) { int err; @@ -166,14 +153,9 @@ static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb) { struct udphdr *udph; struct net_device *ndev = skb->dev; - struct net_device *rdev = ndev; struct rxe_dev *rxe = rxe_get_dev_from_net(ndev); struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); - if (!rxe && is_vlan_dev(rdev)) { - rdev = vlan_dev_real_dev(ndev); - rxe = rxe_get_dev_from_net(rdev); - } if (!rxe) goto drop; diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index af3923bf0a36..d4917646641a 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -634,7 +634,8 @@ next_wqe: } if (unlikely(qp_type(qp) == IB_QPT_RC && - qp->req.psn > (qp->comp.psn + RXE_MAX_UNACKED_PSNS))) { + psn_compare(qp->req.psn, (qp->comp.psn + + RXE_MAX_UNACKED_PSNS)) > 0)) { qp->req.wait_psn = 1; goto exit; } diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index c7e3b6a4af38..5a098083a9d2 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -872,11 +872,6 @@ static enum resp_states do_complete(struct rxe_qp *qp, else wc->network_hdr_type = RDMA_NETWORK_IPV6; - if (is_vlan_dev(skb->dev)) { - wc->wc_flags |= IB_WC_WITH_VLAN; - wc->vlan_id = vlan_dev_vlan_id(skb->dev); - } - if (pkt->mask & RXE_IMMDT_MASK) { wc->wc_flags |= IB_WC_WITH_IMM; wc->ex.imm_data = immdt_imm(pkt); diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index f9c832e82552..2fbea2b2d72a 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -265,6 +265,9 @@ static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init, struct rxe_srq *srq = to_rsrq(ibsrq); struct rxe_create_srq_resp __user *uresp = NULL; + if (init->srq_type != IB_SRQT_BASIC) + return -EOPNOTSUPP; + if (udata) { if (udata->outlen < sizeof(*uresp)) return -EINVAL; @@ -392,6 +395,9 @@ static struct ib_qp *rxe_create_qp(struct ib_pd *ibpd, uresp = udata->outbuf; } + if (init->create_flags) + return ERR_PTR(-EOPNOTSUPP); + err = rxe_qp_chk_init(rxe, init); if (err) goto err1; @@ -433,6 +439,9 @@ static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, struct rxe_dev *rxe = to_rdev(ibqp->device); struct rxe_qp *qp = to_rqp(ibqp); + if (mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + err = rxe_qp_chk_attr(rxe, qp, attr, mask); if (err) goto err1; @@ -765,7 +774,7 @@ static int rxe_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, } if (attr->flags) - return -EINVAL; + return -EOPNOTSUPP; err = rxe_cq_chk_attr(rxe, NULL, attr->cqe, attr->comp_vector); if (err) @@ -1033,7 +1042,7 @@ static ssize_t parent_show(struct device *device, struct rxe_dev *rxe = rdma_device_to_drv_device(device, struct rxe_dev, ib_dev); - return scnprintf(buf, PAGE_SIZE, "%s\n", rxe_parent_name(rxe, 1)); + return sysfs_emit(buf, "%s\n", rxe_parent_name(rxe, 1)); } static DEVICE_ATTR_RO(parent); @@ -1070,6 +1079,7 @@ static const struct ib_device_ops rxe_dev_ops = { .create_cq = rxe_create_cq, .create_qp = rxe_create_qp, .create_srq = rxe_create_srq, + .create_user_ah = rxe_create_ah, .dealloc_driver = rxe_dealloc, .dealloc_pd = rxe_dealloc_pd, .dealloc_ucontext = rxe_dealloc_ucontext, @@ -1118,56 +1128,18 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name) int err; struct ib_device *dev = &rxe->ib_dev; struct crypto_shash *tfm; - u64 dma_mask; strlcpy(dev->node_desc, "rxe", sizeof(dev->node_desc)); dev->node_type = RDMA_NODE_IB_CA; dev->phys_port_cnt = 1; dev->num_comp_vectors = num_possible_cpus(); - dev->dev.parent = rxe_dma_device(rxe); dev->local_dma_lkey = 0; addrconf_addr_eui48((unsigned char *)&dev->node_guid, rxe->ndev->dev_addr); - dev->dev.dma_parms = &rxe->dma_parms; - dma_set_max_seg_size(&dev->dev, UINT_MAX); - dma_mask = IS_ENABLED(CONFIG_64BIT) ? DMA_BIT_MASK(64) : DMA_BIT_MASK(32); - err = dma_coerce_mask_and_coherent(&dev->dev, dma_mask); - if (err) - return err; - dev->uverbs_cmd_mask = BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT) - | BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) - | BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE) - | BIT_ULL(IB_USER_VERBS_CMD_QUERY_PORT) - | BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD) - | BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD) - | BIT_ULL(IB_USER_VERBS_CMD_CREATE_SRQ) - | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_SRQ) - | BIT_ULL(IB_USER_VERBS_CMD_QUERY_SRQ) - | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_SRQ) - | BIT_ULL(IB_USER_VERBS_CMD_POST_SRQ_RECV) - | BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP) - | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_QP) - | BIT_ULL(IB_USER_VERBS_CMD_QUERY_QP) - | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP) - | BIT_ULL(IB_USER_VERBS_CMD_POST_SEND) - | BIT_ULL(IB_USER_VERBS_CMD_POST_RECV) - | BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ) - | BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ) - | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ) - | BIT_ULL(IB_USER_VERBS_CMD_POLL_CQ) - | BIT_ULL(IB_USER_VERBS_CMD_PEEK_CQ) - | BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) - | BIT_ULL(IB_USER_VERBS_CMD_REG_MR) - | BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR) - | BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH) - | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_AH) - | BIT_ULL(IB_USER_VERBS_CMD_QUERY_AH) - | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH) - | BIT_ULL(IB_USER_VERBS_CMD_ATTACH_MCAST) - | BIT_ULL(IB_USER_VERBS_CMD_DETACH_MCAST) - ; + dev->uverbs_cmd_mask |= BIT_ULL(IB_USER_VERBS_CMD_POST_SEND) | + BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ); ib_set_device_ops(dev, &rxe_dev_ops); err = ib_device_set_netdev(&rxe->ib_dev, rxe->ndev, 1); diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index 3414b341b709..79e0a5a878da 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -273,7 +273,6 @@ enum rxe_mem_type { RXE_MEM_TYPE_NONE, RXE_MEM_TYPE_DMA, RXE_MEM_TYPE_MR, - RXE_MEM_TYPE_FMR, RXE_MEM_TYPE_MW, }; @@ -352,7 +351,6 @@ struct rxe_port { struct rxe_dev { struct ib_device ib_dev; struct ib_device_attr attr; - struct device_dma_parameters dma_parms; int max_ucontext; int max_inline_data; struct mutex usdev_lock; diff --git a/drivers/infiniband/sw/siw/Kconfig b/drivers/infiniband/sw/siw/Kconfig index 3450ba5081df..1b5105cbabae 100644 --- a/drivers/infiniband/sw/siw/Kconfig +++ b/drivers/infiniband/sw/siw/Kconfig @@ -2,7 +2,6 @@ config RDMA_SIW tristate "Software RDMA over TCP/IP (iWARP) driver" depends on INET && INFINIBAND && LIBCRC32C depends on INFINIBAND_VIRT_DMA - select DMA_VIRT_OPS help This driver implements the iWARP RDMA transport over the Linux TCP/IP network stack. It enables a system with a diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h index e9753831ac3f..adda78996219 100644 --- a/drivers/infiniband/sw/siw/siw.h +++ b/drivers/infiniband/sw/siw/siw.h @@ -69,7 +69,6 @@ struct siw_pd { struct siw_device { struct ib_device base_dev; - struct device_dma_parameters dma_parms; struct net_device *netdev; struct siw_dev_cap attrs; diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c index 66764f7ef072..1f9e15b71504 100644 --- a/drivers/infiniband/sw/siw/siw_cm.c +++ b/drivers/infiniband/sw/siw/siw_cm.c @@ -1047,7 +1047,7 @@ static void siw_cm_work_handler(struct work_struct *w) cep->state); } } - if (rv && rv != EAGAIN) + if (rv && rv != -EAGAIN) release_cep = 1; break; diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index 181e06c1c43d..ee95cf29179d 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -305,25 +305,8 @@ static struct siw_device *siw_device_create(struct net_device *netdev) { struct siw_device *sdev = NULL; struct ib_device *base_dev; - struct device *parent = netdev->dev.parent; - u64 dma_mask; int rv; - if (!parent) { - /* - * The loopback device has no parent device, - * so it appears as a top-level device. To support - * loopback device connectivity, take this device - * as the parent device. Skip all other devices - * w/o parent device. - */ - if (netdev->type != ARPHRD_LOOPBACK) { - pr_warn("siw: device %s error: no parent device\n", - netdev->name); - return NULL; - } - parent = &netdev->dev; - } sdev = ib_alloc_device(siw_device, base_dev); if (!sdev) return NULL; @@ -347,30 +330,8 @@ static struct siw_device *siw_device_create(struct net_device *netdev) addrconf_addr_eui48((unsigned char *)&base_dev->node_guid, addr); } - base_dev->uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_POLL_CQ) | - (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_POST_SEND) | - (1ull << IB_USER_VERBS_CMD_POST_RECV) | - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); + + base_dev->uverbs_cmd_mask |= BIT_ULL(IB_USER_VERBS_CMD_POST_SEND); base_dev->node_type = RDMA_NODE_RNIC; memcpy(base_dev->node_desc, SIW_NODE_DESC_COMMON, @@ -382,13 +343,6 @@ static struct siw_device *siw_device_create(struct net_device *netdev) * per physical port. */ base_dev->phys_port_cnt = 1; - base_dev->dev.parent = parent; - base_dev->dev.dma_parms = &sdev->dma_parms; - dma_set_max_seg_size(&base_dev->dev, UINT_MAX); - dma_mask = IS_ENABLED(CONFIG_64BIT) ? DMA_BIT_MASK(64) : DMA_BIT_MASK(32); - if (dma_coerce_mask_and_coherent(&base_dev->dev, dma_mask)) - goto error; - base_dev->num_comp_vectors = num_possible_cpus(); xa_init_flags(&sdev->qp_xa, XA_FLAGS_ALLOC1); @@ -430,7 +384,7 @@ static struct siw_device *siw_device_create(struct net_device *netdev) atomic_set(&sdev->num_mr, 0); atomic_set(&sdev->num_pd, 0); - sdev->numa_node = dev_to_node(parent); + sdev->numa_node = dev_to_node(&netdev->dev); spin_lock_init(&sdev->lock); return sdev; diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index 7cf3242ffb41..68fd053fc774 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -307,6 +307,9 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, siw_dbg(base_dev, "create new QP\n"); + if (attrs->create_flags) + return ERR_PTR(-EOPNOTSUPP); + if (atomic_inc_return(&sdev->num_qp) > SIW_MAX_QP) { siw_dbg(base_dev, "too many QP's\n"); rv = -ENOMEM; @@ -544,6 +547,9 @@ int siw_verbs_modify_qp(struct ib_qp *base_qp, struct ib_qp_attr *attr, if (!attr_mask) return 0; + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + memset(&new_attrs, 0, sizeof(new_attrs)); if (attr_mask & IB_QP_ACCESS_FLAGS) { @@ -1094,6 +1100,9 @@ int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr, struct siw_cq *cq = to_siw_cq(base_cq); int rv, size = attr->cqe; + if (attr->flags) + return -EOPNOTSUPP; + if (atomic_inc_return(&sdev->num_cq) > SIW_MAX_CQ) { siw_dbg(base_cq->device, "too many CQ's\n"); rv = -ENOMEM; @@ -1555,6 +1564,9 @@ int siw_create_srq(struct ib_srq *base_srq, base_ucontext); int rv; + if (init_attrs->srq_type != IB_SRQT_BASIC) + return -EOPNOTSUPP; + if (atomic_inc_return(&sdev->num_srq) > SIW_MAX_SRQ) { siw_dbg_pd(base_srq->pd, "too many SRQ's\n"); rv = -ENOMEM; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 8f0b598a46ec..d5d592bdab35 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1514,9 +1514,9 @@ static ssize_t show_mode(struct device *d, struct device_attribute *attr, struct ipoib_dev_priv *priv = ipoib_priv(dev); if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags)) - return sprintf(buf, "connected\n"); + return sysfs_emit(buf, "connected\n"); else - return sprintf(buf, "datagram\n"); + return sysfs_emit(buf, "datagram\n"); } static ssize_t set_mode(struct device *d, struct device_attribute *attr, diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c index 67a21fdf5367..823f6831e7ea 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c @@ -166,6 +166,10 @@ static inline int ib_speed_enum_to_int(int speed) return SPEED_14000; case IB_SPEED_EDR: return SPEED_25000; + case IB_SPEED_HDR: + return SPEED_50000; + case IB_SPEED_NDR: + return SPEED_100000; } return SPEED_UNKNOWN; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index abfab89423f4..a6f413491321 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -2266,7 +2266,7 @@ static ssize_t show_pkey(struct device *dev, struct net_device *ndev = to_net_dev(dev); struct ipoib_dev_priv *priv = ipoib_priv(ndev); - return sprintf(buf, "0x%04x\n", priv->pkey); + return sysfs_emit(buf, "0x%04x\n", priv->pkey); } static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL); @@ -2276,7 +2276,8 @@ static ssize_t show_umcast(struct device *dev, struct net_device *ndev = to_net_dev(dev); struct ipoib_dev_priv *priv = ipoib_priv(ndev); - return sprintf(buf, "%d\n", test_bit(IPOIB_FLAG_UMCAST, &priv->flags)); + return sysfs_emit(buf, "%d\n", + test_bit(IPOIB_FLAG_UMCAST, &priv->flags)); } void ipoib_set_umcast(struct net_device *ndev, int umcast_val) @@ -2446,7 +2447,7 @@ static ssize_t dev_id_show(struct device *dev, "\"%s\" wants to know my dev_id. Should it look at dev_port instead? See Documentation/ABI/testing/sysfs-class-net for more info.\n", current->comm); - return sprintf(buf, "%#x\n", ndev->dev_id); + return sysfs_emit(buf, "%#x\n", ndev->dev_id); } static DEVICE_ATTR_RO(dev_id); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 587252fd6f57..5a150a080ac2 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -158,6 +158,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) int ret, size, req_vec; int i; + static atomic_t counter; size = ipoib_recvq_size + 1; ret = ipoib_cm_dev_init(dev); @@ -171,8 +172,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) if (ret != -EOPNOTSUPP) return ret; - req_vec = (priv->port - 1) * 2; - + req_vec = atomic_inc_return(&counter) * 2; cq_attr.cqe = size; cq_attr.comp_vector = req_vec % priv->ca->num_comp_vectors; priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_rx_completion, NULL, diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index 4c50a87ed7cc..5958840dbeed 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -46,7 +46,7 @@ static ssize_t show_parent(struct device *d, struct device_attribute *attr, struct net_device *dev = to_net_dev(d); struct ipoib_dev_priv *priv = ipoib_priv(dev); - return sprintf(buf, "%s\n", priv->parent->name); + return sysfs_emit(buf, "%s\n", priv->parent->name); } static DEVICE_ATTR(parent, S_IRUGO, show_parent, NULL); diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 436e17f1d0e5..e47cd0291a7e 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -28,6 +28,18 @@ static int isert_debug_level; module_param_named(debug_level, isert_debug_level, int, 0644); MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0 (default:0)"); +static int isert_sg_tablesize_set(const char *val, + const struct kernel_param *kp); +static const struct kernel_param_ops sg_tablesize_ops = { + .set = isert_sg_tablesize_set, + .get = param_get_int, +}; + +static int isert_sg_tablesize = ISCSI_ISER_DEF_SG_TABLESIZE; +module_param_cb(sg_tablesize, &sg_tablesize_ops, &isert_sg_tablesize, 0644); +MODULE_PARM_DESC(sg_tablesize, + "Number of gather/scatter entries in a single scsi command, should >= 128 (default: 256, max: 4096)"); + static DEFINE_MUTEX(device_list_mutex); static LIST_HEAD(device_list); static struct workqueue_struct *isert_comp_wq; @@ -47,6 +59,19 @@ static void isert_send_done(struct ib_cq *cq, struct ib_wc *wc); static void isert_login_recv_done(struct ib_cq *cq, struct ib_wc *wc); static void isert_login_send_done(struct ib_cq *cq, struct ib_wc *wc); +static int isert_sg_tablesize_set(const char *val, const struct kernel_param *kp) +{ + int n = 0, ret; + + ret = kstrtoint(val, 10, &n); + if (ret != 0 || n < ISCSI_ISER_MIN_SG_TABLESIZE || + n > ISCSI_ISER_MAX_SG_TABLESIZE) + return -EINVAL; + + return param_set_int(val, kp); +} + + static inline bool isert_prot_cmd(struct isert_conn *conn, struct se_cmd *cmd) { @@ -101,7 +126,7 @@ isert_create_qp(struct isert_conn *isert_conn, attr.cap.max_send_wr = ISERT_QP_MAX_REQ_DTOS + 1; attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS + 1; factor = rdma_rw_mr_factor(device->ib_device, cma_id->port_num, - ISCSI_ISER_MAX_SG_TABLESIZE); + isert_sg_tablesize); attr.cap.max_rdma_ctxs = ISCSI_DEF_XMIT_CMDS_MAX * factor; attr.cap.max_send_sge = device->ib_device->attrs.max_send_sge; attr.cap.max_recv_sge = 1; @@ -1076,7 +1101,7 @@ isert_handle_scsi_cmd(struct isert_conn *isert_conn, sequence_cmd: rc = iscsit_sequence_cmd(conn, cmd, buf, hdr->cmdsn); - if (!rc && dump_payload == false && unsol_data) + if (!rc && !dump_payload && unsol_data) iscsit_set_unsolicited_dataout(cmd); else if (dump_payload && imm_data) target_put_sess_cmd(&cmd->se_cmd); diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index 7fee4a65e181..6c5af13db4e0 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -65,6 +65,12 @@ */ #define ISER_RX_SIZE (ISCSI_DEF_MAX_RECV_SEG_LEN + 1024) +/* Default I/O size is 1MB */ +#define ISCSI_ISER_DEF_SG_TABLESIZE 256 + +/* Minimum I/O size is 512KB */ +#define ISCSI_ISER_MIN_SG_TABLESIZE 128 + /* Maximum support is 16MB I/O size */ #define ISCSI_ISER_MAX_SG_TABLESIZE 4096 diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c index ac4c49cbf153..ba00f0de14ca 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c @@ -52,7 +52,8 @@ static ssize_t max_reconnect_attempts_show(struct device *dev, { struct rtrs_clt *clt = container_of(dev, struct rtrs_clt, dev); - return sprintf(page, "%d\n", rtrs_clt_get_max_reconnect_attempts(clt)); + return sysfs_emit(page, "%d\n", + rtrs_clt_get_max_reconnect_attempts(clt)); } static ssize_t max_reconnect_attempts_store(struct device *dev, @@ -95,11 +96,13 @@ static ssize_t mpath_policy_show(struct device *dev, switch (clt->mp_policy) { case MP_POLICY_RR: - return sprintf(page, "round-robin (RR: %d)\n", clt->mp_policy); + return sysfs_emit(page, "round-robin (RR: %d)\n", + clt->mp_policy); case MP_POLICY_MIN_INFLIGHT: - return sprintf(page, "min-inflight (MI: %d)\n", clt->mp_policy); + return sysfs_emit(page, "min-inflight (MI: %d)\n", + clt->mp_policy); default: - return sprintf(page, "Unknown (%d)\n", clt->mp_policy); + return sysfs_emit(page, "Unknown (%d)\n", clt->mp_policy); } } @@ -138,9 +141,10 @@ static DEVICE_ATTR_RW(mpath_policy); static ssize_t add_path_show(struct device *dev, struct device_attribute *attr, char *page) { - return scnprintf(page, PAGE_SIZE, - "Usage: echo [<source addr>@]<destination addr> > %s\n\n*addr ::= [ ip:<ipv4|ipv6> | gid:<gid> ]\n", - attr->attr.name); + return sysfs_emit( + page, + "Usage: echo [<source addr>@]<destination addr> > %s\n\n*addr ::= [ ip:<ipv4|ipv6> | gid:<gid> ]\n", + attr->attr.name); } static ssize_t add_path_store(struct device *dev, @@ -184,20 +188,18 @@ static ssize_t rtrs_clt_state_show(struct kobject *kobj, sess = container_of(kobj, struct rtrs_clt_sess, kobj); if (sess->state == RTRS_CLT_CONNECTED) - return sprintf(page, "connected\n"); + return sysfs_emit(page, "connected\n"); - return sprintf(page, "disconnected\n"); + return sysfs_emit(page, "disconnected\n"); } static struct kobj_attribute rtrs_clt_state_attr = __ATTR(state, 0444, rtrs_clt_state_show, NULL); static ssize_t rtrs_clt_reconnect_show(struct kobject *kobj, - struct kobj_attribute *attr, - char *page) + struct kobj_attribute *attr, char *buf) { - return scnprintf(page, PAGE_SIZE, "Usage: echo 1 > %s\n", - attr->attr.name); + return sysfs_emit(buf, "Usage: echo 1 > %s\n", attr->attr.name); } static ssize_t rtrs_clt_reconnect_store(struct kobject *kobj, @@ -225,11 +227,9 @@ static struct kobj_attribute rtrs_clt_reconnect_attr = rtrs_clt_reconnect_store); static ssize_t rtrs_clt_disconnect_show(struct kobject *kobj, - struct kobj_attribute *attr, - char *page) + struct kobj_attribute *attr, char *buf) { - return scnprintf(page, PAGE_SIZE, "Usage: echo 1 > %s\n", - attr->attr.name); + return sysfs_emit(buf, "Usage: echo 1 > %s\n", attr->attr.name); } static ssize_t rtrs_clt_disconnect_store(struct kobject *kobj, @@ -257,11 +257,9 @@ static struct kobj_attribute rtrs_clt_disconnect_attr = rtrs_clt_disconnect_store); static ssize_t rtrs_clt_remove_path_show(struct kobject *kobj, - struct kobj_attribute *attr, - char *page) + struct kobj_attribute *attr, char *buf) { - return scnprintf(page, PAGE_SIZE, "Usage: echo 1 > %s\n", - attr->attr.name); + return sysfs_emit(buf, "Usage: echo 1 > %s\n", attr->attr.name); } static ssize_t rtrs_clt_remove_path_store(struct kobject *kobj, @@ -324,7 +322,7 @@ static ssize_t rtrs_clt_hca_port_show(struct kobject *kobj, sess = container_of(kobj, typeof(*sess), kobj); - return scnprintf(page, PAGE_SIZE, "%u\n", sess->hca_port); + return sysfs_emit(page, "%u\n", sess->hca_port); } static struct kobj_attribute rtrs_clt_hca_port_attr = @@ -338,7 +336,7 @@ static ssize_t rtrs_clt_hca_name_show(struct kobject *kobj, sess = container_of(kobj, struct rtrs_clt_sess, kobj); - return scnprintf(page, PAGE_SIZE, "%s\n", sess->hca_name); + return sysfs_emit(page, "%s\n", sess->hca_name); } static struct kobj_attribute rtrs_clt_hca_name_attr = @@ -349,12 +347,13 @@ static ssize_t rtrs_clt_src_addr_show(struct kobject *kobj, char *page) { struct rtrs_clt_sess *sess; - int cnt; + int len; sess = container_of(kobj, struct rtrs_clt_sess, kobj); - cnt = sockaddr_to_str((struct sockaddr *)&sess->s.src_addr, - page, PAGE_SIZE); - return cnt + scnprintf(page + cnt, PAGE_SIZE - cnt, "\n"); + len = sockaddr_to_str((struct sockaddr *)&sess->s.src_addr, page, + PAGE_SIZE); + len += sysfs_emit_at(page, len, "\n"); + return len; } static struct kobj_attribute rtrs_clt_src_addr_attr = @@ -365,12 +364,13 @@ static ssize_t rtrs_clt_dst_addr_show(struct kobject *kobj, char *page) { struct rtrs_clt_sess *sess; - int cnt; + int len; sess = container_of(kobj, struct rtrs_clt_sess, kobj); - cnt = sockaddr_to_str((struct sockaddr *)&sess->s.dst_addr, - page, PAGE_SIZE); - return cnt + scnprintf(page + cnt, PAGE_SIZE - cnt, "\n"); + len = sockaddr_to_str((struct sockaddr *)&sess->s.dst_addr, page, + PAGE_SIZE); + len += sysfs_emit_at(page, len, "\n"); + return len; } static struct kobj_attribute rtrs_clt_dst_addr_attr = diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index f298adc02acb..560865f65dc4 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -1236,8 +1236,7 @@ static void free_sess_reqs(struct rtrs_clt_sess *sess) if (req->mr) ib_dereg_mr(req->mr); kfree(req->sge); - rtrs_iu_free(req->iu, DMA_TO_DEVICE, - sess->s.dev->ib_dev, 1); + rtrs_iu_free(req->iu, sess->s.dev->ib_dev, 1); } kfree(sess->reqs); sess->reqs = NULL; @@ -1499,6 +1498,7 @@ static int create_con(struct rtrs_clt_sess *sess, unsigned int cid) con->c.cid = cid; con->c.sess = &sess->s; atomic_set(&con->io_cnt, 0); + mutex_init(&con->con_mutex); sess->s.con[cid] = &con->c; @@ -1510,6 +1510,7 @@ static void destroy_con(struct rtrs_clt_con *con) struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); sess->s.con[con->c.cid] = NULL; + mutex_destroy(&con->con_mutex); kfree(con); } @@ -1520,15 +1521,7 @@ static int create_con_cq_qp(struct rtrs_clt_con *con) int err, cq_vector; struct rtrs_msg_rkey_rsp *rsp; - /* - * This function can fail, but still destroy_con_cq_qp() should - * be called, this is because create_con_cq_qp() is called on cm - * event path, thus caller/waiter never knows: have we failed before - * create_con_cq_qp() or after. To solve this dilemma without - * creating any additional flags just allow destroy_con_cq_qp() be - * called many times. - */ - + lockdep_assert_held(&con->con_mutex); if (con->c.cid == 0) { /* * One completion for each receive and two for each send @@ -1602,11 +1595,10 @@ static void destroy_con_cq_qp(struct rtrs_clt_con *con) * Be careful here: destroy_con_cq_qp() can be called even * create_con_cq_qp() failed, see comments there. */ - + lockdep_assert_held(&con->con_mutex); rtrs_cq_qp_destroy(&con->c); if (con->rsp_ius) { - rtrs_iu_free(con->rsp_ius, DMA_FROM_DEVICE, - sess->s.dev->ib_dev, con->queue_size); + rtrs_iu_free(con->rsp_ius, sess->s.dev->ib_dev, con->queue_size); con->rsp_ius = NULL; con->queue_size = 0; } @@ -1634,16 +1626,16 @@ static int rtrs_rdma_addr_resolved(struct rtrs_clt_con *con) struct rtrs_sess *s = con->c.sess; int err; + mutex_lock(&con->con_mutex); err = create_con_cq_qp(con); + mutex_unlock(&con->con_mutex); if (err) { rtrs_err(s, "create_con_cq_qp(), err: %d\n", err); return err; } err = rdma_resolve_route(con->c.cm_id, RTRS_CONNECT_TIMEOUT_MS); - if (err) { + if (err) rtrs_err(s, "Resolving route failed, err: %d\n", err); - destroy_con_cq_qp(con); - } return err; } @@ -1837,8 +1829,8 @@ static int rtrs_clt_rdma_cm_handler(struct rdma_cm_id *cm_id, cm_err = rtrs_rdma_route_resolved(con); break; case RDMA_CM_EVENT_ESTABLISHED: - con->cm_err = rtrs_rdma_conn_established(con, ev); - if (likely(!con->cm_err)) { + cm_err = rtrs_rdma_conn_established(con, ev); + if (likely(!cm_err)) { /* * Report success and wake up. Here we abuse state_wq, * i.e. wake up without state change, but we set cm_err. @@ -1851,20 +1843,22 @@ static int rtrs_clt_rdma_cm_handler(struct rdma_cm_id *cm_id, case RDMA_CM_EVENT_REJECTED: cm_err = rtrs_rdma_conn_rejected(con, ev); break; + case RDMA_CM_EVENT_DISCONNECTED: + /* No message for disconnecting */ + cm_err = -ECONNRESET; + break; case RDMA_CM_EVENT_CONNECT_ERROR: case RDMA_CM_EVENT_UNREACHABLE: + case RDMA_CM_EVENT_ADDR_CHANGE: + case RDMA_CM_EVENT_TIMEWAIT_EXIT: rtrs_wrn(s, "CM error event %d\n", ev->event); cm_err = -ECONNRESET; break; case RDMA_CM_EVENT_ADDR_ERROR: case RDMA_CM_EVENT_ROUTE_ERROR: + rtrs_wrn(s, "CM error event %d\n", ev->event); cm_err = -EHOSTUNREACH; break; - case RDMA_CM_EVENT_DISCONNECTED: - case RDMA_CM_EVENT_ADDR_CHANGE: - case RDMA_CM_EVENT_TIMEWAIT_EXIT: - cm_err = -ECONNRESET; - break; case RDMA_CM_EVENT_DEVICE_REMOVAL: /* * Device removal is a special case. Queue close and return 0. @@ -1949,8 +1943,9 @@ static int create_cm(struct rtrs_clt_con *con) errr: stop_cm(con); - /* Is safe to call destroy if cq_qp is not inited */ + mutex_lock(&con->con_mutex); destroy_con_cq_qp(con); + mutex_unlock(&con->con_mutex); destroy_cm: destroy_cm(con); @@ -2057,7 +2052,9 @@ static void rtrs_clt_stop_and_destroy_conns(struct rtrs_clt_sess *sess) if (!sess->s.con[cid]) break; con = to_clt_con(sess->s.con[cid]); + mutex_lock(&con->con_mutex); destroy_con_cq_qp(con); + mutex_unlock(&con->con_mutex); destroy_cm(con); destroy_con(con); } @@ -2164,8 +2161,7 @@ static void rtrs_clt_remove_path_from_arr(struct rtrs_clt_sess *sess) mutex_unlock(&clt->paths_mutex); } -static void rtrs_clt_add_path_to_arr(struct rtrs_clt_sess *sess, - struct rtrs_addr *addr) +static void rtrs_clt_add_path_to_arr(struct rtrs_clt_sess *sess) { struct rtrs_clt *clt = sess->clt; @@ -2224,7 +2220,10 @@ destroy: struct rtrs_clt_con *con = to_clt_con(sess->s.con[cid]); stop_cm(con); + + mutex_lock(&con->con_mutex); destroy_con_cq_qp(con); + mutex_unlock(&con->con_mutex); destroy_cm(con); destroy_con(con); } @@ -2245,7 +2244,7 @@ static void rtrs_clt_info_req_done(struct ib_cq *cq, struct ib_wc *wc) struct rtrs_iu *iu; iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe); - rtrs_iu_free(iu, DMA_TO_DEVICE, sess->s.dev->ib_dev, 1); + rtrs_iu_free(iu, sess->s.dev->ib_dev, 1); if (unlikely(wc->status != IB_WC_SUCCESS)) { rtrs_err(sess->clt, "Sess info request send failed: %s\n", @@ -2264,8 +2263,12 @@ static int process_info_rsp(struct rtrs_clt_sess *sess, int i, sgi; sg_cnt = le16_to_cpu(msg->sg_cnt); - if (unlikely(!sg_cnt)) + if (unlikely(!sg_cnt || (sess->queue_depth % sg_cnt))) { + rtrs_err(sess->clt, "Incorrect sg_cnt %d, is not multiple\n", + sg_cnt); return -EINVAL; + } + /* * Check if IB immediate data size is enough to hold the mem_id and * the offset inside the memory chunk. @@ -2278,11 +2281,6 @@ static int process_info_rsp(struct rtrs_clt_sess *sess, MAX_IMM_PAYL_BITS, sg_cnt, sess->chunk_size); return -EINVAL; } - if (unlikely(!sg_cnt || (sess->queue_depth % sg_cnt))) { - rtrs_err(sess->clt, "Incorrect sg_cnt %d, is not multiple\n", - sg_cnt); - return -EINVAL; - } total_len = 0; for (sgi = 0, i = 0; sgi < sg_cnt && i < sess->queue_depth; sgi++) { const struct rtrs_sg_desc *desc = &msg->desc[sgi]; @@ -2374,7 +2372,7 @@ static void rtrs_clt_info_rsp_done(struct ib_cq *cq, struct ib_wc *wc) out: rtrs_clt_update_wc_stats(con); - rtrs_iu_free(iu, DMA_FROM_DEVICE, sess->s.dev->ib_dev, 1); + rtrs_iu_free(iu, sess->s.dev->ib_dev, 1); rtrs_clt_change_state(sess, state); } @@ -2436,9 +2434,9 @@ static int rtrs_send_sess_info(struct rtrs_clt_sess *sess) out: if (tx_iu) - rtrs_iu_free(tx_iu, DMA_TO_DEVICE, sess->s.dev->ib_dev, 1); + rtrs_iu_free(tx_iu, sess->s.dev->ib_dev, 1); if (rx_iu) - rtrs_iu_free(rx_iu, DMA_FROM_DEVICE, sess->s.dev->ib_dev, 1); + rtrs_iu_free(rx_iu, sess->s.dev->ib_dev, 1); if (unlikely(err)) /* If we've never taken async path because of malloc problems */ rtrs_clt_change_state(sess, RTRS_CLT_CONNECTING_ERR); @@ -2938,7 +2936,7 @@ int rtrs_clt_create_path_from_sysfs(struct rtrs_clt *clt, * IO will never grab it. Also it is very important to add * path before init, since init fires LINK_CONNECTED event. */ - rtrs_clt_add_path_to_arr(sess, addr); + rtrs_clt_add_path_to_arr(sess); err = init_sess(sess); if (err) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.h b/drivers/infiniband/ulp/rtrs/rtrs-clt.h index 167acd3c90fc..b8dbd701b3cb 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.h @@ -72,6 +72,7 @@ struct rtrs_clt_con { struct rtrs_iu *rsp_ius; u32 queue_size; unsigned int cpu; + struct mutex con_mutex; atomic_t io_cnt; int cm_err; }; diff --git a/drivers/infiniband/ulp/rtrs/rtrs-pri.h b/drivers/infiniband/ulp/rtrs/rtrs-pri.h index b8e43dc4d95a..3f2918671dbe 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-pri.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-pri.h @@ -287,8 +287,7 @@ struct rtrs_msg_rdma_hdr { struct rtrs_iu *rtrs_iu_alloc(u32 queue_size, size_t size, gfp_t t, struct ib_device *dev, enum dma_data_direction, void (*done)(struct ib_cq *cq, struct ib_wc *wc)); -void rtrs_iu_free(struct rtrs_iu *iu, enum dma_data_direction dir, - struct ib_device *dev, u32 queue_size); +void rtrs_iu_free(struct rtrs_iu *iu, struct ib_device *dev, u32 queue_size); int rtrs_iu_post_recv(struct rtrs_con *con, struct rtrs_iu *iu); int rtrs_iu_post_send(struct rtrs_con *con, struct rtrs_iu *iu, size_t size, struct ib_send_wr *head); diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c index 07fbb063555d..d2edff3b8f0d 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c @@ -27,11 +27,9 @@ static struct kobj_type ktype = { }; static ssize_t rtrs_srv_disconnect_show(struct kobject *kobj, - struct kobj_attribute *attr, - char *page) + struct kobj_attribute *attr, char *buf) { - return scnprintf(page, PAGE_SIZE, "Usage: echo 1 > %s\n", - attr->attr.name); + return sysfs_emit(buf, "Usage: echo 1 > %s\n", attr->attr.name); } static ssize_t rtrs_srv_disconnect_store(struct kobject *kobj, @@ -72,8 +70,7 @@ static ssize_t rtrs_srv_hca_port_show(struct kobject *kobj, sess = container_of(kobj, typeof(*sess), kobj); usr_con = sess->s.con[0]; - return scnprintf(page, PAGE_SIZE, "%u\n", - usr_con->cm_id->port_num); + return sysfs_emit(page, "%u\n", usr_con->cm_id->port_num); } static struct kobj_attribute rtrs_srv_hca_port_attr = @@ -87,8 +84,7 @@ static ssize_t rtrs_srv_hca_name_show(struct kobject *kobj, sess = container_of(kobj, struct rtrs_srv_sess, kobj); - return scnprintf(page, PAGE_SIZE, "%s\n", - sess->s.dev->ib_dev->name); + return sysfs_emit(page, "%s\n", sess->s.dev->ib_dev->name); } static struct kobj_attribute rtrs_srv_hca_name_attr = @@ -115,12 +111,13 @@ static ssize_t rtrs_srv_dst_addr_show(struct kobject *kobj, char *page) { struct rtrs_srv_sess *sess; - int cnt; + int len; sess = container_of(kobj, struct rtrs_srv_sess, kobj); - cnt = sockaddr_to_str((struct sockaddr *)&sess->s.src_addr, - page, PAGE_SIZE); - return cnt + scnprintf(page + cnt, PAGE_SIZE - cnt, "\n"); + len = sockaddr_to_str((struct sockaddr *)&sess->s.src_addr, page, + PAGE_SIZE); + len += sysfs_emit_at(page, len, "\n"); + return len; } static struct kobj_attribute rtrs_srv_dst_addr_attr = diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index d6f93601712e..c42fd470c4eb 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -113,28 +113,18 @@ static bool __rtrs_srv_change_state(struct rtrs_srv_sess *sess, return changed; } -static bool rtrs_srv_change_state_get_old(struct rtrs_srv_sess *sess, - enum rtrs_srv_state new_state, - enum rtrs_srv_state *old_state) +static bool rtrs_srv_change_state(struct rtrs_srv_sess *sess, + enum rtrs_srv_state new_state) { bool changed; spin_lock_irq(&sess->state_lock); - *old_state = sess->state; changed = __rtrs_srv_change_state(sess, new_state); spin_unlock_irq(&sess->state_lock); return changed; } -static bool rtrs_srv_change_state(struct rtrs_srv_sess *sess, - enum rtrs_srv_state new_state) -{ - enum rtrs_srv_state old_state; - - return rtrs_srv_change_state_get_old(sess, new_state, &old_state); -} - static void free_id(struct rtrs_srv_op *id) { if (!id) @@ -471,10 +461,7 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id, void close_sess(struct rtrs_srv_sess *sess) { - enum rtrs_srv_state old_state; - - if (rtrs_srv_change_state_get_old(sess, RTRS_SRV_CLOSING, - &old_state)) + if (rtrs_srv_change_state(sess, RTRS_SRV_CLOSING)) queue_work(rtrs_wq, &sess->close_work); WARN_ON(sess->state != RTRS_SRV_CLOSING); } @@ -577,8 +564,7 @@ static void unmap_cont_bufs(struct rtrs_srv_sess *sess) struct rtrs_srv_mr *srv_mr; srv_mr = &sess->mrs[i]; - rtrs_iu_free(srv_mr->iu, DMA_TO_DEVICE, - sess->s.dev->ib_dev, 1); + rtrs_iu_free(srv_mr->iu, sess->s.dev->ib_dev, 1); ib_dereg_mr(srv_mr->mr); ib_dma_unmap_sg(sess->s.dev->ib_dev, srv_mr->sgt.sgl, srv_mr->sgt.nents, DMA_BIDIRECTIONAL); @@ -682,8 +668,7 @@ err: sgt = &srv_mr->sgt; mr = srv_mr->mr; free_iu: - rtrs_iu_free(srv_mr->iu, DMA_TO_DEVICE, - sess->s.dev->ib_dev, 1); + rtrs_iu_free(srv_mr->iu, sess->s.dev->ib_dev, 1); dereg_mr: ib_dereg_mr(mr); unmap_sg: @@ -735,7 +720,7 @@ static void rtrs_srv_info_rsp_done(struct ib_cq *cq, struct ib_wc *wc) struct rtrs_iu *iu; iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe); - rtrs_iu_free(iu, DMA_TO_DEVICE, sess->s.dev->ib_dev, 1); + rtrs_iu_free(iu, sess->s.dev->ib_dev, 1); if (unlikely(wc->status != IB_WC_SUCCESS)) { rtrs_err(s, "Sess info response send failed: %s\n", @@ -861,7 +846,7 @@ static int process_info_req(struct rtrs_srv_con *con, if (unlikely(err)) { rtrs_err(s, "rtrs_iu_post_send(), err: %d\n", err); iu_free: - rtrs_iu_free(tx_iu, DMA_TO_DEVICE, sess->s.dev->ib_dev, 1); + rtrs_iu_free(tx_iu, sess->s.dev->ib_dev, 1); } rwr_free: kfree(rwr); @@ -906,7 +891,7 @@ static void rtrs_srv_info_req_done(struct ib_cq *cq, struct ib_wc *wc) goto close; out: - rtrs_iu_free(iu, DMA_FROM_DEVICE, sess->s.dev->ib_dev, 1); + rtrs_iu_free(iu, sess->s.dev->ib_dev, 1); return; close: close_sess(sess); @@ -929,7 +914,7 @@ static int post_recv_info_req(struct rtrs_srv_con *con) err = rtrs_iu_post_recv(&con->c, rx_iu); if (unlikely(err)) { rtrs_err(s, "rtrs_iu_post_recv(), err: %d\n", err); - rtrs_iu_free(rx_iu, DMA_FROM_DEVICE, sess->s.dev->ib_dev, 1); + rtrs_iu_free(rx_iu, sess->s.dev->ib_dev, 1); return err; } @@ -1328,17 +1313,42 @@ static void rtrs_srv_dev_release(struct device *dev) kfree(srv); } -static struct rtrs_srv *__alloc_srv(struct rtrs_srv_ctx *ctx, - const uuid_t *paths_uuid) +static void free_srv(struct rtrs_srv *srv) +{ + int i; + + WARN_ON(refcount_read(&srv->refcount)); + for (i = 0; i < srv->queue_depth; i++) + mempool_free(srv->chunks[i], chunk_pool); + kfree(srv->chunks); + mutex_destroy(&srv->paths_mutex); + mutex_destroy(&srv->paths_ev_mutex); + /* last put to release the srv structure */ + put_device(&srv->dev); +} + +static struct rtrs_srv *get_or_create_srv(struct rtrs_srv_ctx *ctx, + const uuid_t *paths_uuid) { struct rtrs_srv *srv; int i; + mutex_lock(&ctx->srv_mutex); + list_for_each_entry(srv, &ctx->srv_list, ctx_list) { + if (uuid_equal(&srv->paths_uuid, paths_uuid) && + refcount_inc_not_zero(&srv->refcount)) { + mutex_unlock(&ctx->srv_mutex); + return srv; + } + } + + /* need to allocate a new srv */ srv = kzalloc(sizeof(*srv), GFP_KERNEL); - if (!srv) + if (!srv) { + mutex_unlock(&ctx->srv_mutex); return NULL; + } - refcount_set(&srv->refcount, 1); INIT_LIST_HEAD(&srv->paths_list); mutex_init(&srv->paths_mutex); mutex_init(&srv->paths_ev_mutex); @@ -1347,6 +1357,8 @@ static struct rtrs_srv *__alloc_srv(struct rtrs_srv_ctx *ctx, srv->ctx = ctx; device_initialize(&srv->dev); srv->dev.release = rtrs_srv_dev_release; + list_add(&srv->ctx_list, &ctx->srv_list); + mutex_unlock(&ctx->srv_mutex); srv->chunks = kcalloc(srv->queue_depth, sizeof(*srv->chunks), GFP_KERNEL); @@ -1358,7 +1370,7 @@ static struct rtrs_srv *__alloc_srv(struct rtrs_srv_ctx *ctx, if (!srv->chunks[i]) goto err_free_chunks; } - list_add(&srv->ctx_list, &ctx->srv_list); + refcount_set(&srv->refcount, 1); return srv; @@ -1369,52 +1381,9 @@ err_free_chunks: err_free_srv: kfree(srv); - - return NULL; -} - -static void free_srv(struct rtrs_srv *srv) -{ - int i; - - WARN_ON(refcount_read(&srv->refcount)); - for (i = 0; i < srv->queue_depth; i++) - mempool_free(srv->chunks[i], chunk_pool); - kfree(srv->chunks); - mutex_destroy(&srv->paths_mutex); - mutex_destroy(&srv->paths_ev_mutex); - /* last put to release the srv structure */ - put_device(&srv->dev); -} - -static inline struct rtrs_srv *__find_srv_and_get(struct rtrs_srv_ctx *ctx, - const uuid_t *paths_uuid) -{ - struct rtrs_srv *srv; - - list_for_each_entry(srv, &ctx->srv_list, ctx_list) { - if (uuid_equal(&srv->paths_uuid, paths_uuid) && - refcount_inc_not_zero(&srv->refcount)) - return srv; - } - return NULL; } -static struct rtrs_srv *get_or_create_srv(struct rtrs_srv_ctx *ctx, - const uuid_t *paths_uuid) -{ - struct rtrs_srv *srv; - - mutex_lock(&ctx->srv_mutex); - srv = __find_srv_and_get(ctx, paths_uuid); - if (!srv) - srv = __alloc_srv(ctx, paths_uuid); - mutex_unlock(&ctx->srv_mutex); - - return srv; -} - static void put_srv(struct rtrs_srv *srv) { if (refcount_dec_and_test(&srv->refcount)) { @@ -1813,7 +1782,11 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id, } recon_cnt = le16_to_cpu(msg->recon_cnt); srv = get_or_create_srv(ctx, &msg->paths_uuid); - if (!srv) { + /* + * "refcount == 0" happens if a previous thread calls get_or_create_srv + * allocate srv, but chunks of srv are not allocated yet. + */ + if (!srv || refcount_read(&srv->refcount) == 0) { err = -ENOMEM; goto reject_w_err; } diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.h b/drivers/infiniband/ulp/rtrs/rtrs-srv.h index 08b0b8a6eebe..9543ae19996c 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.h @@ -62,7 +62,7 @@ struct rtrs_srv_op { /* * server side memory region context, when always_invalidate=Y, we need - * queue_depth of memory regrion to invalidate each memory region. + * queue_depth of memory region to invalidate each memory region. */ struct rtrs_srv_mr { struct ib_mr *mr; diff --git a/drivers/infiniband/ulp/rtrs/rtrs.c b/drivers/infiniband/ulp/rtrs/rtrs.c index ff1093d6e4bc..2e3a849e0a77 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs.c @@ -31,6 +31,7 @@ struct rtrs_iu *rtrs_iu_alloc(u32 queue_size, size_t size, gfp_t gfp_mask, return NULL; for (i = 0; i < queue_size; i++) { iu = &ius[i]; + iu->direction = dir; iu->buf = kzalloc(size, gfp_mask); if (!iu->buf) goto err; @@ -41,17 +42,15 @@ struct rtrs_iu *rtrs_iu_alloc(u32 queue_size, size_t size, gfp_t gfp_mask, iu->cqe.done = done; iu->size = size; - iu->direction = dir; } return ius; err: - rtrs_iu_free(ius, dir, dma_dev, i); + rtrs_iu_free(ius, dma_dev, i); return NULL; } EXPORT_SYMBOL_GPL(rtrs_iu_alloc); -void rtrs_iu_free(struct rtrs_iu *ius, enum dma_data_direction dir, - struct ib_device *ibdev, u32 queue_size) +void rtrs_iu_free(struct rtrs_iu *ius, struct ib_device *ibdev, u32 queue_size) { struct rtrs_iu *iu; int i; @@ -61,7 +60,7 @@ void rtrs_iu_free(struct rtrs_iu *ius, enum dma_data_direction dir, for (i = 0; i < queue_size; i++) { iu = &ius[i]; - ib_dma_unmap_single(ibdev, iu->dma_addr, iu->size, dir); + ib_dma_unmap_single(ibdev, iu->dma_addr, iu->size, iu->direction); kfree(iu->buf); } kfree(ius); @@ -105,6 +104,22 @@ int rtrs_post_recv_empty(struct rtrs_con *con, struct ib_cqe *cqe) } EXPORT_SYMBOL_GPL(rtrs_post_recv_empty); +static int rtrs_post_send(struct ib_qp *qp, struct ib_send_wr *head, + struct ib_send_wr *wr) +{ + if (head) { + struct ib_send_wr *tail = head; + + while (tail->next) + tail = tail->next; + tail->next = wr; + } else { + head = wr; + } + + return ib_post_send(qp, head, NULL); +} + int rtrs_iu_post_send(struct rtrs_con *con, struct rtrs_iu *iu, size_t size, struct ib_send_wr *head) { @@ -127,17 +142,7 @@ int rtrs_iu_post_send(struct rtrs_con *con, struct rtrs_iu *iu, size_t size, .send_flags = IB_SEND_SIGNALED, }; - if (head) { - struct ib_send_wr *tail = head; - - while (tail->next) - tail = tail->next; - tail->next = ≀ - } else { - head = ≀ - } - - return ib_post_send(con->qp, head, NULL); + return rtrs_post_send(con->qp, head, &wr); } EXPORT_SYMBOL_GPL(rtrs_iu_post_send); @@ -169,17 +174,7 @@ int rtrs_iu_post_rdma_write_imm(struct rtrs_con *con, struct rtrs_iu *iu, if (WARN_ON(sge[i].length == 0)) return -EINVAL; - if (head) { - struct ib_send_wr *tail = head; - - while (tail->next) - tail = tail->next; - tail->next = &wr.wr; - } else { - head = &wr.wr; - } - - return ib_post_send(con->qp, head, NULL); + return rtrs_post_send(con->qp, head, &wr.wr); } EXPORT_SYMBOL_GPL(rtrs_iu_post_rdma_write_imm); @@ -196,17 +191,7 @@ int rtrs_post_rdma_write_imm_empty(struct rtrs_con *con, struct ib_cqe *cqe, .ex.imm_data = cpu_to_be32(imm_data), }; - if (head) { - struct ib_send_wr *tail = head; - - while (tail->next) - tail = tail->next; - tail->next = ≀ - } else { - head = ≀ - } - - return ib_post_send(con->qp, head, NULL); + return rtrs_post_send(con->qp, head, &wr); } EXPORT_SYMBOL_GPL(rtrs_post_rdma_write_imm_empty); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index d8fcd21ab472..5492b66a8153 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -169,9 +169,9 @@ static int srp_tmo_get(char *buffer, const struct kernel_param *kp) int tmo = *(int *)kp->arg; if (tmo >= 0) - return sprintf(buffer, "%d\n", tmo); + return sysfs_emit(buffer, "%d\n", tmo); else - return sprintf(buffer, "off\n"); + return sysfs_emit(buffer, "off\n"); } static int srp_tmo_set(const char *val, const struct kernel_param *kp) @@ -2896,7 +2896,7 @@ static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->id_ext)); + return sysfs_emit(buf, "0x%016llx\n", be64_to_cpu(target->id_ext)); } static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr, @@ -2904,7 +2904,7 @@ static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid)); + return sysfs_emit(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid)); } static ssize_t show_service_id(struct device *dev, @@ -2914,8 +2914,8 @@ static ssize_t show_service_id(struct device *dev, if (target->using_rdma_cm) return -ENOENT; - return sprintf(buf, "0x%016llx\n", - be64_to_cpu(target->ib_cm.service_id)); + return sysfs_emit(buf, "0x%016llx\n", + be64_to_cpu(target->ib_cm.service_id)); } static ssize_t show_pkey(struct device *dev, struct device_attribute *attr, @@ -2925,7 +2925,8 @@ static ssize_t show_pkey(struct device *dev, struct device_attribute *attr, if (target->using_rdma_cm) return -ENOENT; - return sprintf(buf, "0x%04x\n", be16_to_cpu(target->ib_cm.pkey)); + + return sysfs_emit(buf, "0x%04x\n", be16_to_cpu(target->ib_cm.pkey)); } static ssize_t show_sgid(struct device *dev, struct device_attribute *attr, @@ -2933,7 +2934,7 @@ static ssize_t show_sgid(struct device *dev, struct device_attribute *attr, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "%pI6\n", target->sgid.raw); + return sysfs_emit(buf, "%pI6\n", target->sgid.raw); } static ssize_t show_dgid(struct device *dev, struct device_attribute *attr, @@ -2944,7 +2945,8 @@ static ssize_t show_dgid(struct device *dev, struct device_attribute *attr, if (target->using_rdma_cm) return -ENOENT; - return sprintf(buf, "%pI6\n", ch->ib_cm.path.dgid.raw); + + return sysfs_emit(buf, "%pI6\n", ch->ib_cm.path.dgid.raw); } static ssize_t show_orig_dgid(struct device *dev, @@ -2954,7 +2956,8 @@ static ssize_t show_orig_dgid(struct device *dev, if (target->using_rdma_cm) return -ENOENT; - return sprintf(buf, "%pI6\n", target->ib_cm.orig_dgid.raw); + + return sysfs_emit(buf, "%pI6\n", target->ib_cm.orig_dgid.raw); } static ssize_t show_req_lim(struct device *dev, @@ -2968,7 +2971,8 @@ static ssize_t show_req_lim(struct device *dev, ch = &target->ch[i]; req_lim = min(req_lim, ch->req_lim); } - return sprintf(buf, "%d\n", req_lim); + + return sysfs_emit(buf, "%d\n", req_lim); } static ssize_t show_zero_req_lim(struct device *dev, @@ -2976,7 +2980,7 @@ static ssize_t show_zero_req_lim(struct device *dev, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "%d\n", target->zero_req_lim); + return sysfs_emit(buf, "%d\n", target->zero_req_lim); } static ssize_t show_local_ib_port(struct device *dev, @@ -2984,7 +2988,7 @@ static ssize_t show_local_ib_port(struct device *dev, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "%d\n", target->srp_host->port); + return sysfs_emit(buf, "%d\n", target->srp_host->port); } static ssize_t show_local_ib_device(struct device *dev, @@ -2992,8 +2996,8 @@ static ssize_t show_local_ib_device(struct device *dev, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "%s\n", - dev_name(&target->srp_host->srp_dev->dev->dev)); + return sysfs_emit(buf, "%s\n", + dev_name(&target->srp_host->srp_dev->dev->dev)); } static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr, @@ -3001,7 +3005,7 @@ static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "%d\n", target->ch_count); + return sysfs_emit(buf, "%d\n", target->ch_count); } static ssize_t show_comp_vector(struct device *dev, @@ -3009,7 +3013,7 @@ static ssize_t show_comp_vector(struct device *dev, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "%d\n", target->comp_vector); + return sysfs_emit(buf, "%d\n", target->comp_vector); } static ssize_t show_tl_retry_count(struct device *dev, @@ -3017,7 +3021,7 @@ static ssize_t show_tl_retry_count(struct device *dev, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "%d\n", target->tl_retry_count); + return sysfs_emit(buf, "%d\n", target->tl_retry_count); } static ssize_t show_cmd_sg_entries(struct device *dev, @@ -3025,7 +3029,7 @@ static ssize_t show_cmd_sg_entries(struct device *dev, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "%u\n", target->cmd_sg_cnt); + return sysfs_emit(buf, "%u\n", target->cmd_sg_cnt); } static ssize_t show_allow_ext_sg(struct device *dev, @@ -3033,7 +3037,7 @@ static ssize_t show_allow_ext_sg(struct device *dev, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false"); + return sysfs_emit(buf, "%s\n", target->allow_ext_sg ? "true" : "false"); } static DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL); @@ -3893,7 +3897,7 @@ static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr, { struct srp_host *host = container_of(dev, struct srp_host, dev); - return sprintf(buf, "%s\n", dev_name(&host->srp_dev->dev->dev)); + return sysfs_emit(buf, "%s\n", dev_name(&host->srp_dev->dev->dev)); } static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); @@ -3903,7 +3907,7 @@ static ssize_t show_port(struct device *dev, struct device_attribute *attr, { struct srp_host *host = container_of(dev, struct srp_host, dev); - return sprintf(buf, "%d\n", host->port); + return sysfs_emit(buf, "%d\n", host->port); } static DEVICE_ATTR(port, S_IRUGO, show_port, NULL); diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 53a8becac827..9cbbce1bccd6 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -3448,7 +3448,7 @@ static ssize_t srpt_tpg_attrib_srp_max_rdma_size_show(struct config_item *item, struct se_portal_group *se_tpg = attrib_to_tpg(item); struct srpt_port *sport = srpt_tpg_to_sport(se_tpg); - return sprintf(page, "%u\n", sport->port_attrib.srp_max_rdma_size); + return sysfs_emit(page, "%u\n", sport->port_attrib.srp_max_rdma_size); } static ssize_t srpt_tpg_attrib_srp_max_rdma_size_store(struct config_item *item, @@ -3485,7 +3485,7 @@ static ssize_t srpt_tpg_attrib_srp_max_rsp_size_show(struct config_item *item, struct se_portal_group *se_tpg = attrib_to_tpg(item); struct srpt_port *sport = srpt_tpg_to_sport(se_tpg); - return sprintf(page, "%u\n", sport->port_attrib.srp_max_rsp_size); + return sysfs_emit(page, "%u\n", sport->port_attrib.srp_max_rsp_size); } static ssize_t srpt_tpg_attrib_srp_max_rsp_size_store(struct config_item *item, @@ -3522,7 +3522,7 @@ static ssize_t srpt_tpg_attrib_srp_sq_size_show(struct config_item *item, struct se_portal_group *se_tpg = attrib_to_tpg(item); struct srpt_port *sport = srpt_tpg_to_sport(se_tpg); - return sprintf(page, "%u\n", sport->port_attrib.srp_sq_size); + return sysfs_emit(page, "%u\n", sport->port_attrib.srp_sq_size); } static ssize_t srpt_tpg_attrib_srp_sq_size_store(struct config_item *item, @@ -3559,7 +3559,7 @@ static ssize_t srpt_tpg_attrib_use_srq_show(struct config_item *item, struct se_portal_group *se_tpg = attrib_to_tpg(item); struct srpt_port *sport = srpt_tpg_to_sport(se_tpg); - return sprintf(page, "%d\n", sport->port_attrib.use_srq); + return sysfs_emit(page, "%d\n", sport->port_attrib.use_srq); } static ssize_t srpt_tpg_attrib_use_srq_store(struct config_item *item, @@ -3649,7 +3649,7 @@ out: static ssize_t srpt_rdma_cm_port_show(struct config_item *item, char *page) { - return sprintf(page, "%d\n", rdma_cm_port); + return sysfs_emit(page, "%d\n", rdma_cm_port); } static ssize_t srpt_rdma_cm_port_store(struct config_item *item, @@ -3705,7 +3705,7 @@ static ssize_t srpt_tpg_enable_show(struct config_item *item, char *page) struct se_portal_group *se_tpg = to_tpg(item); struct srpt_port *sport = srpt_tpg_to_sport(se_tpg); - return snprintf(page, PAGE_SIZE, "%d\n", sport->enabled); + return sysfs_emit(page, "%d\n", sport->enabled); } static ssize_t srpt_tpg_enable_store(struct config_item *item, @@ -3812,7 +3812,7 @@ static void srpt_drop_tport(struct se_wwn *wwn) static ssize_t srpt_wwn_version_show(struct config_item *item, char *buf) { - return scnprintf(buf, PAGE_SIZE, "\n"); + return sysfs_emit(buf, "\n"); } CONFIGFS_ATTR_RO(srpt_wwn_, version); |