From 5fd251c8b4c52da0d0916470a67fbb77b972125e Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Mon, 23 May 2016 15:20:48 +0300 Subject: IB/core: Introduce Work Queue object and its verbs Introduce Work Queue object and its create/destroy/modify verbs. QP can be created without internal WQs "packaged" inside it, this QP can be configured to use "external" WQ object as its receive/send queue. WQ is a necessary component for RSS technology since RSS mechanism is supposed to distribute the traffic between multiple Receive Work Queues. WQ associated (many to one) with Completion Queue and it owns WQ properties (PD, WQ size, etc.). WQ has a type, this patch introduces the IB_WQT_RQ (i.e.receive queue), it may be extend to others such as IB_WQT_SQ. (send queue). WQ from type IB_WQT_RQ contains receive work requests. PD is an attribute of a work queue (i.e. send/receive queue), it's used by the hardware for security validation before scattering to a memory region which is pointed by the WQ. For that, an external WQ object needs a PD, letting the hardware makes that validation. When accessing a memory region that is pointed by the WQ its PD is used and not the QP's PD, this behavior is similar to a SRQ and a QP. WQ context is subject to a well-defined state transitions done by the modify_wq verb. When WQ is created its initial state becomes IB_WQS_RESET. >From IB_WQS_RESET it can be modified to itself or to IB_WQS_RDY. >From IB_WQS_RDY it can be modified to itself, to IB_WQS_RESET or to IB_WQS_ERR. >From IB_WQS_ERR it can be modified to IB_WQS_RESET. Note: transition to IB_WQS_ERR might occur implicitly in case there was some HW error. Signed-off-by: Yishai Hadas Signed-off-by: Matan Barak Reviewed-by: Sagi Grimberg Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/core/verbs.c | 82 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 1d7d4cf442e3..c096cadc6e23 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1554,6 +1554,88 @@ int ib_dealloc_xrcd(struct ib_xrcd *xrcd) } EXPORT_SYMBOL(ib_dealloc_xrcd); +/** + * ib_create_wq - Creates a WQ associated with the specified protection + * domain. + * @pd: The protection domain associated with the WQ. + * @wq_init_attr: A list of initial attributes required to create the + * WQ. If WQ creation succeeds, then the attributes are updated to + * the actual capabilities of the created WQ. + * + * wq_init_attr->max_wr and wq_init_attr->max_sge determine + * the requested size of the WQ, and set to the actual values allocated + * on return. + * If ib_create_wq() succeeds, then max_wr and max_sge will always be + * at least as large as the requested values. + */ +struct ib_wq *ib_create_wq(struct ib_pd *pd, + struct ib_wq_init_attr *wq_attr) +{ + struct ib_wq *wq; + + if (!pd->device->create_wq) + return ERR_PTR(-ENOSYS); + + wq = pd->device->create_wq(pd, wq_attr, NULL); + if (!IS_ERR(wq)) { + wq->event_handler = wq_attr->event_handler; + wq->wq_context = wq_attr->wq_context; + wq->wq_type = wq_attr->wq_type; + wq->cq = wq_attr->cq; + wq->device = pd->device; + wq->pd = pd; + wq->uobject = NULL; + atomic_inc(&pd->usecnt); + atomic_inc(&wq_attr->cq->usecnt); + atomic_set(&wq->usecnt, 0); + } + return wq; +} +EXPORT_SYMBOL(ib_create_wq); + +/** + * ib_destroy_wq - Destroys the specified WQ. + * @wq: The WQ to destroy. + */ +int ib_destroy_wq(struct ib_wq *wq) +{ + int err; + struct ib_cq *cq = wq->cq; + struct ib_pd *pd = wq->pd; + + if (atomic_read(&wq->usecnt)) + return -EBUSY; + + err = wq->device->destroy_wq(wq); + if (!err) { + atomic_dec(&pd->usecnt); + atomic_dec(&cq->usecnt); + } + return err; +} +EXPORT_SYMBOL(ib_destroy_wq); + +/** + * ib_modify_wq - Modifies the specified WQ. + * @wq: The WQ to modify. + * @wq_attr: On input, specifies the WQ attributes to modify. + * @wq_attr_mask: A bit-mask used to specify which attributes of the WQ + * are being modified. + * On output, the current values of selected WQ attributes are returned. + */ +int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, + u32 wq_attr_mask) +{ + int err; + + if (!wq->device->modify_wq) + return -ENOSYS; + + err = wq->device->modify_wq(wq, wq_attr, wq_attr_mask, NULL); + return err; +} +EXPORT_SYMBOL(ib_modify_wq); + struct ib_flow *ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr, int domain) -- cgit v1.2.3 From f213c05272100f385912372fff678d0af4d7f8ad Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Mon, 23 May 2016 15:20:49 +0300 Subject: IB/uverbs: Add WQ support User space applications which use RSS functionality need to create a work queue object (WQ). The lifetime of such an object is: * Create a WQ * Modify the WQ from reset to init state. * Use the WQ (by downstream patches). * Destroy the WQ. These commands are added to the uverbs API. Signed-off-by: Yishai Hadas Signed-off-by: Matan Barak Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs.h | 9 ++ drivers/infiniband/core/uverbs_cmd.c | 243 ++++++++++++++++++++++++++++++++++ drivers/infiniband/core/uverbs_main.c | 25 ++++ include/rdma/ib_verbs.h | 3 + include/uapi/rdma/ib_user_verbs.h | 41 ++++++ 5 files changed, 321 insertions(+) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 612ccfd39bf9..74776c6531f4 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -162,6 +162,10 @@ struct ib_uqp_object { struct ib_uxrcd_object *uxrcd; }; +struct ib_uwq_object { + struct ib_uevent_object uevent; +}; + struct ib_ucq_object { struct ib_uobject uobject; struct ib_uverbs_file *uverbs_file; @@ -181,6 +185,7 @@ extern struct idr ib_uverbs_qp_idr; extern struct idr ib_uverbs_srq_idr; extern struct idr ib_uverbs_xrcd_idr; extern struct idr ib_uverbs_rule_idr; +extern struct idr ib_uverbs_wq_idr; void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj); @@ -199,6 +204,7 @@ void ib_uverbs_release_uevent(struct ib_uverbs_file *file, void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context); void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr); +void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_event_handler(struct ib_event_handler *handler, struct ib_event *event); @@ -275,5 +281,8 @@ IB_UVERBS_DECLARE_EX_CMD(destroy_flow); IB_UVERBS_DECLARE_EX_CMD(query_device); IB_UVERBS_DECLARE_EX_CMD(create_cq); IB_UVERBS_DECLARE_EX_CMD(create_qp); +IB_UVERBS_DECLARE_EX_CMD(create_wq); +IB_UVERBS_DECLARE_EX_CMD(modify_wq); +IB_UVERBS_DECLARE_EX_CMD(destroy_wq); #endif /* UVERBS_H */ diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 1a8babb8ee3c..22e617391657 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -57,6 +57,7 @@ static struct uverbs_lock_class ah_lock_class = { .name = "AH-uobj" }; static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" }; static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" }; static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" }; +static struct uverbs_lock_class wq_lock_class = { .name = "WQ-uobj" }; /* * The ib_uobject locking scheme is as follows: @@ -243,6 +244,16 @@ static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context) return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context, 0); } +static struct ib_wq *idr_read_wq(int wq_handle, struct ib_ucontext *context) +{ + return idr_read_obj(&ib_uverbs_wq_idr, wq_handle, context, 0); +} + +static void put_wq_read(struct ib_wq *wq) +{ + put_uobj_read(wq->uobject); +} + static struct ib_qp *idr_write_qp(int qp_handle, struct ib_ucontext *context) { struct ib_uobject *uobj; @@ -326,6 +337,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, INIT_LIST_HEAD(&ucontext->qp_list); INIT_LIST_HEAD(&ucontext->srq_list); INIT_LIST_HEAD(&ucontext->ah_list); + INIT_LIST_HEAD(&ucontext->wq_list); INIT_LIST_HEAD(&ucontext->xrcd_list); INIT_LIST_HEAD(&ucontext->rule_list); rcu_read_lock(); @@ -3056,6 +3068,237 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec, return 0; } +int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + struct ib_udata *ucore, + struct ib_udata *uhw) +{ + struct ib_uverbs_ex_create_wq cmd = {}; + struct ib_uverbs_ex_create_wq_resp resp = {}; + struct ib_uwq_object *obj; + int err = 0; + struct ib_cq *cq; + struct ib_pd *pd; + struct ib_wq *wq; + struct ib_wq_init_attr wq_init_attr = {}; + size_t required_cmd_sz; + size_t required_resp_len; + + required_cmd_sz = offsetof(typeof(cmd), max_sge) + sizeof(cmd.max_sge); + required_resp_len = offsetof(typeof(resp), wqn) + sizeof(resp.wqn); + + if (ucore->inlen < required_cmd_sz) + return -EINVAL; + + if (ucore->outlen < required_resp_len) + return -ENOSPC; + + if (ucore->inlen > sizeof(cmd) && + !ib_is_udata_cleared(ucore, sizeof(cmd), + ucore->inlen - sizeof(cmd))) + return -EOPNOTSUPP; + + err = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); + if (err) + return err; + + if (cmd.comp_mask) + return -EOPNOTSUPP; + + obj = kmalloc(sizeof(*obj), GFP_KERNEL); + if (!obj) + return -ENOMEM; + + init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, + &wq_lock_class); + down_write(&obj->uevent.uobject.mutex); + pd = idr_read_pd(cmd.pd_handle, file->ucontext); + if (!pd) { + err = -EINVAL; + goto err_uobj; + } + + cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); + if (!cq) { + err = -EINVAL; + goto err_put_pd; + } + + wq_init_attr.cq = cq; + wq_init_attr.max_sge = cmd.max_sge; + wq_init_attr.max_wr = cmd.max_wr; + wq_init_attr.wq_context = file; + wq_init_attr.wq_type = cmd.wq_type; + wq_init_attr.event_handler = ib_uverbs_wq_event_handler; + obj->uevent.events_reported = 0; + INIT_LIST_HEAD(&obj->uevent.event_list); + wq = pd->device->create_wq(pd, &wq_init_attr, uhw); + if (IS_ERR(wq)) { + err = PTR_ERR(wq); + goto err_put_cq; + } + + wq->uobject = &obj->uevent.uobject; + obj->uevent.uobject.object = wq; + wq->wq_type = wq_init_attr.wq_type; + wq->cq = cq; + wq->pd = pd; + wq->device = pd->device; + wq->wq_context = wq_init_attr.wq_context; + atomic_set(&wq->usecnt, 0); + atomic_inc(&pd->usecnt); + atomic_inc(&cq->usecnt); + wq->uobject = &obj->uevent.uobject; + obj->uevent.uobject.object = wq; + err = idr_add_uobj(&ib_uverbs_wq_idr, &obj->uevent.uobject); + if (err) + goto destroy_wq; + + memset(&resp, 0, sizeof(resp)); + resp.wq_handle = obj->uevent.uobject.id; + resp.max_sge = wq_init_attr.max_sge; + resp.max_wr = wq_init_attr.max_wr; + resp.wqn = wq->wq_num; + resp.response_length = required_resp_len; + err = ib_copy_to_udata(ucore, + &resp, resp.response_length); + if (err) + goto err_copy; + + put_pd_read(pd); + put_cq_read(cq); + + mutex_lock(&file->mutex); + list_add_tail(&obj->uevent.uobject.list, &file->ucontext->wq_list); + mutex_unlock(&file->mutex); + + obj->uevent.uobject.live = 1; + up_write(&obj->uevent.uobject.mutex); + return 0; + +err_copy: + idr_remove_uobj(&ib_uverbs_wq_idr, &obj->uevent.uobject); +destroy_wq: + ib_destroy_wq(wq); +err_put_cq: + put_cq_read(cq); +err_put_pd: + put_pd_read(pd); +err_uobj: + put_uobj_write(&obj->uevent.uobject); + + return err; +} + +int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + struct ib_udata *ucore, + struct ib_udata *uhw) +{ + struct ib_uverbs_ex_destroy_wq cmd = {}; + struct ib_uverbs_ex_destroy_wq_resp resp = {}; + struct ib_wq *wq; + struct ib_uobject *uobj; + struct ib_uwq_object *obj; + size_t required_cmd_sz; + size_t required_resp_len; + int ret; + + required_cmd_sz = offsetof(typeof(cmd), wq_handle) + sizeof(cmd.wq_handle); + required_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved); + + if (ucore->inlen < required_cmd_sz) + return -EINVAL; + + if (ucore->outlen < required_resp_len) + return -ENOSPC; + + if (ucore->inlen > sizeof(cmd) && + !ib_is_udata_cleared(ucore, sizeof(cmd), + ucore->inlen - sizeof(cmd))) + return -EOPNOTSUPP; + + ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); + if (ret) + return ret; + + if (cmd.comp_mask) + return -EOPNOTSUPP; + + resp.response_length = required_resp_len; + uobj = idr_write_uobj(&ib_uverbs_wq_idr, cmd.wq_handle, + file->ucontext); + if (!uobj) + return -EINVAL; + + wq = uobj->object; + obj = container_of(uobj, struct ib_uwq_object, uevent.uobject); + ret = ib_destroy_wq(wq); + if (!ret) + uobj->live = 0; + + put_uobj_write(uobj); + if (ret) + return ret; + + idr_remove_uobj(&ib_uverbs_wq_idr, uobj); + + mutex_lock(&file->mutex); + list_del(&uobj->list); + mutex_unlock(&file->mutex); + + ib_uverbs_release_uevent(file, &obj->uevent); + resp.events_reported = obj->uevent.events_reported; + put_uobj(uobj); + + ret = ib_copy_to_udata(ucore, &resp, resp.response_length); + if (ret) + return ret; + + return 0; +} + +int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + struct ib_udata *ucore, + struct ib_udata *uhw) +{ + struct ib_uverbs_ex_modify_wq cmd = {}; + struct ib_wq *wq; + struct ib_wq_attr wq_attr = {}; + size_t required_cmd_sz; + int ret; + + required_cmd_sz = offsetof(typeof(cmd), curr_wq_state) + sizeof(cmd.curr_wq_state); + if (ucore->inlen < required_cmd_sz) + return -EINVAL; + + if (ucore->inlen > sizeof(cmd) && + !ib_is_udata_cleared(ucore, sizeof(cmd), + ucore->inlen - sizeof(cmd))) + return -EOPNOTSUPP; + + ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); + if (ret) + return ret; + + if (!cmd.attr_mask) + return -EINVAL; + + if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE)) + return -EINVAL; + + wq = idr_read_wq(cmd.wq_handle, file->ucontext); + if (!wq) + return -EINVAL; + + wq_attr.curr_wq_state = cmd.curr_wq_state; + wq_attr.wq_state = cmd.wq_state; + ret = wq->device->modify_wq(wq, &wq_attr, cmd.attr_mask, uhw); + put_wq_read(wq); + return ret; +} + int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, struct ib_device *ib_dev, struct ib_udata *ucore, diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 31f422a70623..91cb36f66ea7 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -76,6 +76,7 @@ DEFINE_IDR(ib_uverbs_qp_idr); DEFINE_IDR(ib_uverbs_srq_idr); DEFINE_IDR(ib_uverbs_xrcd_idr); DEFINE_IDR(ib_uverbs_rule_idr); +DEFINE_IDR(ib_uverbs_wq_idr); static DEFINE_SPINLOCK(map_lock); static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); @@ -130,6 +131,9 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file, [IB_USER_VERBS_EX_CMD_QUERY_DEVICE] = ib_uverbs_ex_query_device, [IB_USER_VERBS_EX_CMD_CREATE_CQ] = ib_uverbs_ex_create_cq, [IB_USER_VERBS_EX_CMD_CREATE_QP] = ib_uverbs_ex_create_qp, + [IB_USER_VERBS_EX_CMD_CREATE_WQ] = ib_uverbs_ex_create_wq, + [IB_USER_VERBS_EX_CMD_MODIFY_WQ] = ib_uverbs_ex_modify_wq, + [IB_USER_VERBS_EX_CMD_DESTROY_WQ] = ib_uverbs_ex_destroy_wq, }; static void ib_uverbs_add_one(struct ib_device *device); @@ -265,6 +269,17 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, kfree(uqp); } + list_for_each_entry_safe(uobj, tmp, &context->wq_list, list) { + struct ib_wq *wq = uobj->object; + struct ib_uwq_object *uwq = + container_of(uobj, struct ib_uwq_object, uevent.uobject); + + idr_remove_uobj(&ib_uverbs_wq_idr, uobj); + ib_destroy_wq(wq); + ib_uverbs_release_uevent(file, &uwq->uevent); + kfree(uwq); + } + list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) { struct ib_srq *srq = uobj->object; struct ib_uevent_object *uevent = @@ -568,6 +583,16 @@ void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr) &uobj->events_reported); } +void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr) +{ + struct ib_uevent_object *uobj = container_of(event->element.wq->uobject, + struct ib_uevent_object, uobject); + + ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, + event->event, &uobj->event_list, + &uobj->events_reported); +} + void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr) { struct ib_uevent_object *uobj; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index f2d954ac42f6..0c1956a98573 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -562,6 +562,7 @@ enum ib_event_type { IB_EVENT_QP_LAST_WQE_REACHED, IB_EVENT_CLIENT_REREGISTER, IB_EVENT_GID_CHANGE, + IB_EVENT_WQ_FATAL, }; const char *__attribute_const__ ib_event_msg(enum ib_event_type event); @@ -572,6 +573,7 @@ struct ib_event { struct ib_cq *cq; struct ib_qp *qp; struct ib_srq *srq; + struct ib_wq *wq; u8 port_num; } element; enum ib_event_type event; @@ -1323,6 +1325,7 @@ struct ib_ucontext { struct list_head ah_list; struct list_head xrcd_list; struct list_head rule_list; + struct list_head wq_list; int closing; struct pid *tgid; diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index b6543d73d20a..c9470e511e03 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -95,6 +95,9 @@ enum { IB_USER_VERBS_EX_CMD_CREATE_QP = IB_USER_VERBS_CMD_CREATE_QP, IB_USER_VERBS_EX_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD, IB_USER_VERBS_EX_CMD_DESTROY_FLOW, + IB_USER_VERBS_EX_CMD_CREATE_WQ, + IB_USER_VERBS_EX_CMD_MODIFY_WQ, + IB_USER_VERBS_EX_CMD_DESTROY_WQ, }; /* @@ -946,4 +949,42 @@ struct ib_uverbs_destroy_srq_resp { __u32 events_reported; }; +struct ib_uverbs_ex_create_wq { + __u32 comp_mask; + __u32 wq_type; + __u64 user_handle; + __u32 pd_handle; + __u32 cq_handle; + __u32 max_wr; + __u32 max_sge; +}; + +struct ib_uverbs_ex_create_wq_resp { + __u32 comp_mask; + __u32 response_length; + __u32 wq_handle; + __u32 max_wr; + __u32 max_sge; + __u32 wqn; +}; + +struct ib_uverbs_ex_destroy_wq { + __u32 comp_mask; + __u32 wq_handle; +}; + +struct ib_uverbs_ex_destroy_wq_resp { + __u32 comp_mask; + __u32 response_length; + __u32 events_reported; + __u32 reserved; +}; + +struct ib_uverbs_ex_modify_wq { + __u32 attr_mask; + __u32 wq_handle; + __u32 wq_state; + __u32 curr_wq_state; +}; + #endif /* IB_USER_VERBS_H */ -- cgit v1.2.3 From 6d39786bf116e476d75eca91f7cfa22586a32e5f Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Mon, 23 May 2016 15:20:51 +0300 Subject: IB/core: Introduce Receive Work Queue indirection table Introduce Receive Work Queue (WQ) indirection table. This object can be used to spread incoming traffic to different receive Work Queues. A Receive WQ indirection table points to variable size of WQs. This table is given to a QP in downstream patches. Signed-off-by: Yishai Hadas Signed-off-by: Matan Barak Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/core/verbs.c | 62 +++++++++++++++++++++++++++++++++++++++++ include/rdma/ib_verbs.h | 23 +++++++++++++++ 2 files changed, 85 insertions(+) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index c096cadc6e23..6b548d7f8753 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1636,6 +1636,68 @@ int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, } EXPORT_SYMBOL(ib_modify_wq); +/* + * ib_create_rwq_ind_table - Creates a RQ Indirection Table. + * @device: The device on which to create the rwq indirection table. + * @ib_rwq_ind_table_init_attr: A list of initial attributes required to + * create the Indirection Table. + * + * Note: The life time of ib_rwq_ind_table_init_attr->ind_tbl is not less + * than the created ib_rwq_ind_table object and the caller is responsible + * for its memory allocation/free. + */ +struct ib_rwq_ind_table *ib_create_rwq_ind_table(struct ib_device *device, + struct ib_rwq_ind_table_init_attr *init_attr) +{ + struct ib_rwq_ind_table *rwq_ind_table; + int i; + u32 table_size; + + if (!device->create_rwq_ind_table) + return ERR_PTR(-ENOSYS); + + table_size = (1 << init_attr->log_ind_tbl_size); + rwq_ind_table = device->create_rwq_ind_table(device, + init_attr, NULL); + if (IS_ERR(rwq_ind_table)) + return rwq_ind_table; + + rwq_ind_table->ind_tbl = init_attr->ind_tbl; + rwq_ind_table->log_ind_tbl_size = init_attr->log_ind_tbl_size; + rwq_ind_table->device = device; + rwq_ind_table->uobject = NULL; + atomic_set(&rwq_ind_table->usecnt, 0); + + for (i = 0; i < table_size; i++) + atomic_inc(&rwq_ind_table->ind_tbl[i]->usecnt); + + return rwq_ind_table; +} +EXPORT_SYMBOL(ib_create_rwq_ind_table); + +/* + * ib_destroy_rwq_ind_table - Destroys the specified Indirection Table. + * @wq_ind_table: The Indirection Table to destroy. +*/ +int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *rwq_ind_table) +{ + int err, i; + u32 table_size = (1 << rwq_ind_table->log_ind_tbl_size); + struct ib_wq **ind_tbl = rwq_ind_table->ind_tbl; + + if (atomic_read(&rwq_ind_table->usecnt)) + return -EBUSY; + + err = rwq_ind_table->device->destroy_rwq_ind_table(rwq_ind_table); + if (!err) { + for (i = 0; i < table_size; i++) + atomic_dec(&ind_tbl[i]->usecnt); + } + + return err; +} +EXPORT_SYMBOL(ib_destroy_rwq_ind_table); + struct ib_flow *ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr, int domain) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 0c1956a98573..fa2e0184dcc5 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1473,6 +1473,21 @@ struct ib_wq_attr { enum ib_wq_state curr_wq_state; }; +struct ib_rwq_ind_table { + struct ib_device *device; + struct ib_uobject *uobject; + atomic_t usecnt; + u32 ind_tbl_num; + u32 log_ind_tbl_size; + struct ib_wq **ind_tbl; +}; + +struct ib_rwq_ind_table_init_attr { + u32 log_ind_tbl_size; + /* Each entry is a pointer to Receive Work Queue */ + struct ib_wq **ind_tbl; +}; + struct ib_qp { struct ib_device *device; struct ib_pd *pd; @@ -1974,6 +1989,10 @@ struct ib_device { struct ib_wq_attr *attr, u32 wq_attr_mask, struct ib_udata *udata); + struct ib_rwq_ind_table * (*create_rwq_ind_table)(struct ib_device *device, + struct ib_rwq_ind_table_init_attr *init_attr, + struct ib_udata *udata); + int (*destroy_rwq_ind_table)(struct ib_rwq_ind_table *wq_ind_table); struct ib_dma_mapping_ops *dma_ops; struct module *owner; @@ -3224,6 +3243,10 @@ struct ib_wq *ib_create_wq(struct ib_pd *pd, int ib_destroy_wq(struct ib_wq *wq); int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *attr, u32 wq_attr_mask); +struct ib_rwq_ind_table *ib_create_rwq_ind_table(struct ib_device *device, + struct ib_rwq_ind_table_init_attr* + wq_ind_table_init_attr); +int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table); int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset, unsigned int page_size); -- cgit v1.2.3 From de019a94049d579608a5511f8c50652faf125182 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Mon, 23 May 2016 15:20:52 +0300 Subject: IB/uverbs: Introduce RWQ Indirection table User applications that want to spread traffic on several WQs, need to create an indirection table, by using already created WQs. Adding uverbs API in order to create and destroy this table. Signed-off-by: Yishai Hadas Signed-off-by: Matan Barak Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs.h | 3 + drivers/infiniband/core/uverbs_cmd.c | 210 ++++++++++++++++++++++++++++++++++ drivers/infiniband/core/uverbs_main.c | 13 +++ include/rdma/ib_verbs.h | 1 + include/uapi/rdma/ib_user_verbs.h | 26 +++++ 5 files changed, 253 insertions(+) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 74776c6531f4..6c2292397cd6 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -186,6 +186,7 @@ extern struct idr ib_uverbs_srq_idr; extern struct idr ib_uverbs_xrcd_idr; extern struct idr ib_uverbs_rule_idr; extern struct idr ib_uverbs_wq_idr; +extern struct idr ib_uverbs_rwq_ind_tbl_idr; void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj); @@ -284,5 +285,7 @@ IB_UVERBS_DECLARE_EX_CMD(create_qp); IB_UVERBS_DECLARE_EX_CMD(create_wq); IB_UVERBS_DECLARE_EX_CMD(modify_wq); IB_UVERBS_DECLARE_EX_CMD(destroy_wq); +IB_UVERBS_DECLARE_EX_CMD(create_rwq_ind_table); +IB_UVERBS_DECLARE_EX_CMD(destroy_rwq_ind_table); #endif /* UVERBS_H */ diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 22e617391657..327a56cccc27 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -58,6 +58,7 @@ static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" }; static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" }; static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" }; static struct uverbs_lock_class wq_lock_class = { .name = "WQ-uobj" }; +static struct uverbs_lock_class rwq_ind_table_lock_class = { .name = "IND_TBL-uobj" }; /* * The ib_uobject locking scheme is as follows: @@ -338,6 +339,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, INIT_LIST_HEAD(&ucontext->srq_list); INIT_LIST_HEAD(&ucontext->ah_list); INIT_LIST_HEAD(&ucontext->wq_list); + INIT_LIST_HEAD(&ucontext->rwq_ind_tbl_list); INIT_LIST_HEAD(&ucontext->xrcd_list); INIT_LIST_HEAD(&ucontext->rule_list); rcu_read_lock(); @@ -3299,6 +3301,214 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file, return ret; } +int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + struct ib_udata *ucore, + struct ib_udata *uhw) +{ + struct ib_uverbs_ex_create_rwq_ind_table cmd = {}; + struct ib_uverbs_ex_create_rwq_ind_table_resp resp = {}; + struct ib_uobject *uobj; + int err = 0; + struct ib_rwq_ind_table_init_attr init_attr = {}; + struct ib_rwq_ind_table *rwq_ind_tbl; + struct ib_wq **wqs = NULL; + u32 *wqs_handles = NULL; + struct ib_wq *wq = NULL; + int i, j, num_read_wqs; + u32 num_wq_handles; + u32 expected_in_size; + size_t required_cmd_sz_header; + size_t required_resp_len; + + required_cmd_sz_header = offsetof(typeof(cmd), log_ind_tbl_size) + sizeof(cmd.log_ind_tbl_size); + required_resp_len = offsetof(typeof(resp), ind_tbl_num) + sizeof(resp.ind_tbl_num); + + if (ucore->inlen < required_cmd_sz_header) + return -EINVAL; + + if (ucore->outlen < required_resp_len) + return -ENOSPC; + + err = ib_copy_from_udata(&cmd, ucore, required_cmd_sz_header); + if (err) + return err; + + ucore->inbuf += required_cmd_sz_header; + ucore->inlen -= required_cmd_sz_header; + + if (cmd.comp_mask) + return -EOPNOTSUPP; + + if (cmd.log_ind_tbl_size > IB_USER_VERBS_MAX_LOG_IND_TBL_SIZE) + return -EINVAL; + + num_wq_handles = 1 << cmd.log_ind_tbl_size; + expected_in_size = num_wq_handles * sizeof(__u32); + if (num_wq_handles == 1) + /* input size for wq handles is u64 aligned */ + expected_in_size += sizeof(__u32); + + if (ucore->inlen < expected_in_size) + return -EINVAL; + + if (ucore->inlen > expected_in_size && + !ib_is_udata_cleared(ucore, expected_in_size, + ucore->inlen - expected_in_size)) + return -EOPNOTSUPP; + + wqs_handles = kcalloc(num_wq_handles, sizeof(*wqs_handles), + GFP_KERNEL); + if (!wqs_handles) + return -ENOMEM; + + err = ib_copy_from_udata(wqs_handles, ucore, + num_wq_handles * sizeof(__u32)); + if (err) + goto err_free; + + wqs = kcalloc(num_wq_handles, sizeof(*wqs), GFP_KERNEL); + if (!wqs) { + err = -ENOMEM; + goto err_free; + } + + for (num_read_wqs = 0; num_read_wqs < num_wq_handles; + num_read_wqs++) { + wq = idr_read_wq(wqs_handles[num_read_wqs], file->ucontext); + if (!wq) { + err = -EINVAL; + goto put_wqs; + } + + wqs[num_read_wqs] = wq; + } + + uobj = kmalloc(sizeof(*uobj), GFP_KERNEL); + if (!uobj) { + err = -ENOMEM; + goto put_wqs; + } + + init_uobj(uobj, 0, file->ucontext, &rwq_ind_table_lock_class); + down_write(&uobj->mutex); + init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size; + init_attr.ind_tbl = wqs; + rwq_ind_tbl = ib_dev->create_rwq_ind_table(ib_dev, &init_attr, uhw); + + if (IS_ERR(rwq_ind_tbl)) { + err = PTR_ERR(rwq_ind_tbl); + goto err_uobj; + } + + rwq_ind_tbl->ind_tbl = wqs; + rwq_ind_tbl->log_ind_tbl_size = init_attr.log_ind_tbl_size; + rwq_ind_tbl->uobject = uobj; + uobj->object = rwq_ind_tbl; + rwq_ind_tbl->device = ib_dev; + atomic_set(&rwq_ind_tbl->usecnt, 0); + + for (i = 0; i < num_wq_handles; i++) + atomic_inc(&wqs[i]->usecnt); + + err = idr_add_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj); + if (err) + goto destroy_ind_tbl; + + resp.ind_tbl_handle = uobj->id; + resp.ind_tbl_num = rwq_ind_tbl->ind_tbl_num; + resp.response_length = required_resp_len; + + err = ib_copy_to_udata(ucore, + &resp, resp.response_length); + if (err) + goto err_copy; + + kfree(wqs_handles); + + for (j = 0; j < num_read_wqs; j++) + put_wq_read(wqs[j]); + + mutex_lock(&file->mutex); + list_add_tail(&uobj->list, &file->ucontext->rwq_ind_tbl_list); + mutex_unlock(&file->mutex); + + uobj->live = 1; + + up_write(&uobj->mutex); + return 0; + +err_copy: + idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj); +destroy_ind_tbl: + ib_destroy_rwq_ind_table(rwq_ind_tbl); +err_uobj: + put_uobj_write(uobj); +put_wqs: + for (j = 0; j < num_read_wqs; j++) + put_wq_read(wqs[j]); +err_free: + kfree(wqs_handles); + kfree(wqs); + return err; +} + +int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + struct ib_udata *ucore, + struct ib_udata *uhw) +{ + struct ib_uverbs_ex_destroy_rwq_ind_table cmd = {}; + struct ib_rwq_ind_table *rwq_ind_tbl; + struct ib_uobject *uobj; + int ret; + struct ib_wq **ind_tbl; + size_t required_cmd_sz; + + required_cmd_sz = offsetof(typeof(cmd), ind_tbl_handle) + sizeof(cmd.ind_tbl_handle); + + if (ucore->inlen < required_cmd_sz) + return -EINVAL; + + if (ucore->inlen > sizeof(cmd) && + !ib_is_udata_cleared(ucore, sizeof(cmd), + ucore->inlen - sizeof(cmd))) + return -EOPNOTSUPP; + + ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); + if (ret) + return ret; + + if (cmd.comp_mask) + return -EOPNOTSUPP; + + uobj = idr_write_uobj(&ib_uverbs_rwq_ind_tbl_idr, cmd.ind_tbl_handle, + file->ucontext); + if (!uobj) + return -EINVAL; + rwq_ind_tbl = uobj->object; + ind_tbl = rwq_ind_tbl->ind_tbl; + + ret = ib_destroy_rwq_ind_table(rwq_ind_tbl); + if (!ret) + uobj->live = 0; + + put_uobj_write(uobj); + + if (ret) + return ret; + + idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj); + + mutex_lock(&file->mutex); + list_del(&uobj->list); + mutex_unlock(&file->mutex); + + put_uobj(uobj); + kfree(ind_tbl); + return ret; +} + int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, struct ib_device *ib_dev, struct ib_udata *ucore, diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 91cb36f66ea7..426e0ac203fc 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -77,6 +77,7 @@ DEFINE_IDR(ib_uverbs_srq_idr); DEFINE_IDR(ib_uverbs_xrcd_idr); DEFINE_IDR(ib_uverbs_rule_idr); DEFINE_IDR(ib_uverbs_wq_idr); +DEFINE_IDR(ib_uverbs_rwq_ind_tbl_idr); static DEFINE_SPINLOCK(map_lock); static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); @@ -134,6 +135,8 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file, [IB_USER_VERBS_EX_CMD_CREATE_WQ] = ib_uverbs_ex_create_wq, [IB_USER_VERBS_EX_CMD_MODIFY_WQ] = ib_uverbs_ex_modify_wq, [IB_USER_VERBS_EX_CMD_DESTROY_WQ] = ib_uverbs_ex_destroy_wq, + [IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL] = ib_uverbs_ex_create_rwq_ind_table, + [IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL] = ib_uverbs_ex_destroy_rwq_ind_table, }; static void ib_uverbs_add_one(struct ib_device *device); @@ -269,6 +272,16 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, kfree(uqp); } + list_for_each_entry_safe(uobj, tmp, &context->rwq_ind_tbl_list, list) { + struct ib_rwq_ind_table *rwq_ind_tbl = uobj->object; + struct ib_wq **ind_tbl = rwq_ind_tbl->ind_tbl; + + idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj); + ib_destroy_rwq_ind_table(rwq_ind_tbl); + kfree(ind_tbl); + kfree(uobj); + } + list_for_each_entry_safe(uobj, tmp, &context->wq_list, list) { struct ib_wq *wq = uobj->object; struct ib_uwq_object *uwq = diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index fa2e0184dcc5..e305c9a36663 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1326,6 +1326,7 @@ struct ib_ucontext { struct list_head xrcd_list; struct list_head rule_list; struct list_head wq_list; + struct list_head rwq_ind_tbl_list; int closing; struct pid *tgid; diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index c9470e511e03..2cf7c95860d8 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -98,6 +98,8 @@ enum { IB_USER_VERBS_EX_CMD_CREATE_WQ, IB_USER_VERBS_EX_CMD_MODIFY_WQ, IB_USER_VERBS_EX_CMD_DESTROY_WQ, + IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL, + IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL }; /* @@ -987,4 +989,28 @@ struct ib_uverbs_ex_modify_wq { __u32 curr_wq_state; }; +/* Prevent memory allocation rather than max expected size */ +#define IB_USER_VERBS_MAX_LOG_IND_TBL_SIZE 0x0d +struct ib_uverbs_ex_create_rwq_ind_table { + __u32 comp_mask; + __u32 log_ind_tbl_size; + /* Following are the wq handles according to log_ind_tbl_size + * wq_handle1 + * wq_handle2 + */ + __u32 wq_handles[0]; +}; + +struct ib_uverbs_ex_create_rwq_ind_table_resp { + __u32 comp_mask; + __u32 response_length; + __u32 ind_tbl_handle; + __u32 ind_tbl_num; +}; + +struct ib_uverbs_ex_destroy_rwq_ind_table { + __u32 comp_mask; + __u32 ind_tbl_handle; +}; + #endif /* IB_USER_VERBS_H */ -- cgit v1.2.3 From a9017e232ff9eaabeb50eb89841d99310cfc98dc Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Mon, 23 May 2016 15:20:54 +0300 Subject: IB/core: Extend create QP to get indirection table Extend create QP to get Receive Work Queue (WQ) indirection table. QP can be created with external Receive Work Queue indirection table, in that case it is ready to receive immediately. Signed-off-by: Yishai Hadas Signed-off-by: Matan Barak Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/core/verbs.c | 19 +++++++++++++++++-- include/rdma/ib_verbs.h | 2 ++ 2 files changed, 19 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 6b548d7f8753..6916d5c5920b 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -754,6 +754,12 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, struct ib_qp *qp; int ret; + if (qp_init_attr->rwq_ind_tbl && + (qp_init_attr->recv_cq || + qp_init_attr->srq || qp_init_attr->cap.max_recv_wr || + qp_init_attr->cap.max_recv_sge)) + return ERR_PTR(-EINVAL); + /* * If the callers is using the RDMA API calculate the resources * needed for the RDMA READ/WRITE operations. @@ -771,6 +777,7 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, qp->real_qp = qp; qp->uobject = NULL; qp->qp_type = qp_init_attr->qp_type; + qp->rwq_ind_tbl = qp_init_attr->rwq_ind_tbl; atomic_set(&qp->usecnt, 0); qp->mrs_used = 0; @@ -788,7 +795,8 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, qp->srq = NULL; } else { qp->recv_cq = qp_init_attr->recv_cq; - atomic_inc(&qp_init_attr->recv_cq->usecnt); + if (qp_init_attr->recv_cq) + atomic_inc(&qp_init_attr->recv_cq->usecnt); qp->srq = qp_init_attr->srq; if (qp->srq) atomic_inc(&qp_init_attr->srq->usecnt); @@ -799,7 +807,10 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, qp->xrcd = NULL; atomic_inc(&pd->usecnt); - atomic_inc(&qp_init_attr->send_cq->usecnt); + if (qp_init_attr->send_cq) + atomic_inc(&qp_init_attr->send_cq->usecnt); + if (qp_init_attr->rwq_ind_tbl) + atomic_inc(&qp->rwq_ind_tbl->usecnt); if (qp_init_attr->cap.max_rdma_ctxs) { ret = rdma_rw_init_mrs(qp, qp_init_attr); @@ -1279,6 +1290,7 @@ int ib_destroy_qp(struct ib_qp *qp) struct ib_pd *pd; struct ib_cq *scq, *rcq; struct ib_srq *srq; + struct ib_rwq_ind_table *ind_tbl; int ret; WARN_ON_ONCE(qp->mrs_used > 0); @@ -1293,6 +1305,7 @@ int ib_destroy_qp(struct ib_qp *qp) scq = qp->send_cq; rcq = qp->recv_cq; srq = qp->srq; + ind_tbl = qp->rwq_ind_tbl; if (!qp->uobject) rdma_rw_cleanup_mrs(qp); @@ -1307,6 +1320,8 @@ int ib_destroy_qp(struct ib_qp *qp) atomic_dec(&rcq->usecnt); if (srq) atomic_dec(&srq->usecnt); + if (ind_tbl) + atomic_dec(&ind_tbl->usecnt); } return ret; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index e305c9a36663..9b2fafe5eb38 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1017,6 +1017,7 @@ struct ib_qp_init_attr { * Only needed for special QP types, or when using the RW API. */ u8 port_num; + struct ib_rwq_ind_table *rwq_ind_tbl; }; struct ib_qp_open_attr { @@ -1511,6 +1512,7 @@ struct ib_qp { void *qp_context; u32 qp_num; enum ib_qp_type qp_type; + struct ib_rwq_ind_table *rwq_ind_tbl; }; struct ib_mr { -- cgit v1.2.3 From c70285f880e88cb4f73effb722065a182ba5936f Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Mon, 23 May 2016 15:20:55 +0300 Subject: IB/uverbs: Extend create QP to get RWQ indirection table User applications that want to spread incoming traffic between several WQs should create a QP which contains an indirection table. When such a QP is created other receive side parameters are not valid and should not be given. Its send side is optional and assumed active based on max_send_wr capability value. Extend create QP to work accordingly. Signed-off-by: Yishai Hadas Signed-off-by: Matan Barak Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 75 ++++++++++++++++++++++++++++++------ include/uapi/rdma/ib_user_verbs.h | 10 +++++ 2 files changed, 73 insertions(+), 12 deletions(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 327a56cccc27..65ab2097cd81 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -255,6 +255,17 @@ static void put_wq_read(struct ib_wq *wq) put_uobj_read(wq->uobject); } +static struct ib_rwq_ind_table *idr_read_rwq_indirection_table(int ind_table_handle, + struct ib_ucontext *context) +{ + return idr_read_obj(&ib_uverbs_rwq_ind_tbl_idr, ind_table_handle, context, 0); +} + +static void put_rwq_indirection_table_read(struct ib_rwq_ind_table *ind_table) +{ + put_uobj_read(ind_table->uobject); +} + static struct ib_qp *idr_write_qp(int qp_handle, struct ib_ucontext *context) { struct ib_uobject *uobj; @@ -1761,9 +1772,11 @@ static int create_qp(struct ib_uverbs_file *file, struct ib_srq *srq = NULL; struct ib_qp *qp; char *buf; - struct ib_qp_init_attr attr; + struct ib_qp_init_attr attr = {}; struct ib_uverbs_ex_create_qp_resp resp; int ret; + struct ib_rwq_ind_table *ind_tbl = NULL; + bool has_sq = true; if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) return -EPERM; @@ -1775,6 +1788,32 @@ static int create_qp(struct ib_uverbs_file *file, init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, &qp_lock_class); down_write(&obj->uevent.uobject.mutex); + if (cmd_sz >= offsetof(typeof(*cmd), rwq_ind_tbl_handle) + + sizeof(cmd->rwq_ind_tbl_handle) && + (cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE)) { + ind_tbl = idr_read_rwq_indirection_table(cmd->rwq_ind_tbl_handle, + file->ucontext); + if (!ind_tbl) { + ret = -EINVAL; + goto err_put; + } + + attr.rwq_ind_tbl = ind_tbl; + } + + if ((cmd_sz >= offsetof(typeof(*cmd), reserved1) + + sizeof(cmd->reserved1)) && cmd->reserved1) { + ret = -EOPNOTSUPP; + goto err_put; + } + + if (ind_tbl && (cmd->max_recv_wr || cmd->max_recv_sge || cmd->is_srq)) { + ret = -EINVAL; + goto err_put; + } + + if (ind_tbl && !cmd->max_send_wr) + has_sq = false; if (cmd->qp_type == IB_QPT_XRC_TGT) { xrcd = idr_read_xrcd(cmd->pd_handle, file->ucontext, @@ -1798,20 +1837,24 @@ static int create_qp(struct ib_uverbs_file *file, } } - if (cmd->recv_cq_handle != cmd->send_cq_handle) { - rcq = idr_read_cq(cmd->recv_cq_handle, - file->ucontext, 0); - if (!rcq) { - ret = -EINVAL; - goto err_put; + if (!ind_tbl) { + if (cmd->recv_cq_handle != cmd->send_cq_handle) { + rcq = idr_read_cq(cmd->recv_cq_handle, + file->ucontext, 0); + if (!rcq) { + ret = -EINVAL; + goto err_put; + } } } } - scq = idr_read_cq(cmd->send_cq_handle, file->ucontext, !!rcq); - rcq = rcq ?: scq; + if (has_sq) + scq = idr_read_cq(cmd->send_cq_handle, file->ucontext, !!rcq); + if (!ind_tbl) + rcq = rcq ?: scq; pd = idr_read_pd(cmd->pd_handle, file->ucontext); - if (!pd || !scq) { + if (!pd || (!scq && has_sq)) { ret = -EINVAL; goto err_put; } @@ -1878,16 +1921,20 @@ static int create_qp(struct ib_uverbs_file *file, qp->send_cq = attr.send_cq; qp->recv_cq = attr.recv_cq; qp->srq = attr.srq; + qp->rwq_ind_tbl = ind_tbl; qp->event_handler = attr.event_handler; qp->qp_context = attr.qp_context; qp->qp_type = attr.qp_type; atomic_set(&qp->usecnt, 0); atomic_inc(&pd->usecnt); - atomic_inc(&attr.send_cq->usecnt); + if (attr.send_cq) + atomic_inc(&attr.send_cq->usecnt); if (attr.recv_cq) atomic_inc(&attr.recv_cq->usecnt); if (attr.srq) atomic_inc(&attr.srq->usecnt); + if (ind_tbl) + atomic_inc(&ind_tbl->usecnt); } qp->uobject = &obj->uevent.uobject; @@ -1927,6 +1974,8 @@ static int create_qp(struct ib_uverbs_file *file, put_cq_read(rcq); if (srq) put_srq_read(srq); + if (ind_tbl) + put_rwq_indirection_table_read(ind_tbl); mutex_lock(&file->mutex); list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list); @@ -1954,6 +2003,8 @@ err_put: put_cq_read(rcq); if (srq) put_srq_read(srq); + if (ind_tbl) + put_rwq_indirection_table_read(ind_tbl); put_uobj_write(&obj->uevent.uobject); return ret; @@ -2047,7 +2098,7 @@ int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file, if (err) return err; - if (cmd.comp_mask) + if (cmd.comp_mask & ~IB_UVERBS_CREATE_QP_SUP_COMP_MASK) return -EINVAL; if (cmd.reserved) diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 2cf7c95860d8..2c8bca8cd2c2 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -523,6 +523,14 @@ struct ib_uverbs_create_qp { __u64 driver_data[0]; }; +enum ib_uverbs_create_qp_mask { + IB_UVERBS_CREATE_QP_MASK_IND_TABLE = 1UL << 0, +}; + +enum { + IB_UVERBS_CREATE_QP_SUP_COMP_MASK = IB_UVERBS_CREATE_QP_MASK_IND_TABLE, +}; + struct ib_uverbs_ex_create_qp { __u64 user_handle; __u32 pd_handle; @@ -540,6 +548,8 @@ struct ib_uverbs_ex_create_qp { __u8 reserved; __u32 comp_mask; __u32 create_flags; + __u32 rwq_ind_tbl_handle; + __u32 reserved1; }; struct ib_uverbs_open_qp { -- cgit v1.2.3 From 4c2aae712cb024f9d30a1fa62e3ba2ff785c6a3e Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Fri, 17 Jun 2016 15:14:50 +0300 Subject: IB/core: Add IPv6 support to flow steering Add IPv6 flow specification support. Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs.h | 1 + drivers/infiniband/core/uverbs_cmd.c | 9 +++++++++ include/rdma/ib_verbs.h | 14 ++++++++++++++ include/uapi/rdma/ib_user_verbs.h | 18 ++++++++++++++++++ 4 files changed, 42 insertions(+) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 6c2292397cd6..b7f3b8d359b9 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -226,6 +226,7 @@ struct ib_uverbs_flow_spec { struct ib_uverbs_flow_spec_eth eth; struct ib_uverbs_flow_spec_ipv4 ipv4; struct ib_uverbs_flow_spec_tcp_udp tcp_udp; + struct ib_uverbs_flow_spec_ipv6 ipv6; }; }; diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 65ab2097cd81..f6647318138d 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -3105,6 +3105,15 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec, memcpy(&ib_spec->ipv4.mask, &kern_spec->ipv4.mask, sizeof(struct ib_flow_ipv4_filter)); break; + case IB_FLOW_SPEC_IPV6: + ib_spec->ipv6.size = sizeof(struct ib_flow_spec_ipv6); + if (ib_spec->ipv6.size != kern_spec->ipv6.size) + return -EINVAL; + memcpy(&ib_spec->ipv6.val, &kern_spec->ipv6.val, + sizeof(struct ib_flow_ipv6_filter)); + memcpy(&ib_spec->ipv6.mask, &kern_spec->ipv6.mask, + sizeof(struct ib_flow_ipv6_filter)); + break; case IB_FLOW_SPEC_TCP: case IB_FLOW_SPEC_UDP: ib_spec->tcp_udp.size = sizeof(struct ib_flow_spec_tcp_udp); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 9b2fafe5eb38..9bbca6887f7f 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1569,6 +1569,7 @@ enum ib_flow_spec_type { IB_FLOW_SPEC_IB = 0x22, /* L3 header*/ IB_FLOW_SPEC_IPV4 = 0x30, + IB_FLOW_SPEC_IPV6 = 0x31, /* L4 headers*/ IB_FLOW_SPEC_TCP = 0x40, IB_FLOW_SPEC_UDP = 0x41 @@ -1630,6 +1631,18 @@ struct ib_flow_spec_ipv4 { struct ib_flow_ipv4_filter mask; }; +struct ib_flow_ipv6_filter { + u8 src_ip[16]; + u8 dst_ip[16]; +}; + +struct ib_flow_spec_ipv6 { + enum ib_flow_spec_type type; + u16 size; + struct ib_flow_ipv6_filter val; + struct ib_flow_ipv6_filter mask; +}; + struct ib_flow_tcp_udp_filter { __be16 dst_port; __be16 src_port; @@ -1651,6 +1664,7 @@ union ib_flow_spec { struct ib_flow_spec_ib ib; struct ib_flow_spec_ipv4 ipv4; struct ib_flow_spec_tcp_udp tcp_udp; + struct ib_flow_spec_ipv6 ipv6; }; struct ib_flow_attr { diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 2c8bca8cd2c2..7f035f4b53b0 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -867,6 +867,24 @@ struct ib_uverbs_flow_spec_tcp_udp { struct ib_uverbs_flow_tcp_udp_filter mask; }; +struct ib_uverbs_flow_ipv6_filter { + __u8 src_ip[16]; + __u8 dst_ip[16]; +}; + +struct ib_uverbs_flow_spec_ipv6 { + union { + struct ib_uverbs_flow_spec_hdr hdr; + struct { + __u32 type; + __u16 size; + __u16 reserved; + }; + }; + struct ib_uverbs_flow_ipv6_filter val; + struct ib_uverbs_flow_ipv6_filter mask; +}; + struct ib_uverbs_flow_attr { __u32 type; __u16 size; -- cgit v1.2.3 From 5fa76c20458518ed6181adddef2e31c5afc0745c Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 15 Jun 2016 02:21:56 -0400 Subject: IB/core: Add get FW version string to the core Allow for a common core function to get firmware version strings from the individual devices. In later patches this format can then then be used to pass a properly formated version string through the IPoIB layer. The problem with the current code in the IPoIB layer is that it is specific to certain hardware types. Furthermore, this gives us a common function through which the core can provide a common sysfs entry. Eventually we may want to remove the sysfs export but this provides for user space backwards compatibility. Reviewed-by: Dennis Dalessandro Signed-off-by: Ira Weiny Signed-off-by: Doug Ledford --- drivers/infiniband/core/device.c | 9 +++++++++ include/rdma/ib_verbs.h | 3 +++ 2 files changed, 12 insertions(+) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 5c155fa91eec..760ef603a468 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -311,6 +311,15 @@ static int read_port_immutable(struct ib_device *device) return 0; } +void ib_get_device_fw_str(struct ib_device *dev, char *str, size_t str_len) +{ + if (dev->get_dev_fw_str) + dev->get_dev_fw_str(dev, str, str_len); + else + str[0] = '\0'; +} +EXPORT_SYMBOL(ib_get_device_fw_str); + /** * ib_register_device - Register an IB device with IB core * @device:Device to register diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 7e440d41487a..1dc3d0d90202 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1956,6 +1956,7 @@ struct ib_device { * in fast paths. */ int (*get_port_immutable)(struct ib_device *, u8, struct ib_port_immutable *); + void (*get_dev_fw_str)(struct ib_device *, char *str, size_t str_len); }; struct ib_client { @@ -1991,6 +1992,8 @@ struct ib_client { struct ib_device *ib_alloc_device(size_t size); void ib_dealloc_device(struct ib_device *device); +void ib_get_device_fw_str(struct ib_device *device, char *str, size_t str_len); + int ib_register_device(struct ib_device *device, int (*port_callback)(struct ib_device *, u8, struct kobject *)); -- cgit v1.2.3 From 41a6ae1ebd51d074a43d608b8ecfc9dd2b323d5e Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 15 Jun 2016 02:22:07 -0400 Subject: IB/core: Export a common fw_ver sysfs entry Now that all the devices have stopped exporting their own sysfs entry points we can have the core export this on their behalf. Eventually this may be removed but this provides for backwards compatibility. Reviewed-by: Dennis Dalessandro Signed-off-by: Ira Weiny Signed-off-by: Doug Ledford --- drivers/infiniband/core/sysfs.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index a5793c8f1590..0d1ab73f8186 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -1196,16 +1197,28 @@ static ssize_t set_node_desc(struct device *device, return count; } +static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct ib_device *dev = container_of(device, struct ib_device, dev); + + ib_get_device_fw_str(dev, buf, PAGE_SIZE); + strlcat(buf, "\n", PAGE_SIZE); + return strlen(buf); +} + static DEVICE_ATTR(node_type, S_IRUGO, show_node_type, NULL); static DEVICE_ATTR(sys_image_guid, S_IRUGO, show_sys_image_guid, NULL); static DEVICE_ATTR(node_guid, S_IRUGO, show_node_guid, NULL); static DEVICE_ATTR(node_desc, S_IRUGO | S_IWUSR, show_node_desc, set_node_desc); +static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); static struct device_attribute *ib_class_attributes[] = { &dev_attr_node_type, &dev_attr_sys_image_guid, &dev_attr_node_guid, - &dev_attr_node_desc + &dev_attr_node_desc, + &dev_attr_fw_ver, }; static void free_port_list_attributes(struct ib_device *device) -- cgit v1.2.3 From cea05eadded0d4eb59f7be6e1f1560eb6bfde2bf Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Thu, 28 Jul 2016 15:02:26 -0500 Subject: IB/core: Add flow control to the portmapper netlink calls During connection establishment with a large number of connections, it is possible that the connection requests might fail. Adding flow control prevents this failure. Change ibnl_unicast to use blocking to enable flow control. Signed-off-by: Mustafa Ismail Signed-off-by: Faisal Latif Signed-off-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/core/netlink.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c index 9b8c20c8209b..10469b0088b5 100644 --- a/drivers/infiniband/core/netlink.c +++ b/drivers/infiniband/core/netlink.c @@ -229,7 +229,10 @@ static void ibnl_rcv(struct sk_buff *skb) int ibnl_unicast(struct sk_buff *skb, struct nlmsghdr *nlh, __u32 pid) { - return nlmsg_unicast(nls, skb, pid); + int err; + + err = netlink_unicast(nls, skb, pid, 0); + return (err < 0) ? err : 0; } EXPORT_SYMBOL(ibnl_unicast); @@ -252,6 +255,7 @@ int __init ibnl_init(void) return -ENOMEM; } + nls->sk_sndtimeo = 10 * HZ; return 0; } -- cgit v1.2.3 From 59c68ac31e15ad09d2cb04734e3c8c544a95f8d4 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 29 Jul 2016 11:00:54 -0700 Subject: iw_cm: free cm_id resources on the last deref Remove the complicated logic to free the iw_cm_id inside iw_cm event handlers vs when an application thread destroys the cm_id. Also remove the block in iw_destroy_cm_id() to block the application until all references are removed. This block can cause a deadlock when disconnecting or destroying cm_ids inside an rdma_cm event handler. Simply allowing the last deref of the iw_cm_id to free the memory is cleaner and avoids this potential deadlock. Also a flag is added, IW_CM_DROP_EVENTS, that is set when the cm_id is marked for destruction. If any events are pending on this iw_cm_id, then as they are processed they will be dropped vs posted upstream if IW_CM_DROP_EVENTS is set. Signed-off-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/core/iwcm.c | 54 +++++++++++++----------------------------- drivers/infiniband/core/iwcm.h | 2 +- 2 files changed, 18 insertions(+), 38 deletions(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index f0572049d291..357624f8b9d3 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -183,15 +183,14 @@ static void free_cm_id(struct iwcm_id_private *cm_id_priv) /* * Release a reference on cm_id. If the last reference is being - * released, enable the waiting thread (in iw_destroy_cm_id) to - * get woken up, and return 1 if a thread is already waiting. + * released, free the cm_id and return 1. */ static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv) { BUG_ON(atomic_read(&cm_id_priv->refcount)==0); if (atomic_dec_and_test(&cm_id_priv->refcount)) { BUG_ON(!list_empty(&cm_id_priv->work_list)); - complete(&cm_id_priv->destroy_comp); + free_cm_id(cm_id_priv); return 1; } @@ -208,19 +207,10 @@ static void add_ref(struct iw_cm_id *cm_id) static void rem_ref(struct iw_cm_id *cm_id) { struct iwcm_id_private *cm_id_priv; - int cb_destroy; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); - /* - * Test bit before deref in case the cm_id gets freed on another - * thread. - */ - cb_destroy = test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags); - if (iwcm_deref_id(cm_id_priv) && cb_destroy) { - BUG_ON(!list_empty(&cm_id_priv->work_list)); - free_cm_id(cm_id_priv); - } + (void)iwcm_deref_id(cm_id_priv); } static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event); @@ -370,6 +360,12 @@ static void destroy_cm_id(struct iw_cm_id *cm_id) wait_event(cm_id_priv->connect_wait, !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags)); + /* + * Since we're deleting the cm_id, drop any events that + * might arrive before the last dereference. + */ + set_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags); + spin_lock_irqsave(&cm_id_priv->lock, flags); switch (cm_id_priv->state) { case IW_CM_STATE_LISTEN: @@ -433,13 +429,7 @@ void iw_destroy_cm_id(struct iw_cm_id *cm_id) struct iwcm_id_private *cm_id_priv; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); - BUG_ON(test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags)); - destroy_cm_id(cm_id); - - wait_for_completion(&cm_id_priv->destroy_comp); - - free_cm_id(cm_id_priv); } EXPORT_SYMBOL(iw_destroy_cm_id); @@ -809,10 +799,7 @@ static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv, ret = cm_id->cm_handler(cm_id, iw_event); if (ret) { iw_cm_reject(cm_id, NULL, 0); - set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags); - destroy_cm_id(cm_id); - if (atomic_read(&cm_id_priv->refcount)==0) - free_cm_id(cm_id_priv); + iw_destroy_cm_id(cm_id); } out: @@ -1000,7 +987,6 @@ static void cm_work_handler(struct work_struct *_work) unsigned long flags; int empty; int ret = 0; - int destroy_id; spin_lock_irqsave(&cm_id_priv->lock, flags); empty = list_empty(&cm_id_priv->work_list); @@ -1013,20 +999,14 @@ static void cm_work_handler(struct work_struct *_work) put_work(work); spin_unlock_irqrestore(&cm_id_priv->lock, flags); - ret = process_event(cm_id_priv, &levent); - if (ret) { - set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags); - destroy_cm_id(&cm_id_priv->id); - } - BUG_ON(atomic_read(&cm_id_priv->refcount)==0); - destroy_id = test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags); - if (iwcm_deref_id(cm_id_priv)) { - if (destroy_id) { - BUG_ON(!list_empty(&cm_id_priv->work_list)); - free_cm_id(cm_id_priv); - } + if (!test_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags)) { + ret = process_event(cm_id_priv, &levent); + if (ret) + destroy_cm_id(&cm_id_priv->id); + } else + pr_debug("dropping event %d\n", levent.event); + if (iwcm_deref_id(cm_id_priv)) return; - } if (empty) return; spin_lock_irqsave(&cm_id_priv->lock, flags); diff --git a/drivers/infiniband/core/iwcm.h b/drivers/infiniband/core/iwcm.h index 3f6cc82564c8..82c2cd1b0a80 100644 --- a/drivers/infiniband/core/iwcm.h +++ b/drivers/infiniband/core/iwcm.h @@ -56,7 +56,7 @@ struct iwcm_id_private { struct list_head work_free_list; }; -#define IWCM_F_CALLBACK_DESTROY 1 +#define IWCM_F_DROP_EVENTS 1 #define IWCM_F_CONNECT_WAIT 2 #endif /* IWCM_H */ -- cgit v1.2.3 From e972dabf9c38f7051d3dbaa48d8e77cfbe3a1baa Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Mon, 18 Jul 2016 14:21:49 -0500 Subject: Use smaller 512 byte messages for portmapper messages Portmapper messages are short and do not occupy more than 512 bytes. Lower portmapper message size to 512 bytes. This change significantly reduces the amount of memory needed when trying to establish a large number of connections simultaneously. The old value is based on page size. Signed-off-by: Faisal Latif Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/iwpm_util.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c index b65e06c560d7..ade71e7f0131 100644 --- a/drivers/infiniband/core/iwpm_util.c +++ b/drivers/infiniband/core/iwpm_util.c @@ -37,6 +37,7 @@ #define IWPM_MAPINFO_HASH_MASK (IWPM_MAPINFO_HASH_SIZE - 1) #define IWPM_REMINFO_HASH_SIZE 64 #define IWPM_REMINFO_HASH_MASK (IWPM_REMINFO_HASH_SIZE - 1) +#define IWPM_MSG_SIZE 512 static LIST_HEAD(iwpm_nlmsg_req_list); static DEFINE_SPINLOCK(iwpm_nlmsg_req_lock); @@ -452,7 +453,7 @@ struct sk_buff *iwpm_create_nlmsg(u32 nl_op, struct nlmsghdr **nlh, { struct sk_buff *skb = NULL; - skb = dev_alloc_skb(NLMSG_GOODSIZE); + skb = dev_alloc_skb(IWPM_MSG_SIZE); if (!skb) { pr_err("%s Unable to allocate skb\n", __func__); goto create_nlmsg_exit; -- cgit v1.2.3 From d1e09f304a1d9651c5059ebfeb696dc2effc9b32 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sun, 3 Jul 2016 15:28:18 +0300 Subject: IB/uverbs: Fix race between uverbs_close and remove_one Fixes an oops that might happen if uverbs_close races with remove_one. Both contexts may run ib_uverbs_cleanup_ucontext, it depends on the flow. Currently, there is no protection for a case that remove_one didn't make the cleanup it runs to its end, the underlying ib_device was freed then uverbs_close will call ib_uverbs_cleanup_ucontext and OOPs. Above might happen if uverbs_close deleted the file from the list then remove_one didn't find it and runs to its end. Fixes to protect against that case by a new cleanup lock so that ib_uverbs_cleanup_ucontext will be called always before that remove_one is ended. Fixes: 35d4a0b63dc0 ("IB/uverbs: Fix race between ib_uverbs_open and remove_one") Reported-by: Devesh Sharma Signed-off-by: Jason Gunthorpe Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs.h | 1 + drivers/infiniband/core/uverbs_main.c | 37 +++++++++++++++++++++++------------ 2 files changed, 25 insertions(+), 13 deletions(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 612ccfd39bf9..9245e55debed 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -116,6 +116,7 @@ struct ib_uverbs_event_file { struct ib_uverbs_file { struct kref ref; struct mutex mutex; + struct mutex cleanup_mutex; /* protect cleanup */ struct ib_uverbs_device *device; struct ib_ucontext *ucontext; struct ib_event_handler event_handler; diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 31f422a70623..09d515763ad6 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -931,6 +931,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) file->async_file = NULL; kref_init(&file->ref); mutex_init(&file->mutex); + mutex_init(&file->cleanup_mutex); filp->private_data = file; kobject_get(&dev->kobj); @@ -956,18 +957,20 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp) { struct ib_uverbs_file *file = filp->private_data; struct ib_uverbs_device *dev = file->device; - struct ib_ucontext *ucontext = NULL; + + mutex_lock(&file->cleanup_mutex); + if (file->ucontext) { + ib_uverbs_cleanup_ucontext(file, file->ucontext); + file->ucontext = NULL; + } + mutex_unlock(&file->cleanup_mutex); mutex_lock(&file->device->lists_mutex); - ucontext = file->ucontext; - file->ucontext = NULL; if (!file->is_closed) { list_del(&file->list); file->is_closed = 1; } mutex_unlock(&file->device->lists_mutex); - if (ucontext) - ib_uverbs_cleanup_ucontext(file, ucontext); if (file->async_file) kref_put(&file->async_file->ref, ib_uverbs_release_event_file); @@ -1181,22 +1184,30 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, mutex_lock(&uverbs_dev->lists_mutex); while (!list_empty(&uverbs_dev->uverbs_file_list)) { struct ib_ucontext *ucontext; - file = list_first_entry(&uverbs_dev->uverbs_file_list, struct ib_uverbs_file, list); file->is_closed = 1; - ucontext = file->ucontext; list_del(&file->list); - file->ucontext = NULL; kref_get(&file->ref); mutex_unlock(&uverbs_dev->lists_mutex); - /* We must release the mutex before going ahead and calling - * disassociate_ucontext. disassociate_ucontext might end up - * indirectly calling uverbs_close, for example due to freeing - * the resources (e.g mmput). - */ + ib_uverbs_event_handler(&file->event_handler, &event); + + mutex_lock(&file->cleanup_mutex); + ucontext = file->ucontext; + file->ucontext = NULL; + mutex_unlock(&file->cleanup_mutex); + + /* At this point ib_uverbs_close cannot be running + * ib_uverbs_cleanup_ucontext + */ if (ucontext) { + /* We must release the mutex before going ahead and + * calling disassociate_ucontext. disassociate_ucontext + * might end up indirectly calling uverbs_close, + * for example due to freeing the resources + * (e.g mmput). + */ ib_dev->disassociate_ucontext(ucontext); ib_uverbs_cleanup_ucontext(file, ucontext); } -- cgit v1.2.3 From 3d3fd74239db7a6f7e07a762f5ee7140215aafc5 Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Wed, 6 Jul 2016 16:36:34 +0300 Subject: IB/sa: Add cached attribute containing SM information to SA port Added a new SA port attribute containing SM ClassPortInfo fields, (ClassPortInfo fields: Table 126 IB Spec 1.3.). This is useful for checking SM support for specific features. The attribute is cached to avoid resending queries, caching is done when a successful ClassPortInfo reply is received on the port. Invalidation of the attribute is done on SM change events, SM re-registration events, and SM LID change events. The fields in ClassPortInfo should not change during SM runtime without an event. Signed-off-by: Alex Vesker Reviewed by: Hal Rosenstock Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/sa_query.c | 41 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index e95538650dc6..b9bf7aa055e7 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -65,10 +65,17 @@ struct ib_sa_sm_ah { u8 src_path_mask; }; +struct ib_sa_classport_cache { + bool valid; + struct ib_class_port_info data; +}; + struct ib_sa_port { struct ib_mad_agent *agent; struct ib_sa_sm_ah *sm_ah; struct work_struct update_task; + struct ib_sa_classport_cache classport_info; + spinlock_t classport_lock; /* protects class port info set */ spinlock_t ah_lock; u8 port_num; }; @@ -998,6 +1005,13 @@ static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event port->sm_ah = NULL; spin_unlock_irqrestore(&port->ah_lock, flags); + if (event->event == IB_EVENT_SM_CHANGE || + event->event == IB_EVENT_CLIENT_REREGISTER || + event->event == IB_EVENT_LID_CHANGE) { + spin_lock_irqsave(&port->classport_lock, flags); + port->classport_info.valid = false; + spin_unlock_irqrestore(&port->classport_lock, flags); + } queue_work(ib_wq, &sa_dev->port[event->element.port_num - sa_dev->start_port].update_task); } @@ -1719,6 +1733,7 @@ static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query, int status, struct ib_sa_mad *mad) { + unsigned long flags; struct ib_sa_classport_info_query *query = container_of(sa_query, struct ib_sa_classport_info_query, sa_query); @@ -1728,6 +1743,16 @@ static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query, ib_unpack(classport_info_rec_table, ARRAY_SIZE(classport_info_rec_table), mad->data, &rec); + + spin_lock_irqsave(&sa_query->port->classport_lock, flags); + if (!status && !sa_query->port->classport_info.valid) { + memcpy(&sa_query->port->classport_info.data, &rec, + sizeof(sa_query->port->classport_info.data)); + + sa_query->port->classport_info.valid = true; + } + spin_unlock_irqrestore(&sa_query->port->classport_lock, flags); + query->callback(status, &rec, query->context); } else { query->callback(status, NULL, query->context); @@ -1754,7 +1779,9 @@ int ib_sa_classport_info_rec_query(struct ib_sa_client *client, struct ib_sa_port *port; struct ib_mad_agent *agent; struct ib_sa_mad *mad; + struct ib_class_port_info cached_class_port_info; int ret; + unsigned long flags; if (!sa_dev) return -ENODEV; @@ -1762,6 +1789,17 @@ int ib_sa_classport_info_rec_query(struct ib_sa_client *client, port = &sa_dev->port[port_num - sa_dev->start_port]; agent = port->agent; + /* Use cached ClassPortInfo attribute if valid instead of sending mad */ + spin_lock_irqsave(&port->classport_lock, flags); + if (port->classport_info.valid && callback) { + memcpy(&cached_class_port_info, &port->classport_info.data, + sizeof(cached_class_port_info)); + spin_unlock_irqrestore(&port->classport_lock, flags); + callback(0, &cached_class_port_info, context); + return 0; + } + spin_unlock_irqrestore(&port->classport_lock, flags); + query = kzalloc(sizeof(*query), gfp_mask); if (!query) return -ENOMEM; @@ -1885,6 +1923,9 @@ static void ib_sa_add_one(struct ib_device *device) sa_dev->port[i].sm_ah = NULL; sa_dev->port[i].port_num = i + s; + spin_lock_init(&sa_dev->port[i].classport_lock); + sa_dev->port[i].classport_info.valid = false; + sa_dev->port[i].agent = ib_register_mad_agent(device, i + s, IB_QPT_GSI, NULL, 0, send_handler, -- cgit v1.2.3 From ab15c95a17b3fe8c0e01bb7ce1dd0b657598eb61 Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Wed, 6 Jul 2016 16:36:35 +0300 Subject: IB/core: Support for CMA multicast join flags Added UCMA and CMA support for multicast join flags. Flags are passed using UCMA CM join command previously reserved fields. Currently supporting two join flags indicating two different multicast JoinStates: 1. Full Member: The initiator creates the Multicast group(MCG) if it wasn't previously created, can send Multicast messages to the group and receive messages from the MCG. 2. Send Only Full Member: The initiator creates the Multicast group(MCG) if it wasn't previously created, can send Multicast messages to the group but doesn't receive any messages from the MCG. IB: Send Only Full Member requires a query of ClassPortInfo to determine if SM/SA supports this option. If SM/SA doesn't support Send-Only there will be no join request sent and an error will be returned. ETH: When Send Only Full Member is requested no IGMP join will be sent. Signed-off-by: Alex Vesker Reviewed by: Hal Rosenstock Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/cma.c | 100 +++++++++++++++++++++++++++++++++--- drivers/infiniband/core/multicast.c | 12 ----- drivers/infiniband/core/ucma.c | 18 +++++-- include/rdma/ib_sa.h | 13 +++++ include/rdma/rdma_cm.h | 4 +- include/uapi/rdma/rdma_user_cm.h | 9 +++- 6 files changed, 131 insertions(+), 25 deletions(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index f0c91ba3178a..0451307bea18 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -68,6 +68,7 @@ MODULE_DESCRIPTION("Generic RDMA CM Agent"); MODULE_LICENSE("Dual BSD/GPL"); #define CMA_CM_RESPONSE_TIMEOUT 20 +#define CMA_QUERY_CLASSPORT_INFO_TIMEOUT 3000 #define CMA_MAX_CM_RETRIES 15 #define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24) #define CMA_IBOE_PACKET_LIFETIME 18 @@ -162,6 +163,14 @@ struct rdma_bind_list { unsigned short port; }; +struct class_port_info_context { + struct ib_class_port_info *class_port_info; + struct ib_device *device; + struct completion done; + struct ib_sa_query *sa_query; + u8 port_num; +}; + static int cma_ps_alloc(struct net *net, enum rdma_port_space ps, struct rdma_bind_list *bind_list, int snum) { @@ -306,6 +315,7 @@ struct cma_multicast { struct sockaddr_storage addr; struct kref mcref; bool igmp_joined; + u8 join_state; }; struct cma_work { @@ -3754,10 +3764,63 @@ static void cma_set_mgid(struct rdma_id_private *id_priv, } } +static void cma_query_sa_classport_info_cb(int status, + struct ib_class_port_info *rec, + void *context) +{ + struct class_port_info_context *cb_ctx = context; + + WARN_ON(!context); + + if (status || !rec) { + pr_debug("RDMA CM: %s port %u failed query ClassPortInfo status: %d\n", + cb_ctx->device->name, cb_ctx->port_num, status); + goto out; + } + + memcpy(cb_ctx->class_port_info, rec, sizeof(struct ib_class_port_info)); + +out: + complete(&cb_ctx->done); +} + +static int cma_query_sa_classport_info(struct ib_device *device, u8 port_num, + struct ib_class_port_info *class_port_info) +{ + struct class_port_info_context *cb_ctx; + int ret; + + cb_ctx = kmalloc(sizeof(*cb_ctx), GFP_KERNEL); + if (!cb_ctx) + return -ENOMEM; + + cb_ctx->device = device; + cb_ctx->class_port_info = class_port_info; + cb_ctx->port_num = port_num; + init_completion(&cb_ctx->done); + + ret = ib_sa_classport_info_rec_query(&sa_client, device, port_num, + CMA_QUERY_CLASSPORT_INFO_TIMEOUT, + GFP_KERNEL, cma_query_sa_classport_info_cb, + cb_ctx, &cb_ctx->sa_query); + if (ret < 0) { + pr_err("RDMA CM: %s port %u failed to send ClassPortInfo query, ret: %d\n", + device->name, port_num, ret); + goto out; + } + + wait_for_completion(&cb_ctx->done); + +out: + kfree(cb_ctx); + return ret; +} + static int cma_join_ib_multicast(struct rdma_id_private *id_priv, struct cma_multicast *mc) { struct ib_sa_mcmember_rec rec; + struct ib_class_port_info class_port_info; struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; ib_sa_comp_mask comp_mask; int ret; @@ -3776,7 +3839,24 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv, rec.qkey = cpu_to_be32(id_priv->qkey); rdma_addr_get_sgid(dev_addr, &rec.port_gid); rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); - rec.join_state = 1; + rec.join_state = mc->join_state; + + if (rec.join_state == BIT(SENDONLY_FULLMEMBER_JOIN)) { + ret = cma_query_sa_classport_info(id_priv->id.device, + id_priv->id.port_num, + &class_port_info); + + if (ret) + return ret; + + if (!(ib_get_cpi_capmask2(&class_port_info) & + IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT)) { + pr_warn("RDMA CM: %s port %u Unable to multicast join\n" + "RDMA CM: SM doesn't support Send Only Full Member option\n", + id_priv->id.device->name, id_priv->id.port_num); + return -EOPNOTSUPP; + } + } comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE | @@ -3845,6 +3925,9 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, struct sockaddr *addr = (struct sockaddr *)&mc->addr; struct net_device *ndev = NULL; enum ib_gid_type gid_type; + bool send_only; + + send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN); if (cma_zero_addr((struct sockaddr *)&mc->addr)) return -EINVAL; @@ -3878,12 +3961,14 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num - rdma_start_port(id_priv->cma_dev->device)]; if (addr->sa_family == AF_INET) { - if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) - err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, - true); - if (!err) { - mc->igmp_joined = true; + if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) { mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT; + if (!send_only) { + err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, + true); + if (!err) + mc->igmp_joined = true; + } } } else { if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) @@ -3913,7 +3998,7 @@ out1: } int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, - void *context) + u8 join_state, void *context) { struct rdma_id_private *id_priv; struct cma_multicast *mc; @@ -3932,6 +4017,7 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, mc->context = context; mc->id_priv = id_priv; mc->igmp_joined = false; + mc->join_state = join_state; spin_lock(&id_priv->lock); list_add(&mc->list, &id_priv->mc_list); spin_unlock(&id_priv->lock); diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index a83ec28a147b..3a3c5d73bbfc 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -93,18 +93,6 @@ enum { struct mcast_member; -/* -* There are 4 types of join states: -* FullMember, NonMember, SendOnlyNonMember, SendOnlyFullMember. -*/ -enum { - FULLMEMBER_JOIN, - NONMEMBER_JOIN, - SENDONLY_NONMEBER_JOIN, - SENDONLY_FULLMEMBER_JOIN, - NUM_JOIN_MEMBERSHIP_TYPES, -}; - struct mcast_group { struct ib_sa_mcmember_rec rec; struct rb_node node; diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index c0f3826abb30..2825ece91d3c 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -106,6 +106,7 @@ struct ucma_multicast { int events_reported; u64 uid; + u8 join_state; struct list_head list; struct sockaddr_storage addr; }; @@ -1317,12 +1318,20 @@ static ssize_t ucma_process_join(struct ucma_file *file, struct ucma_multicast *mc; struct sockaddr *addr; int ret; + u8 join_state; if (out_len < sizeof(resp)) return -ENOSPC; addr = (struct sockaddr *) &cmd->addr; - if (cmd->reserved || !cmd->addr_size || (cmd->addr_size != rdma_addr_size(addr))) + if (!cmd->addr_size || (cmd->addr_size != rdma_addr_size(addr))) + return -EINVAL; + + if (cmd->join_flags == RDMA_MC_JOIN_FLAG_FULLMEMBER) + join_state = BIT(FULLMEMBER_JOIN); + else if (cmd->join_flags == RDMA_MC_JOIN_FLAG_SENDONLY_FULLMEMBER) + join_state = BIT(SENDONLY_FULLMEMBER_JOIN); + else return -EINVAL; ctx = ucma_get_ctx(file, cmd->id); @@ -1335,10 +1344,11 @@ static ssize_t ucma_process_join(struct ucma_file *file, ret = -ENOMEM; goto err1; } - + mc->join_state = join_state; mc->uid = cmd->uid; memcpy(&mc->addr, addr, cmd->addr_size); - ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr, mc); + ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr, + join_state, mc); if (ret) goto err2; @@ -1382,7 +1392,7 @@ static ssize_t ucma_join_ip_multicast(struct ucma_file *file, join_cmd.uid = cmd.uid; join_cmd.id = cmd.id; join_cmd.addr_size = rdma_addr_size((struct sockaddr *) &cmd.addr); - join_cmd.reserved = 0; + join_cmd.join_flags = RDMA_MC_JOIN_FLAG_FULLMEMBER; memcpy(&join_cmd.addr, &cmd.addr, join_cmd.addr_size); return ucma_process_join(file, &join_cmd, out_len); diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h index 384041669489..5ee7aab95eb8 100644 --- a/include/rdma/ib_sa.h +++ b/include/rdma/ib_sa.h @@ -94,6 +94,19 @@ enum ib_sa_selector { IB_SA_BEST = 3 }; +/* + * There are 4 types of join states: + * FullMember, NonMember, SendOnlyNonMember, SendOnlyFullMember. + * The order corresponds to JoinState bits in MCMemberRecord. + */ +enum ib_sa_mc_join_states { + FULLMEMBER_JOIN, + NONMEMBER_JOIN, + SENDONLY_NONMEBER_JOIN, + SENDONLY_FULLMEMBER_JOIN, + NUM_JOIN_MEMBERSHIP_TYPES, +}; + #define IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT BIT(12) /* diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index afe44fde72a5..81fb1d15e8bb 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -333,11 +333,13 @@ int rdma_disconnect(struct rdma_cm_id *id); * address. * @id: Communication identifier associated with the request. * @addr: Multicast address identifying the group to join. + * @join_state: Multicast JoinState bitmap requested by port. + * Bitmap is based on IB_SA_MCMEMBER_REC_JOIN_STATE bits. * @context: User-defined context associated with the join request, returned * to the user through the private_data pointer in multicast events. */ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, - void *context); + u8 join_state, void *context); /** * rdma_leave_multicast - Leave the multicast group specified by the given diff --git a/include/uapi/rdma/rdma_user_cm.h b/include/uapi/rdma/rdma_user_cm.h index 3066718eb120..01923d463673 100644 --- a/include/uapi/rdma/rdma_user_cm.h +++ b/include/uapi/rdma/rdma_user_cm.h @@ -244,12 +244,19 @@ struct rdma_ucm_join_ip_mcast { __u32 id; }; +/* Multicast join flags */ +enum { + RDMA_MC_JOIN_FLAG_FULLMEMBER, + RDMA_MC_JOIN_FLAG_SENDONLY_FULLMEMBER, + RDMA_MC_JOIN_FLAG_RESERVED, +}; + struct rdma_ucm_join_mcast { __u64 response; /* rdma_ucma_create_id_resp */ __u64 uid; __u32 id; __u16 addr_size; - __u16 reserved; + __u16 join_flags; struct sockaddr_storage addr; }; -- cgit v1.2.3