diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-12-15 21:03:32 +0100 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-12-15 21:03:32 +0100 |
commit | 4d5b57e05a67c3cfd8e2b2a64ca356245a15b1c6 (patch) | |
tree | d8f3ea3bc3ccfe289f414bbe9a4bdd1e935d9228 /drivers/infiniband | |
parent | Merge tag 'devicetree-for-4.10' of git://git.kernel.org/pub/scm/linux/kernel/... (diff) | |
parent | Merge branch 'vmw_pvrdma' into merge-test (diff) | |
download | linux-4d5b57e05a67c3cfd8e2b2a64ca356245a15b1c6.tar.xz linux-4d5b57e05a67c3cfd8e2b2a64ca356245a15b1c6.zip |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma
Pull rdma updates from Doug Ledford:
"This is the complete update for the rdma stack for this release cycle.
Most of it is typical driver and core updates, but there is the
entirely new VMWare pvrdma driver. You may have noticed that there
were changes in DaveM's pull request to the bnxt Ethernet driver to
support a RoCE RDMA driver. The bnxt_re driver was tentatively set to
be pulled in this release cycle, but it simply wasn't ready in time
and was dropped (a few review comments still to address, and some
multi-arch build issues like prefetch() not working across all
arches).
Summary:
- shared mlx5 updates with net stack (will drop out on merge if
Dave's tree has already been merged)
- driver updates: cxgb4, hfi1, hns-roce, i40iw, mlx4, mlx5, qedr, rxe
- debug cleanups
- new connection rejection helpers
- SRP updates
- various misc fixes
- new paravirt driver from vmware"
* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma: (210 commits)
IB: Add vmw_pvrdma driver
IB/mlx4: fix improper return value
IB/ocrdma: fix bad initialization
infiniband: nes: return value of skb_linearize should be handled
MAINTAINERS: Update Intel RDMA RNIC driver maintainers
MAINTAINERS: Remove Mitesh Ahuja from emulex maintainers
IB/core: fix unmap_sg argument
qede: fix general protection fault may occur on probe
IB/mthca: Replace pci_pool_alloc by pci_pool_zalloc
mlx5, calc_sq_size(): Make a debug message more informative
mlx5: Remove a set-but-not-used variable
mlx5: Use { } instead of { 0 } to init struct
IB/srp: Make writing the add_target sysfs attr interruptible
IB/srp: Make mapping failures easier to debug
IB/srp: Make login failures easier to debug
IB/srp: Introduce a local variable in srp_add_one()
IB/srp: Fix CONFIG_DYNAMIC_DEBUG=n build
IB/multicast: Check ib_find_pkey() return value
IPoIB: Avoid reading an uninitialized member variable
IB/mad: Fix an array index check
...
Diffstat (limited to 'drivers/infiniband')
180 files changed, 11640 insertions, 2638 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index fb3fb89640e5..670917387eda 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -73,6 +73,7 @@ source "drivers/infiniband/hw/mlx4/Kconfig" source "drivers/infiniband/hw/mlx5/Kconfig" source "drivers/infiniband/hw/nes/Kconfig" source "drivers/infiniband/hw/ocrdma/Kconfig" +source "drivers/infiniband/hw/vmw_pvrdma/Kconfig" source "drivers/infiniband/hw/usnic/Kconfig" source "drivers/infiniband/hw/hns/Kconfig" diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c index 4fa524dfb6cf..11dacd97a667 100644 --- a/drivers/infiniband/core/agent.c +++ b/drivers/infiniband/core/agent.c @@ -156,7 +156,6 @@ int ib_agent_port_open(struct ib_device *device, int port_num) /* Create new device info */ port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL); if (!port_priv) { - dev_err(&device->dev, "No memory for ib_agent_port_private\n"); ret = -ENOMEM; goto error1; } diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 1a2984c28b95..ae04826e82fc 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -770,12 +770,8 @@ static int _gid_table_setup_one(struct ib_device *ib_dev) int err = 0; table = kcalloc(ib_dev->phys_port_cnt, sizeof(*table), GFP_KERNEL); - - if (!table) { - pr_warn("failed to allocate ib gid cache for %s\n", - ib_dev->name); + if (!table) return -ENOMEM; - } for (port = 0; port < ib_dev->phys_port_cnt; port++) { u8 rdma_port = port + rdma_start_port(ib_dev); @@ -1170,14 +1166,13 @@ int ib_cache_setup_one(struct ib_device *device) GFP_KERNEL); if (!device->cache.pkey_cache || !device->cache.lmc_cache) { - pr_warn("Couldn't allocate cache for %s\n", device->name); - return -ENOMEM; + err = -ENOMEM; + goto free; } err = gid_table_setup_one(device); if (err) - /* Allocated memory will be cleaned in the release function */ - return err; + goto free; for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p) ib_cache_update(device, p + rdma_start_port(device)); @@ -1192,6 +1187,9 @@ int ib_cache_setup_one(struct ib_device *device) err: gid_table_cleanup_one(device); +free: + kfree(device->cache.pkey_cache); + kfree(device->cache.lmc_cache); return err; } diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 71c7c4c328ef..cf1edfa1cbac 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -57,6 +57,54 @@ MODULE_AUTHOR("Sean Hefty"); MODULE_DESCRIPTION("InfiniBand CM"); MODULE_LICENSE("Dual BSD/GPL"); +static const char * const ibcm_rej_reason_strs[] = { + [IB_CM_REJ_NO_QP] = "no QP", + [IB_CM_REJ_NO_EEC] = "no EEC", + [IB_CM_REJ_NO_RESOURCES] = "no resources", + [IB_CM_REJ_TIMEOUT] = "timeout", + [IB_CM_REJ_UNSUPPORTED] = "unsupported", + [IB_CM_REJ_INVALID_COMM_ID] = "invalid comm ID", + [IB_CM_REJ_INVALID_COMM_INSTANCE] = "invalid comm instance", + [IB_CM_REJ_INVALID_SERVICE_ID] = "invalid service ID", + [IB_CM_REJ_INVALID_TRANSPORT_TYPE] = "invalid transport type", + [IB_CM_REJ_STALE_CONN] = "stale conn", + [IB_CM_REJ_RDC_NOT_EXIST] = "RDC not exist", + [IB_CM_REJ_INVALID_GID] = "invalid GID", + [IB_CM_REJ_INVALID_LID] = "invalid LID", + [IB_CM_REJ_INVALID_SL] = "invalid SL", + [IB_CM_REJ_INVALID_TRAFFIC_CLASS] = "invalid traffic class", + [IB_CM_REJ_INVALID_HOP_LIMIT] = "invalid hop limit", + [IB_CM_REJ_INVALID_PACKET_RATE] = "invalid packet rate", + [IB_CM_REJ_INVALID_ALT_GID] = "invalid alt GID", + [IB_CM_REJ_INVALID_ALT_LID] = "invalid alt LID", + [IB_CM_REJ_INVALID_ALT_SL] = "invalid alt SL", + [IB_CM_REJ_INVALID_ALT_TRAFFIC_CLASS] = "invalid alt traffic class", + [IB_CM_REJ_INVALID_ALT_HOP_LIMIT] = "invalid alt hop limit", + [IB_CM_REJ_INVALID_ALT_PACKET_RATE] = "invalid alt packet rate", + [IB_CM_REJ_PORT_CM_REDIRECT] = "port CM redirect", + [IB_CM_REJ_PORT_REDIRECT] = "port redirect", + [IB_CM_REJ_INVALID_MTU] = "invalid MTU", + [IB_CM_REJ_INSUFFICIENT_RESP_RESOURCES] = "insufficient resp resources", + [IB_CM_REJ_CONSUMER_DEFINED] = "consumer defined", + [IB_CM_REJ_INVALID_RNR_RETRY] = "invalid RNR retry", + [IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID] = "duplicate local comm ID", + [IB_CM_REJ_INVALID_CLASS_VERSION] = "invalid class version", + [IB_CM_REJ_INVALID_FLOW_LABEL] = "invalid flow label", + [IB_CM_REJ_INVALID_ALT_FLOW_LABEL] = "invalid alt flow label", +}; + +const char *__attribute_const__ ibcm_reject_msg(int reason) +{ + size_t index = reason; + + if (index < ARRAY_SIZE(ibcm_rej_reason_strs) && + ibcm_rej_reason_strs[index]) + return ibcm_rej_reason_strs[index]; + else + return "unrecognized reason"; +} +EXPORT_SYMBOL(ibcm_reject_msg); + static void cm_add_one(struct ib_device *device); static void cm_remove_one(struct ib_device *device, void *client_data); @@ -1582,6 +1630,7 @@ static struct cm_id_private * cm_match_req(struct cm_work *work, struct cm_id_private *listen_cm_id_priv, *cur_cm_id_priv; struct cm_timewait_info *timewait_info; struct cm_req_msg *req_msg; + struct ib_cm_id *cm_id; req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; @@ -1603,10 +1652,18 @@ static struct cm_id_private * cm_match_req(struct cm_work *work, timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info); if (timewait_info) { cm_cleanup_timewait(cm_id_priv->timewait_info); + cur_cm_id_priv = cm_get_id(timewait_info->work.local_id, + timewait_info->work.remote_id); + spin_unlock_irq(&cm.lock); cm_issue_rej(work->port, work->mad_recv_wc, IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ, NULL, 0); + if (cur_cm_id_priv) { + cm_id = &cur_cm_id_priv->id; + ib_send_cm_dreq(cm_id, NULL, 0); + cm_deref_id(cur_cm_id_priv); + } return NULL; } @@ -1984,6 +2041,9 @@ static int cm_rep_handler(struct cm_work *work) struct cm_id_private *cm_id_priv; struct cm_rep_msg *rep_msg; int ret; + struct cm_id_private *cur_cm_id_priv; + struct ib_cm_id *cm_id; + struct cm_timewait_info *timewait_info; rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad; cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, 0); @@ -2018,16 +2078,26 @@ static int cm_rep_handler(struct cm_work *work) goto error; } /* Check for a stale connection. */ - if (cm_insert_remote_qpn(cm_id_priv->timewait_info)) { + timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info); + if (timewait_info) { rb_erase(&cm_id_priv->timewait_info->remote_id_node, &cm.remote_id_table); cm_id_priv->timewait_info->inserted_remote_id = 0; + cur_cm_id_priv = cm_get_id(timewait_info->work.local_id, + timewait_info->work.remote_id); + spin_unlock(&cm.lock); spin_unlock_irq(&cm_id_priv->lock); cm_issue_rej(work->port, work->mad_recv_wc, IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP, NULL, 0); ret = -EINVAL; + if (cur_cm_id_priv) { + cm_id = &cur_cm_id_priv->id; + ib_send_cm_dreq(cm_id, NULL, 0); + cm_deref_id(cur_cm_id_priv); + } + goto error; } spin_unlock(&cm.lock); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 22fcf284dd8b..e7dcfac877ca 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -101,6 +101,49 @@ const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event) } EXPORT_SYMBOL(rdma_event_msg); +const char *__attribute_const__ rdma_reject_msg(struct rdma_cm_id *id, + int reason) +{ + if (rdma_ib_or_roce(id->device, id->port_num)) + return ibcm_reject_msg(reason); + + if (rdma_protocol_iwarp(id->device, id->port_num)) + return iwcm_reject_msg(reason); + + WARN_ON_ONCE(1); + return "unrecognized transport"; +} +EXPORT_SYMBOL(rdma_reject_msg); + +bool rdma_is_consumer_reject(struct rdma_cm_id *id, int reason) +{ + if (rdma_ib_or_roce(id->device, id->port_num)) + return reason == IB_CM_REJ_CONSUMER_DEFINED; + + if (rdma_protocol_iwarp(id->device, id->port_num)) + return reason == -ECONNREFUSED; + + WARN_ON_ONCE(1); + return false; +} +EXPORT_SYMBOL(rdma_is_consumer_reject); + +const void *rdma_consumer_reject_data(struct rdma_cm_id *id, + struct rdma_cm_event *ev, u8 *data_len) +{ + const void *p; + + if (rdma_is_consumer_reject(id, ev->status)) { + *data_len = ev->param.conn.private_data_len; + p = ev->param.conn.private_data; + } else { + *data_len = 0; + p = NULL; + } + return p; +} +EXPORT_SYMBOL(rdma_consumer_reject_data); + static void cma_add_one(struct ib_device *device); static void cma_remove_one(struct ib_device *device, void *client_data); diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 0c0bea091de8..d29372624f3a 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -72,9 +72,6 @@ void ib_device_unregister_sysfs(struct ib_device *device); void ib_cache_setup(void); void ib_cache_cleanup(void); -int ib_resolve_eth_dmac(struct ib_qp *qp, - struct ib_qp_attr *qp_attr, int *qp_attr_mask); - typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port, struct net_device *idev, void *cookie); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 760ef603a468..571974cd3919 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -254,11 +254,8 @@ static int add_client_context(struct ib_device *device, struct ib_client *client unsigned long flags; context = kmalloc(sizeof *context, GFP_KERNEL); - if (!context) { - pr_warn("Couldn't allocate client context for %s/%s\n", - device->name, client->name); + if (!context) return -ENOMEM; - } context->client = client; context->data = NULL; diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c index cdbb1f1a6d97..cdfad5f26212 100644 --- a/drivers/infiniband/core/fmr_pool.c +++ b/drivers/infiniband/core/fmr_pool.c @@ -247,7 +247,6 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd, kmalloc(IB_FMR_HASH_SIZE * sizeof *pool->cache_bucket, GFP_KERNEL); if (!pool->cache_bucket) { - pr_warn(PFX "Failed to allocate cache in pool\n"); ret = -ENOMEM; goto out_free_pool; } diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index 5495e22839a7..31661b5c1743 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -59,6 +59,27 @@ MODULE_AUTHOR("Tom Tucker"); MODULE_DESCRIPTION("iWARP CM"); MODULE_LICENSE("Dual BSD/GPL"); +static const char * const iwcm_rej_reason_strs[] = { + [ECONNRESET] = "reset by remote host", + [ECONNREFUSED] = "refused by remote application", + [ETIMEDOUT] = "setup timeout", +}; + +const char *__attribute_const__ iwcm_reject_msg(int reason) +{ + size_t index; + + /* iWARP uses negative errnos */ + index = -reason; + + if (index < ARRAY_SIZE(iwcm_rej_reason_strs) && + iwcm_rej_reason_strs[index]) + return iwcm_rej_reason_strs[index]; + else + return "unrecognized reason"; +} +EXPORT_SYMBOL(iwcm_reject_msg); + static struct ibnl_client_cbs iwcm_nl_cb_table[] = { [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb}, [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb}, diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c index 1c41b95cefec..a0e7c16d8bd8 100644 --- a/drivers/infiniband/core/iwpm_msg.c +++ b/drivers/infiniband/core/iwpm_msg.c @@ -604,7 +604,6 @@ int iwpm_remote_info_cb(struct sk_buff *skb, struct netlink_callback *cb) } rem_info = kzalloc(sizeof(struct iwpm_remote_info), GFP_ATOMIC); if (!rem_info) { - pr_err("%s: Unable to allocate a remote info\n", __func__); ret = -ENOMEM; return ret; } diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c index ade71e7f0131..3ef51a96bbf1 100644 --- a/drivers/infiniband/core/iwpm_util.c +++ b/drivers/infiniband/core/iwpm_util.c @@ -62,7 +62,6 @@ int iwpm_init(u8 nl_client) sizeof(struct hlist_head), GFP_KERNEL); if (!iwpm_hash_bucket) { ret = -ENOMEM; - pr_err("%s Unable to create mapinfo hash table\n", __func__); goto init_exit; } iwpm_reminfo_bucket = kzalloc(IWPM_REMINFO_HASH_SIZE * @@ -70,7 +69,6 @@ int iwpm_init(u8 nl_client) if (!iwpm_reminfo_bucket) { kfree(iwpm_hash_bucket); ret = -ENOMEM; - pr_err("%s Unable to create reminfo hash table\n", __func__); goto init_exit; } } @@ -128,10 +126,9 @@ int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr, if (!iwpm_valid_client(nl_client)) return ret; map_info = kzalloc(sizeof(struct iwpm_mapping_info), GFP_KERNEL); - if (!map_info) { - pr_err("%s: Unable to allocate a mapping info\n", __func__); + if (!map_info) return -ENOMEM; - } + memcpy(&map_info->local_sockaddr, local_sockaddr, sizeof(struct sockaddr_storage)); memcpy(&map_info->mapped_sockaddr, mapped_sockaddr, @@ -309,10 +306,9 @@ struct iwpm_nlmsg_request *iwpm_get_nlmsg_request(__u32 nlmsg_seq, unsigned long flags; nlmsg_request = kzalloc(sizeof(struct iwpm_nlmsg_request), gfp); - if (!nlmsg_request) { - pr_err("%s Unable to allocate a nlmsg_request\n", __func__); + if (!nlmsg_request) return NULL; - } + spin_lock_irqsave(&iwpm_nlmsg_req_lock, flags); list_add_tail(&nlmsg_request->inprocess_list, &iwpm_nlmsg_req_list); spin_unlock_irqrestore(&iwpm_nlmsg_req_lock, flags); diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 40cbd6bdb73b..a009f7132c73 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -769,7 +769,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, * If we are at the start of the LID routed part, don't update the * hop_ptr or hop_cnt. See section 14.2.2, Vol 1 IB spec. */ - if (opa && smp->class_version == OPA_SMP_CLASS_VERSION) { + if (opa && smp->class_version == OPA_SM_CLASS_VERSION) { u32 opa_drslid; if ((opa_get_smp_direction(opa_smp) @@ -816,7 +816,6 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, local = kmalloc(sizeof *local, GFP_ATOMIC); if (!local) { ret = -ENOMEM; - dev_err(&device->dev, "No memory for ib_mad_local_private\n"); goto out; } local->mad_priv = NULL; @@ -824,7 +823,6 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, mad_priv = alloc_mad_private(mad_size, GFP_ATOMIC); if (!mad_priv) { ret = -ENOMEM; - dev_err(&device->dev, "No memory for local response MAD\n"); kfree(local); goto out; } @@ -947,9 +945,6 @@ static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr, for (left = send_buf->data_len + pad; left > 0; left -= seg_size) { seg = kmalloc(sizeof (*seg) + seg_size, gfp_mask); if (!seg) { - dev_err(&send_buf->mad_agent->device->dev, - "alloc_send_rmpp_segs: RMPP mem alloc failed for len %zd, gfp %#x\n", - sizeof (*seg) + seg_size, gfp_mask); free_send_rmpp_list(send_wr); return -ENOMEM; } @@ -1362,12 +1357,7 @@ static int allocate_method_table(struct ib_mad_mgmt_method_table **method) { /* Allocate management method table */ *method = kzalloc(sizeof **method, GFP_ATOMIC); - if (!*method) { - pr_err("No memory for ib_mad_mgmt_method_table\n"); - return -ENOMEM; - } - - return 0; + return (*method) ? 0 : (-ENOMEM); } /* @@ -1458,8 +1448,6 @@ static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req, /* Allocate management class table for "new" class version */ *class = kzalloc(sizeof **class, GFP_ATOMIC); if (!*class) { - dev_err(&agent_priv->agent.device->dev, - "No memory for ib_mad_mgmt_class_table\n"); ret = -ENOMEM; goto error1; } @@ -1524,22 +1512,16 @@ static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req, if (!*vendor_table) { /* Allocate mgmt vendor class table for "new" class version */ vendor = kzalloc(sizeof *vendor, GFP_ATOMIC); - if (!vendor) { - dev_err(&agent_priv->agent.device->dev, - "No memory for ib_mad_mgmt_vendor_class_table\n"); + if (!vendor) goto error1; - } *vendor_table = vendor; } if (!(*vendor_table)->vendor_class[vclass]) { /* Allocate table for this management vendor class */ vendor_class = kzalloc(sizeof *vendor_class, GFP_ATOMIC); - if (!vendor_class) { - dev_err(&agent_priv->agent.device->dev, - "No memory for ib_mad_mgmt_vendor_class\n"); + if (!vendor_class) goto error2; - } (*vendor_table)->vendor_class[vclass] = vendor_class; } @@ -1746,7 +1728,7 @@ find_mad_agent(struct ib_mad_port_private *port_priv, if (!class) goto out; if (convert_mgmt_class(mad_hdr->mgmt_class) >= - IB_MGMT_MAX_METHODS) + ARRAY_SIZE(class->method_table)) goto out; method = class->method_table[convert_mgmt_class( mad_hdr->mgmt_class)]; @@ -2167,7 +2149,7 @@ handle_smi(struct ib_mad_port_private *port_priv, struct ib_mad_hdr *mad_hdr = (struct ib_mad_hdr *)recv->mad; if (opa && mad_hdr->base_version == OPA_MGMT_BASE_VERSION && - mad_hdr->class_version == OPA_SMI_CLASS_VERSION) + mad_hdr->class_version == OPA_SM_CLASS_VERSION) return handle_opa_smi(port_priv, qp_info, wc, port_num, recv, response); @@ -2238,11 +2220,8 @@ static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc) mad_size = recv->mad_size; response = alloc_mad_private(mad_size, GFP_KERNEL); - if (!response) { - dev_err(&port_priv->device->dev, - "%s: no memory for response buffer\n", __func__); + if (!response) goto out; - } if (rdma_cap_ib_switch(port_priv->device)) port_num = wc->port_num; @@ -2869,8 +2848,6 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, mad_priv = alloc_mad_private(port_mad_size(qp_info->port_priv), GFP_ATOMIC); if (!mad_priv) { - dev_err(&qp_info->port_priv->device->dev, - "No memory for receive buffer\n"); ret = -ENOMEM; break; } @@ -2961,11 +2938,8 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv) u16 pkey_index; attr = kmalloc(sizeof *attr, GFP_KERNEL); - if (!attr) { - dev_err(&port_priv->device->dev, - "Couldn't kmalloc ib_qp_attr\n"); + if (!attr) return -ENOMEM; - } ret = ib_find_pkey(port_priv->device, port_priv->port_num, IB_DEFAULT_PKEY_FULL, &pkey_index); @@ -3135,10 +3109,8 @@ static int ib_mad_port_open(struct ib_device *device, /* Create new device info */ port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL); - if (!port_priv) { - dev_err(&device->dev, "No memory for ib_mad_port_private\n"); + if (!port_priv) return -ENOMEM; - } port_priv->device = device; port_priv->port_num = port_num; diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index e51b739f6ea3..322cb67b07a9 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -518,8 +518,11 @@ static void join_handler(int status, struct ib_sa_mcmember_rec *rec, process_join_error(group, status); else { int mgids_changed, is_mgid0; - ib_find_pkey(group->port->dev->device, group->port->port_num, - be16_to_cpu(rec->pkey), &pkey_index); + + if (ib_find_pkey(group->port->dev->device, + group->port->port_num, be16_to_cpu(rec->pkey), + &pkey_index)) + pkey_index = MCAST_INVALID_PKEY_INDEX; spin_lock_irq(&group->port->lock); if (group->state == MCAST_BUSY && diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c index 3a64a0881882..0621f4455732 100644 --- a/drivers/infiniband/core/roce_gid_mgmt.c +++ b/drivers/infiniband/core/roce_gid_mgmt.c @@ -304,10 +304,9 @@ static void enum_netdev_ipv4_ips(struct ib_device *ib_dev, for_ifa(in_dev) { struct sin_list *entry = kzalloc(sizeof(*entry), GFP_ATOMIC); - if (!entry) { - pr_warn("roce_gid_mgmt: couldn't allocate entry for IPv4 update\n"); + if (!entry) continue; - } + entry->ip.sin_family = AF_INET; entry->ip.sin_addr.s_addr = ifa->ifa_address; list_add_tail(&entry->list, &sin_list); @@ -348,10 +347,8 @@ static void enum_netdev_ipv6_ips(struct ib_device *ib_dev, list_for_each_entry(ifp, &in6_dev->addr_list, if_list) { struct sin6_list *entry = kzalloc(sizeof(*entry), GFP_ATOMIC); - if (!entry) { - pr_warn("roce_gid_mgmt: couldn't allocate entry for IPv6 update\n"); + if (!entry) continue; - } entry->sin6.sin6_family = AF_INET6; entry->sin6.sin6_addr = ifp->addr; @@ -447,10 +444,8 @@ static int netdev_upper_walk(struct net_device *upper, void *data) struct upper_list *entry = kmalloc(sizeof(*entry), GFP_ATOMIC); struct list_head *upper_list = data; - if (!entry) { - pr_info("roce_gid_mgmt: couldn't allocate entry to delete ndev\n"); + if (!entry) return 0; - } list_add_tail(&entry->list, upper_list); dev_hold(upper); @@ -559,10 +554,8 @@ static int netdevice_queue_work(struct netdev_event_work_cmd *cmds, struct netdev_event_work *ndev_work = kmalloc(sizeof(*ndev_work), GFP_KERNEL); - if (!ndev_work) { - pr_warn("roce_gid_mgmt: can't allocate work for netdevice_event\n"); + if (!ndev_work) return NOTIFY_DONE; - } memcpy(ndev_work->cmds, cmds, sizeof(ndev_work->cmds)); for (i = 0; i < ARRAY_SIZE(ndev_work->cmds) && ndev_work->cmds[i].cb; i++) { @@ -696,10 +689,8 @@ static int addr_event(struct notifier_block *this, unsigned long event, } work = kmalloc(sizeof(*work), GFP_ATOMIC); - if (!work) { - pr_warn("roce_gid_mgmt: Couldn't allocate work for addr_event\n"); + if (!work) return NOTIFY_DONE; - } INIT_WORK(&work->work, update_gid_event_work_handler); diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index 7713ef089c3c..579f9a7f6283 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -1104,8 +1104,11 @@ static ssize_t ib_ucm_write(struct file *filp, const char __user *buf, struct ib_ucm_cmd_hdr hdr; ssize_t result; - if (WARN_ON_ONCE(!ib_safe_file_access(filp))) + if (!ib_safe_file_access(filp)) { + pr_err_once("ucm_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n", + task_tgid_vnr(current), current->comm); return -EACCES; + } if (len < sizeof(hdr)) return -EINVAL; diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 9520154f1d7c..e12f8faf8c23 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -1584,8 +1584,11 @@ static ssize_t ucma_write(struct file *filp, const char __user *buf, struct rdma_ucm_cmd_hdr hdr; ssize_t ret; - if (WARN_ON_ONCE(!ib_safe_file_access(filp))) + if (!ib_safe_file_access(filp)) { + pr_err_once("ucma_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n", + task_tgid_vnr(current), current->comm); return -EACCES; + } if (len < sizeof(hdr)) return -EINVAL; diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 84b4eff90395..1e62a5f0cb28 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -51,7 +51,7 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d if (umem->nmap > 0) ib_dma_unmap_sg(dev, umem->sg_head.sgl, - umem->nmap, + umem->npages, DMA_BIDIRECTIONAL); for_each_sg(umem->sg_head.sgl, sg, umem->npages, i) { diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index df26a741cda6..455034ac994e 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -289,5 +289,6 @@ IB_UVERBS_DECLARE_EX_CMD(modify_wq); IB_UVERBS_DECLARE_EX_CMD(destroy_wq); IB_UVERBS_DECLARE_EX_CMD(create_rwq_ind_table); IB_UVERBS_DECLARE_EX_CMD(destroy_rwq_ind_table); +IB_UVERBS_DECLARE_EX_CMD(modify_qp); #endif /* UVERBS_H */ diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index cb3f515a2285..09b649159e6c 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -2328,94 +2328,88 @@ static int modify_qp_mask(enum ib_qp_type qp_type, int mask) } } -ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) +static int modify_qp(struct ib_uverbs_file *file, + struct ib_uverbs_ex_modify_qp *cmd, struct ib_udata *udata) { - struct ib_uverbs_modify_qp cmd; - struct ib_udata udata; - struct ib_qp *qp; - struct ib_qp_attr *attr; - int ret; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd, - out_len); + struct ib_qp_attr *attr; + struct ib_qp *qp; + int ret; attr = kmalloc(sizeof *attr, GFP_KERNEL); if (!attr) return -ENOMEM; - qp = idr_read_qp(cmd.qp_handle, file->ucontext); + qp = idr_read_qp(cmd->base.qp_handle, file->ucontext); if (!qp) { ret = -EINVAL; goto out; } - attr->qp_state = cmd.qp_state; - attr->cur_qp_state = cmd.cur_qp_state; - attr->path_mtu = cmd.path_mtu; - attr->path_mig_state = cmd.path_mig_state; - attr->qkey = cmd.qkey; - attr->rq_psn = cmd.rq_psn; - attr->sq_psn = cmd.sq_psn; - attr->dest_qp_num = cmd.dest_qp_num; - attr->qp_access_flags = cmd.qp_access_flags; - attr->pkey_index = cmd.pkey_index; - attr->alt_pkey_index = cmd.alt_pkey_index; - attr->en_sqd_async_notify = cmd.en_sqd_async_notify; - attr->max_rd_atomic = cmd.max_rd_atomic; - attr->max_dest_rd_atomic = cmd.max_dest_rd_atomic; - attr->min_rnr_timer = cmd.min_rnr_timer; - attr->port_num = cmd.port_num; - attr->timeout = cmd.timeout; - attr->retry_cnt = cmd.retry_cnt; - attr->rnr_retry = cmd.rnr_retry; - attr->alt_port_num = cmd.alt_port_num; - attr->alt_timeout = cmd.alt_timeout; - - memcpy(attr->ah_attr.grh.dgid.raw, cmd.dest.dgid, 16); - attr->ah_attr.grh.flow_label = cmd.dest.flow_label; - attr->ah_attr.grh.sgid_index = cmd.dest.sgid_index; - attr->ah_attr.grh.hop_limit = cmd.dest.hop_limit; - attr->ah_attr.grh.traffic_class = cmd.dest.traffic_class; - attr->ah_attr.dlid = cmd.dest.dlid; - attr->ah_attr.sl = cmd.dest.sl; - attr->ah_attr.src_path_bits = cmd.dest.src_path_bits; - attr->ah_attr.static_rate = cmd.dest.static_rate; - attr->ah_attr.ah_flags = cmd.dest.is_global ? IB_AH_GRH : 0; - attr->ah_attr.port_num = cmd.dest.port_num; - - memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd.alt_dest.dgid, 16); - attr->alt_ah_attr.grh.flow_label = cmd.alt_dest.flow_label; - attr->alt_ah_attr.grh.sgid_index = cmd.alt_dest.sgid_index; - attr->alt_ah_attr.grh.hop_limit = cmd.alt_dest.hop_limit; - attr->alt_ah_attr.grh.traffic_class = cmd.alt_dest.traffic_class; - attr->alt_ah_attr.dlid = cmd.alt_dest.dlid; - attr->alt_ah_attr.sl = cmd.alt_dest.sl; - attr->alt_ah_attr.src_path_bits = cmd.alt_dest.src_path_bits; - attr->alt_ah_attr.static_rate = cmd.alt_dest.static_rate; - attr->alt_ah_attr.ah_flags = cmd.alt_dest.is_global ? IB_AH_GRH : 0; - attr->alt_ah_attr.port_num = cmd.alt_dest.port_num; + attr->qp_state = cmd->base.qp_state; + attr->cur_qp_state = cmd->base.cur_qp_state; + attr->path_mtu = cmd->base.path_mtu; + attr->path_mig_state = cmd->base.path_mig_state; + attr->qkey = cmd->base.qkey; + attr->rq_psn = cmd->base.rq_psn; + attr->sq_psn = cmd->base.sq_psn; + attr->dest_qp_num = cmd->base.dest_qp_num; + attr->qp_access_flags = cmd->base.qp_access_flags; + attr->pkey_index = cmd->base.pkey_index; + attr->alt_pkey_index = cmd->base.alt_pkey_index; + attr->en_sqd_async_notify = cmd->base.en_sqd_async_notify; + attr->max_rd_atomic = cmd->base.max_rd_atomic; + attr->max_dest_rd_atomic = cmd->base.max_dest_rd_atomic; + attr->min_rnr_timer = cmd->base.min_rnr_timer; + attr->port_num = cmd->base.port_num; + attr->timeout = cmd->base.timeout; + attr->retry_cnt = cmd->base.retry_cnt; + attr->rnr_retry = cmd->base.rnr_retry; + attr->alt_port_num = cmd->base.alt_port_num; + attr->alt_timeout = cmd->base.alt_timeout; + attr->rate_limit = cmd->rate_limit; + + memcpy(attr->ah_attr.grh.dgid.raw, cmd->base.dest.dgid, 16); + attr->ah_attr.grh.flow_label = cmd->base.dest.flow_label; + attr->ah_attr.grh.sgid_index = cmd->base.dest.sgid_index; + attr->ah_attr.grh.hop_limit = cmd->base.dest.hop_limit; + attr->ah_attr.grh.traffic_class = cmd->base.dest.traffic_class; + attr->ah_attr.dlid = cmd->base.dest.dlid; + attr->ah_attr.sl = cmd->base.dest.sl; + attr->ah_attr.src_path_bits = cmd->base.dest.src_path_bits; + attr->ah_attr.static_rate = cmd->base.dest.static_rate; + attr->ah_attr.ah_flags = cmd->base.dest.is_global ? + IB_AH_GRH : 0; + attr->ah_attr.port_num = cmd->base.dest.port_num; + + memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd->base.alt_dest.dgid, 16); + attr->alt_ah_attr.grh.flow_label = cmd->base.alt_dest.flow_label; + attr->alt_ah_attr.grh.sgid_index = cmd->base.alt_dest.sgid_index; + attr->alt_ah_attr.grh.hop_limit = cmd->base.alt_dest.hop_limit; + attr->alt_ah_attr.grh.traffic_class = cmd->base.alt_dest.traffic_class; + attr->alt_ah_attr.dlid = cmd->base.alt_dest.dlid; + attr->alt_ah_attr.sl = cmd->base.alt_dest.sl; + attr->alt_ah_attr.src_path_bits = cmd->base.alt_dest.src_path_bits; + attr->alt_ah_attr.static_rate = cmd->base.alt_dest.static_rate; + attr->alt_ah_attr.ah_flags = cmd->base.alt_dest.is_global ? + IB_AH_GRH : 0; + attr->alt_ah_attr.port_num = cmd->base.alt_dest.port_num; if (qp->real_qp == qp) { - ret = ib_resolve_eth_dmac(qp, attr, &cmd.attr_mask); - if (ret) - goto release_qp; + if (cmd->base.attr_mask & IB_QP_AV) { + ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr); + if (ret) + goto release_qp; + } ret = qp->device->modify_qp(qp, attr, - modify_qp_mask(qp->qp_type, cmd.attr_mask), &udata); + modify_qp_mask(qp->qp_type, + cmd->base.attr_mask), + udata); } else { - ret = ib_modify_qp(qp, attr, modify_qp_mask(qp->qp_type, cmd.attr_mask)); + ret = ib_modify_qp(qp, attr, + modify_qp_mask(qp->qp_type, + cmd->base.attr_mask)); } - if (ret) - goto release_qp; - - ret = in_len; - release_qp: put_qp_read(qp); @@ -2425,6 +2419,68 @@ out: return ret; } +ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_ex_modify_qp cmd = {}; + struct ib_udata udata; + int ret; + + if (copy_from_user(&cmd.base, buf, sizeof(cmd.base))) + return -EFAULT; + + if (cmd.base.attr_mask & + ~((IB_USER_LEGACY_LAST_QP_ATTR_MASK << 1) - 1)) + return -EOPNOTSUPP; + + INIT_UDATA(&udata, buf + sizeof(cmd.base), NULL, + in_len - sizeof(cmd.base), out_len); + + ret = modify_qp(file, &cmd, &udata); + if (ret) + return ret; + + return in_len; +} + +int ib_uverbs_ex_modify_qp(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + struct ib_udata *ucore, + struct ib_udata *uhw) +{ + struct ib_uverbs_ex_modify_qp cmd = {}; + int ret; + + /* + * Last bit is reserved for extending the attr_mask by + * using another field. + */ + BUILD_BUG_ON(IB_USER_LAST_QP_ATTR_MASK == (1 << 31)); + + if (ucore->inlen < sizeof(cmd.base)) + return -EINVAL; + + ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); + if (ret) + return ret; + + if (cmd.base.attr_mask & + ~((IB_USER_LAST_QP_ATTR_MASK << 1) - 1)) + return -EOPNOTSUPP; + + if (ucore->inlen > sizeof(cmd)) { + if (ib_is_udata_cleared(ucore, sizeof(cmd), + ucore->inlen - sizeof(cmd))) + return -EOPNOTSUPP; + } + + ret = modify_qp(file, &cmd, uhw); + + return ret; +} + ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, int in_len, @@ -2875,6 +2931,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, struct ib_ah *ah; struct ib_ah_attr attr; int ret; + struct ib_udata udata; if (out_len < sizeof resp) return -ENOSPC; @@ -2882,6 +2939,10 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; + INIT_UDATA(&udata, buf + sizeof(cmd), + (unsigned long)cmd.response + sizeof(resp), + in_len - sizeof(cmd), out_len - sizeof(resp)); + uobj = kmalloc(sizeof *uobj, GFP_KERNEL); if (!uobj) return -ENOMEM; @@ -2908,12 +2969,16 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, memset(&attr.dmac, 0, sizeof(attr.dmac)); memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16); - ah = ib_create_ah(pd, &attr); + ah = pd->device->create_ah(pd, &attr, &udata); + if (IS_ERR(ah)) { ret = PTR_ERR(ah); goto err_put; } + ah->device = pd->device; + ah->pd = pd; + atomic_inc(&pd->usecnt); ah->uobject = uobj; uobj->object = ah; @@ -3124,8 +3189,10 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec, kern_spec_val = (void *)kern_spec + sizeof(struct ib_uverbs_flow_spec_hdr); kern_spec_mask = kern_spec_val + kern_filter_sz; + if (ib_spec->type == (IB_FLOW_SPEC_INNER | IB_FLOW_SPEC_VXLAN_TUNNEL)) + return -EINVAL; - switch (ib_spec->type) { + switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) { case IB_FLOW_SPEC_ETH: ib_filter_sz = offsetof(struct ib_flow_eth_filter, real_sz); actual_filter_sz = spec_filter_size(kern_spec_mask, @@ -3175,6 +3242,21 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec, memcpy(&ib_spec->tcp_udp.val, kern_spec_val, actual_filter_sz); memcpy(&ib_spec->tcp_udp.mask, kern_spec_mask, actual_filter_sz); break; + case IB_FLOW_SPEC_VXLAN_TUNNEL: + ib_filter_sz = offsetof(struct ib_flow_tunnel_filter, real_sz); + actual_filter_sz = spec_filter_size(kern_spec_mask, + kern_filter_sz, + ib_filter_sz); + if (actual_filter_sz <= 0) + return -EINVAL; + ib_spec->tunnel.size = sizeof(struct ib_flow_spec_tunnel); + memcpy(&ib_spec->tunnel.val, kern_spec_val, actual_filter_sz); + memcpy(&ib_spec->tunnel.mask, kern_spec_mask, actual_filter_sz); + + if ((ntohl(ib_spec->tunnel.mask.tunnel_id)) >= BIT(24) || + (ntohl(ib_spec->tunnel.val.tunnel_id)) >= BIT(24)) + return -EINVAL; + break; default: return -EINVAL; } @@ -3745,7 +3827,6 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, err = PTR_ERR(flow_id); goto err_free; } - flow_id->qp = qp; flow_id->uobject = uobj; uobj->object = flow_id; diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 44b1104eb168..813593550c4b 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -137,6 +137,7 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file, [IB_USER_VERBS_EX_CMD_DESTROY_WQ] = ib_uverbs_ex_destroy_wq, [IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL] = ib_uverbs_ex_create_rwq_ind_table, [IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL] = ib_uverbs_ex_destroy_rwq_ind_table, + [IB_USER_VERBS_EX_CMD_MODIFY_QP] = ib_uverbs_ex_modify_qp, }; static void ib_uverbs_add_one(struct ib_device *device); @@ -746,8 +747,11 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, int srcu_key; ssize_t ret; - if (WARN_ON_ONCE(!ib_safe_file_access(filp))) + if (!ib_safe_file_access(filp)) { + pr_err_once("uverbs_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n", + task_tgid_vnr(current), current->comm); return -EACCES; + } if (count < sizeof hdr) return -EINVAL; diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 83687646da68..71580cc28c9e 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -315,7 +315,7 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) { struct ib_ah *ah; - ah = pd->device->create_ah(pd, ah_attr); + ah = pd->device->create_ah(pd, ah_attr, NULL); if (!IS_ERR(ah)) { ah->device = pd->device; @@ -328,7 +328,7 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) } EXPORT_SYMBOL(ib_create_ah); -static int ib_get_header_version(const union rdma_network_hdr *hdr) +int ib_get_rdma_header_version(const union rdma_network_hdr *hdr) { const struct iphdr *ip4h = (struct iphdr *)&hdr->roce4grh; struct iphdr ip4h_checked; @@ -359,6 +359,7 @@ static int ib_get_header_version(const union rdma_network_hdr *hdr) return 4; return 6; } +EXPORT_SYMBOL(ib_get_rdma_header_version); static enum rdma_network_type ib_get_net_type_by_grh(struct ib_device *device, u8 port_num, @@ -369,7 +370,7 @@ static enum rdma_network_type ib_get_net_type_by_grh(struct ib_device *device, if (rdma_protocol_ib(device, port_num)) return RDMA_NETWORK_IB; - grh_version = ib_get_header_version((union rdma_network_hdr *)grh); + grh_version = ib_get_rdma_header_version((union rdma_network_hdr *)grh); if (grh_version == 4) return RDMA_NETWORK_IPV4; @@ -415,9 +416,9 @@ static int get_sgid_index_from_eth(struct ib_device *device, u8 port_num, &context, gid_index); } -static int get_gids_from_rdma_hdr(union rdma_network_hdr *hdr, - enum rdma_network_type net_type, - union ib_gid *sgid, union ib_gid *dgid) +int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr, + enum rdma_network_type net_type, + union ib_gid *sgid, union ib_gid *dgid) { struct sockaddr_in src_in; struct sockaddr_in dst_in; @@ -447,6 +448,7 @@ static int get_gids_from_rdma_hdr(union rdma_network_hdr *hdr, return -EINVAL; } } +EXPORT_SYMBOL(ib_get_gids_from_rdma_hdr); int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, const struct ib_wc *wc, const struct ib_grh *grh, @@ -469,8 +471,8 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, net_type = ib_get_net_type_by_grh(device, port_num, grh); gid_type = ib_network_to_gid_type(net_type); } - ret = get_gids_from_rdma_hdr((union rdma_network_hdr *)grh, net_type, - &sgid, &dgid); + ret = ib_get_gids_from_rdma_hdr((union rdma_network_hdr *)grh, net_type, + &sgid, &dgid); if (ret) return ret; @@ -1014,6 +1016,7 @@ static const struct { IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_CUR_STATE | IB_QP_QKEY), + [IB_QPT_RAW_PACKET] = IB_QP_RATE_LIMIT, } } }, @@ -1047,6 +1050,7 @@ static const struct { IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_CUR_STATE | IB_QP_QKEY), + [IB_QPT_RAW_PACKET] = IB_QP_RATE_LIMIT, } }, [IB_QPS_SQD] = { @@ -1196,66 +1200,66 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, } EXPORT_SYMBOL(ib_modify_qp_is_ok); -int ib_resolve_eth_dmac(struct ib_qp *qp, - struct ib_qp_attr *qp_attr, int *qp_attr_mask) +int ib_resolve_eth_dmac(struct ib_device *device, + struct ib_ah_attr *ah_attr) { int ret = 0; - if (*qp_attr_mask & IB_QP_AV) { - if (qp_attr->ah_attr.port_num < rdma_start_port(qp->device) || - qp_attr->ah_attr.port_num > rdma_end_port(qp->device)) - return -EINVAL; - - if (!rdma_cap_eth_ah(qp->device, qp_attr->ah_attr.port_num)) - return 0; - - if (rdma_link_local_addr((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw)) { - rdma_get_ll_mac((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw, - qp_attr->ah_attr.dmac); - } else { - union ib_gid sgid; - struct ib_gid_attr sgid_attr; - int ifindex; - int hop_limit; - - ret = ib_query_gid(qp->device, - qp_attr->ah_attr.port_num, - qp_attr->ah_attr.grh.sgid_index, - &sgid, &sgid_attr); - - if (ret || !sgid_attr.ndev) { - if (!ret) - ret = -ENXIO; - goto out; - } + if (ah_attr->port_num < rdma_start_port(device) || + ah_attr->port_num > rdma_end_port(device)) + return -EINVAL; - ifindex = sgid_attr.ndev->ifindex; + if (!rdma_cap_eth_ah(device, ah_attr->port_num)) + return 0; - ret = rdma_addr_find_l2_eth_by_grh(&sgid, - &qp_attr->ah_attr.grh.dgid, - qp_attr->ah_attr.dmac, - NULL, &ifindex, &hop_limit); + if (rdma_link_local_addr((struct in6_addr *)ah_attr->grh.dgid.raw)) { + rdma_get_ll_mac((struct in6_addr *)ah_attr->grh.dgid.raw, + ah_attr->dmac); + } else { + union ib_gid sgid; + struct ib_gid_attr sgid_attr; + int ifindex; + int hop_limit; + + ret = ib_query_gid(device, + ah_attr->port_num, + ah_attr->grh.sgid_index, + &sgid, &sgid_attr); + + if (ret || !sgid_attr.ndev) { + if (!ret) + ret = -ENXIO; + goto out; + } - dev_put(sgid_attr.ndev); + ifindex = sgid_attr.ndev->ifindex; - qp_attr->ah_attr.grh.hop_limit = hop_limit; - } + ret = rdma_addr_find_l2_eth_by_grh(&sgid, + &ah_attr->grh.dgid, + ah_attr->dmac, + NULL, &ifindex, &hop_limit); + + dev_put(sgid_attr.ndev); + + ah_attr->grh.hop_limit = hop_limit; } out: return ret; } EXPORT_SYMBOL(ib_resolve_eth_dmac); - int ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask) { - int ret; - ret = ib_resolve_eth_dmac(qp, qp_attr, &qp_attr_mask); - if (ret) - return ret; + if (qp_attr_mask & IB_QP_AV) { + int ret; + + ret = ib_resolve_eth_dmac(qp->device, &qp_attr->ah_attr); + if (ret) + return ret; + } return qp->device->modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL); } @@ -1734,8 +1738,10 @@ struct ib_flow *ib_create_flow(struct ib_qp *qp, return ERR_PTR(-ENOSYS); flow_id = qp->device->create_flow(qp, flow_attr, domain); - if (!IS_ERR(flow_id)) + if (!IS_ERR(flow_id)) { atomic_inc(&qp->usecnt); + flow_id->qp = qp; + } return flow_id; } EXPORT_SYMBOL(ib_create_flow); diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile index e7a5ed9f6f3f..ed553de2ca12 100644 --- a/drivers/infiniband/hw/Makefile +++ b/drivers/infiniband/hw/Makefile @@ -7,6 +7,7 @@ obj-$(CONFIG_MLX4_INFINIBAND) += mlx4/ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5/ obj-$(CONFIG_INFINIBAND_NES) += nes/ obj-$(CONFIG_INFINIBAND_OCRDMA) += ocrdma/ +obj-$(CONFIG_INFINIBAND_VMWARE_PVRDMA) += vmw_pvrdma/ obj-$(CONFIG_INFINIBAND_USNIC) += usnic/ obj-$(CONFIG_INFINIBAND_HFI1) += hfi1/ obj-$(CONFIG_INFINIBAND_HNS) += hns/ diff --git a/drivers/infiniband/hw/cxgb3/cxio_dbg.c b/drivers/infiniband/hw/cxgb3/cxio_dbg.c index 8bca6b4ec9af..445e89e5e7cf 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_dbg.c +++ b/drivers/infiniband/hw/cxgb3/cxio_dbg.c @@ -45,10 +45,9 @@ void cxio_dump_tpt(struct cxio_rdev *rdev, u32 stag) int size = 32; m = kmalloc(sizeof(*m) + size, GFP_ATOMIC); - if (!m) { - PDBG("%s couldn't allocate memory.\n", __func__); + if (!m) return; - } + m->mem_id = MEM_PMRX; m->addr = (stag>>8) * 32 + rdev->rnic_info.tpt_base; m->len = size; @@ -82,10 +81,9 @@ void cxio_dump_pbl(struct cxio_rdev *rdev, u32 pbl_addr, uint len, u8 shift) size = npages * sizeof(u64); m = kmalloc(sizeof(*m) + size, GFP_ATOMIC); - if (!m) { - PDBG("%s couldn't allocate memory.\n", __func__); + if (!m) return; - } + m->mem_id = MEM_PMRX; m->addr = pbl_addr; m->len = size; @@ -144,10 +142,9 @@ void cxio_dump_rqt(struct cxio_rdev *rdev, u32 hwtid, int nents) int rc; m = kmalloc(sizeof(*m) + size, GFP_ATOMIC); - if (!m) { - PDBG("%s couldn't allocate memory.\n", __func__); + if (!m) return; - } + m->mem_id = MEM_PMRX; m->addr = ((hwtid)<<10) + rdev->rnic_info.rqt_base; m->len = size; @@ -177,10 +174,9 @@ void cxio_dump_tcb(struct cxio_rdev *rdev, u32 hwtid) int rc; m = kmalloc(sizeof(*m) + size, GFP_ATOMIC); - if (!m) { - PDBG("%s couldn't allocate memory.\n", __func__); + if (!m) return; - } + m->mem_id = MEM_CM; m->addr = hwtid * size; m->len = size; diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index cba57bb53dba..9d5fe1853da4 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -62,7 +62,8 @@ #include "common.h" static struct ib_ah *iwch_ah_create(struct ib_pd *pd, - struct ib_ah_attr *ah_attr) + struct ib_ah_attr *ah_attr, + struct ib_udata *udata) { return ERR_PTR(-ENOSYS); } diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 4e5baf4fe15e..516b0ae6dc3f 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -828,8 +828,10 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) } rdev->status_page = (struct t4_dev_status_page *) __get_free_page(GFP_KERNEL); - if (!rdev->status_page) + if (!rdev->status_page) { + err = -ENOMEM; goto destroy_ocqp_pool; + } rdev->status_page->qp_start = rdev->lldi.vr->qp.start; rdev->status_page->qp_size = rdev->lldi.vr->qp.size; rdev->status_page->cq_start = rdev->lldi.vr->cq.start; @@ -841,8 +843,6 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) if (rdev->wr_log) { rdev->wr_log_size = 1 << c4iw_wr_log_size_order; atomic_set(&rdev->wr_log_idx, 0); - } else { - pr_err(MOD "error allocating wr_log. Logging disabled\n"); } } @@ -1424,8 +1424,6 @@ static void recover_queues(struct uld_ctx *ctx) qp_list.qps = kzalloc(count * sizeof *qp_list.qps, GFP_ATOMIC); if (!qp_list.qps) { - printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n", - pci_name(ctx->lldi.pdev)); spin_unlock_irq(&ctx->dev->lock); return; } diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 645e606a17c5..49b51b7e0fd7 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -59,7 +59,9 @@ module_param(fastreg_support, int, 0644); MODULE_PARM_DESC(fastreg_support, "Advertise fastreg support (default=1)"); static struct ib_ah *c4iw_ah_create(struct ib_pd *pd, - struct ib_ah_attr *ah_attr) + struct ib_ah_attr *ah_attr, + struct ib_udata *udata) + { return ERR_PTR(-ENOSYS); } diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index 67ea85a56945..7a3d906b3671 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -125,6 +125,7 @@ int node_affinity_init(void) cpumask_weight(topology_sibling_cpumask( cpumask_first(&node_affinity.proc.mask) )); + node_affinity.num_possible_nodes = num_possible_nodes(); node_affinity.num_online_nodes = num_online_nodes(); node_affinity.num_online_cpus = num_online_cpus(); @@ -135,7 +136,7 @@ int node_affinity_init(void) */ init_real_cpu_mask(); - hfi1_per_node_cntr = kcalloc(num_possible_nodes(), + hfi1_per_node_cntr = kcalloc(node_affinity.num_possible_nodes, sizeof(*hfi1_per_node_cntr), GFP_KERNEL); if (!hfi1_per_node_cntr) return -ENOMEM; diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h index 42e63316afd1..e78c7aa094e0 100644 --- a/drivers/infiniband/hw/hfi1/affinity.h +++ b/drivers/infiniband/hw/hfi1/affinity.h @@ -70,14 +70,6 @@ struct cpu_mask_set { uint gen; }; -struct hfi1_affinity { - struct cpu_mask_set def_intr; - struct cpu_mask_set rcv_intr; - struct cpumask real_cpu_mask; - /* spin lock to protect affinity struct */ - spinlock_t lock; -}; - struct hfi1_msix_entry; /* Initialize non-HT cpu cores mask */ @@ -115,6 +107,7 @@ struct hfi1_affinity_node_list { struct cpumask real_cpu_mask; struct cpu_mask_set proc; int num_core_siblings; + int num_possible_nodes; int num_online_nodes; int num_online_cpus; struct mutex lock; /* protects affinity nodes */ diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 24d0820873cf..ef72bc2a9e1d 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -8477,7 +8477,10 @@ static int do_8051_command( */ if (type == HCMD_WRITE_LCB_CSR) { in_data |= ((*out_data) & 0xffffffffffull) << 8; - reg = ((((*out_data) >> 40) & 0xff) << + /* must preserve COMPLETED - it is tied to hardware */ + reg = read_csr(dd, DC_DC8051_CFG_EXT_DEV_0); + reg &= DC_DC8051_CFG_EXT_DEV_0_COMPLETED_SMASK; + reg |= ((((*out_data) >> 40) & 0xff) << DC_DC8051_CFG_EXT_DEV_0_RETURN_CODE_SHIFT) | ((((*out_data) >> 48) & 0xffff) << DC_DC8051_CFG_EXT_DEV_0_RSP_DATA_SHIFT); @@ -9556,11 +9559,11 @@ int bringup_serdes(struct hfi1_pportdata *ppd) if (HFI1_CAP_IS_KSET(EXTENDED_PSN)) add_rcvctrl(dd, RCV_CTRL_RCV_EXTENDED_PSN_ENABLE_SMASK); - guid = ppd->guid; + guid = ppd->guids[HFI1_PORT_GUID_INDEX]; if (!guid) { if (dd->base_guid) guid = dd->base_guid + ppd->port - 1; - ppd->guid = guid; + ppd->guids[HFI1_PORT_GUID_INDEX] = guid; } /* Set linkinit_reason on power up per OPA spec */ diff --git a/drivers/infiniband/hw/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h index 5b9993899789..5bfa839d1c48 100644 --- a/drivers/infiniband/hw/hfi1/chip_registers.h +++ b/drivers/infiniband/hw/hfi1/chip_registers.h @@ -415,6 +415,9 @@ #define ASIC_CFG_SBUS_REQUEST_DATA_IN_SHIFT 32 #define ASIC_CFG_SBUS_REQUEST_RECEIVER_ADDR_SHIFT 0 #define ASIC_CFG_SCRATCH (ASIC + 0x000000000020) +#define ASIC_CFG_SCRATCH_1 (ASIC_CFG_SCRATCH + 0x08) +#define ASIC_CFG_SCRATCH_2 (ASIC_CFG_SCRATCH + 0x10) +#define ASIC_CFG_SCRATCH_3 (ASIC_CFG_SCRATCH + 0x18) #define ASIC_CFG_THERM_POLL_EN (ASIC + 0x000000000050) #define ASIC_EEP_ADDR_CMD (ASIC + 0x000000000308) #define ASIC_EEP_ADDR_CMD_EP_ADDR_MASK 0xFFFFFFull diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index 632ba21759ab..8725f4c086cf 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -541,6 +541,114 @@ static ssize_t asic_flags_write(struct file *file, const char __user *buf, return ret; } +/* read the dc8051 memory */ +static ssize_t dc8051_memory_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct hfi1_pportdata *ppd = private2ppd(file); + ssize_t rval; + void *tmp; + loff_t start, end; + + /* the checks below expect the position to be positive */ + if (*ppos < 0) + return -EINVAL; + + tmp = kzalloc(DC8051_DATA_MEM_SIZE, GFP_KERNEL); + if (!tmp) + return -ENOMEM; + + /* + * Fill in the requested portion of the temporary buffer from the + * 8051 memory. The 8051 memory read is done in terms of 8 bytes. + * Adjust start and end to fit. Skip reading anything if out of + * range. + */ + start = *ppos & ~0x7; /* round down */ + if (start < DC8051_DATA_MEM_SIZE) { + end = (*ppos + count + 7) & ~0x7; /* round up */ + if (end > DC8051_DATA_MEM_SIZE) + end = DC8051_DATA_MEM_SIZE; + rval = read_8051_data(ppd->dd, start, end - start, + (u64 *)(tmp + start)); + if (rval) + goto done; + } + + rval = simple_read_from_buffer(buf, count, ppos, tmp, + DC8051_DATA_MEM_SIZE); +done: + kfree(tmp); + return rval; +} + +static ssize_t debugfs_lcb_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct hfi1_pportdata *ppd = private2ppd(file); + struct hfi1_devdata *dd = ppd->dd; + unsigned long total, csr_off; + u64 data; + + if (*ppos < 0) + return -EINVAL; + /* only read 8 byte quantities */ + if ((count % 8) != 0) + return -EINVAL; + /* offset must be 8-byte aligned */ + if ((*ppos % 8) != 0) + return -EINVAL; + /* do nothing if out of range or zero count */ + if (*ppos >= (LCB_END - LCB_START) || !count) + return 0; + /* reduce count if needed */ + if (*ppos + count > LCB_END - LCB_START) + count = (LCB_END - LCB_START) - *ppos; + + csr_off = LCB_START + *ppos; + for (total = 0; total < count; total += 8, csr_off += 8) { + if (read_lcb_csr(dd, csr_off, (u64 *)&data)) + break; /* failed */ + if (put_user(data, (unsigned long __user *)(buf + total))) + break; + } + *ppos += total; + return total; +} + +static ssize_t debugfs_lcb_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct hfi1_pportdata *ppd = private2ppd(file); + struct hfi1_devdata *dd = ppd->dd; + unsigned long total, csr_off, data; + + if (*ppos < 0) + return -EINVAL; + /* only write 8 byte quantities */ + if ((count % 8) != 0) + return -EINVAL; + /* offset must be 8-byte aligned */ + if ((*ppos % 8) != 0) + return -EINVAL; + /* do nothing if out of range or zero count */ + if (*ppos >= (LCB_END - LCB_START) || !count) + return 0; + /* reduce count if needed */ + if (*ppos + count > LCB_END - LCB_START) + count = (LCB_END - LCB_START) - *ppos; + + csr_off = LCB_START + *ppos; + for (total = 0; total < count; total += 8, csr_off += 8) { + if (get_user(data, (unsigned long __user *)(buf + total))) + break; + if (write_lcb_csr(dd, csr_off, data)) + break; /* failed */ + } + *ppos += total; + return total; +} + /* * read the per-port QSFP data for ppd */ @@ -931,6 +1039,8 @@ static const struct counter_info port_cntr_ops[] = { DEBUGFS_XOPS("qsfp2", qsfp2_debugfs_read, qsfp2_debugfs_write, qsfp2_debugfs_open, qsfp2_debugfs_release), DEBUGFS_OPS("asic_flags", asic_flags_read, asic_flags_write), + DEBUGFS_OPS("dc8051_memory", dc8051_memory_read, NULL), + DEBUGFS_OPS("lcb", debugfs_lcb_read, debugfs_lcb_write), }; static void *_sdma_cpu_list_seq_start(struct seq_file *s, loff_t *pos) diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index c5efff29c147..4fbaee68012b 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -795,8 +795,7 @@ static inline void process_rcv_qp_work(struct hfi1_packet *packet) hfi1_schedule_send(qp); spin_unlock_irqrestore(&qp->s_lock, flags); } - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); } } diff --git a/drivers/infiniband/hw/hfi1/eprom.c b/drivers/infiniband/hw/hfi1/eprom.c index e70c223801b4..26da124c88e2 100644 --- a/drivers/infiniband/hw/hfi1/eprom.c +++ b/drivers/infiniband/hw/hfi1/eprom.c @@ -207,6 +207,40 @@ done_asic: /* magic character sequence that trails an image */ #define IMAGE_TRAIL_MAGIC "egamiAPO" +/* EPROM file types */ +#define HFI1_EFT_PLATFORM_CONFIG 2 + +/* segment size - 128 KiB */ +#define SEG_SIZE (128 * 1024) + +struct hfi1_eprom_footer { + u32 oprom_size; /* size of the oprom, in bytes */ + u16 num_table_entries; + u16 version; /* version of this footer */ + u32 magic; /* must be last */ +}; + +struct hfi1_eprom_table_entry { + u32 type; /* file type */ + u32 offset; /* file offset from start of EPROM */ + u32 size; /* file size, in bytes */ +}; + +/* + * Calculate the max number of table entries that will fit within a directory + * buffer of size 'dir_size'. + */ +#define MAX_TABLE_ENTRIES(dir_size) \ + (((dir_size) - sizeof(struct hfi1_eprom_footer)) / \ + sizeof(struct hfi1_eprom_table_entry)) + +#define DIRECTORY_SIZE(n) (sizeof(struct hfi1_eprom_footer) + \ + (sizeof(struct hfi1_eprom_table_entry) * (n))) + +#define MAGIC4(a, b, c, d) ((d) << 24 | (c) << 16 | (b) << 8 | (a)) +#define FOOTER_MAGIC MAGIC4('e', 'p', 'r', 'm') +#define FOOTER_VERSION 1 + /* * Read all of partition 1. The actual file is at the front. Adjust * the returned size if a trailing image magic is found. @@ -242,6 +276,167 @@ static int read_partition_platform_config(struct hfi1_devdata *dd, void **data, } /* + * The segment magic has been checked. There is a footer and table of + * contents present. + * + * directory is a u32 aligned buffer of size EP_PAGE_SIZE. + */ +static int read_segment_platform_config(struct hfi1_devdata *dd, + void *directory, void **data, u32 *size) +{ + struct hfi1_eprom_footer *footer; + struct hfi1_eprom_table_entry *table; + struct hfi1_eprom_table_entry *entry; + void *buffer = NULL; + void *table_buffer = NULL; + int ret, i; + u32 directory_size; + u32 seg_base, seg_offset; + u32 bytes_available, ncopied, to_copy; + + /* the footer is at the end of the directory */ + footer = (struct hfi1_eprom_footer *) + (directory + EP_PAGE_SIZE - sizeof(*footer)); + + /* make sure the structure version is supported */ + if (footer->version != FOOTER_VERSION) + return -EINVAL; + + /* oprom size cannot be larger than a segment */ + if (footer->oprom_size >= SEG_SIZE) + return -EINVAL; + + /* the file table must fit in a segment with the oprom */ + if (footer->num_table_entries > + MAX_TABLE_ENTRIES(SEG_SIZE - footer->oprom_size)) + return -EINVAL; + + /* find the file table start, which precedes the footer */ + directory_size = DIRECTORY_SIZE(footer->num_table_entries); + if (directory_size <= EP_PAGE_SIZE) { + /* the file table fits into the directory buffer handed in */ + table = (struct hfi1_eprom_table_entry *) + (directory + EP_PAGE_SIZE - directory_size); + } else { + /* need to allocate and read more */ + table_buffer = kmalloc(directory_size, GFP_KERNEL); + if (!table_buffer) + return -ENOMEM; + ret = read_length(dd, SEG_SIZE - directory_size, + directory_size, table_buffer); + if (ret) + goto done; + table = table_buffer; + } + + /* look for the platform configuration file in the table */ + for (entry = NULL, i = 0; i < footer->num_table_entries; i++) { + if (table[i].type == HFI1_EFT_PLATFORM_CONFIG) { + entry = &table[i]; + break; + } + } + if (!entry) { + ret = -ENOENT; + goto done; + } + + /* + * Sanity check on the configuration file size - it should never + * be larger than 4 KiB. + */ + if (entry->size > (4 * 1024)) { + dd_dev_err(dd, "Bad configuration file size 0x%x\n", + entry->size); + ret = -EINVAL; + goto done; + } + + /* check for bogus offset and size that wrap when added together */ + if (entry->offset + entry->size < entry->offset) { + dd_dev_err(dd, + "Bad configuration file start + size 0x%x+0x%x\n", + entry->offset, entry->size); + ret = -EINVAL; + goto done; + } + + /* allocate the buffer to return */ + buffer = kmalloc(entry->size, GFP_KERNEL); + if (!buffer) { + ret = -ENOMEM; + goto done; + } + + /* + * Extract the file by looping over segments until it is fully read. + */ + seg_offset = entry->offset % SEG_SIZE; + seg_base = entry->offset - seg_offset; + ncopied = 0; + while (ncopied < entry->size) { + /* calculate data bytes available in this segment */ + + /* start with the bytes from the current offset to the end */ + bytes_available = SEG_SIZE - seg_offset; + /* subtract off footer and table from segment 0 */ + if (seg_base == 0) { + /* + * Sanity check: should not have a starting point + * at or within the directory. + */ + if (bytes_available <= directory_size) { + dd_dev_err(dd, + "Bad configuration file - offset 0x%x within footer+table\n", + entry->offset); + ret = -EINVAL; + goto done; + } + bytes_available -= directory_size; + } + + /* calculate bytes wanted */ + to_copy = entry->size - ncopied; + + /* max out at the available bytes in this segment */ + if (to_copy > bytes_available) + to_copy = bytes_available; + + /* + * Read from the EPROM. + * + * The sanity check for entry->offset is done in read_length(). + * The EPROM offset is validated against what the hardware + * addressing supports. In addition, if the offset is larger + * than the actual EPROM, it silently wraps. It will work + * fine, though the reader may not get what they expected + * from the EPROM. + */ + ret = read_length(dd, seg_base + seg_offset, to_copy, + buffer + ncopied); + if (ret) + goto done; + + ncopied += to_copy; + + /* set up for next segment */ + seg_offset = footer->oprom_size; + seg_base += SEG_SIZE; + } + + /* success */ + ret = 0; + *data = buffer; + *size = entry->size; + +done: + kfree(table_buffer); + if (ret) + kfree(buffer); + return ret; +} + +/* * Read the platform configuration file from the EPROM. * * On success, an allocated buffer containing the data and its size are @@ -253,6 +448,7 @@ static int read_partition_platform_config(struct hfi1_devdata *dd, void **data, * -EBUSY - not able to acquire access to the EPROM * -ENOENT - no recognizable file written * -ENOMEM - buffer could not be allocated + * -EINVAL - invalid EPROM contentents found */ int eprom_read_platform_config(struct hfi1_devdata *dd, void **data, u32 *size) { @@ -266,21 +462,20 @@ int eprom_read_platform_config(struct hfi1_devdata *dd, void **data, u32 *size) if (ret) return -EBUSY; - /* read the last page of P0 for the EPROM format magic */ - ret = read_length(dd, P1_START - EP_PAGE_SIZE, EP_PAGE_SIZE, directory); + /* read the last page of the segment for the EPROM format magic */ + ret = read_length(dd, SEG_SIZE - EP_PAGE_SIZE, EP_PAGE_SIZE, directory); if (ret) goto done; - /* last dword of P0 contains a magic indicator */ - if (directory[EP_PAGE_DWORDS - 1] == 0) { + /* last dword of the segment contains a magic value */ + if (directory[EP_PAGE_DWORDS - 1] == FOOTER_MAGIC) { + /* segment format */ + ret = read_segment_platform_config(dd, directory, data, size); + } else { /* partition format */ ret = read_partition_platform_config(dd, data, size); - goto done; } - /* nothing recognized */ - ret = -ENOENT; - done: release_chip_resource(dd, CR_EPROM); return ret; diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c index 13db8eb4f4ec..0dd50cdb039a 100644 --- a/drivers/infiniband/hw/hfi1/firmware.c +++ b/drivers/infiniband/hw/hfi1/firmware.c @@ -239,6 +239,16 @@ static const u8 all_fabric_serdes_broadcast = 0xe1; const u8 pcie_serdes_broadcast[2] = { 0xe2, 0xe3 }; static const u8 all_pcie_serdes_broadcast = 0xe0; +static const u32 platform_config_table_limits[PLATFORM_CONFIG_TABLE_MAX] = { + 0, + SYSTEM_TABLE_MAX, + PORT_TABLE_MAX, + RX_PRESET_TABLE_MAX, + TX_PRESET_TABLE_MAX, + QSFP_ATTEN_TABLE_MAX, + VARIABLE_SETTINGS_TABLE_MAX +}; + /* forwards */ static void dispose_one_firmware(struct firmware_details *fdet); static int load_fabric_serdes_firmware(struct hfi1_devdata *dd, @@ -263,11 +273,13 @@ static int __read_8051_data(struct hfi1_devdata *dd, u32 addr, u64 *result) u64 reg; int count; - /* start the read at the given address */ - reg = ((addr & DC_DC8051_CFG_RAM_ACCESS_CTRL_ADDRESS_MASK) - << DC_DC8051_CFG_RAM_ACCESS_CTRL_ADDRESS_SHIFT) - | DC_DC8051_CFG_RAM_ACCESS_CTRL_READ_ENA_SMASK; + /* step 1: set the address, clear enable */ + reg = (addr & DC_DC8051_CFG_RAM_ACCESS_CTRL_ADDRESS_MASK) + << DC_DC8051_CFG_RAM_ACCESS_CTRL_ADDRESS_SHIFT; write_csr(dd, DC_DC8051_CFG_RAM_ACCESS_CTRL, reg); + /* step 2: enable */ + write_csr(dd, DC_DC8051_CFG_RAM_ACCESS_CTRL, + reg | DC_DC8051_CFG_RAM_ACCESS_CTRL_READ_ENA_SMASK); /* wait until ACCESS_COMPLETED is set */ count = 0; @@ -707,6 +719,9 @@ static int obtain_firmware(struct hfi1_devdata *dd) &dd->pcidev->dev); if (err) { platform_config = NULL; + dd_dev_err(dd, + "%s: No default platform config file found\n", + __func__); goto done; } dd->platform_config.data = platform_config->data; @@ -1761,8 +1776,17 @@ int parse_platform_config(struct hfi1_devdata *dd) u32 record_idx = 0, table_type = 0, table_length_dwords = 0; int ret = -EINVAL; /* assume failure */ + /* + * For integrated devices that did not fall back to the default file, + * the SI tuning information for active channels is acquired from the + * scratch register bitmap, thus there is no platform config to parse. + * Skip parsing in these situations. + */ + if (is_integrated(dd) && !platform_config_load) + return 0; + if (!dd->platform_config.data) { - dd_dev_info(dd, "%s: Missing config file\n", __func__); + dd_dev_err(dd, "%s: Missing config file\n", __func__); goto bail; } ptr = (u32 *)dd->platform_config.data; @@ -1770,7 +1794,7 @@ int parse_platform_config(struct hfi1_devdata *dd) magic_num = *ptr; ptr++; if (magic_num != PLATFORM_CONFIG_MAGIC_NUM) { - dd_dev_info(dd, "%s: Bad config file\n", __func__); + dd_dev_err(dd, "%s: Bad config file\n", __func__); goto bail; } @@ -1797,9 +1821,9 @@ int parse_platform_config(struct hfi1_devdata *dd) header1 = *ptr; header2 = *(ptr + 1); if (header1 != ~header2) { - dd_dev_info(dd, "%s: Failed validation at offset %ld\n", - __func__, (ptr - (u32 *) - dd->platform_config.data)); + dd_dev_err(dd, "%s: Failed validation at offset %ld\n", + __func__, (ptr - (u32 *) + dd->platform_config.data)); goto bail; } @@ -1841,11 +1865,11 @@ int parse_platform_config(struct hfi1_devdata *dd) table_length_dwords; break; default: - dd_dev_info(dd, - "%s: Unknown data table %d, offset %ld\n", - __func__, table_type, - (ptr - (u32 *) - dd->platform_config.data)); + dd_dev_err(dd, + "%s: Unknown data table %d, offset %ld\n", + __func__, table_type, + (ptr - (u32 *) + dd->platform_config.data)); goto bail; /* We don't trust this file now */ } pcfgcache->config_tables[table_type].table = ptr; @@ -1865,11 +1889,11 @@ int parse_platform_config(struct hfi1_devdata *dd) case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE: break; default: - dd_dev_info(dd, - "%s: Unknown meta table %d, offset %ld\n", - __func__, table_type, - (ptr - - (u32 *)dd->platform_config.data)); + dd_dev_err(dd, + "%s: Unknown meta table %d, offset %ld\n", + __func__, table_type, + (ptr - + (u32 *)dd->platform_config.data)); goto bail; /* We don't trust this file now */ } pcfgcache->config_tables[table_type].table_metadata = @@ -1884,10 +1908,9 @@ int parse_platform_config(struct hfi1_devdata *dd) /* Jump the table */ ptr += table_length_dwords; if (crc != *ptr) { - dd_dev_info(dd, "%s: Failed CRC check at offset %ld\n", - __func__, (ptr - - (u32 *) - dd->platform_config.data)); + dd_dev_err(dd, "%s: Failed CRC check at offset %ld\n", + __func__, (ptr - + (u32 *)dd->platform_config.data)); goto bail; } /* Jump the CRC DWORD */ @@ -1901,6 +1924,84 @@ bail: return ret; } +static void get_integrated_platform_config_field( + struct hfi1_devdata *dd, + enum platform_config_table_type_encoding table_type, + int field_index, u32 *data) +{ + struct hfi1_pportdata *ppd = dd->pport; + u8 *cache = ppd->qsfp_info.cache; + u32 tx_preset = 0; + + switch (table_type) { + case PLATFORM_CONFIG_SYSTEM_TABLE: + if (field_index == SYSTEM_TABLE_QSFP_POWER_CLASS_MAX) + *data = ppd->max_power_class; + else if (field_index == SYSTEM_TABLE_QSFP_ATTENUATION_DEFAULT_25G) + *data = ppd->default_atten; + break; + case PLATFORM_CONFIG_PORT_TABLE: + if (field_index == PORT_TABLE_PORT_TYPE) + *data = ppd->port_type; + else if (field_index == PORT_TABLE_LOCAL_ATTEN_25G) + *data = ppd->local_atten; + else if (field_index == PORT_TABLE_REMOTE_ATTEN_25G) + *data = ppd->remote_atten; + break; + case PLATFORM_CONFIG_RX_PRESET_TABLE: + if (field_index == RX_PRESET_TABLE_QSFP_RX_CDR_APPLY) + *data = (ppd->rx_preset & QSFP_RX_CDR_APPLY_SMASK) >> + QSFP_RX_CDR_APPLY_SHIFT; + else if (field_index == RX_PRESET_TABLE_QSFP_RX_EMP_APPLY) + *data = (ppd->rx_preset & QSFP_RX_EMP_APPLY_SMASK) >> + QSFP_RX_EMP_APPLY_SHIFT; + else if (field_index == RX_PRESET_TABLE_QSFP_RX_AMP_APPLY) + *data = (ppd->rx_preset & QSFP_RX_AMP_APPLY_SMASK) >> + QSFP_RX_AMP_APPLY_SHIFT; + else if (field_index == RX_PRESET_TABLE_QSFP_RX_CDR) + *data = (ppd->rx_preset & QSFP_RX_CDR_SMASK) >> + QSFP_RX_CDR_SHIFT; + else if (field_index == RX_PRESET_TABLE_QSFP_RX_EMP) + *data = (ppd->rx_preset & QSFP_RX_EMP_SMASK) >> + QSFP_RX_EMP_SHIFT; + else if (field_index == RX_PRESET_TABLE_QSFP_RX_AMP) + *data = (ppd->rx_preset & QSFP_RX_AMP_SMASK) >> + QSFP_RX_AMP_SHIFT; + break; + case PLATFORM_CONFIG_TX_PRESET_TABLE: + if (cache[QSFP_EQ_INFO_OFFS] & 0x4) + tx_preset = ppd->tx_preset_eq; + else + tx_preset = ppd->tx_preset_noeq; + if (field_index == TX_PRESET_TABLE_PRECUR) + *data = (tx_preset & TX_PRECUR_SMASK) >> + TX_PRECUR_SHIFT; + else if (field_index == TX_PRESET_TABLE_ATTN) + *data = (tx_preset & TX_ATTN_SMASK) >> + TX_ATTN_SHIFT; + else if (field_index == TX_PRESET_TABLE_POSTCUR) + *data = (tx_preset & TX_POSTCUR_SMASK) >> + TX_POSTCUR_SHIFT; + else if (field_index == TX_PRESET_TABLE_QSFP_TX_CDR_APPLY) + *data = (tx_preset & QSFP_TX_CDR_APPLY_SMASK) >> + QSFP_TX_CDR_APPLY_SHIFT; + else if (field_index == TX_PRESET_TABLE_QSFP_TX_EQ_APPLY) + *data = (tx_preset & QSFP_TX_EQ_APPLY_SMASK) >> + QSFP_TX_EQ_APPLY_SHIFT; + else if (field_index == TX_PRESET_TABLE_QSFP_TX_CDR) + *data = (tx_preset & QSFP_TX_CDR_SMASK) >> + QSFP_TX_CDR_SHIFT; + else if (field_index == TX_PRESET_TABLE_QSFP_TX_EQ) + *data = (tx_preset & QSFP_TX_EQ_SMASK) >> + QSFP_TX_EQ_SHIFT; + break; + case PLATFORM_CONFIG_QSFP_ATTEN_TABLE: + case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE: + default: + break; + } +} + static int get_platform_fw_field_metadata(struct hfi1_devdata *dd, int table, int field, u32 *field_len_bits, u32 *field_start_bits) @@ -1976,6 +2077,15 @@ int get_platform_config_field(struct hfi1_devdata *dd, else return -EINVAL; + if (is_integrated(dd) && !platform_config_load) { + /* + * Use saved configuration from ppd for integrated platforms + */ + get_integrated_platform_config_field(dd, table_type, + field_index, data); + return 0; + } + ret = get_platform_fw_field_metadata(dd, table_type, field_index, &field_len_bits, &field_start_bits); diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index cc87fd4e534b..751a0fb29fa5 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -492,6 +492,9 @@ struct rvt_sge_state; #define HFI1_MIN_VLS_SUPPORTED 1 #define HFI1_MAX_VLS_SUPPORTED 8 +#define HFI1_GUIDS_PER_PORT 5 +#define HFI1_PORT_GUID_INDEX 0 + static inline void incr_cntr64(u64 *cntr) { if (*cntr < (u64)-1LL) @@ -559,11 +562,20 @@ struct hfi1_pportdata { struct kobject vl2mtu_kobj; /* PHY support */ - u32 port_type; struct qsfp_data qsfp_info; + /* Values for SI tuning of SerDes */ + u32 port_type; + u32 tx_preset_eq; + u32 tx_preset_noeq; + u32 rx_preset; + u8 local_atten; + u8 remote_atten; + u8 default_atten; + u8 max_power_class; + + /* GUIDs for this interface, in host order, guids[0] is a port guid */ + u64 guids[HFI1_GUIDS_PER_PORT]; - /* GUID for this interface, in host order */ - u64 guid; /* GUID for peer interface, in host order */ u64 neighbor_guid; @@ -826,32 +838,29 @@ struct hfi1_devdata { u8 __iomem *kregend; /* physical address of chip for io_remap, etc. */ resource_size_t physaddr; - /* receive context data */ - struct hfi1_ctxtdata **rcd; + /* Per VL data. Enough for all VLs but not all elements are set/used. */ + struct per_vl_data vld[PER_VL_SEND_CONTEXTS]; /* send context data */ struct send_context_info *send_contexts; /* map hardware send contexts to software index */ u8 *hw_to_sw; /* spinlock for allocating and releasing send context resources */ spinlock_t sc_lock; - /* Per VL data. Enough for all VLs but not all elements are set/used. */ - struct per_vl_data vld[PER_VL_SEND_CONTEXTS]; /* lock for pio_map */ spinlock_t pio_map_lock; + /* Send Context initialization lock. */ + spinlock_t sc_init_lock; + /* lock for sdma_map */ + spinlock_t sde_map_lock; /* array of kernel send contexts */ struct send_context **kernel_send_context; /* array of vl maps */ struct pio_vl_map __rcu *pio_map; - /* seqlock for sc2vl */ - seqlock_t sc2vl_lock; - u64 sc2vl[4]; - /* Send Context initialization lock. */ - spinlock_t sc_init_lock; + /* default flags to last descriptor */ + u64 default_desc1; /* fields common to all SDMA engines */ - /* default flags to last descriptor */ - u64 default_desc1; volatile __le64 *sdma_heads_dma; /* DMA'ed by chip */ dma_addr_t sdma_heads_phys; void *sdma_pad_dma; /* DMA'ed by chip */ @@ -862,8 +871,6 @@ struct hfi1_devdata { u32 chip_sdma_engines; /* num used */ u32 num_sdma; - /* lock for sdma_map */ - spinlock_t sde_map_lock; /* array of engines sized by num_sdma */ struct sdma_engine *per_sdma; /* array of vl maps */ @@ -872,14 +879,11 @@ struct hfi1_devdata { wait_queue_head_t sdma_unfreeze_wq; atomic_t sdma_unfreeze_count; + u32 lcb_access_count; /* count of LCB users */ + /* common data between shared ASIC HFIs in this OS */ struct hfi1_asic_data *asic_data; - /* hfi1_pportdata, points to array of (physical) port-specific - * data structs, indexed by pidx (0..n-1) - */ - struct hfi1_pportdata *pport; - /* mem-mapped pointer to base of PIO buffers */ void __iomem *piobase; /* @@ -896,20 +900,13 @@ struct hfi1_devdata { /* send context numbers and sizes for each type */ struct sc_config_sizes sc_sizes[SC_MAX]; - u32 lcb_access_count; /* count of LCB users */ - char *boardname; /* human readable board info */ - /* device (not port) flags, basically device capabilities */ - u32 flags; - /* reset value */ u64 z_int_counter; u64 z_rcv_limit; u64 z_send_schedule; - /* percpu int_counter */ - u64 __percpu *int_counter; - u64 __percpu *rcv_limit; + u64 __percpu *send_schedule; /* number of receive contexts in use by the driver */ u32 num_rcv_contexts; @@ -924,6 +921,7 @@ struct hfi1_devdata { /* base receive interrupt timeout, in CSR units */ u32 rcv_intr_timeout_csr; + u32 freezelen; /* max length of freezemsg */ u64 __iomem *egrtidbase; spinlock_t sendctrl_lock; /* protect changes to SendCtrl */ spinlock_t rcvctrl_lock; /* protect changes to RcvCtrl */ @@ -945,7 +943,6 @@ struct hfi1_devdata { * IB link status cheaply */ struct hfi1_status *status; - u32 freezelen; /* max length of freezemsg */ /* revision register shadow */ u64 revision; @@ -973,6 +970,8 @@ struct hfi1_devdata { u16 rcvegrbufsize_shift; /* both sides of the PCIe link are gen3 capable */ u8 link_gen3_capable; + /* default link down value (poll/sleep) */ + u8 link_default; /* localbus width (1, 2,4,8,16,32) from config space */ u32 lbus_width; /* localbus speed in MHz */ @@ -1008,8 +1007,6 @@ struct hfi1_devdata { u8 hfi1_id; /* implementation code */ u8 icode; - /* default link down value (poll/sleep) */ - u8 link_default; /* vAU of this device */ u8 vau; /* vCU of this device */ @@ -1020,27 +1017,17 @@ struct hfi1_devdata { u16 vl15_init; /* Misc small ints */ - /* Number of physical ports available */ - u8 num_pports; - /* Lowest context number which can be used by user processes */ - u8 first_user_ctxt; u8 n_krcv_queues; u8 qos_shift; - u8 qpn_mask; - u16 rhf_offset; /* offset of RHF within receive header entry */ u16 irev; /* implementation revision */ u16 dc8051_ver; /* 8051 firmware version */ + spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */ struct platform_config platform_config; struct platform_config_cache pcfg_cache; struct diag_client *diag_client; - spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */ - - u8 psxmitwait_supported; - /* cycle length of PS* counters in HW (in picoseconds) */ - u16 psxmitwait_check_rate; /* MSI-X information */ struct hfi1_msix_entry *msix_entries; @@ -1055,6 +1042,9 @@ struct hfi1_devdata { struct rcv_array_data rcv_entries; + /* cycle length of PS* counters in HW (in picoseconds) */ + u16 psxmitwait_check_rate; + /* * 64 bit synthetic counters */ @@ -1085,11 +1075,11 @@ struct hfi1_devdata { struct err_info_rcvport err_info_rcvport; struct err_info_constraint err_info_rcv_constraint; struct err_info_constraint err_info_xmit_constraint; - u8 err_info_uncorrectable; - u8 err_info_fmconfig; atomic_t drop_packet; u8 do_drop; + u8 err_info_uncorrectable; + u8 err_info_fmconfig; /* * Software counters for the status bits defined by the @@ -1112,40 +1102,60 @@ struct hfi1_devdata { u64 sw_cce_err_status_aggregate; /* Software counter that aggregates all bypass packet rcv errors */ u64 sw_rcv_bypass_packet_errors; - /* receive interrupt functions */ - rhf_rcv_function_ptr *rhf_rcv_function_map; + /* receive interrupt function */ rhf_rcv_function_ptr normal_rhf_rcv_functions[8]; + /* Save the enabled LCB error bits */ + u64 lcb_err_en; + /* * Capability to have different send engines simply by changing a * pointer value. */ - send_routine process_pio_send; + send_routine process_pio_send ____cacheline_aligned_in_smp; send_routine process_dma_send; void (*pio_inline_send)(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc, const void *from, size_t count); + /* hfi1_pportdata, points to array of (physical) port-specific + * data structs, indexed by pidx (0..n-1) + */ + struct hfi1_pportdata *pport; + /* receive context data */ + struct hfi1_ctxtdata **rcd; + u64 __percpu *int_counter; + /* device (not port) flags, basically device capabilities */ + u16 flags; + /* Number of physical ports available */ + u8 num_pports; + /* Lowest context number which can be used by user processes */ + u8 first_user_ctxt; + /* adding a new field here would make it part of this cacheline */ + + /* seqlock for sc2vl */ + seqlock_t sc2vl_lock ____cacheline_aligned_in_smp; + u64 sc2vl[4]; + /* receive interrupt functions */ + rhf_rcv_function_ptr *rhf_rcv_function_map; + u64 __percpu *rcv_limit; + u16 rhf_offset; /* offset of RHF within receive header entry */ + /* adding a new field here would make it part of this cacheline */ /* OUI comes from the HW. Used everywhere as 3 separate bytes. */ u8 oui1; u8 oui2; u8 oui3; + u8 dc_shutdown; + /* Timer and counter used to detect RcvBufOvflCnt changes */ struct timer_list rcverr_timer; - u32 rcv_ovfl_cnt; wait_queue_head_t event_queue; - /* Save the enabled LCB error bits */ - u64 lcb_err_en; - u8 dc_shutdown; - /* receive context tail dummy address */ __le64 *rcvhdrtail_dummy_kvaddr; dma_addr_t rcvhdrtail_dummy_dma; - bool eprom_available; /* true if EPROM is available for this device */ - bool aspm_supported; /* Does HW support ASPM */ - bool aspm_enabled; /* ASPM state: enabled/disabled */ + u32 rcv_ovfl_cnt; /* Serialize ASPM enable/disable between multiple verbs contexts */ spinlock_t aspm_lock; /* Number of verbs contexts which have disabled ASPM */ @@ -1155,8 +1165,11 @@ struct hfi1_devdata { /* Used to wait for outstanding user space clients before dev removal */ struct completion user_comp; - struct hfi1_affinity *affinity; + bool eprom_available; /* true if EPROM is available for this device */ + bool aspm_supported; /* Does HW support ASPM */ + bool aspm_enabled; /* ASPM state: enabled/disabled */ struct rhashtable sdma_rht; + struct kobject kobj; }; @@ -1604,6 +1617,17 @@ static inline u16 hfi1_get_pkey(struct hfi1_ibport *ibp, unsigned index) } /* + * Return the indexed GUID from the port GUIDs table. + */ +static inline __be64 get_sguid(struct hfi1_ibport *ibp, unsigned int index) +{ + struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); + + WARN_ON(index >= HFI1_GUIDS_PER_PORT); + return cpu_to_be64(ppd->guids[index]); +} + +/* * Called by readers of cc_state only, must call under rcu_read_lock(). */ static inline struct cc_state *get_cc_state(struct hfi1_pportdata *ppd) @@ -1982,6 +2006,12 @@ static inline u32 qsfp_resource(struct hfi1_devdata *dd) return i2c_target(dd->hfi1_id); } +/* Is this device integrated or discrete? */ +static inline bool is_integrated(struct hfi1_devdata *dd) +{ + return dd->pcidev->device == PCI_DEVICE_ID_INTEL1; +} + int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp); #define DD_DEV_ENTRY(dd) __string(dev, dev_name(&(dd)->pcidev->dev)) diff --git a/drivers/infiniband/hw/hfi1/iowait.h b/drivers/infiniband/hw/hfi1/iowait.h index 2ec6ef38d389..d9740ddea6f1 100644 --- a/drivers/infiniband/hw/hfi1/iowait.h +++ b/drivers/infiniband/hw/hfi1/iowait.h @@ -64,6 +64,7 @@ struct sdma_engine; /** * struct iowait - linkage for delayed progress/waiting * @list: used to add/insert into QP/PQ wait lists + * @lock: uses to record the list head lock * @tx_head: overflow list of sdma_txreq's * @sleep: no space callback * @wakeup: space callback wakeup @@ -91,6 +92,11 @@ struct sdma_engine; * so sleeping is not allowed. * * The wait_dma member along with the iow + * + * The lock field is used by waiters to record + * the seqlock_t that guards the list head. + * Waiters explicity know that, but the destroy + * code that unwaits QPs does not. */ struct iowait { @@ -103,6 +109,7 @@ struct iowait { unsigned seq); void (*wakeup)(struct iowait *wait, int reason); void (*sdma_drained)(struct iowait *wait); + seqlock_t *lock; struct work_struct iowork; wait_queue_head_t wait_dma; wait_queue_head_t wait_pio; @@ -141,6 +148,7 @@ static inline void iowait_init( void (*sdma_drained)(struct iowait *wait)) { wait->count = 0; + wait->lock = NULL; INIT_LIST_HEAD(&wait->list); INIT_LIST_HEAD(&wait->tx_head); INIT_WORK(&wait->iowork, func); diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 9487c9bb8920..6e595afca24c 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -128,7 +128,7 @@ static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len) smp = send_buf->mad; smp->base_version = OPA_MGMT_BASE_VERSION; smp->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; - smp->class_version = OPA_SMI_CLASS_VERSION; + smp->class_version = OPA_SM_CLASS_VERSION; smp->method = IB_MGMT_METHOD_TRAP; ibp->rvp.tid++; smp->tid = cpu_to_be64(ibp->rvp.tid); @@ -336,20 +336,20 @@ static int __subn_get_opa_nodeinfo(struct opa_smp *smp, u32 am, u8 *data, ni = (struct opa_node_info *)data; /* GUID 0 is illegal */ - if (am || pidx >= dd->num_pports || dd->pport[pidx].guid == 0) { + if (am || pidx >= dd->num_pports || ibdev->node_guid == 0 || + get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX) == 0) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } - ni->port_guid = cpu_to_be64(dd->pport[pidx].guid); + ni->port_guid = get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX); ni->base_version = OPA_MGMT_BASE_VERSION; - ni->class_version = OPA_SMI_CLASS_VERSION; + ni->class_version = OPA_SM_CLASS_VERSION; ni->node_type = 1; /* channel adapter */ ni->num_ports = ibdev->phys_port_cnt; /* This is already in network order */ ni->system_image_guid = ib_hfi1_sys_image_guid; - /* Use first-port GUID as node */ - ni->node_guid = cpu_to_be64(dd->pport->guid); + ni->node_guid = ibdev->node_guid; ni->partition_cap = cpu_to_be16(hfi1_get_npkeys(dd)); ni->device_id = cpu_to_be16(dd->pcidev->device); ni->revision = cpu_to_be32(dd->minrev); @@ -373,19 +373,20 @@ static int subn_get_nodeinfo(struct ib_smp *smp, struct ib_device *ibdev, /* GUID 0 is illegal */ if (smp->attr_mod || pidx >= dd->num_pports || - dd->pport[pidx].guid == 0) + ibdev->node_guid == 0 || + get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX) == 0) { smp->status |= IB_SMP_INVALID_FIELD; - else - nip->port_guid = cpu_to_be64(dd->pport[pidx].guid); + return reply((struct ib_mad_hdr *)smp); + } + nip->port_guid = get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX); nip->base_version = OPA_MGMT_BASE_VERSION; - nip->class_version = OPA_SMI_CLASS_VERSION; + nip->class_version = OPA_SM_CLASS_VERSION; nip->node_type = 1; /* channel adapter */ nip->num_ports = ibdev->phys_port_cnt; /* This is already in network order */ nip->sys_guid = ib_hfi1_sys_image_guid; - /* Use first-port GUID as node */ - nip->node_guid = cpu_to_be64(dd->pport->guid); + nip->node_guid = ibdev->node_guid; nip->partition_cap = cpu_to_be16(hfi1_get_npkeys(dd)); nip->device_id = cpu_to_be16(dd->pcidev->device); nip->revision = cpu_to_be32(dd->minrev); @@ -2302,7 +2303,7 @@ static int pma_get_opa_classportinfo(struct opa_pma_mad *pmp, pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; p->base_version = OPA_MGMT_BASE_VERSION; - p->class_version = OPA_SMI_CLASS_VERSION; + p->class_version = OPA_SM_CLASS_VERSION; /* * Expected response time is 4.096 usec. * 2^18 == 1.073741824 sec. */ @@ -4022,7 +4023,7 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags, am = be32_to_cpu(smp->attr_mod); attr_id = smp->attr_id; - if (smp->class_version != OPA_SMI_CLASS_VERSION) { + if (smp->class_version != OPA_SM_CLASS_VERSION) { smp->status |= IB_SMP_UNSUP_VERSION; ret = reply((struct ib_mad_hdr *)smp); return ret; @@ -4232,7 +4233,7 @@ static int process_perf_opa(struct ib_device *ibdev, u8 port, *out_mad = *in_mad; - if (pmp->mad_hdr.class_version != OPA_SMI_CLASS_VERSION) { + if (pmp->mad_hdr.class_version != OPA_SM_CLASS_VERSION) { pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION; return reply((struct ib_mad_hdr *)pmp); } diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c index 7ad30898fc19..ccbf52c8ff6f 100644 --- a/drivers/infiniband/hw/hfi1/mmu_rb.c +++ b/drivers/infiniband/hw/hfi1/mmu_rb.c @@ -81,7 +81,7 @@ static void do_remove(struct mmu_rb_handler *handler, struct list_head *del_list); static void handle_remove(struct work_struct *work); -static struct mmu_notifier_ops mn_opts = { +static const struct mmu_notifier_ops mn_opts = { .invalidate_page = mmu_notifier_page, .invalidate_range_start = mmu_notifier_range_start, }; diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index d89b8745d4c1..615be68e40b3 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -758,6 +758,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, sc->hw_context = hw_context; cr_group_addresses(sc, &dma); sc->credits = sci->credits; + sc->size = sc->credits * PIO_BLOCK_SIZE; /* PIO Send Memory Address details */ #define PIO_ADDR_CONTEXT_MASK 0xfful @@ -1242,6 +1243,7 @@ int sc_enable(struct send_context *sc) sc->free = 0; sc->alloc_free = 0; sc->fill = 0; + sc->fill_wrap = 0; sc->sr_head = 0; sc->sr_tail = 0; sc->flags = 0; @@ -1385,7 +1387,7 @@ struct pio_buf *sc_buffer_alloc(struct send_context *sc, u32 dw_len, unsigned long flags; unsigned long avail; unsigned long blocks = dwords_to_blocks(dw_len); - unsigned long start_fill; + u32 fill_wrap; int trycount = 0; u32 head, next; @@ -1410,9 +1412,7 @@ retry: (sc->fill - sc->alloc_free); if (blocks > avail) { /* still no room, actively update */ - spin_unlock_irqrestore(&sc->alloc_lock, flags); sc_release_update(sc); - spin_lock_irqsave(&sc->alloc_lock, flags); sc->alloc_free = ACCESS_ONCE(sc->free); trycount++; goto retry; @@ -1428,8 +1428,11 @@ retry: head = sc->sr_head; /* "allocate" the buffer */ - start_fill = sc->fill; sc->fill += blocks; + fill_wrap = sc->fill_wrap; + sc->fill_wrap += blocks; + if (sc->fill_wrap >= sc->credits) + sc->fill_wrap = sc->fill_wrap - sc->credits; /* * Fill the parts that the releaser looks at before moving the head. @@ -1458,11 +1461,8 @@ retry: spin_unlock_irqrestore(&sc->alloc_lock, flags); /* finish filling in the buffer outside the lock */ - pbuf->start = sc->base_addr + ((start_fill % sc->credits) - * PIO_BLOCK_SIZE); - pbuf->size = sc->credits * PIO_BLOCK_SIZE; - pbuf->end = sc->base_addr + pbuf->size; - pbuf->block_count = blocks; + pbuf->start = sc->base_addr + fill_wrap * PIO_BLOCK_SIZE; + pbuf->end = sc->base_addr + sc->size; pbuf->qw_written = 0; pbuf->carry_bytes = 0; pbuf->carry.val64 = 0; @@ -1573,6 +1573,7 @@ static void sc_piobufavail(struct send_context *sc) qp = iowait_to_qp(wait); priv = qp->priv; list_del_init(&priv->s_iowait.list); + priv->s_iowait.lock = NULL; /* refcount held until actual wake up */ qps[n++] = qp; } @@ -2028,29 +2029,17 @@ freesc15: int init_credit_return(struct hfi1_devdata *dd) { int ret; - int num_numa; int i; - num_numa = num_online_nodes(); - /* enforce the expectation that the numas are compact */ - for (i = 0; i < num_numa; i++) { - if (!node_online(i)) { - dd_dev_err(dd, "NUMA nodes are not compact\n"); - ret = -EINVAL; - goto done; - } - } - dd->cr_base = kcalloc( - num_numa, + node_affinity.num_possible_nodes, sizeof(struct credit_return_base), GFP_KERNEL); if (!dd->cr_base) { - dd_dev_err(dd, "Unable to allocate credit return base\n"); ret = -ENOMEM; goto done; } - for (i = 0; i < num_numa; i++) { + for_each_node_with_cpus(i) { int bytes = TXE_NUM_CONTEXTS * sizeof(struct credit_return); set_dev_node(&dd->pcidev->dev, i); @@ -2077,14 +2066,11 @@ done: void free_credit_return(struct hfi1_devdata *dd) { - int num_numa; int i; if (!dd->cr_base) return; - - num_numa = num_online_nodes(); - for (i = 0; i < num_numa; i++) { + for (i = 0; i < node_affinity.num_possible_nodes; i++) { if (dd->cr_base[i].va) { dma_free_coherent(&dd->pcidev->dev, TXE_NUM_CONTEXTS * diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h index e709eaf743b5..867e5ffc3595 100644 --- a/drivers/infiniband/hw/hfi1/pio.h +++ b/drivers/infiniband/hw/hfi1/pio.h @@ -83,53 +83,55 @@ struct pio_buf { void *arg; /* argument for cb */ void __iomem *start; /* buffer start address */ void __iomem *end; /* context end address */ - unsigned long size; /* context size, in bytes */ unsigned long sent_at; /* buffer is sent when <= free */ - u32 block_count; /* size of buffer, in blocks */ - u32 qw_written; /* QW written so far */ - u32 carry_bytes; /* number of valid bytes in carry */ union mix carry; /* pending unwritten bytes */ + u16 qw_written; /* QW written so far */ + u8 carry_bytes; /* number of valid bytes in carry */ }; /* cache line aligned pio buffer array */ union pio_shadow_ring { struct pio_buf pbuf; - u64 unused[16]; /* cache line spacer */ } ____cacheline_aligned; /* per-NUMA send context */ struct send_context { /* read-only after init */ struct hfi1_devdata *dd; /* device */ - void __iomem *base_addr; /* start of PIO memory */ union pio_shadow_ring *sr; /* shadow ring */ + void __iomem *base_addr; /* start of PIO memory */ + u32 __percpu *buffers_allocated;/* count of buffers allocated */ + u32 size; /* context size, in bytes */ - volatile __le64 *hw_free; /* HW free counter */ - struct work_struct halt_work; /* halted context work queue entry */ - unsigned long flags; /* flags */ int node; /* context home node */ - int type; /* context type */ - u32 sw_index; /* software index number */ - u32 hw_context; /* hardware context number */ - u32 credits; /* number of blocks in context */ u32 sr_size; /* size of the shadow ring */ - u32 group; /* credit return group */ + u16 flags; /* flags */ + u8 type; /* context type */ + u8 sw_index; /* software index number */ + u8 hw_context; /* hardware context number */ + u8 group; /* credit return group */ + /* allocator fields */ spinlock_t alloc_lock ____cacheline_aligned_in_smp; + u32 sr_head; /* shadow ring head */ unsigned long fill; /* official alloc count */ unsigned long alloc_free; /* copy of free (less cache thrash) */ - u32 sr_head; /* shadow ring head */ + u32 fill_wrap; /* tracks fill within ring */ + u32 credits; /* number of blocks in context */ + /* adding a new field here would make it part of this cacheline */ + /* releaser fields */ spinlock_t release_lock ____cacheline_aligned_in_smp; - unsigned long free; /* official free count */ u32 sr_tail; /* shadow ring tail */ + unsigned long free; /* official free count */ + volatile __le64 *hw_free; /* HW free counter */ /* list for PIO waiters */ struct list_head piowait ____cacheline_aligned_in_smp; spinlock_t credit_ctrl_lock ____cacheline_aligned_in_smp; - u64 credit_ctrl; /* cache for credit control */ u32 credit_intr_count; /* count of credit intr users */ - u32 __percpu *buffers_allocated;/* count of buffers allocated */ + u64 credit_ctrl; /* cache for credit control */ wait_queue_head_t halt_wait; /* wait until kernel sees interrupt */ + struct work_struct halt_work; /* halted context work queue entry */ }; /* send context flags */ diff --git a/drivers/infiniband/hw/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c index aa7773643107..03024cec78dd 100644 --- a/drivers/infiniband/hw/hfi1/pio_copy.c +++ b/drivers/infiniband/hw/hfi1/pio_copy.c @@ -129,8 +129,8 @@ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc, dest += sizeof(u64); } - dest -= pbuf->size; - dend -= pbuf->size; + dest -= pbuf->sc->size; + dend -= pbuf->sc->size; } /* write 8-byte non-SOP, non-wrap chunk data */ @@ -361,8 +361,8 @@ void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc, dest += sizeof(u64); } - dest -= pbuf->size; - dend -= pbuf->size; + dest -= pbuf->sc->size; + dend -= pbuf->sc->size; } /* write 8-byte non-SOP, non-wrap chunk data */ @@ -458,8 +458,8 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes) dest += sizeof(u64); } - dest -= pbuf->size; - dend -= pbuf->size; + dest -= pbuf->sc->size; + dend -= pbuf->sc->size; } /* write 8-byte non-SOP, non-wrap chunk data */ @@ -492,7 +492,7 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes) */ /* adjust if we have wrapped */ if (dest >= pbuf->end) - dest -= pbuf->size; + dest -= pbuf->sc->size; /* jump to the SOP range if within the first block */ else if (pbuf->qw_written < PIO_BLOCK_QWS) dest += SOP_DISTANCE; @@ -584,8 +584,8 @@ static void mid_copy_straight(struct pio_buf *pbuf, dest += sizeof(u64); } - dest -= pbuf->size; - dend -= pbuf->size; + dest -= pbuf->sc->size; + dend -= pbuf->sc->size; } /* write 8-byte non-SOP, non-wrap chunk data */ @@ -666,7 +666,7 @@ void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes) */ /* adjust if we've wrapped */ if (dest >= pbuf->end) - dest -= pbuf->size; + dest -= pbuf->sc->size; /* jump to SOP range if within the first block */ else if (pbuf->qw_written < PIO_BLOCK_QWS) dest += SOP_DISTANCE; @@ -719,7 +719,7 @@ void seg_pio_copy_end(struct pio_buf *pbuf) */ /* adjust if we have wrapped */ if (dest >= pbuf->end) - dest -= pbuf->size; + dest -= pbuf->sc->size; /* jump to the SOP range if within the first block */ else if (pbuf->qw_written < PIO_BLOCK_QWS) dest += SOP_DISTANCE; diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c index 202433178864..838fe84e285a 100644 --- a/drivers/infiniband/hw/hfi1/platform.c +++ b/drivers/infiniband/hw/hfi1/platform.c @@ -49,6 +49,90 @@ #include "efivar.h" #include "eprom.h" +static int validate_scratch_checksum(struct hfi1_devdata *dd) +{ + u64 checksum = 0, temp_scratch = 0; + int i, j, version; + + temp_scratch = read_csr(dd, ASIC_CFG_SCRATCH); + version = (temp_scratch & BITMAP_VERSION_SMASK) >> BITMAP_VERSION_SHIFT; + + /* Prevent power on default of all zeroes from passing checksum */ + if (!version) + return 0; + + /* + * ASIC scratch 0 only contains the checksum and bitmap version as + * fields of interest, both of which are handled separately from the + * loop below, so skip it + */ + checksum += version; + for (i = 1; i < ASIC_NUM_SCRATCH; i++) { + temp_scratch = read_csr(dd, ASIC_CFG_SCRATCH + (8 * i)); + for (j = sizeof(u64); j != 0; j -= 2) { + checksum += (temp_scratch & 0xFFFF); + temp_scratch >>= 16; + } + } + + while (checksum >> 16) + checksum = (checksum & CHECKSUM_MASK) + (checksum >> 16); + + temp_scratch = read_csr(dd, ASIC_CFG_SCRATCH); + temp_scratch &= CHECKSUM_SMASK; + temp_scratch >>= CHECKSUM_SHIFT; + + if (checksum + temp_scratch == 0xFFFF) + return 1; + return 0; +} + +static void save_platform_config_fields(struct hfi1_devdata *dd) +{ + struct hfi1_pportdata *ppd = dd->pport; + u64 temp_scratch = 0, temp_dest = 0; + + temp_scratch = read_csr(dd, ASIC_CFG_SCRATCH_1); + + temp_dest = temp_scratch & + (dd->hfi1_id ? PORT1_PORT_TYPE_SMASK : + PORT0_PORT_TYPE_SMASK); + ppd->port_type = temp_dest >> + (dd->hfi1_id ? PORT1_PORT_TYPE_SHIFT : + PORT0_PORT_TYPE_SHIFT); + + temp_dest = temp_scratch & + (dd->hfi1_id ? PORT1_LOCAL_ATTEN_SMASK : + PORT0_LOCAL_ATTEN_SMASK); + ppd->local_atten = temp_dest >> + (dd->hfi1_id ? PORT1_LOCAL_ATTEN_SHIFT : + PORT0_LOCAL_ATTEN_SHIFT); + + temp_dest = temp_scratch & + (dd->hfi1_id ? PORT1_REMOTE_ATTEN_SMASK : + PORT0_REMOTE_ATTEN_SMASK); + ppd->remote_atten = temp_dest >> + (dd->hfi1_id ? PORT1_REMOTE_ATTEN_SHIFT : + PORT0_REMOTE_ATTEN_SHIFT); + + temp_dest = temp_scratch & + (dd->hfi1_id ? PORT1_DEFAULT_ATTEN_SMASK : + PORT0_DEFAULT_ATTEN_SMASK); + ppd->default_atten = temp_dest >> + (dd->hfi1_id ? PORT1_DEFAULT_ATTEN_SHIFT : + PORT0_DEFAULT_ATTEN_SHIFT); + + temp_scratch = read_csr(dd, dd->hfi1_id ? ASIC_CFG_SCRATCH_3 : + ASIC_CFG_SCRATCH_2); + + ppd->tx_preset_eq = (temp_scratch & TX_EQ_SMASK) >> TX_EQ_SHIFT; + ppd->tx_preset_noeq = (temp_scratch & TX_NO_EQ_SMASK) >> TX_NO_EQ_SHIFT; + ppd->rx_preset = (temp_scratch & RX_SMASK) >> RX_SHIFT; + + ppd->max_power_class = (temp_scratch & QSFP_MAX_POWER_SMASK) >> + QSFP_MAX_POWER_SHIFT; +} + void get_platform_config(struct hfi1_devdata *dd) { int ret = 0; @@ -56,38 +140,49 @@ void get_platform_config(struct hfi1_devdata *dd) u8 *temp_platform_config = NULL; u32 esize; - ret = eprom_read_platform_config(dd, (void **)&temp_platform_config, - &esize); - if (!ret) { - /* success */ - size = esize; - goto success; + if (is_integrated(dd)) { + if (validate_scratch_checksum(dd)) { + save_platform_config_fields(dd); + return; + } + dd_dev_err(dd, "%s: Config bitmap corrupted/uninitialized\n", + __func__); + dd_dev_err(dd, + "%s: Please update your BIOS to support active channels\n", + __func__); + } else { + ret = eprom_read_platform_config(dd, + (void **)&temp_platform_config, + &esize); + if (!ret) { + /* success */ + dd->platform_config.data = temp_platform_config; + dd->platform_config.size = esize; + return; + } + /* fail, try EFI variable */ + + ret = read_hfi1_efi_var(dd, "configuration", &size, + (void **)&temp_platform_config); + if (!ret) { + dd->platform_config.data = temp_platform_config; + dd->platform_config.size = size; + return; + } } - /* fail, try EFI variable */ - - ret = read_hfi1_efi_var(dd, "configuration", &size, - (void **)&temp_platform_config); - if (!ret) - goto success; - - dd_dev_info(dd, - "%s: Failed to get platform config from UEFI, falling back to request firmware\n", - __func__); + dd_dev_err(dd, + "%s: Failed to get platform config, falling back to sub-optimal default file\n", + __func__); /* fall back to request firmware */ platform_config_load = 1; - return; - -success: - dd->platform_config.data = temp_platform_config; - dd->platform_config.size = size; } void free_platform_config(struct hfi1_devdata *dd) { if (!platform_config_load) { /* - * was loaded from EFI, release memory - * allocated by read_efi_var + * was loaded from EFI or the EPROM, release memory + * allocated by read_efi_var/eprom_read_platform_config */ kfree(dd->platform_config.data); } @@ -100,12 +195,16 @@ void free_platform_config(struct hfi1_devdata *dd) void get_port_type(struct hfi1_pportdata *ppd) { int ret; + u32 temp; ret = get_platform_config_field(ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0, - PORT_TABLE_PORT_TYPE, &ppd->port_type, + PORT_TABLE_PORT_TYPE, &temp, 4); - if (ret) + if (ret) { ppd->port_type = PORT_TYPE_UNKNOWN; + return; + } + ppd->port_type = temp; } int set_qsfp_tx(struct hfi1_pportdata *ppd, int on) @@ -538,6 +637,38 @@ static void apply_tx_lanes(struct hfi1_pportdata *ppd, u8 field_id, } } +/* + * Return a special SerDes setting for low power AOC cables. The power class + * threshold and setting being used were all found by empirical testing. + * + * Summary of the logic: + * + * if (QSFP and QSFP_TYPE == AOC and QSFP_POWER_CLASS < 4) + * return 0xe + * return 0; // leave at default + */ +static u8 aoc_low_power_setting(struct hfi1_pportdata *ppd) +{ + u8 *cache = ppd->qsfp_info.cache; + int power_class; + + /* QSFP only */ + if (ppd->port_type != PORT_TYPE_QSFP) + return 0; /* leave at default */ + + /* active optical cables only */ + switch ((cache[QSFP_MOD_TECH_OFFS] & 0xF0) >> 4) { + case 0x0 ... 0x9: /* fallthrough */ + case 0xC: /* fallthrough */ + case 0xE: + /* active AOC */ + power_class = get_qsfp_power_class(cache[QSFP_MOD_PWR_OFFS]); + if (power_class < QSFP_POWER_CLASS_4) + return 0xe; + } + return 0; /* leave at default */ +} + static void apply_tunings( struct hfi1_pportdata *ppd, u32 tx_preset_index, u8 tuning_method, u32 total_atten, u8 limiting_active) @@ -606,7 +737,17 @@ static void apply_tunings( tx_preset_index, TX_PRESET_TABLE_POSTCUR, &tx_preset, 4); postcur = tx_preset; - config_data = precur | (attn << 8) | (postcur << 16); + /* + * NOTES: + * o The aoc_low_power_setting is applied to all lanes even + * though only lane 0's value is examined by the firmware. + * o A lingering low power setting after a cable swap does + * not occur. On cable unplug the 8051 is reset and + * restarted on cable insert. This resets all settings to + * their default, erasing any previous low power setting. + */ + config_data = precur | (attn << 8) | (postcur << 16) | + (aoc_low_power_setting(ppd) << 24); apply_tx_lanes(ppd, TX_EQ_SETTINGS, config_data, "Applying TX settings"); diff --git a/drivers/infiniband/hw/hfi1/platform.h b/drivers/infiniband/hw/hfi1/platform.h index e2c21613c326..eed0aa9124fa 100644 --- a/drivers/infiniband/hw/hfi1/platform.h +++ b/drivers/infiniband/hw/hfi1/platform.h @@ -168,16 +168,6 @@ struct platform_config_cache { struct platform_config_data config_tables[PLATFORM_CONFIG_TABLE_MAX]; }; -static const u32 platform_config_table_limits[PLATFORM_CONFIG_TABLE_MAX] = { - 0, - SYSTEM_TABLE_MAX, - PORT_TABLE_MAX, - RX_PRESET_TABLE_MAX, - TX_PRESET_TABLE_MAX, - QSFP_ATTEN_TABLE_MAX, - VARIABLE_SETTINGS_TABLE_MAX -}; - /* This section defines default values and encodings for the * fields defined for each table above */ @@ -295,6 +285,123 @@ enum link_tuning_encoding { OPA_UNKNOWN_TUNING }; +/* + * Shifts and masks for the link SI tuning values stuffed into the ASIC scratch + * registers for integrated platforms + */ +#define PORT0_PORT_TYPE_SHIFT 0 +#define PORT0_LOCAL_ATTEN_SHIFT 4 +#define PORT0_REMOTE_ATTEN_SHIFT 10 +#define PORT0_DEFAULT_ATTEN_SHIFT 32 + +#define PORT1_PORT_TYPE_SHIFT 16 +#define PORT1_LOCAL_ATTEN_SHIFT 20 +#define PORT1_REMOTE_ATTEN_SHIFT 26 +#define PORT1_DEFAULT_ATTEN_SHIFT 40 + +#define PORT0_PORT_TYPE_MASK 0xFUL +#define PORT0_LOCAL_ATTEN_MASK 0x3FUL +#define PORT0_REMOTE_ATTEN_MASK 0x3FUL +#define PORT0_DEFAULT_ATTEN_MASK 0xFFUL + +#define PORT1_PORT_TYPE_MASK 0xFUL +#define PORT1_LOCAL_ATTEN_MASK 0x3FUL +#define PORT1_REMOTE_ATTEN_MASK 0x3FUL +#define PORT1_DEFAULT_ATTEN_MASK 0xFFUL + +#define PORT0_PORT_TYPE_SMASK (PORT0_PORT_TYPE_MASK << \ + PORT0_PORT_TYPE_SHIFT) +#define PORT0_LOCAL_ATTEN_SMASK (PORT0_LOCAL_ATTEN_MASK << \ + PORT0_LOCAL_ATTEN_SHIFT) +#define PORT0_REMOTE_ATTEN_SMASK (PORT0_REMOTE_ATTEN_MASK << \ + PORT0_REMOTE_ATTEN_SHIFT) +#define PORT0_DEFAULT_ATTEN_SMASK (PORT0_DEFAULT_ATTEN_MASK << \ + PORT0_DEFAULT_ATTEN_SHIFT) + +#define PORT1_PORT_TYPE_SMASK (PORT1_PORT_TYPE_MASK << \ + PORT1_PORT_TYPE_SHIFT) +#define PORT1_LOCAL_ATTEN_SMASK (PORT1_LOCAL_ATTEN_MASK << \ + PORT1_LOCAL_ATTEN_SHIFT) +#define PORT1_REMOTE_ATTEN_SMASK (PORT1_REMOTE_ATTEN_MASK << \ + PORT1_REMOTE_ATTEN_SHIFT) +#define PORT1_DEFAULT_ATTEN_SMASK (PORT1_DEFAULT_ATTEN_MASK << \ + PORT1_DEFAULT_ATTEN_SHIFT) + +#define QSFP_MAX_POWER_SHIFT 0 +#define TX_NO_EQ_SHIFT 4 +#define TX_EQ_SHIFT 25 +#define RX_SHIFT 46 + +#define QSFP_MAX_POWER_MASK 0xFUL +#define TX_NO_EQ_MASK 0x1FFFFFUL +#define TX_EQ_MASK 0x1FFFFFUL +#define RX_MASK 0xFFFFUL + +#define QSFP_MAX_POWER_SMASK (QSFP_MAX_POWER_MASK << \ + QSFP_MAX_POWER_SHIFT) +#define TX_NO_EQ_SMASK (TX_NO_EQ_MASK << TX_NO_EQ_SHIFT) +#define TX_EQ_SMASK (TX_EQ_MASK << TX_EQ_SHIFT) +#define RX_SMASK (RX_MASK << RX_SHIFT) + +#define TX_PRECUR_SHIFT 0 +#define TX_ATTN_SHIFT 4 +#define QSFP_TX_CDR_APPLY_SHIFT 9 +#define QSFP_TX_EQ_APPLY_SHIFT 10 +#define QSFP_TX_CDR_SHIFT 11 +#define QSFP_TX_EQ_SHIFT 12 +#define TX_POSTCUR_SHIFT 16 + +#define TX_PRECUR_MASK 0xFUL +#define TX_ATTN_MASK 0x1FUL +#define QSFP_TX_CDR_APPLY_MASK 0x1UL +#define QSFP_TX_EQ_APPLY_MASK 0x1UL +#define QSFP_TX_CDR_MASK 0x1UL +#define QSFP_TX_EQ_MASK 0xFUL +#define TX_POSTCUR_MASK 0x1FUL + +#define TX_PRECUR_SMASK (TX_PRECUR_MASK << TX_PRECUR_SHIFT) +#define TX_ATTN_SMASK (TX_ATTN_MASK << TX_ATTN_SHIFT) +#define QSFP_TX_CDR_APPLY_SMASK (QSFP_TX_CDR_APPLY_MASK << \ + QSFP_TX_CDR_APPLY_SHIFT) +#define QSFP_TX_EQ_APPLY_SMASK (QSFP_TX_EQ_APPLY_MASK << \ + QSFP_TX_EQ_APPLY_SHIFT) +#define QSFP_TX_CDR_SMASK (QSFP_TX_CDR_MASK << QSFP_TX_CDR_SHIFT) +#define QSFP_TX_EQ_SMASK (QSFP_TX_EQ_MASK << QSFP_TX_EQ_SHIFT) +#define TX_POSTCUR_SMASK (TX_POSTCUR_MASK << TX_POSTCUR_SHIFT) + +#define QSFP_RX_CDR_APPLY_SHIFT 0 +#define QSFP_RX_EMP_APPLY_SHIFT 1 +#define QSFP_RX_AMP_APPLY_SHIFT 2 +#define QSFP_RX_CDR_SHIFT 3 +#define QSFP_RX_EMP_SHIFT 4 +#define QSFP_RX_AMP_SHIFT 8 + +#define QSFP_RX_CDR_APPLY_MASK 0x1UL +#define QSFP_RX_EMP_APPLY_MASK 0x1UL +#define QSFP_RX_AMP_APPLY_MASK 0x1UL +#define QSFP_RX_CDR_MASK 0x1UL +#define QSFP_RX_EMP_MASK 0xFUL +#define QSFP_RX_AMP_MASK 0x3UL + +#define QSFP_RX_CDR_APPLY_SMASK (QSFP_RX_CDR_APPLY_MASK << \ + QSFP_RX_CDR_APPLY_SHIFT) +#define QSFP_RX_EMP_APPLY_SMASK (QSFP_RX_EMP_APPLY_MASK << \ + QSFP_RX_EMP_APPLY_SHIFT) +#define QSFP_RX_AMP_APPLY_SMASK (QSFP_RX_AMP_APPLY_MASK << \ + QSFP_RX_AMP_APPLY_SHIFT) +#define QSFP_RX_CDR_SMASK (QSFP_RX_CDR_MASK << QSFP_RX_CDR_SHIFT) +#define QSFP_RX_EMP_SMASK (QSFP_RX_EMP_MASK << QSFP_RX_EMP_SHIFT) +#define QSFP_RX_AMP_SMASK (QSFP_RX_AMP_MASK << QSFP_RX_AMP_SHIFT) + +#define BITMAP_VERSION 1 +#define BITMAP_VERSION_SHIFT 44 +#define BITMAP_VERSION_MASK 0xFUL +#define BITMAP_VERSION_SMASK (BITMAP_VERSION_MASK << \ + BITMAP_VERSION_SHIFT) +#define CHECKSUM_SHIFT 48 +#define CHECKSUM_MASK 0xFFFFUL +#define CHECKSUM_SMASK (CHECKSUM_MASK << CHECKSUM_SHIFT) + /* platform.c */ void get_platform_config(struct hfi1_devdata *dd); void free_platform_config(struct hfi1_devdata *dd); diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 9fc75e7e8781..d752d6768a49 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -196,15 +196,18 @@ static void flush_tx_list(struct rvt_qp *qp) static void flush_iowait(struct rvt_qp *qp) { struct hfi1_qp_priv *priv = qp->priv; - struct hfi1_ibdev *dev = to_idev(qp->ibqp.device); unsigned long flags; + seqlock_t *lock = priv->s_iowait.lock; - write_seqlock_irqsave(&dev->iowait_lock, flags); + if (!lock) + return; + write_seqlock_irqsave(lock, flags); if (!list_empty(&priv->s_iowait.list)) { list_del_init(&priv->s_iowait.list); + priv->s_iowait.lock = NULL; rvt_put_qp(qp); } - write_sequnlock_irqrestore(&dev->iowait_lock, flags); + write_sequnlock_irqrestore(lock, flags); } static inline int opa_mtu_enum_to_int(int mtu) @@ -543,6 +546,7 @@ static int iowait_sleep( ibp->rvp.n_dmawait++; qp->s_flags |= RVT_S_WAIT_DMA_DESC; list_add_tail(&priv->s_iowait.list, &sde->dmawait); + priv->s_iowait.lock = &dev->iowait_lock; trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC); rvt_get_qp(qp); } @@ -964,6 +968,7 @@ void notify_error_qp(struct rvt_qp *qp) if (!list_empty(&priv->s_iowait.list) && !(qp->s_flags & RVT_S_BUSY)) { qp->s_flags &= ~RVT_S_ANY_WAIT_IO; list_del_init(&priv->s_iowait.list); + priv->s_iowait.lock = NULL; rvt_put_qp(qp); } write_sequnlock(&dev->iowait_lock); diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 83198a8a8797..809b26eb6d3c 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -276,7 +276,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, rvt_get_mr(ps->s_txreq->mr); qp->s_ack_rdma_sge.sge = e->rdma_sge; qp->s_ack_rdma_sge.num_sge = 1; - qp->s_cur_sge = &qp->s_ack_rdma_sge; + ps->s_txreq->ss = &qp->s_ack_rdma_sge; if (len > pmtu) { len = pmtu; qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST); @@ -290,7 +290,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, bth2 = mask_psn(qp->s_ack_rdma_psn++); } else { /* COMPARE_SWAP or FETCH_ADD */ - qp->s_cur_sge = NULL; + ps->s_txreq->ss = NULL; len = 0; qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE); ohdr->u.at.aeth = hfi1_compute_aeth(qp); @@ -306,7 +306,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE); /* FALLTHROUGH */ case OP(RDMA_READ_RESPONSE_MIDDLE): - qp->s_cur_sge = &qp->s_ack_rdma_sge; + ps->s_txreq->ss = &qp->s_ack_rdma_sge; ps->s_txreq->mr = qp->s_ack_rdma_sge.sge.mr; if (ps->s_txreq->mr) rvt_get_mr(ps->s_txreq->mr); @@ -335,7 +335,7 @@ normal: */ qp->s_ack_state = OP(SEND_ONLY); qp->s_flags &= ~RVT_S_ACK_PENDING; - qp->s_cur_sge = NULL; + ps->s_txreq->ss = NULL; if (qp->s_nak_state) ohdr->u.aeth = cpu_to_be32((qp->r_msn & HFI1_MSN_MASK) | @@ -351,7 +351,7 @@ normal: qp->s_rdma_ack_cnt++; qp->s_hdrwords = hwords; ps->s_txreq->sde = priv->s_sde; - qp->s_cur_size = len; + ps->s_txreq->s_cur_size = len; hfi1_make_ruc_header(qp, ohdr, bth0, bth2, middle, ps); /* pbc */ ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2; @@ -801,8 +801,8 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) qp->s_len -= len; qp->s_hdrwords = hwords; ps->s_txreq->sde = priv->s_sde; - qp->s_cur_sge = ss; - qp->s_cur_size = len; + ps->s_txreq->ss = ss; + ps->s_txreq->s_cur_size = len; hfi1_make_ruc_header( qp, ohdr, @@ -1146,8 +1146,6 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr) { struct ib_other_headers *ohdr; struct rvt_swqe *wqe; - struct ib_wc wc; - unsigned i; u32 opcode; u32 psn; @@ -1195,22 +1193,8 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr) qp->s_last = s_last; /* see post_send() */ barrier(); - for (i = 0; i < wqe->wr.num_sge; i++) { - struct rvt_sge *sge = &wqe->sg_list[i]; - - rvt_put_mr(sge->mr); - } - /* Post a send completion queue entry if requested. */ - if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) || - (wqe->wr.send_flags & IB_SEND_SIGNALED)) { - memset(&wc, 0, sizeof(wc)); - wc.wr_id = wqe->wr.wr_id; - wc.status = IB_WC_SUCCESS; - wc.opcode = ib_hfi1_wc_opcode[wqe->wr.opcode]; - wc.byte_len = wqe->length; - wc.qp = &qp->ibqp; - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0); - } + rvt_put_swqe(wqe); + rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS); } /* * If we were waiting for sends to complete before re-sending, @@ -1240,9 +1224,6 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, struct rvt_swqe *wqe, struct hfi1_ibport *ibp) { - struct ib_wc wc; - unsigned i; - lockdep_assert_held(&qp->s_lock); /* * Don't decrement refcount and don't generate a @@ -1253,28 +1234,14 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) { u32 s_last; - for (i = 0; i < wqe->wr.num_sge; i++) { - struct rvt_sge *sge = &wqe->sg_list[i]; - - rvt_put_mr(sge->mr); - } + rvt_put_swqe(wqe); s_last = qp->s_last; if (++s_last >= qp->s_size) s_last = 0; qp->s_last = s_last; /* see post_send() */ barrier(); - /* Post a send completion queue entry if requested. */ - if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) || - (wqe->wr.send_flags & IB_SEND_SIGNALED)) { - memset(&wc, 0, sizeof(wc)); - wc.wr_id = wqe->wr.wr_id; - wc.status = IB_WC_SUCCESS; - wc.opcode = ib_hfi1_wc_opcode[wqe->wr.opcode]; - wc.byte_len = wqe->length; - wc.qp = &qp->ibqp; - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0); - } + rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS); } else { struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); @@ -2295,7 +2262,7 @@ send_last: hfi1_copy_sge(&qp->r_sge, data, tlen, 1, copy_last); rvt_put_ss(&qp->r_sge); qp->r_msn++; - if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) + if (!__test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) break; wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; @@ -2410,8 +2377,7 @@ send_last: * Update the next expected PSN. We add 1 later * below, so only add the remainder here. */ - if (len > pmtu) - qp->r_psn += (len - 1) / pmtu; + qp->r_psn += rvt_div_mtu(qp, len - 1); } else { e->rdma_sge.mr = NULL; e->rdma_sge.vaddr = NULL; diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index a1576aea4756..717ed4b159d3 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -239,16 +239,6 @@ bail: return ret; } -static __be64 get_sguid(struct hfi1_ibport *ibp, unsigned index) -{ - if (!index) { - struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - - return cpu_to_be64(ppd->guid); - } - return ibp->guids[index - 1]; -} - static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id) { return (gid->global.interface_id == id && @@ -699,9 +689,9 @@ u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr, /* The SGID is 32-bit aligned. */ hdr->sgid.global.subnet_prefix = ibp->rvp.gid_prefix; hdr->sgid.global.interface_id = - grh->sgid_index && grh->sgid_index < ARRAY_SIZE(ibp->guids) ? - ibp->guids[grh->sgid_index - 1] : - cpu_to_be64(ppd_from_ibp(ibp)->guid); + grh->sgid_index < HFI1_GUIDS_PER_PORT ? + get_sguid(ibp, grh->sgid_index) : + get_sguid(ibp, HFI1_PORT_GUID_INDEX); hdr->dgid = grh->dgid; /* GRH header size in 32-bit words. */ @@ -777,8 +767,8 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, u32 bth1; /* Construct the header. */ - extra_bytes = -qp->s_cur_size & 3; - nwords = (qp->s_cur_size + extra_bytes) >> 2; + extra_bytes = -ps->s_txreq->s_cur_size & 3; + nwords = (ps->s_txreq->s_cur_size + extra_bytes) >> 2; lrh0 = HFI1_LRH_BTH; if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) { qp->s_hdrwords += hfi1_make_grh(ibp, @@ -952,7 +942,6 @@ void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, enum ib_wc_status status) { u32 old_last, last; - unsigned i; if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND)) return; @@ -964,32 +953,13 @@ void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, qp->s_last = last; /* See post_send() */ barrier(); - for (i = 0; i < wqe->wr.num_sge; i++) { - struct rvt_sge *sge = &wqe->sg_list[i]; - - rvt_put_mr(sge->mr); - } + rvt_put_swqe(wqe); if (qp->ibqp.qp_type == IB_QPT_UD || qp->ibqp.qp_type == IB_QPT_SMI || qp->ibqp.qp_type == IB_QPT_GSI) atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount); - /* See ch. 11.2.4.1 and 10.7.3.1 */ - if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) || - (wqe->wr.send_flags & IB_SEND_SIGNALED) || - status != IB_WC_SUCCESS) { - struct ib_wc wc; - - memset(&wc, 0, sizeof(wc)); - wc.wr_id = wqe->wr.wr_id; - wc.status = status; - wc.opcode = ib_hfi1_wc_opcode[wqe->wr.opcode]; - wc.qp = &qp->ibqp; - if (status == IB_WC_SUCCESS) - wc.byte_len = wqe->length; - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, - status != IB_WC_SUCCESS); - } + rvt_qp_swqe_complete(qp, wqe, status); if (qp->s_acked == old_last) qp->s_acked = last; diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 9cbe52d21077..1d81cac1fa6c 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -375,7 +375,7 @@ static inline void complete_tx(struct sdma_engine *sde, sde->head_sn, tx->sn); sde->head_sn++; #endif - sdma_txclean(sde->dd, tx); + __sdma_txclean(sde->dd, tx); if (complete) (*complete)(tx, res); if (wait && iowait_sdma_dec(wait)) @@ -1643,7 +1643,7 @@ static inline u8 ahg_mode(struct sdma_txreq *tx) } /** - * sdma_txclean() - clean tx of mappings, descp *kmalloc's + * __sdma_txclean() - clean tx of mappings, descp *kmalloc's * @dd: hfi1_devdata for unmapping * @tx: tx request to clean * @@ -1653,7 +1653,7 @@ static inline u8 ahg_mode(struct sdma_txreq *tx) * The code can be called multiple times without issue. * */ -void sdma_txclean( +void __sdma_txclean( struct hfi1_devdata *dd, struct sdma_txreq *tx) { @@ -3065,7 +3065,7 @@ static int _extend_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx) tx->descp[i] = tx->descs[i]; return 0; enomem: - sdma_txclean(dd, tx); + __sdma_txclean(dd, tx); return -ENOMEM; } @@ -3094,14 +3094,14 @@ int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx, rval = _extend_sdma_tx_descs(dd, tx); if (rval) { - sdma_txclean(dd, tx); + __sdma_txclean(dd, tx); return rval; } /* If coalesce buffer is allocated, copy data into it */ if (tx->coalesce_buf) { if (type == SDMA_MAP_NONE) { - sdma_txclean(dd, tx); + __sdma_txclean(dd, tx); return -EINVAL; } @@ -3109,7 +3109,7 @@ int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx, kvaddr = kmap(page); kvaddr += offset; } else if (WARN_ON(!kvaddr)) { - sdma_txclean(dd, tx); + __sdma_txclean(dd, tx); return -EINVAL; } @@ -3139,7 +3139,7 @@ int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx, DMA_TO_DEVICE); if (unlikely(dma_mapping_error(&dd->pcidev->dev, addr))) { - sdma_txclean(dd, tx); + __sdma_txclean(dd, tx); return -ENOSPC; } @@ -3181,7 +3181,7 @@ int _pad_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx) if ((unlikely(tx->num_desc == tx->desc_limit))) { rval = _extend_sdma_tx_descs(dd, tx); if (rval) { - sdma_txclean(dd, tx); + __sdma_txclean(dd, tx); return rval; } } diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h index 56257ea3598f..21f1e2834f37 100644 --- a/drivers/infiniband/hw/hfi1/sdma.h +++ b/drivers/infiniband/hw/hfi1/sdma.h @@ -667,7 +667,13 @@ int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx, int type, void *kvaddr, struct page *page, unsigned long offset, u16 len); int _pad_sdma_tx_descs(struct hfi1_devdata *, struct sdma_txreq *); -void sdma_txclean(struct hfi1_devdata *, struct sdma_txreq *); +void __sdma_txclean(struct hfi1_devdata *, struct sdma_txreq *); + +static inline void sdma_txclean(struct hfi1_devdata *dd, struct sdma_txreq *tx) +{ + if (tx->num_desc) + __sdma_txclean(dd, tx); +} /* helpers used by public routines */ static inline void _sdma_close_tx(struct hfi1_devdata *dd, @@ -753,7 +759,7 @@ static inline int sdma_txadd_page( DMA_TO_DEVICE); if (unlikely(dma_mapping_error(&dd->pcidev->dev, addr))) { - sdma_txclean(dd, tx); + __sdma_txclean(dd, tx); return -ENOSPC; } @@ -834,7 +840,7 @@ static inline int sdma_txadd_kvaddr( DMA_TO_DEVICE); if (unlikely(dma_mapping_error(&dd->pcidev->dev, addr))) { - sdma_txclean(dd, tx); + __sdma_txclean(dd, tx); return -ENOSPC; } diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index 5e6d1bac4914..b141a78ae38b 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -258,8 +258,8 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) qp->s_len -= len; qp->s_hdrwords = hwords; ps->s_txreq->sde = priv->s_sde; - qp->s_cur_sge = &qp->s_sge; - qp->s_cur_size = len; + ps->s_txreq->ss = &qp->s_sge; + ps->s_txreq->s_cur_size = len; hfi1_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), mask_psn(qp->s_psn++), middle, ps); /* pbc */ diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index 97ae24b6314c..c071955c0272 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -354,8 +354,8 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) /* header size in 32-bit words LRH+BTH+DETH = (8+12+8)/4. */ qp->s_hdrwords = 7; - qp->s_cur_size = wqe->length; - qp->s_cur_sge = &qp->s_sge; + ps->s_txreq->s_cur_size = wqe->length; + ps->s_txreq->ss = &qp->s_sge; qp->s_srate = ah_attr->static_rate; qp->srate_mbps = ib_rate_to_mbps(qp->s_srate); qp->s_wqe = wqe; diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 77697d690f3e..7d22f8ee98ef 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -115,6 +115,7 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12 #define KDETH_HCRC_LOWER_MASK 0xff #define AHG_KDETH_INTR_SHIFT 12 +#define AHG_KDETH_SH_SHIFT 13 #define PBC2LRH(x) ((((x) & 0xfff) << 2) - 4) #define LRH2PBC(x) ((((x) >> 2) + 1) & 0xfff) @@ -144,8 +145,9 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12 #define KDETH_OM_LARGE 64 #define KDETH_OM_MAX_SIZE (1 << ((KDETH_OM_LARGE / KDETH_OM_SMALL) + 1)) -/* Last packet in the request */ -#define TXREQ_FLAGS_REQ_LAST_PKT BIT(0) +/* Tx request flag bits */ +#define TXREQ_FLAGS_REQ_ACK BIT(0) /* Set the ACK bit in the header */ +#define TXREQ_FLAGS_REQ_DISABLE_SH BIT(1) /* Disable header suppression */ /* SDMA request flag bits */ #define SDMA_REQ_FOR_THREAD 1 @@ -943,8 +945,13 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) tx->busycount = 0; INIT_LIST_HEAD(&tx->list); + /* + * For the last packet set the ACK request + * and disable header suppression. + */ if (req->seqnum == req->info.npkts - 1) - tx->flags |= TXREQ_FLAGS_REQ_LAST_PKT; + tx->flags |= (TXREQ_FLAGS_REQ_ACK | + TXREQ_FLAGS_REQ_DISABLE_SH); /* * Calculate the payload size - this is min of the fragment @@ -963,11 +970,22 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) } datalen = compute_data_length(req, tx); + + /* + * Disable header suppression for the payload <= 8DWS. + * If there is an uncorrectable error in the receive + * data FIFO when the received payload size is less than + * or equal to 8DWS then the RxDmaDataFifoRdUncErr is + * not reported.There is set RHF.EccErr if the header + * is not suppressed. + */ if (!datalen) { SDMA_DBG(req, "Request has data but pkt len is 0"); ret = -EFAULT; goto free_tx; + } else if (datalen <= 32) { + tx->flags |= TXREQ_FLAGS_REQ_DISABLE_SH; } } @@ -990,6 +1008,10 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) LRH2PBC(lrhlen); tx->hdr.pbc[0] = cpu_to_le16(pbclen); } + ret = check_header_template(req, &tx->hdr, + lrhlen, datalen); + if (ret) + goto free_tx; ret = sdma_txinit_ahg(&tx->txreq, SDMA_TXREQ_F_AHG_COPY, sizeof(tx->hdr) + datalen, @@ -1351,7 +1373,7 @@ static int set_txreq_header(struct user_sdma_request *req, req->seqnum)); /* Set ACK request on last packet */ - if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT)) + if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK)) hdr->bth[2] |= cpu_to_be32(1UL << 31); /* Set the new offset */ @@ -1384,8 +1406,8 @@ static int set_txreq_header(struct user_sdma_request *req, /* Set KDETH.TID based on value for this TID */ KDETH_SET(hdr->kdeth.ver_tid_offset, TID, EXP_TID_GET(tidval, IDX)); - /* Clear KDETH.SH only on the last packet */ - if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT)) + /* Clear KDETH.SH when DISABLE_SH flag is set */ + if (unlikely(tx->flags & TXREQ_FLAGS_REQ_DISABLE_SH)) KDETH_SET(hdr->kdeth.ver_tid_offset, SH, 0); /* * Set the KDETH.OFFSET and KDETH.OM based on size of @@ -1429,7 +1451,7 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, /* BTH.PSN and BTH.A */ val32 = (be32_to_cpu(hdr->bth[2]) + req->seqnum) & (HFI1_CAP_IS_KSET(EXTENDED_PSN) ? 0x7fffffff : 0xffffff); - if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT)) + if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK)) val32 |= 1UL << 31; AHG_HEADER_SET(req->ahg, diff, 6, 0, 16, cpu_to_be16(val32 >> 16)); AHG_HEADER_SET(req->ahg, diff, 6, 16, 16, cpu_to_be16(val32 & 0xffff)); @@ -1468,19 +1490,23 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, AHG_HEADER_SET(req->ahg, diff, 7, 0, 16, ((!!(req->omfactor - KDETH_OM_SMALL)) << 15 | ((req->tidoffset / req->omfactor) & 0x7fff))); - /* KDETH.TIDCtrl, KDETH.TID */ + /* KDETH.TIDCtrl, KDETH.TID, KDETH.Intr, KDETH.SH */ val = cpu_to_le16(((EXP_TID_GET(tidval, CTRL) & 0x3) << 10) | - (EXP_TID_GET(tidval, IDX) & 0x3ff)); - /* Clear KDETH.SH on last packet */ - if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT)) { - val |= cpu_to_le16(KDETH_GET(hdr->kdeth.ver_tid_offset, - INTR) << - AHG_KDETH_INTR_SHIFT); - val &= cpu_to_le16(~(1U << 13)); - AHG_HEADER_SET(req->ahg, diff, 7, 16, 14, val); + (EXP_TID_GET(tidval, IDX) & 0x3ff)); + + if (unlikely(tx->flags & TXREQ_FLAGS_REQ_DISABLE_SH)) { + val |= cpu_to_le16((KDETH_GET(hdr->kdeth.ver_tid_offset, + INTR) << + AHG_KDETH_INTR_SHIFT)); } else { - AHG_HEADER_SET(req->ahg, diff, 7, 16, 12, val); + val |= KDETH_GET(hdr->kdeth.ver_tid_offset, SH) ? + cpu_to_le16(0x1 << AHG_KDETH_SH_SHIFT) : + cpu_to_le16((KDETH_GET(hdr->kdeth.ver_tid_offset, + INTR) << + AHG_KDETH_INTR_SHIFT)); } + + AHG_HEADER_SET(req->ahg, diff, 7, 16, 14, val); } trace_hfi1_sdma_user_header_ahg(pq->dd, pq->ctxt, pq->subctxt, diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 4b7a16ceb362..95ed4d6da510 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -297,22 +297,6 @@ static inline int wss_exceeds_threshold(void) } /* - * Translate ib_wr_opcode into ib_wc_opcode. - */ -const enum ib_wc_opcode ib_hfi1_wc_opcode[] = { - [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE, - [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE, - [IB_WR_SEND] = IB_WC_SEND, - [IB_WR_SEND_WITH_IMM] = IB_WC_SEND, - [IB_WR_RDMA_READ] = IB_WC_RDMA_READ, - [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP, - [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD, - [IB_WR_SEND_WITH_INV] = IB_WC_SEND, - [IB_WR_LOCAL_INV] = IB_WC_LOCAL_INV, - [IB_WR_REG_MR] = IB_WC_REG_MR -}; - -/* * Length of header by opcode, 0 --> not supported */ const u8 hdr_len_by_opcode[256] = { @@ -694,6 +678,7 @@ static void mem_timer(unsigned long data) qp = iowait_to_qp(wait); priv = qp->priv; list_del_init(&priv->s_iowait.list); + priv->s_iowait.lock = NULL; /* refcount held until actual wake up */ if (!list_empty(list)) mod_timer(&dev->mem_timer, jiffies + 1); @@ -769,6 +754,7 @@ static int wait_kmem(struct hfi1_ibdev *dev, mod_timer(&dev->mem_timer, jiffies + 1); qp->s_flags |= RVT_S_WAIT_KMEM; list_add_tail(&priv->s_iowait.list, &dev->memwait); + priv->s_iowait.lock = &dev->iowait_lock; trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM); rvt_get_qp(qp); } @@ -788,10 +774,10 @@ static int wait_kmem(struct hfi1_ibdev *dev, */ static noinline int build_verbs_ulp_payload( struct sdma_engine *sde, - struct rvt_sge_state *ss, u32 length, struct verbs_txreq *tx) { + struct rvt_sge_state *ss = tx->ss; struct rvt_sge *sg_list = ss->sg_list; struct rvt_sge sge = ss->sge; u8 num_sge = ss->num_sge; @@ -835,7 +821,6 @@ bail_txadd: /* New API */ static int build_verbs_tx_desc( struct sdma_engine *sde, - struct rvt_sge_state *ss, u32 length, struct verbs_txreq *tx, struct hfi1_ahg_info *ahg_info, @@ -879,9 +864,9 @@ static int build_verbs_tx_desc( goto bail_txadd; } - /* add the ulp payload - if any. ss can be NULL for acks */ - if (ss) - ret = build_verbs_ulp_payload(sde, ss, length, tx); + /* add the ulp payload - if any. tx->ss can be NULL for acks */ + if (tx->ss) + ret = build_verbs_ulp_payload(sde, length, tx); bail_txadd: return ret; } @@ -892,8 +877,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, struct hfi1_qp_priv *priv = qp->priv; struct hfi1_ahg_info *ahg_info = priv->s_ahg; u32 hdrwords = qp->s_hdrwords; - struct rvt_sge_state *ss = qp->s_cur_sge; - u32 len = qp->s_cur_size; + u32 len = ps->s_txreq->s_cur_size; u32 plen = hdrwords + ((len + 3) >> 2) + 2; /* includes pbc */ struct hfi1_ibdev *dev = ps->dev; struct hfi1_pportdata *ppd = ps->ppd; @@ -918,7 +902,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, plen); } tx->wqe = qp->s_wqe; - ret = build_verbs_tx_desc(tx->sde, ss, len, tx, ahg_info, pbc); + ret = build_verbs_tx_desc(tx->sde, len, tx, ahg_info, pbc); if (unlikely(ret)) goto bail_build; } @@ -980,6 +964,7 @@ static int pio_wait(struct rvt_qp *qp, qp->s_flags |= flag; was_empty = list_empty(&sc->piowait); list_add_tail(&priv->s_iowait.list, &sc->piowait); + priv->s_iowait.lock = &dev->iowait_lock; trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO); rvt_get_qp(qp); /* counting: only call wantpiobuf_intr if first user */ @@ -1008,8 +993,8 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, { struct hfi1_qp_priv *priv = qp->priv; u32 hdrwords = qp->s_hdrwords; - struct rvt_sge_state *ss = qp->s_cur_sge; - u32 len = qp->s_cur_size; + struct rvt_sge_state *ss = ps->s_txreq->ss; + u32 len = ps->s_txreq->s_cur_size; u32 dwords = (len + 3) >> 2; u32 plen = hdrwords + dwords + 2; /* includes pbc */ struct hfi1_pportdata *ppd = ps->ppd; @@ -1237,7 +1222,7 @@ static inline send_routine get_send_routine(struct rvt_qp *qp, u8 op = get_opcode(h); if (piothreshold && - qp->s_cur_size <= min(piothreshold, qp->pmtu) && + tx->s_cur_size <= min(piothreshold, qp->pmtu) && (BIT(op & OPMASK) & pio_opmask[op >> 5]) && iowait_sdma_pending(&priv->s_iowait) == 0 && !sdma_txreq_built(&tx->txreq)) @@ -1483,15 +1468,11 @@ static int hfi1_get_guid_be(struct rvt_dev_info *rdi, struct rvt_ibport *rvp, int guid_index, __be64 *guid) { struct hfi1_ibport *ibp = container_of(rvp, struct hfi1_ibport, rvp); - struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - if (guid_index == 0) - *guid = cpu_to_be64(ppd->guid); - else if (guid_index < HFI1_GUIDS_PER_PORT) - *guid = ibp->guids[guid_index - 1]; - else + if (guid_index >= HFI1_GUIDS_PER_PORT) return -EINVAL; + *guid = get_sguid(ibp, guid_index); return 0; } @@ -1610,6 +1591,154 @@ static void hfi1_get_dev_fw_str(struct ib_device *ibdev, char *str, dc8051_ver_min(ver)); } +static const char * const driver_cntr_names[] = { + /* must be element 0*/ + "DRIVER_KernIntr", + "DRIVER_ErrorIntr", + "DRIVER_Tx_Errs", + "DRIVER_Rcv_Errs", + "DRIVER_HW_Errs", + "DRIVER_NoPIOBufs", + "DRIVER_CtxtsOpen", + "DRIVER_RcvLen_Errs", + "DRIVER_EgrBufFull", + "DRIVER_EgrHdrFull" +}; + +static const char **dev_cntr_names; +static const char **port_cntr_names; +static int num_driver_cntrs = ARRAY_SIZE(driver_cntr_names); +static int num_dev_cntrs; +static int num_port_cntrs; +static int cntr_names_initialized; + +/* + * Convert a list of names separated by '\n' into an array of NULL terminated + * strings. Optionally some entries can be reserved in the array to hold extra + * external strings. + */ +static int init_cntr_names(const char *names_in, + const int names_len, + int num_extra_names, + int *num_cntrs, + const char ***cntr_names) +{ + char *names_out, *p, **q; + int i, n; + + n = 0; + for (i = 0; i < names_len; i++) + if (names_in[i] == '\n') + n++; + + names_out = kmalloc((n + num_extra_names) * sizeof(char *) + names_len, + GFP_KERNEL); + if (!names_out) { + *num_cntrs = 0; + *cntr_names = NULL; + return -ENOMEM; + } + + p = names_out + (n + num_extra_names) * sizeof(char *); + memcpy(p, names_in, names_len); + + q = (char **)names_out; + for (i = 0; i < n; i++) { + q[i] = p; + p = strchr(p, '\n'); + *p++ = '\0'; + } + + *num_cntrs = n; + *cntr_names = (const char **)names_out; + return 0; +} + +static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev, + u8 port_num) +{ + int i, err; + + if (!cntr_names_initialized) { + struct hfi1_devdata *dd = dd_from_ibdev(ibdev); + + err = init_cntr_names(dd->cntrnames, + dd->cntrnameslen, + num_driver_cntrs, + &num_dev_cntrs, + &dev_cntr_names); + if (err) + return NULL; + + for (i = 0; i < num_driver_cntrs; i++) + dev_cntr_names[num_dev_cntrs + i] = + driver_cntr_names[i]; + + err = init_cntr_names(dd->portcntrnames, + dd->portcntrnameslen, + 0, + &num_port_cntrs, + &port_cntr_names); + if (err) { + kfree(dev_cntr_names); + dev_cntr_names = NULL; + return NULL; + } + cntr_names_initialized = 1; + } + + if (!port_num) + return rdma_alloc_hw_stats_struct( + dev_cntr_names, + num_dev_cntrs + num_driver_cntrs, + RDMA_HW_STATS_DEFAULT_LIFESPAN); + else + return rdma_alloc_hw_stats_struct( + port_cntr_names, + num_port_cntrs, + RDMA_HW_STATS_DEFAULT_LIFESPAN); +} + +static u64 hfi1_sps_ints(void) +{ + unsigned long flags; + struct hfi1_devdata *dd; + u64 sps_ints = 0; + + spin_lock_irqsave(&hfi1_devs_lock, flags); + list_for_each_entry(dd, &hfi1_dev_list, list) { + sps_ints += get_all_cpu_total(dd->int_counter); + } + spin_unlock_irqrestore(&hfi1_devs_lock, flags); + return sps_ints; +} + +static int get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, + u8 port, int index) +{ + u64 *values; + int count; + + if (!port) { + u64 *stats = (u64 *)&hfi1_stats; + int i; + + hfi1_read_cntrs(dd_from_ibdev(ibdev), NULL, &values); + values[num_dev_cntrs] = hfi1_sps_ints(); + for (i = 1; i < num_driver_cntrs; i++) + values[num_dev_cntrs + i] = stats[i]; + count = num_dev_cntrs + num_driver_cntrs; + } else { + struct hfi1_ibport *ibp = to_iport(ibdev, port); + + hfi1_read_portcntrs(ppd_from_ibp(ibp), NULL, &values); + count = num_port_cntrs; + } + + memcpy(stats->value, values, count * sizeof(u64)); + return count; +} + /** * hfi1_register_ib_device - register our device with the infiniband core * @dd: the device data structure @@ -1620,6 +1749,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) struct hfi1_ibdev *dev = &dd->verbs_dev; struct ib_device *ibdev = &dev->rdi.ibdev; struct hfi1_pportdata *ppd = dd->pport; + struct hfi1_ibport *ibp = &ppd->ibport_data; unsigned i; int ret; size_t lcpysz = IB_DEVICE_NAME_MAX; @@ -1632,6 +1762,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) setup_timer(&dev->mem_timer, mem_timer, (unsigned long)dev); seqlock_init(&dev->iowait_lock); + seqlock_init(&dev->txwait_lock); INIT_LIST_HEAD(&dev->txwait); INIT_LIST_HEAD(&dev->memwait); @@ -1639,20 +1770,24 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) if (ret) goto err_verbs_txreq; + /* Use first-port GUID as node guid */ + ibdev->node_guid = get_sguid(ibp, HFI1_PORT_GUID_INDEX); + /* * The system image GUID is supposed to be the same for all * HFIs in a single system but since there can be other * device types in the system, we can't be sure this is unique. */ if (!ib_hfi1_sys_image_guid) - ib_hfi1_sys_image_guid = cpu_to_be64(ppd->guid); + ib_hfi1_sys_image_guid = ibdev->node_guid; lcpysz = strlcpy(ibdev->name, class_name(), lcpysz); strlcpy(ibdev->name + lcpysz, "_%d", IB_DEVICE_NAME_MAX - lcpysz); ibdev->owner = THIS_MODULE; - ibdev->node_guid = cpu_to_be64(ppd->guid); ibdev->phys_port_cnt = dd->num_pports; ibdev->dma_device = &dd->pcidev->dev; ibdev->modify_device = modify_device; + ibdev->alloc_hw_stats = alloc_hw_stats; + ibdev->get_hw_stats = get_hw_stats; /* keep process mad in the driver */ ibdev->process_mad = hfi1_process_mad; @@ -1767,6 +1902,10 @@ void hfi1_unregister_ib_device(struct hfi1_devdata *dd) del_timer_sync(&dev->mem_timer); verbs_txreq_exit(dev); + + kfree(dev_cntr_names); + kfree(port_cntr_names); + cntr_names_initialized = 0; } void hfi1_cnp_rcv(struct hfi1_packet *packet) diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 1c3815d89eb7..e6b893010e6d 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -73,7 +73,6 @@ struct hfi1_packet; #include "iowait.h" #define HFI1_MAX_RDMA_ATOMIC 16 -#define HFI1_GUIDS_PER_PORT 5 /* * Increment this value if any changes that break userspace ABI @@ -169,8 +168,6 @@ struct hfi1_ibport { struct rvt_qp __rcu *qp[2]; struct rvt_ibport rvp; - __be64 guids[HFI1_GUIDS_PER_PORT - 1]; /* writable GUIDs */ - /* the first 16 entries are sl_to_vl for !OPA */ u8 sl_to_sc[32]; u8 sc_to_sl[32]; @@ -180,18 +177,19 @@ struct hfi1_ibdev { struct rvt_dev_info rdi; /* Must be first */ /* QP numbers are shared by all IB ports */ - /* protect wait lists */ - seqlock_t iowait_lock; + /* protect txwait list */ + seqlock_t txwait_lock ____cacheline_aligned_in_smp; struct list_head txwait; /* list for wait verbs_txreq */ struct list_head memwait; /* list for wait kernel memory */ - struct list_head txreq_free; struct kmem_cache *verbs_txreq_cache; - struct timer_list mem_timer; + u64 n_txwait; + u64 n_kmem_wait; + /* protect iowait lists */ + seqlock_t iowait_lock ____cacheline_aligned_in_smp; u64 n_piowait; u64 n_piodrain; - u64 n_txwait; - u64 n_kmem_wait; + struct timer_list mem_timer; #ifdef CONFIG_DEBUG_FS /* per HFI debugfs */ diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.c b/drivers/infiniband/hw/hfi1/verbs_txreq.c index 094ab829ec42..5d23172c470f 100644 --- a/drivers/infiniband/hw/hfi1/verbs_txreq.c +++ b/drivers/infiniband/hw/hfi1/verbs_txreq.c @@ -72,22 +72,22 @@ void hfi1_put_txreq(struct verbs_txreq *tx) kmem_cache_free(dev->verbs_txreq_cache, tx); do { - seq = read_seqbegin(&dev->iowait_lock); + seq = read_seqbegin(&dev->txwait_lock); if (!list_empty(&dev->txwait)) { struct iowait *wait; - write_seqlock_irqsave(&dev->iowait_lock, flags); + write_seqlock_irqsave(&dev->txwait_lock, flags); wait = list_first_entry(&dev->txwait, struct iowait, list); qp = iowait_to_qp(wait); priv = qp->priv; list_del_init(&priv->s_iowait.list); /* refcount held until actual wake up */ - write_sequnlock_irqrestore(&dev->iowait_lock, flags); + write_sequnlock_irqrestore(&dev->txwait_lock, flags); hfi1_qp_wakeup(qp, RVT_S_WAIT_TX); break; } - } while (read_seqretry(&dev->iowait_lock, seq)); + } while (read_seqretry(&dev->txwait_lock, seq)); } struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev, @@ -96,7 +96,7 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev, { struct verbs_txreq *tx = ERR_PTR(-EBUSY); - write_seqlock(&dev->iowait_lock); + write_seqlock(&dev->txwait_lock); if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) { struct hfi1_qp_priv *priv; @@ -108,13 +108,14 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev, dev->n_txwait++; qp->s_flags |= RVT_S_WAIT_TX; list_add_tail(&priv->s_iowait.list, &dev->txwait); + priv->s_iowait.lock = &dev->txwait_lock; trace_hfi1_qpsleep(qp, RVT_S_WAIT_TX); rvt_get_qp(qp); } qp->s_flags &= ~RVT_S_BUSY; } out: - write_sequnlock(&dev->iowait_lock); + write_sequnlock(&dev->txwait_lock); return tx; } diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h index 5660897593ba..76216f2ef35a 100644 --- a/drivers/infiniband/hw/hfi1/verbs_txreq.h +++ b/drivers/infiniband/hw/hfi1/verbs_txreq.h @@ -65,6 +65,7 @@ struct verbs_txreq { struct sdma_engine *sde; struct send_context *psc; u16 hdr_dwords; + u16 s_cur_size; }; struct hfi1_ibdev; diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index 24f79ee39fdf..0ac294db3b29 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -39,7 +39,8 @@ #define HNS_ROCE_VLAN_SL_BIT_MASK 7 #define HNS_ROCE_VLAN_SL_SHIFT 13 -struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *ah_attr) +struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *ah_attr, + struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibpd->device); struct device *dev = &hr_dev->pdev->dev; diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c index 863a17a2de40..605962f2828c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_alloc.c +++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c @@ -61,9 +61,10 @@ int hns_roce_bitmap_alloc(struct hns_roce_bitmap *bitmap, unsigned long *obj) return ret; } -void hns_roce_bitmap_free(struct hns_roce_bitmap *bitmap, unsigned long obj) +void hns_roce_bitmap_free(struct hns_roce_bitmap *bitmap, unsigned long obj, + int rr) { - hns_roce_bitmap_free_range(bitmap, obj, 1); + hns_roce_bitmap_free_range(bitmap, obj, 1, rr); } int hns_roce_bitmap_alloc_range(struct hns_roce_bitmap *bitmap, int cnt, @@ -106,7 +107,8 @@ int hns_roce_bitmap_alloc_range(struct hns_roce_bitmap *bitmap, int cnt, } void hns_roce_bitmap_free_range(struct hns_roce_bitmap *bitmap, - unsigned long obj, int cnt) + unsigned long obj, int cnt, + int rr) { int i; @@ -116,7 +118,8 @@ void hns_roce_bitmap_free_range(struct hns_roce_bitmap *bitmap, for (i = 0; i < cnt; i++) clear_bit(obj + i, bitmap->table); - bitmap->last = min(bitmap->last, obj); + if (!rr) + bitmap->last = min(bitmap->last, obj); bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top) & bitmap->mask; spin_unlock(&bitmap->lock); diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.c b/drivers/infiniband/hw/hns/hns_roce_cmd.c index 2a0b6c05da5f..8c1f7a6f84d2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.c +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.c @@ -216,10 +216,10 @@ static int __hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param, goto out; /* - * It is timeout when wait_for_completion_timeout return 0 - * The return value is the time limit set in advance - * how many seconds showing - */ + * It is timeout when wait_for_completion_timeout return 0 + * The return value is the time limit set in advance + * how many seconds showing + */ if (!wait_for_completion_timeout(&context->done, msecs_to_jiffies(timeout))) { dev_err(dev, "[cmd]wait_for_completion_timeout timeout\n"); diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h index e3997d312c55..f5a9ee2fc53d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.h +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h @@ -34,6 +34,7 @@ #define _HNS_ROCE_CMD_H #define HNS_ROCE_MAILBOX_SIZE 4096 +#define HNS_ROCE_CMD_TIMEOUT_MSECS 10000 enum { /* TPT commands */ @@ -57,17 +58,6 @@ enum { HNS_ROCE_CMD_QUERY_QP = 0x22, }; -enum { - HNS_ROCE_CMD_TIME_CLASS_A = 10000, - HNS_ROCE_CMD_TIME_CLASS_B = 10000, - HNS_ROCE_CMD_TIME_CLASS_C = 10000, -}; - -struct hns_roce_cmd_mailbox { - void *buf; - dma_addr_t dma; -}; - int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, unsigned long in_modifier, u8 op_modifier, u16 op, unsigned long timeout); diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h index 297016103aa7..4af403e1348c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_common.h +++ b/drivers/infiniband/hw/hns/hns_roce_common.h @@ -57,6 +57,32 @@ #define roce_set_bit(origin, shift, val) \ roce_set_field((origin), (1ul << (shift)), (shift), (val)) +/* + * roce_hw_index_cmp_lt - Compare two hardware index values in hisilicon + * SOC, check if a is less than b. + * @a: hardware index value + * @b: hardware index value + * @bits: the number of bits of a and b, range: 0~31. + * + * Hardware index increases continuously till max value, and then restart + * from zero, again and again. Because the bits of reg field is often + * limited, the reg field can only hold the low bits of the hardware index + * in hisilicon SOC. + * In some scenes we need to compare two values(a,b) getted from two reg + * fields in this driver, for example: + * If a equals 0xfffe, b equals 0x1 and bits equals 16, we think b has + * incresed from 0xffff to 0x1 and a is less than b. + * If a equals 0xfffe, b equals 0x0xf001 and bits equals 16, we think a + * is bigger than b. + * + * Return true on a less than b, otherwise false. + */ +#define roce_hw_index_mask(bits) ((1ul << (bits)) - 1) +#define roce_hw_index_shift(bits) (32 - (bits)) +#define roce_hw_index_cmp_lt(a, b, bits) \ + ((int)((((a) - (b)) & roce_hw_index_mask(bits)) << \ + roce_hw_index_shift(bits)) < 0) + #define ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S 3 #define ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S 4 @@ -245,16 +271,26 @@ #define ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M \ (((1UL << 28) - 1) << ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) +#define ROCEE_SDB_PTR_CMP_BITS 28 + #define ROCEE_SDB_INV_CNT_SDB_INV_CNT_S 0 #define ROCEE_SDB_INV_CNT_SDB_INV_CNT_M \ (((1UL << 16) - 1) << ROCEE_SDB_INV_CNT_SDB_INV_CNT_S) +#define ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S 0 +#define ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M \ + (((1UL << 16) - 1) << ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S) + +#define ROCEE_SDB_CNT_CMP_BITS 16 + +#define ROCEE_TSP_BP_ST_QH_FIFO_ENTRY_S 20 + +#define ROCEE_CNT_CLR_CE_CNT_CLR_CE_S 0 + /*************ROCEE_REG DEFINITION****************/ #define ROCEE_VENDOR_ID_REG 0x0 #define ROCEE_VENDOR_PART_ID_REG 0x4 -#define ROCEE_HW_VERSION_REG 0x8 - #define ROCEE_SYS_IMAGE_GUID_L_REG 0xC #define ROCEE_SYS_IMAGE_GUID_H_REG 0x10 @@ -318,7 +354,11 @@ #define ROCEE_SDB_ISSUE_PTR_REG 0x758 #define ROCEE_SDB_SEND_PTR_REG 0x75C +#define ROCEE_CAEP_CQE_WCMD_EMPTY 0x850 +#define ROCEE_SCAEP_WR_CQE_CNT 0x8D0 #define ROCEE_SDB_INV_CNT_REG 0x9A4 +#define ROCEE_SDB_RETRY_CNT_REG 0x9AC +#define ROCEE_TSP_BP_ST_REG 0x9EC #define ROCEE_ECC_UCERR_ALM0_REG 0xB34 #define ROCEE_ECC_CERR_ALM0_REG 0xB40 diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 097365932b09..589496c8fb9e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -35,7 +35,7 @@ #include "hns_roce_device.h" #include "hns_roce_cmd.h" #include "hns_roce_hem.h" -#include "hns_roce_user.h" +#include <rdma/hns-abi.h> #include "hns_roce_common.h" static void hns_roce_ib_cq_comp(struct hns_roce_cq *hr_cq) @@ -77,7 +77,7 @@ static int hns_roce_sw2hw_cq(struct hns_roce_dev *dev, unsigned long cq_num) { return hns_roce_cmd_mbox(dev, mailbox->dma, 0, cq_num, 0, - HNS_ROCE_CMD_SW2HW_CQ, HNS_ROCE_CMD_TIME_CLASS_A); + HNS_ROCE_CMD_SW2HW_CQ, HNS_ROCE_CMD_TIMEOUT_MSECS); } static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent, @@ -166,7 +166,7 @@ err_put: hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn); err_out: - hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn); + hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn, BITMAP_NO_RR); return ret; } @@ -176,11 +176,10 @@ static int hns_roce_hw2sw_cq(struct hns_roce_dev *dev, { return hns_roce_cmd_mbox(dev, 0, mailbox ? mailbox->dma : 0, cq_num, mailbox ? 0 : 1, HNS_ROCE_CMD_HW2SW_CQ, - HNS_ROCE_CMD_TIME_CLASS_A); + HNS_ROCE_CMD_TIMEOUT_MSECS); } -static void hns_roce_free_cq(struct hns_roce_dev *hr_dev, - struct hns_roce_cq *hr_cq) +void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; struct device *dev = &hr_dev->pdev->dev; @@ -204,7 +203,7 @@ static void hns_roce_free_cq(struct hns_roce_dev *hr_dev, spin_unlock_irq(&cq_table->lock); hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn); - hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn); + hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn, BITMAP_NO_RR); } static int hns_roce_ib_get_cq_umem(struct hns_roce_dev *hr_dev, @@ -349,6 +348,15 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, goto err_mtt; } + /* + * For the QP created by kernel space, tptr value should be initialized + * to zero; For the QP created by user space, it will cause synchronous + * problems if tptr is set to zero here, so we initialze it in user + * space. + */ + if (!context) + *hr_cq->tptr_addr = 0; + /* Get created cq handler and carry out event */ hr_cq->comp = hns_roce_ib_cq_comp; hr_cq->event = hns_roce_ib_cq_event; @@ -383,19 +391,25 @@ int hns_roce_ib_destroy_cq(struct ib_cq *ib_cq) { struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device); struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq); + int ret = 0; - hns_roce_free_cq(hr_dev, hr_cq); - hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt); + if (hr_dev->hw->destroy_cq) { + ret = hr_dev->hw->destroy_cq(ib_cq); + } else { + hns_roce_free_cq(hr_dev, hr_cq); + hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt); - if (ib_cq->uobject) - ib_umem_release(hr_cq->umem); - else - /* Free the buff of stored cq */ - hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf, ib_cq->cqe); + if (ib_cq->uobject) + ib_umem_release(hr_cq->umem); + else + /* Free the buff of stored cq */ + hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf, + ib_cq->cqe); - kfree(hr_cq); + kfree(hr_cq); + } - return 0; + return ret; } void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 341731553a60..1a6cb5d7a0dd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -37,6 +37,8 @@ #define DRV_NAME "hns_roce" +#define HNS_ROCE_HW_VER1 ('h' << 24 | 'i' << 16 | '0' << 8 | '6') + #define MAC_ADDR_OCTET_NUM 6 #define HNS_ROCE_MAX_MSG_LEN 0x80000000 @@ -54,6 +56,12 @@ #define HNS_ROCE_MAX_INNER_MTPT_NUM 0x7 #define HNS_ROCE_MAX_MTPT_PBL_NUM 0x100000 +#define HNS_ROCE_EACH_FREE_CQ_WAIT_MSECS 20 +#define HNS_ROCE_MAX_FREE_CQ_WAIT_CNT \ + (5000 / HNS_ROCE_EACH_FREE_CQ_WAIT_MSECS) +#define HNS_ROCE_CQE_WCMD_EMPTY_BIT 0x2 +#define HNS_ROCE_MIN_CQE_CNT 16 + #define HNS_ROCE_MAX_IRQ_NUM 34 #define HNS_ROCE_COMP_VEC_NUM 32 @@ -70,6 +78,9 @@ #define HNS_ROCE_MAX_GID_NUM 16 #define HNS_ROCE_GID_SIZE 16 +#define BITMAP_NO_RR 0 +#define BITMAP_RR 1 + #define MR_TYPE_MR 0x00 #define MR_TYPE_DMA 0x03 @@ -196,9 +207,9 @@ struct hns_roce_bitmap { /* Order = 0: bitmap is biggest, order = max bitmap is least (only a bit) */ /* Every bit repesent to a partner free/used status in bitmap */ /* -* Initial, bits of other bitmap are all 0 except that a bit of max_order is 1 -* Bit = 1 represent to idle and available; bit = 0: not available -*/ + * Initial, bits of other bitmap are all 0 except that a bit of max_order is 1 + * Bit = 1 represent to idle and available; bit = 0: not available + */ struct hns_roce_buddy { /* Members point to every order level bitmap */ unsigned long **bits; @@ -296,7 +307,7 @@ struct hns_roce_cq { u32 cq_depth; u32 cons_index; void __iomem *cq_db_l; - void __iomem *tptr_addr; + u16 *tptr_addr; unsigned long cqn; u32 vector; atomic_t refcount; @@ -360,29 +371,34 @@ struct hns_roce_cmdq { struct mutex hcr_mutex; struct semaphore poll_sem; /* - * Event mode: cmd register mutex protection, - * ensure to not exceed max_cmds and user use limit region - */ + * Event mode: cmd register mutex protection, + * ensure to not exceed max_cmds and user use limit region + */ struct semaphore event_sem; int max_cmds; spinlock_t context_lock; int free_head; struct hns_roce_cmd_context *context; /* - * Result of get integer part - * which max_comds compute according a power of 2 - */ + * Result of get integer part + * which max_comds compute according a power of 2 + */ u16 token_mask; /* - * Process whether use event mode, init default non-zero - * After the event queue of cmd event ready, - * can switch into event mode - * close device, switch into poll mode(non event mode) - */ + * Process whether use event mode, init default non-zero + * After the event queue of cmd event ready, + * can switch into event mode + * close device, switch into poll mode(non event mode) + */ u8 use_events; u8 toggle; }; +struct hns_roce_cmd_mailbox { + void *buf; + dma_addr_t dma; +}; + struct hns_roce_dev; struct hns_roce_qp { @@ -424,8 +440,6 @@ struct hns_roce_ib_iboe { struct net_device *netdevs[HNS_ROCE_MAX_PORTS]; struct notifier_block nb; struct notifier_block nb_inet; - /* 16 GID is shared by 6 port in v1 engine. */ - union ib_gid gid_table[HNS_ROCE_MAX_GID_NUM]; u8 phy_port[HNS_ROCE_MAX_PORTS]; }; @@ -519,6 +533,8 @@ struct hns_roce_hw { struct ib_recv_wr **bad_recv_wr); int (*req_notify_cq)(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); int (*poll_cq)(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); + int (*dereg_mr)(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr); + int (*destroy_cq)(struct ib_cq *ibcq); void *priv; }; @@ -553,6 +569,8 @@ struct hns_roce_dev { int cmd_mod; int loop_idc; + dma_addr_t tptr_dma_addr; /*only for hw v1*/ + u32 tptr_size; /*only for hw v1*/ struct hns_roce_hw *hw; }; @@ -657,7 +675,8 @@ void hns_roce_cleanup_cq_table(struct hns_roce_dev *hr_dev); void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev); int hns_roce_bitmap_alloc(struct hns_roce_bitmap *bitmap, unsigned long *obj); -void hns_roce_bitmap_free(struct hns_roce_bitmap *bitmap, unsigned long obj); +void hns_roce_bitmap_free(struct hns_roce_bitmap *bitmap, unsigned long obj, + int rr); int hns_roce_bitmap_init(struct hns_roce_bitmap *bitmap, u32 num, u32 mask, u32 reserved_bot, u32 resetrved_top); void hns_roce_bitmap_cleanup(struct hns_roce_bitmap *bitmap); @@ -665,9 +684,11 @@ void hns_roce_cleanup_bitmap(struct hns_roce_dev *hr_dev); int hns_roce_bitmap_alloc_range(struct hns_roce_bitmap *bitmap, int cnt, int align, unsigned long *obj); void hns_roce_bitmap_free_range(struct hns_roce_bitmap *bitmap, - unsigned long obj, int cnt); + unsigned long obj, int cnt, + int rr); -struct ib_ah *hns_roce_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); +struct ib_ah *hns_roce_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, + struct ib_udata *udata); int hns_roce_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr); int hns_roce_destroy_ah(struct ib_ah *ah); @@ -681,6 +702,10 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); int hns_roce_dereg_mr(struct ib_mr *ibmr); +int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev, + struct hns_roce_cmd_mailbox *mailbox, + unsigned long mpt_index); +unsigned long key_to_hw_index(u32 key); void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size, struct hns_roce_buf *buf); @@ -717,6 +742,7 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, struct ib_udata *udata); int hns_roce_ib_destroy_cq(struct ib_cq *ib_cq); +void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq); void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn); void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type); diff --git a/drivers/infiniband/hw/hns/hns_roce_eq.c b/drivers/infiniband/hw/hns/hns_roce_eq.c index 21e21b03cfb5..50f864935a0e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_eq.c +++ b/drivers/infiniband/hw/hns/hns_roce_eq.c @@ -371,9 +371,9 @@ static int hns_roce_aeq_ovf_int(struct hns_roce_dev *hr_dev, int i = 0; /** - * AEQ overflow ECC mult bit err CEQ overflow alarm - * must clear interrupt, mask irq, clear irq, cancel mask operation - */ + * AEQ overflow ECC mult bit err CEQ overflow alarm + * must clear interrupt, mask irq, clear irq, cancel mask operation + */ aeshift_val = roce_read(hr_dev, ROCEE_CAEP_AEQC_AEQE_SHIFT_REG); if (roce_get_bit(aeshift_val, diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index 250d8f280390..c5104e0b2916 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -80,9 +80,9 @@ struct hns_roce_hem *hns_roce_alloc_hem(struct hns_roce_dev *hr_dev, int npages, --order; /* - * Alloc memory one time. If failed, don't alloc small block - * memory, directly return fail. - */ + * Alloc memory one time. If failed, don't alloc small block + * memory, directly return fail. + */ mem = &chunk->mem[chunk->npages]; buf = dma_alloc_coherent(&hr_dev->pdev->dev, PAGE_SIZE << order, &sg_dma_address(mem), gfp_mask); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 71232e5fabf6..b8111b0c8877 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -32,6 +32,7 @@ #include <linux/platform_device.h> #include <linux/acpi.h> +#include <linux/etherdevice.h> #include <rdma/ib_umem.h> #include "hns_roce_common.h" #include "hns_roce_device.h" @@ -72,6 +73,8 @@ int hns_roce_v1_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, int nreq = 0; u32 ind = 0; int ret = 0; + u8 *smac; + int loopback; if (unlikely(ibqp->qp_type != IB_QPT_GSI && ibqp->qp_type != IB_QPT_RC)) { @@ -129,6 +132,14 @@ int hns_roce_v1_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, UD_SEND_WQE_U32_8_DMAC_5_M, UD_SEND_WQE_U32_8_DMAC_5_S, ah->av.mac[5]); + + smac = (u8 *)hr_dev->dev_addr[qp->port]; + loopback = ether_addr_equal_unaligned(ah->av.mac, + smac) ? 1 : 0; + roce_set_bit(ud_sq_wqe->u32_8, + UD_SEND_WQE_U32_8_LOOPBACK_INDICATOR_S, + loopback); + roce_set_field(ud_sq_wqe->u32_8, UD_SEND_WQE_U32_8_OPERATION_TYPE_M, UD_SEND_WQE_U32_8_OPERATION_TYPE_S, @@ -284,6 +295,8 @@ out: roce_set_field(sq_db.u32_4, SQ_DOORBELL_U32_4_SQ_HEAD_M, SQ_DOORBELL_U32_4_SQ_HEAD_S, (qp->sq.head & ((qp->sq.wqe_cnt << 1) - 1))); + roce_set_field(sq_db.u32_4, SQ_DOORBELL_U32_4_SL_M, + SQ_DOORBELL_U32_4_SL_S, qp->sl); roce_set_field(sq_db.u32_4, SQ_DOORBELL_U32_4_PORT_M, SQ_DOORBELL_U32_4_PORT_S, qp->phy_port); roce_set_field(sq_db.u32_8, SQ_DOORBELL_U32_8_QPN_M, @@ -611,6 +624,213 @@ ext_sdb_buf_fail_out: return ret; } +static struct hns_roce_qp *hns_roce_v1_create_lp_qp(struct hns_roce_dev *hr_dev, + struct ib_pd *pd) +{ + struct device *dev = &hr_dev->pdev->dev; + struct ib_qp_init_attr init_attr; + struct ib_qp *qp; + + memset(&init_attr, 0, sizeof(struct ib_qp_init_attr)); + init_attr.qp_type = IB_QPT_RC; + init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; + init_attr.cap.max_recv_wr = HNS_ROCE_MIN_WQE_NUM; + init_attr.cap.max_send_wr = HNS_ROCE_MIN_WQE_NUM; + + qp = hns_roce_create_qp(pd, &init_attr, NULL); + if (IS_ERR(qp)) { + dev_err(dev, "Create loop qp for mr free failed!"); + return NULL; + } + + return to_hr_qp(qp); +} + +static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_caps *caps = &hr_dev->caps; + struct device *dev = &hr_dev->pdev->dev; + struct ib_cq_init_attr cq_init_attr; + struct hns_roce_free_mr *free_mr; + struct ib_qp_attr attr = { 0 }; + struct hns_roce_v1_priv *priv; + struct hns_roce_qp *hr_qp; + struct ib_cq *cq; + struct ib_pd *pd; + u64 subnet_prefix; + int attr_mask = 0; + int i; + int ret; + u8 phy_port; + u8 sl; + + priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + free_mr = &priv->free_mr; + + /* Reserved cq for loop qp */ + cq_init_attr.cqe = HNS_ROCE_MIN_WQE_NUM * 2; + cq_init_attr.comp_vector = 0; + cq = hns_roce_ib_create_cq(&hr_dev->ib_dev, &cq_init_attr, NULL, NULL); + if (IS_ERR(cq)) { + dev_err(dev, "Create cq for reseved loop qp failed!"); + return -ENOMEM; + } + free_mr->mr_free_cq = to_hr_cq(cq); + free_mr->mr_free_cq->ib_cq.device = &hr_dev->ib_dev; + free_mr->mr_free_cq->ib_cq.uobject = NULL; + free_mr->mr_free_cq->ib_cq.comp_handler = NULL; + free_mr->mr_free_cq->ib_cq.event_handler = NULL; + free_mr->mr_free_cq->ib_cq.cq_context = NULL; + atomic_set(&free_mr->mr_free_cq->ib_cq.usecnt, 0); + + pd = hns_roce_alloc_pd(&hr_dev->ib_dev, NULL, NULL); + if (IS_ERR(pd)) { + dev_err(dev, "Create pd for reseved loop qp failed!"); + ret = -ENOMEM; + goto alloc_pd_failed; + } + free_mr->mr_free_pd = to_hr_pd(pd); + free_mr->mr_free_pd->ibpd.device = &hr_dev->ib_dev; + free_mr->mr_free_pd->ibpd.uobject = NULL; + atomic_set(&free_mr->mr_free_pd->ibpd.usecnt, 0); + + attr.qp_access_flags = IB_ACCESS_REMOTE_WRITE; + attr.pkey_index = 0; + attr.min_rnr_timer = 0; + /* Disable read ability */ + attr.max_dest_rd_atomic = 0; + attr.max_rd_atomic = 0; + /* Use arbitrary values as rq_psn and sq_psn */ + attr.rq_psn = 0x0808; + attr.sq_psn = 0x0808; + attr.retry_cnt = 7; + attr.rnr_retry = 7; + attr.timeout = 0x12; + attr.path_mtu = IB_MTU_256; + attr.ah_attr.ah_flags = 1; + attr.ah_attr.static_rate = 3; + attr.ah_attr.grh.sgid_index = 0; + attr.ah_attr.grh.hop_limit = 1; + attr.ah_attr.grh.flow_label = 0; + attr.ah_attr.grh.traffic_class = 0; + + subnet_prefix = cpu_to_be64(0xfe80000000000000LL); + for (i = 0; i < HNS_ROCE_V1_RESV_QP; i++) { + free_mr->mr_free_qp[i] = hns_roce_v1_create_lp_qp(hr_dev, pd); + if (IS_ERR(free_mr->mr_free_qp[i])) { + dev_err(dev, "Create loop qp failed!\n"); + goto create_lp_qp_failed; + } + hr_qp = free_mr->mr_free_qp[i]; + + sl = i / caps->num_ports; + + if (caps->num_ports == HNS_ROCE_MAX_PORTS) + phy_port = (i >= HNS_ROCE_MAX_PORTS) ? (i - 2) : + (i % caps->num_ports); + else + phy_port = i % caps->num_ports; + + hr_qp->port = phy_port + 1; + hr_qp->phy_port = phy_port; + hr_qp->ibqp.qp_type = IB_QPT_RC; + hr_qp->ibqp.device = &hr_dev->ib_dev; + hr_qp->ibqp.uobject = NULL; + atomic_set(&hr_qp->ibqp.usecnt, 0); + hr_qp->ibqp.pd = pd; + hr_qp->ibqp.recv_cq = cq; + hr_qp->ibqp.send_cq = cq; + + attr.ah_attr.port_num = phy_port + 1; + attr.ah_attr.sl = sl; + attr.port_num = phy_port + 1; + + attr.dest_qp_num = hr_qp->qpn; + memcpy(attr.ah_attr.dmac, hr_dev->dev_addr[phy_port], + MAC_ADDR_OCTET_NUM); + + memcpy(attr.ah_attr.grh.dgid.raw, + &subnet_prefix, sizeof(u64)); + memcpy(&attr.ah_attr.grh.dgid.raw[8], + hr_dev->dev_addr[phy_port], 3); + memcpy(&attr.ah_attr.grh.dgid.raw[13], + hr_dev->dev_addr[phy_port] + 3, 3); + attr.ah_attr.grh.dgid.raw[11] = 0xff; + attr.ah_attr.grh.dgid.raw[12] = 0xfe; + attr.ah_attr.grh.dgid.raw[8] ^= 2; + + attr_mask |= IB_QP_PORT; + + ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, &attr, attr_mask, + IB_QPS_RESET, IB_QPS_INIT); + if (ret) { + dev_err(dev, "modify qp failed(%d)!\n", ret); + goto create_lp_qp_failed; + } + + ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, &attr, attr_mask, + IB_QPS_INIT, IB_QPS_RTR); + if (ret) { + dev_err(dev, "modify qp failed(%d)!\n", ret); + goto create_lp_qp_failed; + } + + ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, &attr, attr_mask, + IB_QPS_RTR, IB_QPS_RTS); + if (ret) { + dev_err(dev, "modify qp failed(%d)!\n", ret); + goto create_lp_qp_failed; + } + } + + return 0; + +create_lp_qp_failed: + for (i -= 1; i >= 0; i--) { + hr_qp = free_mr->mr_free_qp[i]; + if (hns_roce_v1_destroy_qp(&hr_qp->ibqp)) + dev_err(dev, "Destroy qp %d for mr free failed!\n", i); + } + + if (hns_roce_dealloc_pd(pd)) + dev_err(dev, "Destroy pd for create_lp_qp failed!\n"); + +alloc_pd_failed: + if (hns_roce_ib_destroy_cq(cq)) + dev_err(dev, "Destroy cq for create_lp_qp failed!\n"); + + return -EINVAL; +} + +static void hns_roce_v1_release_lp_qp(struct hns_roce_dev *hr_dev) +{ + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_free_mr *free_mr; + struct hns_roce_v1_priv *priv; + struct hns_roce_qp *hr_qp; + int ret; + int i; + + priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + free_mr = &priv->free_mr; + + for (i = 0; i < HNS_ROCE_V1_RESV_QP; i++) { + hr_qp = free_mr->mr_free_qp[i]; + ret = hns_roce_v1_destroy_qp(&hr_qp->ibqp); + if (ret) + dev_err(dev, "Destroy qp %d for mr free failed(%d)!\n", + i, ret); + } + + ret = hns_roce_ib_destroy_cq(&free_mr->mr_free_cq->ib_cq); + if (ret) + dev_err(dev, "Destroy cq for mr_free failed(%d)!\n", ret); + + ret = hns_roce_dealloc_pd(&free_mr->mr_free_pd->ibpd); + if (ret) + dev_err(dev, "Destroy pd for mr_free failed(%d)!\n", ret); +} + static int hns_roce_db_init(struct hns_roce_dev *hr_dev) { struct device *dev = &hr_dev->pdev->dev; @@ -648,6 +868,223 @@ static int hns_roce_db_init(struct hns_roce_dev *hr_dev) return 0; } +void hns_roce_v1_recreate_lp_qp_work_fn(struct work_struct *work) +{ + struct hns_roce_recreate_lp_qp_work *lp_qp_work; + struct hns_roce_dev *hr_dev; + + lp_qp_work = container_of(work, struct hns_roce_recreate_lp_qp_work, + work); + hr_dev = to_hr_dev(lp_qp_work->ib_dev); + + hns_roce_v1_release_lp_qp(hr_dev); + + if (hns_roce_v1_rsv_lp_qp(hr_dev)) + dev_err(&hr_dev->pdev->dev, "create reserver qp failed\n"); + + if (lp_qp_work->comp_flag) + complete(lp_qp_work->comp); + + kfree(lp_qp_work); +} + +static int hns_roce_v1_recreate_lp_qp(struct hns_roce_dev *hr_dev) +{ + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_recreate_lp_qp_work *lp_qp_work; + struct hns_roce_free_mr *free_mr; + struct hns_roce_v1_priv *priv; + struct completion comp; + unsigned long end = + msecs_to_jiffies(HNS_ROCE_V1_RECREATE_LP_QP_TIMEOUT_MSECS) + jiffies; + + priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + free_mr = &priv->free_mr; + + lp_qp_work = kzalloc(sizeof(struct hns_roce_recreate_lp_qp_work), + GFP_KERNEL); + + INIT_WORK(&(lp_qp_work->work), hns_roce_v1_recreate_lp_qp_work_fn); + + lp_qp_work->ib_dev = &(hr_dev->ib_dev); + lp_qp_work->comp = ∁ + lp_qp_work->comp_flag = 1; + + init_completion(lp_qp_work->comp); + + queue_work(free_mr->free_mr_wq, &(lp_qp_work->work)); + + while (time_before_eq(jiffies, end)) { + if (try_wait_for_completion(&comp)) + return 0; + msleep(HNS_ROCE_V1_RECREATE_LP_QP_WAIT_VALUE); + } + + lp_qp_work->comp_flag = 0; + if (try_wait_for_completion(&comp)) + return 0; + + dev_warn(dev, "recreate lp qp failed 20s timeout and return failed!\n"); + return -ETIMEDOUT; +} + +static int hns_roce_v1_send_lp_wqe(struct hns_roce_qp *hr_qp) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device); + struct device *dev = &hr_dev->pdev->dev; + struct ib_send_wr send_wr, *bad_wr; + int ret; + + memset(&send_wr, 0, sizeof(send_wr)); + send_wr.next = NULL; + send_wr.num_sge = 0; + send_wr.send_flags = 0; + send_wr.sg_list = NULL; + send_wr.wr_id = (unsigned long long)&send_wr; + send_wr.opcode = IB_WR_RDMA_WRITE; + + ret = hns_roce_v1_post_send(&hr_qp->ibqp, &send_wr, &bad_wr); + if (ret) { + dev_err(dev, "Post write wqe for mr free failed(%d)!", ret); + return ret; + } + + return 0; +} + +static void hns_roce_v1_mr_free_work_fn(struct work_struct *work) +{ + struct hns_roce_mr_free_work *mr_work; + struct ib_wc wc[HNS_ROCE_V1_RESV_QP]; + struct hns_roce_free_mr *free_mr; + struct hns_roce_cq *mr_free_cq; + struct hns_roce_v1_priv *priv; + struct hns_roce_dev *hr_dev; + struct hns_roce_mr *hr_mr; + struct hns_roce_qp *hr_qp; + struct device *dev; + unsigned long end = + msecs_to_jiffies(HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS) + jiffies; + int i; + int ret; + int ne; + + mr_work = container_of(work, struct hns_roce_mr_free_work, work); + hr_mr = (struct hns_roce_mr *)mr_work->mr; + hr_dev = to_hr_dev(mr_work->ib_dev); + dev = &hr_dev->pdev->dev; + + priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + free_mr = &priv->free_mr; + mr_free_cq = free_mr->mr_free_cq; + + for (i = 0; i < HNS_ROCE_V1_RESV_QP; i++) { + hr_qp = free_mr->mr_free_qp[i]; + ret = hns_roce_v1_send_lp_wqe(hr_qp); + if (ret) { + dev_err(dev, + "Send wqe (qp:0x%lx) for mr free failed(%d)!\n", + hr_qp->qpn, ret); + goto free_work; + } + } + + ne = HNS_ROCE_V1_RESV_QP; + do { + ret = hns_roce_v1_poll_cq(&mr_free_cq->ib_cq, ne, wc); + if (ret < 0) { + dev_err(dev, + "(qp:0x%lx) starts, Poll cqe failed(%d) for mr 0x%x free! Remain %d cqe\n", + hr_qp->qpn, ret, hr_mr->key, ne); + goto free_work; + } + ne -= ret; + msleep(HNS_ROCE_V1_FREE_MR_WAIT_VALUE); + } while (ne && time_before_eq(jiffies, end)); + + if (ne != 0) + dev_err(dev, + "Poll cqe for mr 0x%x free timeout! Remain %d cqe\n", + hr_mr->key, ne); + +free_work: + if (mr_work->comp_flag) + complete(mr_work->comp); + kfree(mr_work); +} + +int hns_roce_v1_dereg_mr(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) +{ + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_mr_free_work *mr_work; + struct hns_roce_free_mr *free_mr; + struct hns_roce_v1_priv *priv; + struct completion comp; + unsigned long end = + msecs_to_jiffies(HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS) + jiffies; + unsigned long start = jiffies; + int npages; + int ret = 0; + + priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + free_mr = &priv->free_mr; + + if (mr->enabled) { + if (hns_roce_hw2sw_mpt(hr_dev, NULL, key_to_hw_index(mr->key) + & (hr_dev->caps.num_mtpts - 1))) + dev_warn(dev, "HW2SW_MPT failed!\n"); + } + + mr_work = kzalloc(sizeof(*mr_work), GFP_KERNEL); + if (!mr_work) { + ret = -ENOMEM; + goto free_mr; + } + + INIT_WORK(&(mr_work->work), hns_roce_v1_mr_free_work_fn); + + mr_work->ib_dev = &(hr_dev->ib_dev); + mr_work->comp = ∁ + mr_work->comp_flag = 1; + mr_work->mr = (void *)mr; + init_completion(mr_work->comp); + + queue_work(free_mr->free_mr_wq, &(mr_work->work)); + + while (time_before_eq(jiffies, end)) { + if (try_wait_for_completion(&comp)) + goto free_mr; + msleep(HNS_ROCE_V1_FREE_MR_WAIT_VALUE); + } + + mr_work->comp_flag = 0; + if (try_wait_for_completion(&comp)) + goto free_mr; + + dev_warn(dev, "Free mr work 0x%x over 50s and failed!\n", mr->key); + ret = -ETIMEDOUT; + +free_mr: + dev_dbg(dev, "Free mr 0x%x use 0x%x us.\n", + mr->key, jiffies_to_usecs(jiffies) - jiffies_to_usecs(start)); + + if (mr->size != ~0ULL) { + npages = ib_umem_page_count(mr->umem); + dma_free_coherent(dev, npages * 8, mr->pbl_buf, + mr->pbl_dma_addr); + } + + hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap, + key_to_hw_index(mr->key), 0); + + if (mr->umem) + ib_umem_release(mr->umem); + + kfree(mr); + + return ret; +} + static void hns_roce_db_free(struct hns_roce_dev *hr_dev) { struct device *dev = &hr_dev->pdev->dev; @@ -849,6 +1286,85 @@ static void hns_roce_bt_free(struct hns_roce_dev *hr_dev) priv->bt_table.qpc_buf.buf, priv->bt_table.qpc_buf.map); } +static int hns_roce_tptr_init(struct hns_roce_dev *hr_dev) +{ + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_buf_list *tptr_buf; + struct hns_roce_v1_priv *priv; + + priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + tptr_buf = &priv->tptr_table.tptr_buf; + + /* + * This buffer will be used for CQ's tptr(tail pointer), also + * named ci(customer index). Every CQ will use 2 bytes to save + * cqe ci in hip06. Hardware will read this area to get new ci + * when the queue is almost full. + */ + tptr_buf->buf = dma_alloc_coherent(dev, HNS_ROCE_V1_TPTR_BUF_SIZE, + &tptr_buf->map, GFP_KERNEL); + if (!tptr_buf->buf) + return -ENOMEM; + + hr_dev->tptr_dma_addr = tptr_buf->map; + hr_dev->tptr_size = HNS_ROCE_V1_TPTR_BUF_SIZE; + + return 0; +} + +static void hns_roce_tptr_free(struct hns_roce_dev *hr_dev) +{ + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_buf_list *tptr_buf; + struct hns_roce_v1_priv *priv; + + priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + tptr_buf = &priv->tptr_table.tptr_buf; + + dma_free_coherent(dev, HNS_ROCE_V1_TPTR_BUF_SIZE, + tptr_buf->buf, tptr_buf->map); +} + +static int hns_roce_free_mr_init(struct hns_roce_dev *hr_dev) +{ + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_free_mr *free_mr; + struct hns_roce_v1_priv *priv; + int ret = 0; + + priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + free_mr = &priv->free_mr; + + free_mr->free_mr_wq = create_singlethread_workqueue("hns_roce_free_mr"); + if (!free_mr->free_mr_wq) { + dev_err(dev, "Create free mr workqueue failed!\n"); + return -ENOMEM; + } + + ret = hns_roce_v1_rsv_lp_qp(hr_dev); + if (ret) { + dev_err(dev, "Reserved loop qp failed(%d)!\n", ret); + flush_workqueue(free_mr->free_mr_wq); + destroy_workqueue(free_mr->free_mr_wq); + } + + return ret; +} + +static void hns_roce_free_mr_free(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_free_mr *free_mr; + struct hns_roce_v1_priv *priv; + + priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + free_mr = &priv->free_mr; + + flush_workqueue(free_mr->free_mr_wq); + destroy_workqueue(free_mr->free_mr_wq); + + hns_roce_v1_release_lp_qp(hr_dev); +} + /** * hns_roce_v1_reset - reset RoCE * @hr_dev: RoCE device struct pointer @@ -898,6 +1414,38 @@ int hns_roce_v1_reset(struct hns_roce_dev *hr_dev, bool dereset) return ret; } +static int hns_roce_des_qp_init(struct hns_roce_dev *hr_dev) +{ + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_v1_priv *priv; + struct hns_roce_des_qp *des_qp; + + priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + des_qp = &priv->des_qp; + + des_qp->requeue_flag = 1; + des_qp->qp_wq = create_singlethread_workqueue("hns_roce_destroy_qp"); + if (!des_qp->qp_wq) { + dev_err(dev, "Create destroy qp workqueue failed!\n"); + return -ENOMEM; + } + + return 0; +} + +static void hns_roce_des_qp_free(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_v1_priv *priv; + struct hns_roce_des_qp *des_qp; + + priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + des_qp = &priv->des_qp; + + des_qp->requeue_flag = 0; + flush_workqueue(des_qp->qp_wq); + destroy_workqueue(des_qp->qp_wq); +} + void hns_roce_v1_profile(struct hns_roce_dev *hr_dev) { int i = 0; @@ -906,12 +1454,11 @@ void hns_roce_v1_profile(struct hns_roce_dev *hr_dev) hr_dev->vendor_id = le32_to_cpu(roce_read(hr_dev, ROCEE_VENDOR_ID_REG)); hr_dev->vendor_part_id = le32_to_cpu(roce_read(hr_dev, ROCEE_VENDOR_PART_ID_REG)); - hr_dev->hw_rev = le32_to_cpu(roce_read(hr_dev, ROCEE_HW_VERSION_REG)); - hr_dev->sys_image_guid = le32_to_cpu(roce_read(hr_dev, ROCEE_SYS_IMAGE_GUID_L_REG)) | ((u64)le32_to_cpu(roce_read(hr_dev, ROCEE_SYS_IMAGE_GUID_H_REG)) << 32); + hr_dev->hw_rev = HNS_ROCE_HW_VER1; caps->num_qps = HNS_ROCE_V1_MAX_QP_NUM; caps->max_wqes = HNS_ROCE_V1_MAX_WQE_NUM; @@ -1001,18 +1548,44 @@ int hns_roce_v1_init(struct hns_roce_dev *hr_dev) goto error_failed_raq_init; } - hns_roce_port_enable(hr_dev, HNS_ROCE_PORT_UP); - ret = hns_roce_bt_init(hr_dev); if (ret) { dev_err(dev, "bt init failed!\n"); goto error_failed_bt_init; } + ret = hns_roce_tptr_init(hr_dev); + if (ret) { + dev_err(dev, "tptr init failed!\n"); + goto error_failed_tptr_init; + } + + ret = hns_roce_des_qp_init(hr_dev); + if (ret) { + dev_err(dev, "des qp init failed!\n"); + goto error_failed_des_qp_init; + } + + ret = hns_roce_free_mr_init(hr_dev); + if (ret) { + dev_err(dev, "free mr init failed!\n"); + goto error_failed_free_mr_init; + } + + hns_roce_port_enable(hr_dev, HNS_ROCE_PORT_UP); + return 0; +error_failed_free_mr_init: + hns_roce_des_qp_free(hr_dev); + +error_failed_des_qp_init: + hns_roce_tptr_free(hr_dev); + +error_failed_tptr_init: + hns_roce_bt_free(hr_dev); + error_failed_bt_init: - hns_roce_port_enable(hr_dev, HNS_ROCE_PORT_DOWN); hns_roce_raq_free(hr_dev); error_failed_raq_init: @@ -1022,8 +1595,11 @@ error_failed_raq_init: void hns_roce_v1_exit(struct hns_roce_dev *hr_dev) { - hns_roce_bt_free(hr_dev); hns_roce_port_enable(hr_dev, HNS_ROCE_PORT_DOWN); + hns_roce_free_mr_free(hr_dev); + hns_roce_des_qp_free(hr_dev); + hns_roce_tptr_free(hr_dev); + hns_roce_bt_free(hr_dev); hns_roce_raq_free(hr_dev); hns_roce_db_free(hr_dev); } @@ -1061,6 +1637,14 @@ void hns_roce_v1_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, u8 *addr) u32 *p; u32 val; + /* + * When mac changed, loopback may fail + * because of smac not equal to dmac. + * We Need to release and create reserved qp again. + */ + if (hr_dev->hw->dereg_mr && hns_roce_v1_recreate_lp_qp(hr_dev)) + dev_warn(&hr_dev->pdev->dev, "recreate lp qp timeout!\n"); + p = (u32 *)(&addr[0]); reg_smac_l = *p; roce_raw_write(reg_smac_l, hr_dev->reg_base + ROCEE_SMAC_L_0_REG + @@ -1293,9 +1877,9 @@ static void __hns_roce_v1_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn, } /* - * Now backwards through the CQ, removing CQ entries - * that match our QP by overwriting them with next entries. - */ + * Now backwards through the CQ, removing CQ entries + * that match our QP by overwriting them with next entries. + */ while ((int) --prod_index - (int) hr_cq->cons_index >= 0) { cqe = get_cqe(hr_cq, prod_index & hr_cq->ib_cq.cqe); if ((roce_get_field(cqe->cqe_byte_16, CQE_BYTE_16_LOCAL_QPN_M, @@ -1317,9 +1901,9 @@ static void __hns_roce_v1_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn, if (nfreed) { hr_cq->cons_index += nfreed; /* - * Make sure update of buffer contents is done before - * updating consumer index. - */ + * Make sure update of buffer contents is done before + * updating consumer index. + */ wmb(); hns_roce_v1_cq_set_ci(hr_cq, hr_cq->cons_index); @@ -1339,14 +1923,21 @@ void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev, dma_addr_t dma_handle, int nent, u32 vector) { struct hns_roce_cq_context *cq_context = NULL; - void __iomem *tptr_addr; + struct hns_roce_buf_list *tptr_buf; + struct hns_roce_v1_priv *priv; + dma_addr_t tptr_dma_addr; + int offset; + + priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + tptr_buf = &priv->tptr_table.tptr_buf; cq_context = mb_buf; memset(cq_context, 0, sizeof(*cq_context)); - tptr_addr = 0; - hr_dev->priv_addr = tptr_addr; - hr_cq->tptr_addr = tptr_addr; + /* Get the tptr for this CQ. */ + offset = hr_cq->cqn * HNS_ROCE_V1_TPTR_ENTRY_SIZE; + tptr_dma_addr = tptr_buf->map + offset; + hr_cq->tptr_addr = (u16 *)(tptr_buf->buf + offset); /* Register cq_context members */ roce_set_field(cq_context->cqc_byte_4, @@ -1390,10 +1981,10 @@ void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev, roce_set_field(cq_context->cqc_byte_20, CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_M, CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_S, - (u64)tptr_addr >> 44); + tptr_dma_addr >> 44); cq_context->cqc_byte_20 = cpu_to_le32(cq_context->cqc_byte_20); - cq_context->cqe_tptr_addr_l = (u32)((u64)tptr_addr >> 12); + cq_context->cqe_tptr_addr_l = (u32)(tptr_dma_addr >> 12); roce_set_field(cq_context->cqc_byte_32, CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_M, @@ -1407,7 +1998,7 @@ void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev, roce_set_bit(cq_context->cqc_byte_32, CQ_CQNTEXT_CQC_BYTE_32_TYPE_OF_COMPLETION_NOTIFICATION_S, 0); - /*The initial value of cq's ci is 0 */ + /* The initial value of cq's ci is 0 */ roce_set_field(cq_context->cqc_byte_32, CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_M, CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_S, 0); @@ -1424,9 +2015,9 @@ int hns_roce_v1_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) notification_flag = (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ? CQ_DB_REQ_NOT : CQ_DB_REQ_NOT_SOL; /* - * flags = 0; Notification Flag = 1, next - * flags = 1; Notification Flag = 0, solocited - */ + * flags = 0; Notification Flag = 1, next + * flags = 1; Notification Flag = 0, solocited + */ doorbell[0] = hr_cq->cons_index & ((hr_cq->cq_depth << 1) - 1); roce_set_bit(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_HW_SYNS_S, 1); roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_M, @@ -1581,10 +2172,10 @@ static int hns_roce_v1_poll_one(struct hns_roce_cq *hr_cq, wq = &(*cur_qp)->sq; if ((*cur_qp)->sq_signal_bits) { /* - * If sg_signal_bit is 1, - * firstly tail pointer updated to wqe - * which current cqe correspond to - */ + * If sg_signal_bit is 1, + * firstly tail pointer updated to wqe + * which current cqe correspond to + */ wqe_ctr = (u16)roce_get_field(cqe->cqe_byte_4, CQE_BYTE_4_WQE_INDEX_M, CQE_BYTE_4_WQE_INDEX_S); @@ -1659,8 +2250,14 @@ int hns_roce_v1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) break; } - if (npolled) + if (npolled) { + *hr_cq->tptr_addr = hr_cq->cons_index & + ((hr_cq->cq_depth << 1) - 1); + + /* Memroy barrier */ + wmb(); hns_roce_v1_cq_set_ci(hr_cq, hr_cq->cons_index); + } spin_unlock_irqrestore(&hr_cq->lock, flags); @@ -1799,12 +2396,12 @@ static int hns_roce_v1_qp_modify(struct hns_roce_dev *hr_dev, if (op[cur_state][new_state] == HNS_ROCE_CMD_2RST_QP) return hns_roce_cmd_mbox(hr_dev, 0, 0, hr_qp->qpn, 2, HNS_ROCE_CMD_2RST_QP, - HNS_ROCE_CMD_TIME_CLASS_A); + HNS_ROCE_CMD_TIMEOUT_MSECS); if (op[cur_state][new_state] == HNS_ROCE_CMD_2ERR_QP) return hns_roce_cmd_mbox(hr_dev, 0, 0, hr_qp->qpn, 2, HNS_ROCE_CMD_2ERR_QP, - HNS_ROCE_CMD_TIME_CLASS_A); + HNS_ROCE_CMD_TIMEOUT_MSECS); mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); if (IS_ERR(mailbox)) @@ -1814,7 +2411,7 @@ static int hns_roce_v1_qp_modify(struct hns_roce_dev *hr_dev, ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, hr_qp->qpn, 0, op[cur_state][new_state], - HNS_ROCE_CMD_TIME_CLASS_C); + HNS_ROCE_CMD_TIMEOUT_MSECS); hns_roce_free_cmd_mailbox(hr_dev, mailbox); return ret; @@ -2000,11 +2597,11 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, } /* - *Reset to init - * Mandatory param: - * IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS - * Optional param: NA - */ + * Reset to init + * Mandatory param: + * IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS + * Optional param: NA + */ if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { roce_set_field(context->qpc_bytes_4, QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_M, @@ -2172,24 +2769,14 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP_CONTEXT_QPC_BYTE_32_SIGNALING_TYPE_S, hr_qp->sq_signal_bits); - for (port = 0; port < hr_dev->caps.num_ports; port++) { - smac = (u8 *)hr_dev->dev_addr[port]; - dev_dbg(dev, "smac: %2x: %2x: %2x: %2x: %2x: %2x\n", - smac[0], smac[1], smac[2], smac[3], smac[4], - smac[5]); - if ((dmac[0] == smac[0]) && (dmac[1] == smac[1]) && - (dmac[2] == smac[2]) && (dmac[3] == smac[3]) && - (dmac[4] == smac[4]) && (dmac[5] == smac[5])) { - roce_set_bit(context->qpc_bytes_32, - QP_CONTEXT_QPC_BYTE_32_LOOPBACK_INDICATOR_S, - 1); - break; - } - } - - if (hr_dev->loop_idc == 0x1) + port = (attr_mask & IB_QP_PORT) ? (attr->port_num - 1) : + hr_qp->port; + smac = (u8 *)hr_dev->dev_addr[port]; + /* when dmac equals smac or loop_idc is 1, it should loopback */ + if (ether_addr_equal_unaligned(dmac, smac) || + hr_dev->loop_idc == 0x1) roce_set_bit(context->qpc_bytes_32, - QP_CONTEXT_QPC_BYTE_32_LOOPBACK_INDICATOR_S, 1); + QP_CONTEXT_QPC_BYTE_32_LOOPBACK_INDICATOR_S, 1); roce_set_bit(context->qpc_bytes_32, QP_CONTEXT_QPC_BYTE_32_GLOBAL_HEADER_S, @@ -2509,7 +3096,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, /* Every status migrate must change state */ roce_set_field(context->qpc_bytes_144, QP_CONTEXT_QPC_BYTES_144_QP_STATE_M, - QP_CONTEXT_QPC_BYTES_144_QP_STATE_S, attr->qp_state); + QP_CONTEXT_QPC_BYTES_144_QP_STATE_S, new_state); /* SW pass context to HW */ ret = hns_roce_v1_qp_modify(hr_dev, &hr_qp->mtt, @@ -2522,9 +3109,9 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, } /* - * Use rst2init to instead of init2init with drv, - * need to hw to flash RQ HEAD by DB again - */ + * Use rst2init to instead of init2init with drv, + * need to hw to flash RQ HEAD by DB again + */ if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) { /* Memory barrier */ wmb(); @@ -2619,7 +3206,7 @@ static int hns_roce_v1_query_qpc(struct hns_roce_dev *hr_dev, ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, hr_qp->qpn, 0, HNS_ROCE_CMD_QUERY_QP, - HNS_ROCE_CMD_TIME_CLASS_A); + HNS_ROCE_CMD_TIMEOUT_MSECS); if (!ret) memcpy(hr_context, mailbox->buf, sizeof(*hr_context)); else @@ -2630,8 +3217,78 @@ static int hns_roce_v1_query_qpc(struct hns_roce_dev *hr_dev, return ret; } -int hns_roce_v1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, - int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) +static int hns_roce_v1_q_sqp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); + struct hns_roce_sqp_context context; + u32 addr; + + mutex_lock(&hr_qp->mutex); + + if (hr_qp->state == IB_QPS_RESET) { + qp_attr->qp_state = IB_QPS_RESET; + goto done; + } + + addr = ROCEE_QP1C_CFG0_0_REG + + hr_qp->port * sizeof(struct hns_roce_sqp_context); + context.qp1c_bytes_4 = roce_read(hr_dev, addr); + context.sq_rq_bt_l = roce_read(hr_dev, addr + 1); + context.qp1c_bytes_12 = roce_read(hr_dev, addr + 2); + context.qp1c_bytes_16 = roce_read(hr_dev, addr + 3); + context.qp1c_bytes_20 = roce_read(hr_dev, addr + 4); + context.cur_rq_wqe_ba_l = roce_read(hr_dev, addr + 5); + context.qp1c_bytes_28 = roce_read(hr_dev, addr + 6); + context.qp1c_bytes_32 = roce_read(hr_dev, addr + 7); + context.cur_sq_wqe_ba_l = roce_read(hr_dev, addr + 8); + context.qp1c_bytes_40 = roce_read(hr_dev, addr + 9); + + hr_qp->state = roce_get_field(context.qp1c_bytes_4, + QP1C_BYTES_4_QP_STATE_M, + QP1C_BYTES_4_QP_STATE_S); + qp_attr->qp_state = hr_qp->state; + qp_attr->path_mtu = IB_MTU_256; + qp_attr->path_mig_state = IB_MIG_ARMED; + qp_attr->qkey = QKEY_VAL; + qp_attr->rq_psn = 0; + qp_attr->sq_psn = 0; + qp_attr->dest_qp_num = 1; + qp_attr->qp_access_flags = 6; + + qp_attr->pkey_index = roce_get_field(context.qp1c_bytes_20, + QP1C_BYTES_20_PKEY_IDX_M, + QP1C_BYTES_20_PKEY_IDX_S); + qp_attr->port_num = hr_qp->port + 1; + qp_attr->sq_draining = 0; + qp_attr->max_rd_atomic = 0; + qp_attr->max_dest_rd_atomic = 0; + qp_attr->min_rnr_timer = 0; + qp_attr->timeout = 0; + qp_attr->retry_cnt = 0; + qp_attr->rnr_retry = 0; + qp_attr->alt_timeout = 0; + +done: + qp_attr->cur_qp_state = qp_attr->qp_state; + qp_attr->cap.max_recv_wr = hr_qp->rq.wqe_cnt; + qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs; + qp_attr->cap.max_send_wr = hr_qp->sq.wqe_cnt; + qp_attr->cap.max_send_sge = hr_qp->sq.max_gs; + qp_attr->cap.max_inline_data = 0; + qp_init_attr->cap = qp_attr->cap; + qp_init_attr->create_flags = 0; + + mutex_unlock(&hr_qp->mutex); + + return 0; +} + +static int hns_roce_v1_q_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr) { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); @@ -2725,9 +3382,7 @@ int hns_roce_v1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, qp_attr->pkey_index = roce_get_field(context->qpc_bytes_12, QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_M, QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S); - qp_attr->port_num = (u8)roce_get_field(context->qpc_bytes_156, - QP_CONTEXT_QPC_BYTES_156_PORT_NUM_M, - QP_CONTEXT_QPC_BYTES_156_PORT_NUM_S) + 1; + qp_attr->port_num = hr_qp->port + 1; qp_attr->sq_draining = 0; qp_attr->max_rd_atomic = roce_get_field(context->qpc_bytes_156, QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_M, @@ -2767,134 +3422,397 @@ out: return ret; } -static void hns_roce_v1_destroy_qp_common(struct hns_roce_dev *hr_dev, - struct hns_roce_qp *hr_qp, - int is_user) +int hns_roce_v1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) +{ + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); + + return hr_qp->doorbell_qpn <= 1 ? + hns_roce_v1_q_sqp(ibqp, qp_attr, qp_attr_mask, qp_init_attr) : + hns_roce_v1_q_qp(ibqp, qp_attr, qp_attr_mask, qp_init_attr); +} + +static int check_qp_db_process_status(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp, + u32 sdb_issue_ptr, + u32 *sdb_inv_cnt, + u32 *wait_stage) { - u32 sdbinvcnt; - unsigned long end = 0; - u32 sdbinvcnt_val; - u32 sdbsendptr_val; - u32 sdbisusepr_val; - struct hns_roce_cq *send_cq, *recv_cq; struct device *dev = &hr_dev->pdev->dev; + u32 sdb_retry_cnt, old_retry; + u32 sdb_send_ptr, old_send; + u32 success_flags = 0; + u32 cur_cnt, old_cnt; + unsigned long end; + u32 send_ptr; + u32 inv_cnt; + u32 tsp_st; + + if (*wait_stage > HNS_ROCE_V1_DB_STAGE2 || + *wait_stage < HNS_ROCE_V1_DB_STAGE1) { + dev_err(dev, "QP(0x%lx) db status wait stage(%d) error!\n", + hr_qp->qpn, *wait_stage); + return -EINVAL; + } - if (hr_qp->ibqp.qp_type == IB_QPT_RC) { - if (hr_qp->state != IB_QPS_RESET) { - /* - * Set qp to ERR, - * waiting for hw complete processing all dbs - */ - if (hns_roce_v1_qp_modify(hr_dev, NULL, - to_hns_roce_state( - (enum ib_qp_state)hr_qp->state), - HNS_ROCE_QP_STATE_ERR, NULL, - hr_qp)) - dev_err(dev, "modify QP %06lx to ERR failed.\n", - hr_qp->qpn); - - /* Record issued doorbell */ - sdbisusepr_val = roce_read(hr_dev, - ROCEE_SDB_ISSUE_PTR_REG); - /* - * Query db process status, - * until hw process completely - */ - end = msecs_to_jiffies( - HNS_ROCE_QP_DESTROY_TIMEOUT_MSECS) + jiffies; - do { - sdbsendptr_val = roce_read(hr_dev, + /* Calculate the total timeout for the entire verification process */ + end = msecs_to_jiffies(HNS_ROCE_V1_CHECK_DB_TIMEOUT_MSECS) + jiffies; + + if (*wait_stage == HNS_ROCE_V1_DB_STAGE1) { + /* Query db process status, until hw process completely */ + sdb_send_ptr = roce_read(hr_dev, ROCEE_SDB_SEND_PTR_REG); + while (roce_hw_index_cmp_lt(sdb_send_ptr, sdb_issue_ptr, + ROCEE_SDB_PTR_CMP_BITS)) { + if (!time_before(jiffies, end)) { + dev_dbg(dev, "QP(0x%lx) db process stage1 timeout. issue 0x%x send 0x%x.\n", + hr_qp->qpn, sdb_issue_ptr, + sdb_send_ptr); + return 0; + } + + msleep(HNS_ROCE_V1_CHECK_DB_SLEEP_MSECS); + sdb_send_ptr = roce_read(hr_dev, ROCEE_SDB_SEND_PTR_REG); - if (!time_before(jiffies, end)) { - dev_err(dev, "destroy qp(0x%lx) timeout!!!", - hr_qp->qpn); - break; - } - } while ((short)(roce_get_field(sdbsendptr_val, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) - - roce_get_field(sdbisusepr_val, - ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_M, - ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_S) - ) < 0); + } - /* Get list pointer */ - sdbinvcnt = roce_read(hr_dev, ROCEE_SDB_INV_CNT_REG); + if (roce_get_field(sdb_issue_ptr, + ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_M, + ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_S) == + roce_get_field(sdb_send_ptr, + ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, + ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S)) { + old_send = roce_read(hr_dev, ROCEE_SDB_SEND_PTR_REG); + old_retry = roce_read(hr_dev, ROCEE_SDB_RETRY_CNT_REG); - /* Query db's list status, until hw reversal */ do { - sdbinvcnt_val = roce_read(hr_dev, - ROCEE_SDB_INV_CNT_REG); + tsp_st = roce_read(hr_dev, ROCEE_TSP_BP_ST_REG); + if (roce_get_bit(tsp_st, + ROCEE_TSP_BP_ST_QH_FIFO_ENTRY_S) == 1) { + *wait_stage = HNS_ROCE_V1_DB_WAIT_OK; + return 0; + } + if (!time_before(jiffies, end)) { - dev_err(dev, "destroy qp(0x%lx) timeout!!!", - hr_qp->qpn); - dev_err(dev, "SdbInvCnt = 0x%x\n", - sdbinvcnt_val); - break; + dev_dbg(dev, "QP(0x%lx) db process stage1 timeout when send ptr equals issue ptr.\n" + "issue 0x%x send 0x%x.\n", + hr_qp->qpn, sdb_issue_ptr, + sdb_send_ptr); + return 0; + } + + msleep(HNS_ROCE_V1_CHECK_DB_SLEEP_MSECS); + + sdb_send_ptr = roce_read(hr_dev, + ROCEE_SDB_SEND_PTR_REG); + sdb_retry_cnt = roce_read(hr_dev, + ROCEE_SDB_RETRY_CNT_REG); + cur_cnt = roce_get_field(sdb_send_ptr, + ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, + ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) + + roce_get_field(sdb_retry_cnt, + ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M, + ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S); + if (!roce_get_bit(tsp_st, + ROCEE_CNT_CLR_CE_CNT_CLR_CE_S)) { + old_cnt = roce_get_field(old_send, + ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, + ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) + + roce_get_field(old_retry, + ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M, + ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S); + if (cur_cnt - old_cnt > SDB_ST_CMP_VAL) + success_flags = 1; + } else { + old_cnt = roce_get_field(old_send, + ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, + ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S); + if (cur_cnt - old_cnt > SDB_ST_CMP_VAL) + success_flags = 1; + else { + send_ptr = roce_get_field(old_send, + ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, + ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) + + roce_get_field(sdb_retry_cnt, + ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M, + ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S); + roce_set_field(old_send, + ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, + ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S, + send_ptr); + } } - } while ((short)(roce_get_field(sdbinvcnt_val, - ROCEE_SDB_INV_CNT_SDB_INV_CNT_M, - ROCEE_SDB_INV_CNT_SDB_INV_CNT_S) - - (sdbinvcnt + SDB_INV_CNT_OFFSET)) < 0); - - /* Modify qp to reset before destroying qp */ - if (hns_roce_v1_qp_modify(hr_dev, NULL, - to_hns_roce_state( - (enum ib_qp_state)hr_qp->state), - HNS_ROCE_QP_STATE_RST, NULL, hr_qp)) - dev_err(dev, "modify QP %06lx to RESET failed.\n", - hr_qp->qpn); + } while (!success_flags); } + + *wait_stage = HNS_ROCE_V1_DB_STAGE2; + + /* Get list pointer */ + *sdb_inv_cnt = roce_read(hr_dev, ROCEE_SDB_INV_CNT_REG); + dev_dbg(dev, "QP(0x%lx) db process stage2. inv cnt = 0x%x.\n", + hr_qp->qpn, *sdb_inv_cnt); + } + + if (*wait_stage == HNS_ROCE_V1_DB_STAGE2) { + /* Query db's list status, until hw reversal */ + inv_cnt = roce_read(hr_dev, ROCEE_SDB_INV_CNT_REG); + while (roce_hw_index_cmp_lt(inv_cnt, + *sdb_inv_cnt + SDB_INV_CNT_OFFSET, + ROCEE_SDB_CNT_CMP_BITS)) { + if (!time_before(jiffies, end)) { + dev_dbg(dev, "QP(0x%lx) db process stage2 timeout. inv cnt 0x%x.\n", + hr_qp->qpn, inv_cnt); + return 0; + } + + msleep(HNS_ROCE_V1_CHECK_DB_SLEEP_MSECS); + inv_cnt = roce_read(hr_dev, ROCEE_SDB_INV_CNT_REG); + } + + *wait_stage = HNS_ROCE_V1_DB_WAIT_OK; + } + + return 0; +} + +static int check_qp_reset_state(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp, + struct hns_roce_qp_work *qp_work_entry, + int *is_timeout) +{ + struct device *dev = &hr_dev->pdev->dev; + u32 sdb_issue_ptr; + int ret; + + if (hr_qp->state != IB_QPS_RESET) { + /* Set qp to ERR, waiting for hw complete processing all dbs */ + ret = hns_roce_v1_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state, + IB_QPS_ERR); + if (ret) { + dev_err(dev, "Modify QP(0x%lx) to ERR failed!\n", + hr_qp->qpn); + return ret; + } + + /* Record issued doorbell */ + sdb_issue_ptr = roce_read(hr_dev, ROCEE_SDB_ISSUE_PTR_REG); + qp_work_entry->sdb_issue_ptr = sdb_issue_ptr; + qp_work_entry->db_wait_stage = HNS_ROCE_V1_DB_STAGE1; + + /* Query db process status, until hw process completely */ + ret = check_qp_db_process_status(hr_dev, hr_qp, sdb_issue_ptr, + &qp_work_entry->sdb_inv_cnt, + &qp_work_entry->db_wait_stage); + if (ret) { + dev_err(dev, "Check QP(0x%lx) db process status failed!\n", + hr_qp->qpn); + return ret; + } + + if (qp_work_entry->db_wait_stage != HNS_ROCE_V1_DB_WAIT_OK) { + qp_work_entry->sche_cnt = 0; + *is_timeout = 1; + return 0; + } + + /* Modify qp to reset before destroying qp */ + ret = hns_roce_v1_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state, + IB_QPS_RESET); + if (ret) { + dev_err(dev, "Modify QP(0x%lx) to RST failed!\n", + hr_qp->qpn); + return ret; + } + } + + return 0; +} + +static void hns_roce_v1_destroy_qp_work_fn(struct work_struct *work) +{ + struct hns_roce_qp_work *qp_work_entry; + struct hns_roce_v1_priv *priv; + struct hns_roce_dev *hr_dev; + struct hns_roce_qp *hr_qp; + struct device *dev; + int ret; + + qp_work_entry = container_of(work, struct hns_roce_qp_work, work); + hr_dev = to_hr_dev(qp_work_entry->ib_dev); + dev = &hr_dev->pdev->dev; + priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + hr_qp = qp_work_entry->qp; + + dev_dbg(dev, "Schedule destroy QP(0x%lx) work.\n", hr_qp->qpn); + + qp_work_entry->sche_cnt++; + + /* Query db process status, until hw process completely */ + ret = check_qp_db_process_status(hr_dev, hr_qp, + qp_work_entry->sdb_issue_ptr, + &qp_work_entry->sdb_inv_cnt, + &qp_work_entry->db_wait_stage); + if (ret) { + dev_err(dev, "Check QP(0x%lx) db process status failed!\n", + hr_qp->qpn); + return; + } + + if (qp_work_entry->db_wait_stage != HNS_ROCE_V1_DB_WAIT_OK && + priv->des_qp.requeue_flag) { + queue_work(priv->des_qp.qp_wq, work); + return; + } + + /* Modify qp to reset before destroying qp */ + ret = hns_roce_v1_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state, + IB_QPS_RESET); + if (ret) { + dev_err(dev, "Modify QP(0x%lx) to RST failed!\n", hr_qp->qpn); + return; + } + + hns_roce_qp_remove(hr_dev, hr_qp); + hns_roce_qp_free(hr_dev, hr_qp); + + if (hr_qp->ibqp.qp_type == IB_QPT_RC) { + /* RC QP, release QPN */ + hns_roce_release_range_qp(hr_dev, hr_qp->qpn, 1); + kfree(hr_qp); + } else + kfree(hr_to_hr_sqp(hr_qp)); + + kfree(qp_work_entry); + + dev_dbg(dev, "Accomplished destroy QP(0x%lx) work.\n", hr_qp->qpn); +} + +int hns_roce_v1_destroy_qp(struct ib_qp *ibqp) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_qp_work qp_work_entry; + struct hns_roce_qp_work *qp_work; + struct hns_roce_v1_priv *priv; + struct hns_roce_cq *send_cq, *recv_cq; + int is_user = !!ibqp->pd->uobject; + int is_timeout = 0; + int ret; + + ret = check_qp_reset_state(hr_dev, hr_qp, &qp_work_entry, &is_timeout); + if (ret) { + dev_err(dev, "QP reset state check failed(%d)!\n", ret); + return ret; } send_cq = to_hr_cq(hr_qp->ibqp.send_cq); recv_cq = to_hr_cq(hr_qp->ibqp.recv_cq); hns_roce_lock_cqs(send_cq, recv_cq); - if (!is_user) { __hns_roce_v1_cq_clean(recv_cq, hr_qp->qpn, hr_qp->ibqp.srq ? to_hr_srq(hr_qp->ibqp.srq) : NULL); if (send_cq != recv_cq) __hns_roce_v1_cq_clean(send_cq, hr_qp->qpn, NULL); } - - hns_roce_qp_remove(hr_dev, hr_qp); - hns_roce_unlock_cqs(send_cq, recv_cq); - hns_roce_qp_free(hr_dev, hr_qp); + if (!is_timeout) { + hns_roce_qp_remove(hr_dev, hr_qp); + hns_roce_qp_free(hr_dev, hr_qp); - /* Not special_QP, free their QPN */ - if ((hr_qp->ibqp.qp_type == IB_QPT_RC) || - (hr_qp->ibqp.qp_type == IB_QPT_UC) || - (hr_qp->ibqp.qp_type == IB_QPT_UD)) - hns_roce_release_range_qp(hr_dev, hr_qp->qpn, 1); + /* RC QP, release QPN */ + if (hr_qp->ibqp.qp_type == IB_QPT_RC) + hns_roce_release_range_qp(hr_dev, hr_qp->qpn, 1); + } hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt); - if (is_user) { + if (is_user) ib_umem_release(hr_qp->umem); - } else { + else { kfree(hr_qp->sq.wrid); kfree(hr_qp->rq.wrid); + hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf); } + + if (!is_timeout) { + if (hr_qp->ibqp.qp_type == IB_QPT_RC) + kfree(hr_qp); + else + kfree(hr_to_hr_sqp(hr_qp)); + } else { + qp_work = kzalloc(sizeof(*qp_work), GFP_KERNEL); + if (!qp_work) + return -ENOMEM; + + INIT_WORK(&qp_work->work, hns_roce_v1_destroy_qp_work_fn); + qp_work->ib_dev = &hr_dev->ib_dev; + qp_work->qp = hr_qp; + qp_work->db_wait_stage = qp_work_entry.db_wait_stage; + qp_work->sdb_issue_ptr = qp_work_entry.sdb_issue_ptr; + qp_work->sdb_inv_cnt = qp_work_entry.sdb_inv_cnt; + qp_work->sche_cnt = qp_work_entry.sche_cnt; + + priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + queue_work(priv->des_qp.qp_wq, &qp_work->work); + dev_dbg(dev, "Begin destroy QP(0x%lx) work.\n", hr_qp->qpn); + } + + return 0; } -int hns_roce_v1_destroy_qp(struct ib_qp *ibqp) +int hns_roce_v1_destroy_cq(struct ib_cq *ibcq) { - struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); - struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); + struct hns_roce_dev *hr_dev = to_hr_dev(ibcq->device); + struct hns_roce_cq *hr_cq = to_hr_cq(ibcq); + struct device *dev = &hr_dev->pdev->dev; + u32 cqe_cnt_ori; + u32 cqe_cnt_cur; + u32 cq_buf_size; + int wait_time = 0; + int ret = 0; - hns_roce_v1_destroy_qp_common(hr_dev, hr_qp, !!ibqp->pd->uobject); + hns_roce_free_cq(hr_dev, hr_cq); - if (hr_qp->ibqp.qp_type == IB_QPT_GSI) - kfree(hr_to_hr_sqp(hr_qp)); - else - kfree(hr_qp); + /* + * Before freeing cq buffer, we need to ensure that the outstanding CQE + * have been written by checking the CQE counter. + */ + cqe_cnt_ori = roce_read(hr_dev, ROCEE_SCAEP_WR_CQE_CNT); + while (1) { + if (roce_read(hr_dev, ROCEE_CAEP_CQE_WCMD_EMPTY) & + HNS_ROCE_CQE_WCMD_EMPTY_BIT) + break; - return 0; + cqe_cnt_cur = roce_read(hr_dev, ROCEE_SCAEP_WR_CQE_CNT); + if ((cqe_cnt_cur - cqe_cnt_ori) >= HNS_ROCE_MIN_CQE_CNT) + break; + + msleep(HNS_ROCE_EACH_FREE_CQ_WAIT_MSECS); + if (wait_time > HNS_ROCE_MAX_FREE_CQ_WAIT_CNT) { + dev_warn(dev, "Destroy cq 0x%lx timeout!\n", + hr_cq->cqn); + ret = -ETIMEDOUT; + break; + } + wait_time++; + } + + hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt); + + if (ibcq->uobject) + ib_umem_release(hr_cq->umem); + else { + /* Free the buff of stored cq */ + cq_buf_size = (ibcq->cqe + 1) * hr_dev->caps.cq_entry_sz; + hns_roce_buf_free(hr_dev, cq_buf_size, &hr_cq->hr_buf.hr_buf); + } + + kfree(hr_cq); + + return ret; } struct hns_roce_v1_priv hr_v1_priv; @@ -2917,5 +3835,7 @@ struct hns_roce_hw hns_roce_hw_v1 = { .post_recv = hns_roce_v1_post_recv, .req_notify_cq = hns_roce_v1_req_notify_cq, .poll_cq = hns_roce_v1_poll_cq, + .dereg_mr = hns_roce_v1_dereg_mr, + .destroy_cq = hns_roce_v1_destroy_cq, .priv = &hr_v1_priv, }; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h index 539b0a3b92b0..b213b5e6fef1 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h @@ -58,6 +58,7 @@ #define HNS_ROCE_V1_PHY_UAR_NUM 8 #define HNS_ROCE_V1_GID_NUM 16 +#define HNS_ROCE_V1_RESV_QP 8 #define HNS_ROCE_V1_NUM_COMP_EQE 0x8000 #define HNS_ROCE_V1_NUM_ASYNC_EQE 0x400 @@ -102,8 +103,22 @@ #define HNS_ROCE_V1_EXT_ODB_ALFUL \ (HNS_ROCE_V1_EXT_ODB_DEPTH - HNS_ROCE_V1_DB_RSVD) +#define HNS_ROCE_V1_DB_WAIT_OK 0 +#define HNS_ROCE_V1_DB_STAGE1 1 +#define HNS_ROCE_V1_DB_STAGE2 2 +#define HNS_ROCE_V1_CHECK_DB_TIMEOUT_MSECS 10000 +#define HNS_ROCE_V1_CHECK_DB_SLEEP_MSECS 20 +#define HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS 50000 +#define HNS_ROCE_V1_RECREATE_LP_QP_TIMEOUT_MSECS 10000 +#define HNS_ROCE_V1_FREE_MR_WAIT_VALUE 5 +#define HNS_ROCE_V1_RECREATE_LP_QP_WAIT_VALUE 20 + #define HNS_ROCE_BT_RSV_BUF_SIZE (1 << 17) +#define HNS_ROCE_V1_TPTR_ENTRY_SIZE 2 +#define HNS_ROCE_V1_TPTR_BUF_SIZE \ + (HNS_ROCE_V1_TPTR_ENTRY_SIZE * HNS_ROCE_V1_MAX_CQ_NUM) + #define HNS_ROCE_ODB_POLL_MODE 0 #define HNS_ROCE_SDB_NORMAL_MODE 0 @@ -140,6 +155,7 @@ #define SQ_PSN_SHIFT 8 #define QKEY_VAL 0x80010000 #define SDB_INV_CNT_OFFSET 8 +#define SDB_ST_CMP_VAL 8 struct hns_roce_cq_context { u32 cqc_byte_4; @@ -436,6 +452,8 @@ struct hns_roce_ud_send_wqe { #define UD_SEND_WQE_U32_8_DMAC_5_M \ (((1UL << 8) - 1) << UD_SEND_WQE_U32_8_DMAC_5_S) +#define UD_SEND_WQE_U32_8_LOOPBACK_INDICATOR_S 22 + #define UD_SEND_WQE_U32_8_OPERATION_TYPE_S 16 #define UD_SEND_WQE_U32_8_OPERATION_TYPE_M \ (((1UL << 4) - 1) << UD_SEND_WQE_U32_8_OPERATION_TYPE_S) @@ -480,13 +498,17 @@ struct hns_roce_sqp_context { u32 qp1c_bytes_12; u32 qp1c_bytes_16; u32 qp1c_bytes_20; - u32 qp1c_bytes_28; u32 cur_rq_wqe_ba_l; + u32 qp1c_bytes_28; u32 qp1c_bytes_32; u32 cur_sq_wqe_ba_l; u32 qp1c_bytes_40; }; +#define QP1C_BYTES_4_QP_STATE_S 0 +#define QP1C_BYTES_4_QP_STATE_M \ + (((1UL << 3) - 1) << QP1C_BYTES_4_QP_STATE_S) + #define QP1C_BYTES_4_SQ_WQE_SHIFT_S 8 #define QP1C_BYTES_4_SQ_WQE_SHIFT_M \ (((1UL << 4) - 1) << QP1C_BYTES_4_SQ_WQE_SHIFT_S) @@ -952,6 +974,10 @@ struct hns_roce_sq_db { #define SQ_DOORBELL_U32_4_SQ_HEAD_M \ (((1UL << 15) - 1) << SQ_DOORBELL_U32_4_SQ_HEAD_S) +#define SQ_DOORBELL_U32_4_SL_S 16 +#define SQ_DOORBELL_U32_4_SL_M \ + (((1UL << 2) - 1) << SQ_DOORBELL_U32_4_SL_S) + #define SQ_DOORBELL_U32_4_PORT_S 18 #define SQ_DOORBELL_U32_4_PORT_M (((1UL << 3) - 1) << SQ_DOORBELL_U32_4_PORT_S) @@ -979,12 +1005,58 @@ struct hns_roce_bt_table { struct hns_roce_buf_list cqc_buf; }; +struct hns_roce_tptr_table { + struct hns_roce_buf_list tptr_buf; +}; + +struct hns_roce_qp_work { + struct work_struct work; + struct ib_device *ib_dev; + struct hns_roce_qp *qp; + u32 db_wait_stage; + u32 sdb_issue_ptr; + u32 sdb_inv_cnt; + u32 sche_cnt; +}; + +struct hns_roce_des_qp { + struct workqueue_struct *qp_wq; + int requeue_flag; +}; + +struct hns_roce_mr_free_work { + struct work_struct work; + struct ib_device *ib_dev; + struct completion *comp; + int comp_flag; + void *mr; +}; + +struct hns_roce_recreate_lp_qp_work { + struct work_struct work; + struct ib_device *ib_dev; + struct completion *comp; + int comp_flag; +}; + +struct hns_roce_free_mr { + struct workqueue_struct *free_mr_wq; + struct hns_roce_qp *mr_free_qp[HNS_ROCE_V1_RESV_QP]; + struct hns_roce_cq *mr_free_cq; + struct hns_roce_pd *mr_free_pd; +}; + struct hns_roce_v1_priv { struct hns_roce_db_table db_table; struct hns_roce_raq_table raq_table; struct hns_roce_bt_table bt_table; + struct hns_roce_tptr_table tptr_table; + struct hns_roce_des_qp des_qp; + struct hns_roce_free_mr free_mr; }; int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool dereset); +int hns_roce_v1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); +int hns_roce_v1_destroy_qp(struct ib_qp *ibqp); #endif diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 764e35a54457..4953d9cb83a7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -35,52 +35,13 @@ #include <rdma/ib_addr.h> #include <rdma/ib_smi.h> #include <rdma/ib_user_verbs.h> +#include <rdma/ib_cache.h> #include "hns_roce_common.h" #include "hns_roce_device.h" -#include "hns_roce_user.h" +#include <rdma/hns-abi.h> #include "hns_roce_hem.h" /** - * hns_roce_addrconf_ifid_eui48 - Get default gid. - * @eui: eui. - * @vlan_id: gid - * @dev: net device - * Description: - * MAC convert to GID - * gid[0..7] = fe80 0000 0000 0000 - * gid[8] = mac[0] ^ 2 - * gid[9] = mac[1] - * gid[10] = mac[2] - * gid[11] = ff (VLAN ID high byte (4 MS bits)) - * gid[12] = fe (VLAN ID low byte) - * gid[13] = mac[3] - * gid[14] = mac[4] - * gid[15] = mac[5] - */ -static void hns_roce_addrconf_ifid_eui48(u8 *eui, u16 vlan_id, - struct net_device *dev) -{ - memcpy(eui, dev->dev_addr, 3); - memcpy(eui + 5, dev->dev_addr + 3, 3); - if (vlan_id < 0x1000) { - eui[3] = vlan_id >> 8; - eui[4] = vlan_id & 0xff; - } else { - eui[3] = 0xff; - eui[4] = 0xfe; - } - eui[0] ^= 2; -} - -static void hns_roce_make_default_gid(struct net_device *dev, union ib_gid *gid) -{ - memset(gid, 0, sizeof(*gid)); - gid->raw[0] = 0xFE; - gid->raw[1] = 0x80; - hns_roce_addrconf_ifid_eui48(&gid->raw[8], 0xffff, dev); -} - -/** * hns_get_gid_index - Get gid index. * @hr_dev: pointer to structure hns_roce_dev. * @port: port, value range: 0 ~ MAX @@ -96,30 +57,6 @@ int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index) return gid_index * hr_dev->caps.num_ports + port; } -static int hns_roce_set_gid(struct hns_roce_dev *hr_dev, u8 port, int gid_index, - union ib_gid *gid) -{ - struct device *dev = &hr_dev->pdev->dev; - u8 gid_idx = 0; - - if (gid_index >= hr_dev->caps.gid_table_len[port]) { - dev_err(dev, "gid_index %d illegal, port %d gid range: 0~%d\n", - gid_index, port, hr_dev->caps.gid_table_len[port] - 1); - return -EINVAL; - } - - gid_idx = hns_get_gid_index(hr_dev, port, gid_index); - - if (!memcmp(gid, &hr_dev->iboe.gid_table[gid_idx], sizeof(*gid))) - return -EINVAL; - - memcpy(&hr_dev->iboe.gid_table[gid_idx], gid, sizeof(*gid)); - - hr_dev->hw->set_gid(hr_dev, port, gid_index, gid); - - return 0; -} - static void hns_roce_set_mac(struct hns_roce_dev *hr_dev, u8 port, u8 *addr) { u8 phy_port; @@ -135,27 +72,44 @@ static void hns_roce_set_mac(struct hns_roce_dev *hr_dev, u8 port, u8 *addr) hr_dev->hw->set_mac(hr_dev, phy_port, addr); } -static void hns_roce_set_mtu(struct hns_roce_dev *hr_dev, u8 port, int mtu) +static int hns_roce_add_gid(struct ib_device *device, u8 port_num, + unsigned int index, const union ib_gid *gid, + const struct ib_gid_attr *attr, void **context) { - u8 phy_port = hr_dev->iboe.phy_port[port]; - enum ib_mtu tmp; + struct hns_roce_dev *hr_dev = to_hr_dev(device); + u8 port = port_num - 1; + unsigned long flags; + + if (port >= hr_dev->caps.num_ports) + return -EINVAL; + + spin_lock_irqsave(&hr_dev->iboe.lock, flags); - tmp = iboe_get_mtu(mtu); - if (!tmp) - tmp = IB_MTU_256; + hr_dev->hw->set_gid(hr_dev, port, index, (union ib_gid *)gid); - hr_dev->hw->set_mtu(hr_dev, phy_port, tmp); + spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); + + return 0; } -static void hns_roce_update_gids(struct hns_roce_dev *hr_dev, int port) +static int hns_roce_del_gid(struct ib_device *device, u8 port_num, + unsigned int index, void **context) { - struct ib_event event; + struct hns_roce_dev *hr_dev = to_hr_dev(device); + union ib_gid zgid = { {0} }; + u8 port = port_num - 1; + unsigned long flags; + + if (port >= hr_dev->caps.num_ports) + return -EINVAL; + + spin_lock_irqsave(&hr_dev->iboe.lock, flags); - /* Refresh gid in ib_cache */ - event.device = &hr_dev->ib_dev; - event.element.port_num = port + 1; - event.event = IB_EVENT_GID_CHANGE; - ib_dispatch_event(&event); + hr_dev->hw->set_gid(hr_dev, port, index, &zgid); + + spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); + + return 0; } static int handle_en_event(struct hns_roce_dev *hr_dev, u8 port, @@ -163,9 +117,6 @@ static int handle_en_event(struct hns_roce_dev *hr_dev, u8 port, { struct device *dev = &hr_dev->pdev->dev; struct net_device *netdev; - unsigned long flags; - union ib_gid gid; - int ret = 0; netdev = hr_dev->iboe.netdevs[port]; if (!netdev) { @@ -173,7 +124,7 @@ static int handle_en_event(struct hns_roce_dev *hr_dev, u8 port, return -ENODEV; } - spin_lock_irqsave(&hr_dev->iboe.lock, flags); + spin_lock_bh(&hr_dev->iboe.lock); switch (event) { case NETDEV_UP: @@ -181,23 +132,19 @@ static int handle_en_event(struct hns_roce_dev *hr_dev, u8 port, case NETDEV_REGISTER: case NETDEV_CHANGEADDR: hns_roce_set_mac(hr_dev, port, netdev->dev_addr); - hns_roce_make_default_gid(netdev, &gid); - ret = hns_roce_set_gid(hr_dev, port, 0, &gid); - if (!ret) - hns_roce_update_gids(hr_dev, port); break; case NETDEV_DOWN: /* - * In v1 engine, only support all ports closed together. - */ + * In v1 engine, only support all ports closed together. + */ break; default: dev_dbg(dev, "NETDEV event = 0x%x!\n", (u32)(event)); break; } - spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); - return ret; + spin_unlock_bh(&hr_dev->iboe.lock); + return 0; } static int hns_roce_netdev_event(struct notifier_block *self, @@ -224,118 +171,17 @@ static int hns_roce_netdev_event(struct notifier_block *self, return NOTIFY_DONE; } -static void hns_roce_addr_event(int event, struct net_device *event_netdev, - struct hns_roce_dev *hr_dev, union ib_gid *gid) +static int hns_roce_setup_mtu_mac(struct hns_roce_dev *hr_dev) { - struct hns_roce_ib_iboe *iboe = NULL; - int gid_table_len = 0; - unsigned long flags; - union ib_gid zgid; - u8 gid_idx = 0; - u8 port = 0; - int i = 0; - int free; - struct net_device *real_dev = rdma_vlan_dev_real_dev(event_netdev) ? - rdma_vlan_dev_real_dev(event_netdev) : - event_netdev; - - if (event != NETDEV_UP && event != NETDEV_DOWN) - return; - - iboe = &hr_dev->iboe; - while (port < hr_dev->caps.num_ports) { - if (real_dev == iboe->netdevs[port]) - break; - port++; - } - - if (port >= hr_dev->caps.num_ports) { - dev_dbg(&hr_dev->pdev->dev, "can't find netdev\n"); - return; - } - - memset(zgid.raw, 0, sizeof(zgid.raw)); - free = -1; - gid_table_len = hr_dev->caps.gid_table_len[port]; - - spin_lock_irqsave(&hr_dev->iboe.lock, flags); - - for (i = 0; i < gid_table_len; i++) { - gid_idx = hns_get_gid_index(hr_dev, port, i); - if (!memcmp(gid->raw, iboe->gid_table[gid_idx].raw, - sizeof(gid->raw))) - break; - if (free < 0 && !memcmp(zgid.raw, - iboe->gid_table[gid_idx].raw, sizeof(zgid.raw))) - free = i; - } - - if (i >= gid_table_len) { - if (free < 0) { - spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); - dev_dbg(&hr_dev->pdev->dev, - "gid_index overflow, port(%d)\n", port); - return; - } - if (!hns_roce_set_gid(hr_dev, port, free, gid)) - hns_roce_update_gids(hr_dev, port); - } else if (event == NETDEV_DOWN) { - if (!hns_roce_set_gid(hr_dev, port, i, &zgid)) - hns_roce_update_gids(hr_dev, port); - } - - spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); -} - -static int hns_roce_inet_event(struct notifier_block *self, unsigned long event, - void *ptr) -{ - struct in_ifaddr *ifa = ptr; - struct hns_roce_dev *hr_dev; - struct net_device *dev = ifa->ifa_dev->dev; - union ib_gid gid; - - ipv6_addr_set_v4mapped(ifa->ifa_address, (struct in6_addr *)&gid); - - hr_dev = container_of(self, struct hns_roce_dev, iboe.nb_inet); - - hns_roce_addr_event(event, dev, hr_dev, &gid); - - return NOTIFY_DONE; -} - -static int hns_roce_setup_mtu_gids(struct hns_roce_dev *hr_dev) -{ - struct in_ifaddr *ifa_list = NULL; - union ib_gid gid = {{0} }; - u32 ipaddr = 0; - int index = 0; - int ret = 0; - u8 i = 0; + u8 i; for (i = 0; i < hr_dev->caps.num_ports; i++) { - hns_roce_set_mtu(hr_dev, i, - ib_mtu_enum_to_int(hr_dev->caps.max_mtu)); + hr_dev->hw->set_mtu(hr_dev, hr_dev->iboe.phy_port[i], + hr_dev->caps.max_mtu); hns_roce_set_mac(hr_dev, i, hr_dev->iboe.netdevs[i]->dev_addr); - - if (hr_dev->iboe.netdevs[i]->ip_ptr) { - ifa_list = hr_dev->iboe.netdevs[i]->ip_ptr->ifa_list; - index = 1; - while (ifa_list) { - ipaddr = ifa_list->ifa_address; - ipv6_addr_set_v4mapped(ipaddr, - (struct in6_addr *)&gid); - ret = hns_roce_set_gid(hr_dev, i, index, &gid); - if (ret) - break; - index++; - ifa_list = ifa_list->ifa_next; - } - hns_roce_update_gids(hr_dev, i); - } } - return ret; + return 0; } static int hns_roce_query_device(struct ib_device *ib_dev, @@ -444,31 +290,6 @@ static enum rdma_link_layer hns_roce_get_link_layer(struct ib_device *device, static int hns_roce_query_gid(struct ib_device *ib_dev, u8 port_num, int index, union ib_gid *gid) { - struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); - struct device *dev = &hr_dev->pdev->dev; - u8 gid_idx = 0; - u8 port; - - if (port_num < 1 || port_num > hr_dev->caps.num_ports || - index >= hr_dev->caps.gid_table_len[port_num - 1]) { - dev_err(dev, - "port_num %d index %d illegal! correct range: port_num 1~%d index 0~%d!\n", - port_num, index, hr_dev->caps.num_ports, - hr_dev->caps.gid_table_len[port_num - 1] - 1); - return -EINVAL; - } - - port = port_num - 1; - gid_idx = hns_get_gid_index(hr_dev, port, index); - if (gid_idx >= HNS_ROCE_MAX_GID_NUM) { - dev_err(dev, "port_num %d index %d illegal! total gid num %d!\n", - port_num, index, HNS_ROCE_MAX_GID_NUM); - return -EINVAL; - } - - memcpy(gid->raw, hr_dev->iboe.gid_table[gid_idx].raw, - HNS_ROCE_GID_SIZE); - return 0; } @@ -549,6 +370,8 @@ static int hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext) static int hns_roce_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) { + struct hns_roce_dev *hr_dev = to_hr_dev(context->device); + if (((vma->vm_end - vma->vm_start) % PAGE_SIZE) != 0) return -EINVAL; @@ -558,10 +381,15 @@ static int hns_roce_mmap(struct ib_ucontext *context, to_hr_ucontext(context)->uar.pfn, PAGE_SIZE, vma->vm_page_prot)) return -EAGAIN; - - } else { + } else if (vma->vm_pgoff == 1 && hr_dev->hw_rev == HNS_ROCE_HW_VER1) { + /* vm_pgoff: 1 -- TPTR */ + if (io_remap_pfn_range(vma, vma->vm_start, + hr_dev->tptr_dma_addr >> PAGE_SHIFT, + hr_dev->tptr_size, + vma->vm_page_prot)) + return -EAGAIN; + } else return -EINVAL; - } return 0; } @@ -605,7 +433,7 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) spin_lock_init(&iboe->lock); ib_dev = &hr_dev->ib_dev; - strlcpy(ib_dev->name, "hisi_%d", IB_DEVICE_NAME_MAX); + strlcpy(ib_dev->name, "hns_%d", IB_DEVICE_NAME_MAX); ib_dev->owner = THIS_MODULE; ib_dev->node_type = RDMA_NODE_IB_CA; @@ -639,6 +467,8 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) ib_dev->get_link_layer = hns_roce_get_link_layer; ib_dev->get_netdev = hns_roce_get_netdev; ib_dev->query_gid = hns_roce_query_gid; + ib_dev->add_gid = hns_roce_add_gid; + ib_dev->del_gid = hns_roce_del_gid; ib_dev->query_pkey = hns_roce_query_pkey; ib_dev->alloc_ucontext = hns_roce_alloc_ucontext; ib_dev->dealloc_ucontext = hns_roce_dealloc_ucontext; @@ -681,32 +511,22 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) return ret; } - ret = hns_roce_setup_mtu_gids(hr_dev); + ret = hns_roce_setup_mtu_mac(hr_dev); if (ret) { - dev_err(dev, "roce_setup_mtu_gids failed!\n"); - goto error_failed_setup_mtu_gids; + dev_err(dev, "setup_mtu_mac failed!\n"); + goto error_failed_setup_mtu_mac; } iboe->nb.notifier_call = hns_roce_netdev_event; ret = register_netdevice_notifier(&iboe->nb); if (ret) { dev_err(dev, "register_netdevice_notifier failed!\n"); - goto error_failed_setup_mtu_gids; - } - - iboe->nb_inet.notifier_call = hns_roce_inet_event; - ret = register_inetaddr_notifier(&iboe->nb_inet); - if (ret) { - dev_err(dev, "register inet addr notifier failed!\n"); - goto error_failed_register_inetaddr_notifier; + goto error_failed_setup_mtu_mac; } return 0; -error_failed_register_inetaddr_notifier: - unregister_netdevice_notifier(&iboe->nb); - -error_failed_setup_mtu_gids: +error_failed_setup_mtu_mac: ib_unregister_device(ib_dev); return ret; @@ -940,10 +760,10 @@ err_unmap_mtt: } /** -* hns_roce_setup_hca - setup host channel adapter -* @hr_dev: pointer to hns roce device -* Return : int -*/ + * hns_roce_setup_hca - setup host channel adapter + * @hr_dev: pointer to hns roce device + * Return : int + */ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) { int ret; @@ -1008,11 +828,11 @@ err_uar_table_free: } /** -* hns_roce_probe - RoCE driver entrance -* @pdev: pointer to platform device -* Return : int -* -*/ + * hns_roce_probe - RoCE driver entrance + * @pdev: pointer to platform device + * Return : int + * + */ static int hns_roce_probe(struct platform_device *pdev) { int ret; @@ -1023,9 +843,6 @@ static int hns_roce_probe(struct platform_device *pdev) if (!hr_dev) return -ENOMEM; - memset((u8 *)hr_dev + sizeof(struct ib_device), 0, - sizeof(struct hns_roce_dev) - sizeof(struct ib_device)); - hr_dev->pdev = pdev; platform_set_drvdata(pdev, hr_dev); @@ -1125,9 +942,9 @@ error_failed_get_cfg: } /** -* hns_roce_remove - remove RoCE device -* @pdev: pointer to platform device -*/ + * hns_roce_remove - remove RoCE device + * @pdev: pointer to platform device + */ static int hns_roce_remove(struct platform_device *pdev) { struct hns_roce_dev *hr_dev = platform_get_drvdata(pdev); diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index fb87883ead34..4139abee3b54 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -42,7 +42,7 @@ static u32 hw_index_to_key(unsigned long ind) return (u32)(ind >> 24) | (ind << 8); } -static unsigned long key_to_hw_index(u32 key) +unsigned long key_to_hw_index(u32 key) { return (key << 24) | (key >> 8); } @@ -53,16 +53,16 @@ static int hns_roce_sw2hw_mpt(struct hns_roce_dev *hr_dev, { return hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, mpt_index, 0, HNS_ROCE_CMD_SW2HW_MPT, - HNS_ROCE_CMD_TIME_CLASS_B); + HNS_ROCE_CMD_TIMEOUT_MSECS); } -static int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev, +int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev, struct hns_roce_cmd_mailbox *mailbox, unsigned long mpt_index) { return hns_roce_cmd_mbox(hr_dev, 0, mailbox ? mailbox->dma : 0, mpt_index, !mailbox, HNS_ROCE_CMD_HW2SW_MPT, - HNS_ROCE_CMD_TIME_CLASS_B); + HNS_ROCE_CMD_TIMEOUT_MSECS); } static int hns_roce_buddy_alloc(struct hns_roce_buddy *buddy, int order, @@ -137,11 +137,13 @@ static int hns_roce_buddy_init(struct hns_roce_buddy *buddy, int max_order) for (i = 0; i <= buddy->max_order; ++i) { s = BITS_TO_LONGS(1 << (buddy->max_order - i)); - buddy->bits[i] = kmalloc_array(s, sizeof(long), GFP_KERNEL); - if (!buddy->bits[i]) - goto err_out_free; - - bitmap_zero(buddy->bits[i], 1 << (buddy->max_order - i)); + buddy->bits[i] = kcalloc(s, sizeof(long), GFP_KERNEL | + __GFP_NOWARN); + if (!buddy->bits[i]) { + buddy->bits[i] = vzalloc(s * sizeof(long)); + if (!buddy->bits[i]) + goto err_out_free; + } } set_bit(0, buddy->bits[buddy->max_order]); @@ -151,7 +153,7 @@ static int hns_roce_buddy_init(struct hns_roce_buddy *buddy, int max_order) err_out_free: for (i = 0; i <= buddy->max_order; ++i) - kfree(buddy->bits[i]); + kvfree(buddy->bits[i]); err_out: kfree(buddy->bits); @@ -164,7 +166,7 @@ static void hns_roce_buddy_cleanup(struct hns_roce_buddy *buddy) int i; for (i = 0; i <= buddy->max_order; ++i) - kfree(buddy->bits[i]); + kvfree(buddy->bits[i]); kfree(buddy->bits); kfree(buddy->num_free); @@ -287,7 +289,7 @@ static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, } hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap, - key_to_hw_index(mr->key)); + key_to_hw_index(mr->key), BITMAP_NO_RR); } static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev, @@ -605,13 +607,20 @@ err_free: int hns_roce_dereg_mr(struct ib_mr *ibmr) { + struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device); struct hns_roce_mr *mr = to_hr_mr(ibmr); + int ret = 0; - hns_roce_mr_free(to_hr_dev(ibmr->device), mr); - if (mr->umem) - ib_umem_release(mr->umem); + if (hr_dev->hw->dereg_mr) { + ret = hr_dev->hw->dereg_mr(hr_dev, mr); + } else { + hns_roce_mr_free(hr_dev, mr); - kfree(mr); + if (mr->umem) + ib_umem_release(mr->umem); - return 0; + kfree(mr); + } + + return ret; } diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c index 05db7d59812a..a64500fa1145 100644 --- a/drivers/infiniband/hw/hns/hns_roce_pd.c +++ b/drivers/infiniband/hw/hns/hns_roce_pd.c @@ -40,7 +40,7 @@ static int hns_roce_pd_alloc(struct hns_roce_dev *hr_dev, unsigned long *pdn) static void hns_roce_pd_free(struct hns_roce_dev *hr_dev, unsigned long pdn) { - hns_roce_bitmap_free(&hr_dev->pd_bitmap, pdn); + hns_roce_bitmap_free(&hr_dev->pd_bitmap, pdn, BITMAP_NO_RR); } int hns_roce_init_pd_table(struct hns_roce_dev *hr_dev) @@ -121,7 +121,8 @@ int hns_roce_uar_alloc(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar) void hns_roce_uar_free(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar) { - hns_roce_bitmap_free(&hr_dev->uar_table.bitmap, uar->index); + hns_roce_bitmap_free(&hr_dev->uar_table.bitmap, uar->index, + BITMAP_NO_RR); } int hns_roce_init_uar_table(struct hns_roce_dev *hr_dev) diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index e86dd8d06777..f036f32f15d3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -37,7 +37,7 @@ #include "hns_roce_common.h" #include "hns_roce_device.h" #include "hns_roce_hem.h" -#include "hns_roce_user.h" +#include <rdma/hns-abi.h> #define SQP_NUM (2 * HNS_ROCE_MAX_PORTS) @@ -250,7 +250,7 @@ void hns_roce_release_range_qp(struct hns_roce_dev *hr_dev, int base_qpn, if (base_qpn < SQP_NUM) return; - hns_roce_bitmap_free_range(&qp_table->bitmap, base_qpn, cnt); + hns_roce_bitmap_free_range(&qp_table->bitmap, base_qpn, cnt, BITMAP_RR); } static int hns_roce_set_rq_size(struct hns_roce_dev *hr_dev, diff --git a/drivers/infiniband/hw/hns/hns_roce_user.h b/drivers/infiniband/hw/hns/hns_roce_user.h deleted file mode 100644 index a28f761a9f65..000000000000 --- a/drivers/infiniband/hw/hns/hns_roce_user.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2016 Hisilicon Limited. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _HNS_ROCE_USER_H -#define _HNS_ROCE_USER_H - -struct hns_roce_ib_create_cq { - __u64 buf_addr; -}; - -struct hns_roce_ib_create_qp { - __u64 buf_addr; - __u64 db_addr; - __u8 log_sq_bb_count; - __u8 log_sq_stride; - __u8 sq_no_prefetch; - __u8 reserved[5]; -}; - -struct hns_roce_ib_alloc_ucontext_resp { - __u32 qp_tab_size; -}; - -#endif /*_HNS_ROCE_USER_H */ diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h index 8ec09e470f84..da2eb5a281fa 100644 --- a/drivers/infiniband/hw/i40iw/i40iw.h +++ b/drivers/infiniband/hw/i40iw/i40iw.h @@ -112,9 +112,12 @@ #define I40IW_DRV_OPT_MCAST_LOGPORT_MAP 0x00000800 #define IW_HMC_OBJ_TYPE_NUM ARRAY_SIZE(iw_hmc_obj_types) -#define IW_CFG_FPM_QP_COUNT 32768 -#define I40IW_MAX_PAGES_PER_FMR 512 -#define I40IW_MIN_PAGES_PER_FMR 1 +#define IW_CFG_FPM_QP_COUNT 32768 +#define I40IW_MAX_PAGES_PER_FMR 512 +#define I40IW_MIN_PAGES_PER_FMR 1 +#define I40IW_CQP_COMPL_RQ_WQE_FLUSHED 2 +#define I40IW_CQP_COMPL_SQ_WQE_FLUSHED 3 +#define I40IW_CQP_COMPL_RQ_SQ_WQE_FLUSHED 4 #define I40IW_MTU_TO_MSS 40 #define I40IW_DEFAULT_MSS 1460 @@ -210,6 +213,12 @@ struct i40iw_msix_vector { u32 ceq_id; }; +struct l2params_work { + struct work_struct work; + struct i40iw_device *iwdev; + struct i40iw_l2params l2params; +}; + #define I40IW_MSIX_TABLE_SIZE 65 struct virtchnl_work { @@ -227,6 +236,7 @@ struct i40iw_device { struct net_device *netdev; wait_queue_head_t vchnl_waitq; struct i40iw_sc_dev sc_dev; + struct i40iw_sc_vsi vsi; struct i40iw_handler *hdl; struct i40e_info *ldev; struct i40e_client *client; @@ -280,7 +290,6 @@ struct i40iw_device { u32 sd_type; struct workqueue_struct *param_wq; atomic_t params_busy; - u32 mss; enum init_completion_state init_state; u16 mac_ip_table_idx; atomic_t vchnl_msgs; @@ -297,6 +306,14 @@ struct i40iw_device { u32 mr_stagmask; u32 mpa_version; bool dcb; + bool closing; + bool reset; + u32 used_pds; + u32 used_cqs; + u32 used_mrs; + u32 used_qps; + wait_queue_head_t close_wq; + atomic64_t use_count; }; struct i40iw_ib_device { @@ -498,7 +515,7 @@ u32 i40iw_initialize_hw_resources(struct i40iw_device *iwdev); int i40iw_register_rdma_device(struct i40iw_device *iwdev); void i40iw_port_ibevent(struct i40iw_device *iwdev); -int i40iw_cm_disconn(struct i40iw_qp *); +void i40iw_cm_disconn(struct i40iw_qp *iwqp); void i40iw_cm_disconn_worker(void *); int mini_cm_recv_pkt(struct i40iw_cm_core *, struct i40iw_device *, struct sk_buff *); @@ -508,20 +525,26 @@ enum i40iw_status_code i40iw_handle_cqp_op(struct i40iw_device *iwdev, enum i40iw_status_code i40iw_add_mac_addr(struct i40iw_device *iwdev, u8 *mac_addr, u8 *mac_index); int i40iw_modify_qp(struct ib_qp *, struct ib_qp_attr *, int, struct ib_udata *); +void i40iw_cq_wq_destroy(struct i40iw_device *iwdev, struct i40iw_sc_cq *cq); void i40iw_rem_pdusecount(struct i40iw_pd *iwpd, struct i40iw_device *iwdev); void i40iw_add_pdusecount(struct i40iw_pd *iwpd); +void i40iw_rem_devusecount(struct i40iw_device *iwdev); +void i40iw_add_devusecount(struct i40iw_device *iwdev); void i40iw_hw_modify_qp(struct i40iw_device *iwdev, struct i40iw_qp *iwqp, struct i40iw_modify_qp_info *info, bool wait); +void i40iw_qp_suspend_resume(struct i40iw_sc_dev *dev, + struct i40iw_sc_qp *qp, + bool suspend); enum i40iw_status_code i40iw_manage_qhash(struct i40iw_device *iwdev, struct i40iw_cm_info *cminfo, enum i40iw_quad_entry_type etype, enum i40iw_quad_hash_manage_type mtype, void *cmnode, bool wait); -void i40iw_receive_ilq(struct i40iw_sc_dev *dev, struct i40iw_puda_buf *rbuf); -void i40iw_free_sqbuf(struct i40iw_sc_dev *dev, void *bufp); +void i40iw_receive_ilq(struct i40iw_sc_vsi *vsi, struct i40iw_puda_buf *rbuf); +void i40iw_free_sqbuf(struct i40iw_sc_vsi *vsi, void *bufp); void i40iw_free_qp_resources(struct i40iw_device *iwdev, struct i40iw_qp *iwqp, u32 qp_num); diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 85637696f6e9..95a0586a4da8 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -68,13 +68,13 @@ static void i40iw_disconnect_worker(struct work_struct *work); /** * i40iw_free_sqbuf - put back puda buffer if refcount = 0 - * @dev: FPK device + * @vsi: pointer to vsi structure * @buf: puda buffer to free */ -void i40iw_free_sqbuf(struct i40iw_sc_dev *dev, void *bufp) +void i40iw_free_sqbuf(struct i40iw_sc_vsi *vsi, void *bufp) { struct i40iw_puda_buf *buf = (struct i40iw_puda_buf *)bufp; - struct i40iw_puda_rsrc *ilq = dev->ilq; + struct i40iw_puda_rsrc *ilq = vsi->ilq; if (!atomic_dec_return(&buf->refcount)) i40iw_puda_ret_bufpool(ilq, buf); @@ -221,6 +221,7 @@ static void i40iw_get_addr_info(struct i40iw_cm_node *cm_node, memcpy(cm_info->rem_addr, cm_node->rem_addr, sizeof(cm_info->rem_addr)); cm_info->loc_port = cm_node->loc_port; cm_info->rem_port = cm_node->rem_port; + cm_info->user_pri = cm_node->user_pri; } /** @@ -271,6 +272,7 @@ static int i40iw_send_cm_event(struct i40iw_cm_node *cm_node, event.provider_data = (void *)cm_node; event.private_data = (void *)cm_node->pdata_buf; event.private_data_len = (u8)cm_node->pdata.size; + event.ird = cm_node->ird_size; break; case IW_CM_EVENT_CONNECT_REPLY: i40iw_get_cmevent_info(cm_node, cm_id, &event); @@ -335,13 +337,13 @@ static struct i40iw_cm_event *i40iw_create_event(struct i40iw_cm_node *cm_node, */ static void i40iw_free_retrans_entry(struct i40iw_cm_node *cm_node) { - struct i40iw_sc_dev *dev = cm_node->dev; + struct i40iw_device *iwdev = cm_node->iwdev; struct i40iw_timer_entry *send_entry; send_entry = cm_node->send_entry; if (send_entry) { cm_node->send_entry = NULL; - i40iw_free_sqbuf(dev, (void *)send_entry->sqbuf); + i40iw_free_sqbuf(&iwdev->vsi, (void *)send_entry->sqbuf); kfree(send_entry); atomic_dec(&cm_node->ref_count); } @@ -360,15 +362,6 @@ static void i40iw_cleanup_retrans_entry(struct i40iw_cm_node *cm_node) spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); } -static bool is_remote_ne020_or_chelsio(struct i40iw_cm_node *cm_node) -{ - if ((cm_node->rem_mac[0] == 0x0) && - (((cm_node->rem_mac[1] == 0x12) && (cm_node->rem_mac[2] == 0x55)) || - ((cm_node->rem_mac[1] == 0x07 && (cm_node->rem_mac[2] == 0x43))))) - return true; - return false; -} - /** * i40iw_form_cm_frame - get a free packet and build frame * @cm_node: connection's node ionfo to use in frame @@ -384,7 +377,7 @@ static struct i40iw_puda_buf *i40iw_form_cm_frame(struct i40iw_cm_node *cm_node, u8 flags) { struct i40iw_puda_buf *sqbuf; - struct i40iw_sc_dev *dev = cm_node->dev; + struct i40iw_sc_vsi *vsi = &cm_node->iwdev->vsi; u8 *buf; struct tcphdr *tcph; @@ -396,8 +389,9 @@ static struct i40iw_puda_buf *i40iw_form_cm_frame(struct i40iw_cm_node *cm_node, u32 opts_len = 0; u32 pd_len = 0; u32 hdr_len = 0; + u16 vtag; - sqbuf = i40iw_puda_get_bufpool(dev->ilq); + sqbuf = i40iw_puda_get_bufpool(vsi->ilq); if (!sqbuf) return NULL; buf = sqbuf->mem.va; @@ -408,11 +402,8 @@ static struct i40iw_puda_buf *i40iw_form_cm_frame(struct i40iw_cm_node *cm_node, if (hdr) hdr_len = hdr->size; - if (pdata) { + if (pdata) pd_len = pdata->size; - if (!is_remote_ne020_or_chelsio(cm_node)) - pd_len += MPA_ZERO_PAD_LEN; - } if (cm_node->vlan_id < VLAN_TAG_PRESENT) eth_hlen += 4; @@ -445,7 +436,8 @@ static struct i40iw_puda_buf *i40iw_form_cm_frame(struct i40iw_cm_node *cm_node, ether_addr_copy(ethh->h_source, cm_node->loc_mac); if (cm_node->vlan_id < VLAN_TAG_PRESENT) { ((struct vlan_ethhdr *)ethh)->h_vlan_proto = htons(ETH_P_8021Q); - ((struct vlan_ethhdr *)ethh)->h_vlan_TCI = htons(cm_node->vlan_id); + vtag = (cm_node->user_pri << VLAN_PRIO_SHIFT) | cm_node->vlan_id; + ((struct vlan_ethhdr *)ethh)->h_vlan_TCI = htons(vtag); ((struct vlan_ethhdr *)ethh)->h_vlan_encapsulated_proto = htons(ETH_P_IP); } else { @@ -454,7 +446,7 @@ static struct i40iw_puda_buf *i40iw_form_cm_frame(struct i40iw_cm_node *cm_node, iph->version = IPVERSION; iph->ihl = 5; /* 5 * 4Byte words, IP headr len */ - iph->tos = 0; + iph->tos = cm_node->tos; iph->tot_len = htons(packetsize); iph->id = htons(++cm_node->tcp_cntxt.loc_id); @@ -474,13 +466,15 @@ static struct i40iw_puda_buf *i40iw_form_cm_frame(struct i40iw_cm_node *cm_node, ether_addr_copy(ethh->h_source, cm_node->loc_mac); if (cm_node->vlan_id < VLAN_TAG_PRESENT) { ((struct vlan_ethhdr *)ethh)->h_vlan_proto = htons(ETH_P_8021Q); - ((struct vlan_ethhdr *)ethh)->h_vlan_TCI = htons(cm_node->vlan_id); + vtag = (cm_node->user_pri << VLAN_PRIO_SHIFT) | cm_node->vlan_id; + ((struct vlan_ethhdr *)ethh)->h_vlan_TCI = htons(vtag); ((struct vlan_ethhdr *)ethh)->h_vlan_encapsulated_proto = htons(ETH_P_IPV6); } else { ethh->h_proto = htons(ETH_P_IPV6); } ip6h->version = 6; - ip6h->flow_lbl[0] = 0; + ip6h->priority = cm_node->tos >> 4; + ip6h->flow_lbl[0] = cm_node->tos << 4; ip6h->flow_lbl[1] = 0; ip6h->flow_lbl[2] = 0; ip6h->payload_len = htons(packetsize - sizeof(*ip6h)); @@ -1065,7 +1059,7 @@ int i40iw_schedule_cm_timer(struct i40iw_cm_node *cm_node, int send_retrans, int close_when_complete) { - struct i40iw_sc_dev *dev = cm_node->dev; + struct i40iw_sc_vsi *vsi = &cm_node->iwdev->vsi; struct i40iw_cm_core *cm_core = cm_node->cm_core; struct i40iw_timer_entry *new_send; int ret = 0; @@ -1074,7 +1068,7 @@ int i40iw_schedule_cm_timer(struct i40iw_cm_node *cm_node, new_send = kzalloc(sizeof(*new_send), GFP_ATOMIC); if (!new_send) { - i40iw_free_sqbuf(cm_node->dev, (void *)sqbuf); + i40iw_free_sqbuf(vsi, (void *)sqbuf); return -ENOMEM; } new_send->retrycount = I40IW_DEFAULT_RETRYS; @@ -1089,7 +1083,7 @@ int i40iw_schedule_cm_timer(struct i40iw_cm_node *cm_node, new_send->timetosend += (HZ / 10); if (cm_node->close_entry) { kfree(new_send); - i40iw_free_sqbuf(cm_node->dev, (void *)sqbuf); + i40iw_free_sqbuf(vsi, (void *)sqbuf); i40iw_pr_err("already close entry\n"); return -EINVAL; } @@ -1104,7 +1098,7 @@ int i40iw_schedule_cm_timer(struct i40iw_cm_node *cm_node, new_send->timetosend = jiffies + I40IW_RETRY_TIMEOUT; atomic_inc(&sqbuf->refcount); - i40iw_puda_send_buf(dev->ilq, sqbuf); + i40iw_puda_send_buf(vsi->ilq, sqbuf); if (!send_retrans) { i40iw_cleanup_retrans_entry(cm_node); if (close_when_complete) @@ -1201,6 +1195,7 @@ static void i40iw_cm_timer_tick(unsigned long pass) struct i40iw_cm_node *cm_node; struct i40iw_timer_entry *send_entry, *close_entry; struct list_head *list_core_temp; + struct i40iw_sc_vsi *vsi; struct list_head *list_node; struct i40iw_cm_core *cm_core = (struct i40iw_cm_core *)pass; u32 settimer = 0; @@ -1276,9 +1271,10 @@ static void i40iw_cm_timer_tick(unsigned long pass) cm_node->cm_core->stats_pkt_retrans++; spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); + vsi = &cm_node->iwdev->vsi; dev = cm_node->dev; atomic_inc(&send_entry->sqbuf->refcount); - i40iw_puda_send_buf(dev->ilq, send_entry->sqbuf); + i40iw_puda_send_buf(vsi->ilq, send_entry->sqbuf); spin_lock_irqsave(&cm_node->retrans_list_lock, flags); if (send_entry->send_retrans) { send_entry->retranscount--; @@ -1379,10 +1375,11 @@ int i40iw_send_syn(struct i40iw_cm_node *cm_node, u32 sendack) static void i40iw_send_ack(struct i40iw_cm_node *cm_node) { struct i40iw_puda_buf *sqbuf; + struct i40iw_sc_vsi *vsi = &cm_node->iwdev->vsi; sqbuf = i40iw_form_cm_frame(cm_node, NULL, NULL, NULL, SET_ACK); if (sqbuf) - i40iw_puda_send_buf(cm_node->dev->ilq, sqbuf); + i40iw_puda_send_buf(vsi->ilq, sqbuf); else i40iw_pr_err("no sqbuf\n"); } @@ -1564,9 +1561,15 @@ static enum i40iw_status_code i40iw_del_multiple_qhash( memcpy(cm_info->loc_addr, child_listen_node->loc_addr, sizeof(cm_info->loc_addr)); cm_info->vlan_id = child_listen_node->vlan_id; - ret = i40iw_manage_qhash(iwdev, cm_info, - I40IW_QHASH_TYPE_TCP_SYN, - I40IW_QHASH_MANAGE_TYPE_DELETE, NULL, false); + if (child_listen_node->qhash_set) { + ret = i40iw_manage_qhash(iwdev, cm_info, + I40IW_QHASH_TYPE_TCP_SYN, + I40IW_QHASH_MANAGE_TYPE_DELETE, + NULL, false); + child_listen_node->qhash_set = false; + } else { + ret = I40IW_SUCCESS; + } i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_CM, "freed pointer = %p\n", @@ -1591,9 +1594,10 @@ static enum i40iw_status_code i40iw_del_multiple_qhash( static struct net_device *i40iw_netdev_vlan_ipv6(u32 *addr, u16 *vlan_id, u8 *mac) { struct net_device *ip_dev = NULL; -#if IS_ENABLED(CONFIG_IPV6) struct in6_addr laddr6; + if (!IS_ENABLED(CONFIG_IPV6)) + return NULL; i40iw_copy_ip_htonl(laddr6.in6_u.u6_addr32, addr); if (vlan_id) *vlan_id = I40IW_NO_VLAN; @@ -1610,7 +1614,6 @@ static struct net_device *i40iw_netdev_vlan_ipv6(u32 *addr, u16 *vlan_id, u8 *ma } } rcu_read_unlock(); -#endif return ip_dev; } @@ -1646,7 +1649,7 @@ static enum i40iw_status_code i40iw_add_mqh_6(struct i40iw_device *iwdev, { struct net_device *ip_dev; struct inet6_dev *idev; - struct inet6_ifaddr *ifp; + struct inet6_ifaddr *ifp, *tmp; enum i40iw_status_code ret = 0; struct i40iw_cm_listener *child_listen_node; unsigned long flags; @@ -1661,7 +1664,7 @@ static enum i40iw_status_code i40iw_add_mqh_6(struct i40iw_device *iwdev, i40iw_pr_err("idev == NULL\n"); break; } - list_for_each_entry(ifp, &idev->addr_list, if_list) { + list_for_each_entry_safe(ifp, tmp, &idev->addr_list, if_list) { i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_CM, "IP=%pI6, vlan_id=%d, MAC=%pM\n", @@ -1675,7 +1678,6 @@ static enum i40iw_status_code i40iw_add_mqh_6(struct i40iw_device *iwdev, "Allocating child listener %p\n", child_listen_node); if (!child_listen_node) { - i40iw_pr_err("listener memory allocation\n"); ret = I40IW_ERR_NO_MEMORY; goto exit; } @@ -1695,6 +1697,7 @@ static enum i40iw_status_code i40iw_add_mqh_6(struct i40iw_device *iwdev, I40IW_QHASH_MANAGE_TYPE_ADD, NULL, true); if (!ret) { + child_listen_node->qhash_set = true; spin_lock_irqsave(&iwdev->cm_core.listen_list_lock, flags); list_add(&child_listen_node->child_listen_list, &cm_parent_listen_node->child_listen_list); @@ -1751,7 +1754,6 @@ static enum i40iw_status_code i40iw_add_mqh_4( "Allocating child listener %p\n", child_listen_node); if (!child_listen_node) { - i40iw_pr_err("listener memory allocation\n"); in_dev_put(idev); ret = I40IW_ERR_NO_MEMORY; goto exit; @@ -1773,6 +1775,7 @@ static enum i40iw_status_code i40iw_add_mqh_4( NULL, true); if (!ret) { + child_listen_node->qhash_set = true; spin_lock_irqsave(&iwdev->cm_core.listen_list_lock, flags); list_add(&child_listen_node->child_listen_list, &cm_parent_listen_node->child_listen_list); @@ -1880,6 +1883,7 @@ static int i40iw_dec_refcnt_listen(struct i40iw_cm_core *cm_core, nfo.loc_port = listener->loc_port; nfo.ipv4 = listener->ipv4; nfo.vlan_id = listener->vlan_id; + nfo.user_pri = listener->user_pri; if (!list_empty(&listener->child_listen_list)) { i40iw_del_multiple_qhash(listener->iwdev, &nfo, listener); @@ -2138,6 +2142,20 @@ static struct i40iw_cm_node *i40iw_make_cm_node( /* set our node specific transport info */ cm_node->ipv4 = cm_info->ipv4; cm_node->vlan_id = cm_info->vlan_id; + if ((cm_node->vlan_id == I40IW_NO_VLAN) && iwdev->dcb) + cm_node->vlan_id = 0; + cm_node->tos = cm_info->tos; + cm_node->user_pri = cm_info->user_pri; + if (listener) { + if (listener->tos != cm_info->tos) + i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_DCB, + "application TOS[%d] and remote client TOS[%d] mismatch\n", + listener->tos, cm_info->tos); + cm_node->tos = max(listener->tos, cm_info->tos); + cm_node->user_pri = rt_tos2priority(cm_node->tos); + i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_DCB, "listener: TOS:[%d] UP:[%d]\n", + cm_node->tos, cm_node->user_pri); + } memcpy(cm_node->loc_addr, cm_info->loc_addr, sizeof(cm_node->loc_addr)); memcpy(cm_node->rem_addr, cm_info->rem_addr, sizeof(cm_node->rem_addr)); cm_node->loc_port = cm_info->loc_port; @@ -2162,7 +2180,7 @@ static struct i40iw_cm_node *i40iw_make_cm_node( I40IW_CM_DEFAULT_RCV_WND_SCALED >> I40IW_CM_DEFAULT_RCV_WND_SCALE; ts = current_kernel_time(); cm_node->tcp_cntxt.loc_seq_num = ts.tv_nsec; - cm_node->tcp_cntxt.mss = iwdev->mss; + cm_node->tcp_cntxt.mss = iwdev->vsi.mss; cm_node->iwdev = iwdev; cm_node->dev = &iwdev->sc_dev; @@ -2236,7 +2254,7 @@ static void i40iw_rem_ref_cm_node(struct i40iw_cm_node *cm_node) i40iw_dec_refcnt_listen(cm_core, cm_node->listener, 0, true); } else { if (!i40iw_listen_port_in_use(cm_core, cm_node->loc_port) && - cm_node->apbvt_set && cm_node->iwdev) { + cm_node->apbvt_set) { i40iw_manage_apbvt(cm_node->iwdev, cm_node->loc_port, I40IW_MANAGE_APBVT_DEL); @@ -2861,7 +2879,7 @@ static struct i40iw_cm_node *i40iw_create_cm_node( /* create a CM connection node */ cm_node = i40iw_make_cm_node(cm_core, iwdev, cm_info, NULL); if (!cm_node) - return NULL; + return ERR_PTR(-ENOMEM); /* set our node side to client (active) side */ cm_node->tcp_cntxt.client = 1; cm_node->tcp_cntxt.rcv_wscale = I40IW_CM_DEFAULT_RCV_WND_SCALE; @@ -2874,7 +2892,8 @@ static struct i40iw_cm_node *i40iw_create_cm_node( cm_node->vlan_id, I40IW_CM_LISTENER_ACTIVE_STATE); if (!loopback_remotelistener) { - i40iw_create_event(cm_node, I40IW_CM_EVENT_ABORTED); + i40iw_rem_ref_cm_node(cm_node); + return ERR_PTR(-ECONNREFUSED); } else { loopback_cm_info = *cm_info; loopback_cm_info.loc_port = cm_info->rem_port; @@ -2887,7 +2906,7 @@ static struct i40iw_cm_node *i40iw_create_cm_node( loopback_remotelistener); if (!loopback_remotenode) { i40iw_rem_ref_cm_node(cm_node); - return NULL; + return ERR_PTR(-ENOMEM); } cm_core->stats_loopbacks++; loopback_remotenode->loopbackpartner = cm_node; @@ -3041,10 +3060,10 @@ static int i40iw_cm_close(struct i40iw_cm_node *cm_node) /** * i40iw_receive_ilq - recv an ETHERNET packet, and process it * through CM - * @dev: FPK dev struct + * @vsi: pointer to the vsi structure * @rbuf: receive buffer */ -void i40iw_receive_ilq(struct i40iw_sc_dev *dev, struct i40iw_puda_buf *rbuf) +void i40iw_receive_ilq(struct i40iw_sc_vsi *vsi, struct i40iw_puda_buf *rbuf) { struct i40iw_cm_node *cm_node; struct i40iw_cm_listener *listener; @@ -3052,9 +3071,11 @@ void i40iw_receive_ilq(struct i40iw_sc_dev *dev, struct i40iw_puda_buf *rbuf) struct ipv6hdr *ip6h; struct tcphdr *tcph; struct i40iw_cm_info cm_info; + struct i40iw_sc_dev *dev = vsi->dev; struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev; struct i40iw_cm_core *cm_core = &iwdev->cm_core; struct vlan_ethhdr *ethh; + u16 vtag; /* if vlan, then maclen = 18 else 14 */ iph = (struct iphdr *)rbuf->iph; @@ -3068,7 +3089,9 @@ void i40iw_receive_ilq(struct i40iw_sc_dev *dev, struct i40iw_puda_buf *rbuf) ethh = (struct vlan_ethhdr *)rbuf->mem.va; if (ethh->h_vlan_proto == htons(ETH_P_8021Q)) { - cm_info.vlan_id = ntohs(ethh->h_vlan_TCI) & VLAN_VID_MASK; + vtag = ntohs(ethh->h_vlan_TCI); + cm_info.user_pri = (vtag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; + cm_info.vlan_id = vtag & VLAN_VID_MASK; i40iw_debug(cm_core->dev, I40IW_DEBUG_CM, "%s vlan_id=%d\n", @@ -3083,6 +3106,7 @@ void i40iw_receive_ilq(struct i40iw_sc_dev *dev, struct i40iw_puda_buf *rbuf) cm_info.loc_addr[0] = ntohl(iph->daddr); cm_info.rem_addr[0] = ntohl(iph->saddr); cm_info.ipv4 = true; + cm_info.tos = iph->tos; } else { ip6h = (struct ipv6hdr *)rbuf->iph; i40iw_copy_ip_ntohl(cm_info.loc_addr, @@ -3090,6 +3114,7 @@ void i40iw_receive_ilq(struct i40iw_sc_dev *dev, struct i40iw_puda_buf *rbuf) i40iw_copy_ip_ntohl(cm_info.rem_addr, ip6h->saddr.in6_u.u6_addr32); cm_info.ipv4 = false; + cm_info.tos = (ip6h->priority << 4) | (ip6h->flow_lbl[0] >> 4); } cm_info.loc_port = ntohs(tcph->dest); cm_info.rem_port = ntohs(tcph->source); @@ -3309,6 +3334,8 @@ static void i40iw_cm_init_tsa_conn(struct i40iw_qp *iwqp, ctx_info->tcp_info_valid = true; ctx_info->iwarp_info_valid = true; + ctx_info->add_to_qoslist = true; + ctx_info->user_pri = cm_node->user_pri; i40iw_init_tcp_ctx(cm_node, &tcp_info, iwqp); if (cm_node->snd_mark_en) { @@ -3320,33 +3347,47 @@ static void i40iw_cm_init_tsa_conn(struct i40iw_qp *iwqp, cm_node->state = I40IW_CM_STATE_OFFLOADED; tcp_info.tcp_state = I40IW_TCP_STATE_ESTABLISHED; tcp_info.src_mac_addr_idx = iwdev->mac_ip_table_idx; + tcp_info.tos = cm_node->tos; dev->iw_priv_qp_ops->qp_setctx(&iwqp->sc_qp, (u64 *)(iwqp->host_ctx.va), ctx_info); /* once tcp_info is set, no need to do it again */ ctx_info->tcp_info_valid = false; ctx_info->iwarp_info_valid = false; + ctx_info->add_to_qoslist = false; } /** * i40iw_cm_disconn - when a connection is being closed * @iwqp: associate qp for the connection */ -int i40iw_cm_disconn(struct i40iw_qp *iwqp) +void i40iw_cm_disconn(struct i40iw_qp *iwqp) { struct disconn_work *work; struct i40iw_device *iwdev = iwqp->iwdev; struct i40iw_cm_core *cm_core = &iwdev->cm_core; + unsigned long flags; work = kzalloc(sizeof(*work), GFP_ATOMIC); if (!work) - return -ENOMEM; /* Timer will clean up */ - + return; /* Timer will clean up */ + + spin_lock_irqsave(&iwdev->qptable_lock, flags); + if (!iwdev->qp_table[iwqp->ibqp.qp_num]) { + spin_unlock_irqrestore(&iwdev->qptable_lock, flags); + i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_CM, + "%s qp_id %d is already freed\n", + __func__, iwqp->ibqp.qp_num); + kfree(work); + return; + } i40iw_add_ref(&iwqp->ibqp); + spin_unlock_irqrestore(&iwdev->qptable_lock, flags); + work->iwqp = iwqp; INIT_WORK(&work->work, i40iw_disconnect_worker); queue_work(cm_core->disconn_wq, &work->work); - return 0; + return; } /** @@ -3432,7 +3473,7 @@ static void i40iw_cm_disconn_true(struct i40iw_qp *iwqp) *terminate-handler to issue cm_disconn which can re-free *a QP even after its refcnt=0. */ - del_timer(&iwqp->terminate_timer); + i40iw_terminate_del_timer(qp); if (!iwqp->flush_issued) { iwqp->flush_issued = 1; issue_flush = 1; @@ -3462,7 +3503,7 @@ static void i40iw_cm_disconn_true(struct i40iw_qp *iwqp) /* Flush the queues */ i40iw_flush_wqes(iwdev, iwqp); - if (qp->term_flags) { + if (qp->term_flags && iwqp->ibqp.event_handler) { ibevent.device = iwqp->ibqp.device; ibevent.event = (qp->eventtype == TERM_EVENT_QP_FATAL) ? IB_EVENT_QP_FATAL : IB_EVENT_QP_ACCESS_ERR; @@ -3571,7 +3612,7 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) iwqp->cm_node = (void *)cm_node; cm_node->iwqp = iwqp; - buf_len = conn_param->private_data_len + I40IW_MAX_IETF_SIZE + MPA_ZERO_PAD_LEN; + buf_len = conn_param->private_data_len + I40IW_MAX_IETF_SIZE; status = i40iw_allocate_dma_mem(dev->hw, &iwqp->ietf_mem, buf_len, 1); @@ -3605,18 +3646,10 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) iwqp->lsmm_mr = ibmr; if (iwqp->page) iwqp->sc_qp.qp_uk.sq_base = kmap(iwqp->page); - if (is_remote_ne020_or_chelsio(cm_node)) - dev->iw_priv_qp_ops->qp_send_lsmm( - &iwqp->sc_qp, + dev->iw_priv_qp_ops->qp_send_lsmm(&iwqp->sc_qp, iwqp->ietf_mem.va, (accept.size + conn_param->private_data_len), ibmr->lkey); - else - dev->iw_priv_qp_ops->qp_send_lsmm( - &iwqp->sc_qp, - iwqp->ietf_mem.va, - (accept.size + conn_param->private_data_len + MPA_ZERO_PAD_LEN), - ibmr->lkey); } else { if (iwqp->page) @@ -3714,6 +3747,7 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct sockaddr_in6 *raddr6; bool qhash_set = false; int apbvt_set = 0; + int err = 0; enum i40iw_status_code status; ibqp = i40iw_get_qp(cm_id->device, conn_param->qpn); @@ -3759,6 +3793,10 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) i40iw_netdev_vlan_ipv6(cm_info.loc_addr, &cm_info.vlan_id, NULL); } cm_info.cm_id = cm_id; + cm_info.tos = cm_id->tos; + cm_info.user_pri = rt_tos2priority(cm_id->tos); + i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_DCB, "%s TOS:[%d] UP:[%d]\n", + __func__, cm_id->tos, cm_info.user_pri); if ((cm_info.ipv4 && (laddr->sin_addr.s_addr != raddr->sin_addr.s_addr)) || (!cm_info.ipv4 && memcmp(laddr6->sin6_addr.in6_u.u6_addr32, raddr6->sin6_addr.in6_u.u6_addr32, @@ -3790,8 +3828,11 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) conn_param->private_data_len, (void *)conn_param->private_data, &cm_info); - if (!cm_node) - goto err; + + if (IS_ERR(cm_node)) { + err = PTR_ERR(cm_node); + goto err_out; + } i40iw_record_ird_ord(cm_node, (u16)conn_param->ird, (u16)conn_param->ord); if (cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO && @@ -3805,10 +3846,12 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) iwqp->cm_id = cm_id; i40iw_add_ref(&iwqp->ibqp); - if (cm_node->state == I40IW_CM_STATE_SYN_SENT) { - if (i40iw_send_syn(cm_node, 0)) { + if (cm_node->state != I40IW_CM_STATE_OFFLOADED) { + cm_node->state = I40IW_CM_STATE_SYN_SENT; + err = i40iw_send_syn(cm_node, 0); + if (err) { i40iw_rem_ref_cm_node(cm_node); - goto err; + goto err_out; } } @@ -3820,24 +3863,25 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) cm_node->cm_id); return 0; -err: - if (cm_node) { - if (cm_node->ipv4) - i40iw_debug(cm_node->dev, - I40IW_DEBUG_CM, - "Api - connect() FAILED: dest addr=%pI4", - cm_node->rem_addr); - else - i40iw_debug(cm_node->dev, I40IW_DEBUG_CM, - "Api - connect() FAILED: dest addr=%pI6", - cm_node->rem_addr); - } - i40iw_manage_qhash(iwdev, - &cm_info, - I40IW_QHASH_TYPE_TCP_ESTABLISHED, - I40IW_QHASH_MANAGE_TYPE_DELETE, - NULL, - false); +err_out: + if (cm_info.ipv4) + i40iw_debug(&iwdev->sc_dev, + I40IW_DEBUG_CM, + "Api - connect() FAILED: dest addr=%pI4", + cm_info.rem_addr); + else + i40iw_debug(&iwdev->sc_dev, + I40IW_DEBUG_CM, + "Api - connect() FAILED: dest addr=%pI6", + cm_info.rem_addr); + + if (qhash_set) + i40iw_manage_qhash(iwdev, + &cm_info, + I40IW_QHASH_TYPE_TCP_ESTABLISHED, + I40IW_QHASH_MANAGE_TYPE_DELETE, + NULL, + false); if (apbvt_set && !i40iw_listen_port_in_use(&iwdev->cm_core, cm_info.loc_port)) @@ -3846,7 +3890,7 @@ err: I40IW_MANAGE_APBVT_DEL); cm_id->rem_ref(cm_id); iwdev->cm_core.stats_connect_errs++; - return -ENOMEM; + return err; } /** @@ -3904,6 +3948,10 @@ int i40iw_create_listen(struct iw_cm_id *cm_id, int backlog) cm_id->provider_data = cm_listen_node; + cm_listen_node->tos = cm_id->tos; + cm_listen_node->user_pri = rt_tos2priority(cm_id->tos); + cm_info.user_pri = cm_listen_node->user_pri; + if (!cm_listen_node->reused_node) { if (wildcard) { if (cm_info.ipv4) @@ -4124,3 +4172,158 @@ static void i40iw_cm_post_event(struct i40iw_cm_event *event) queue_work(event->cm_node->cm_core->event_wq, &event->event_work); } + +/** + * i40iw_qhash_ctrl - enable/disable qhash for list + * @iwdev: device pointer + * @parent_listen_node: parent listen node + * @nfo: cm info node + * @ipaddr: Pointer to IPv4 or IPv6 address + * @ipv4: flag indicating IPv4 when true + * @ifup: flag indicating interface up when true + * + * Enables or disables the qhash for the node in the child + * listen list that matches ipaddr. If no matching IP was found + * it will allocate and add a new child listen node to the + * parent listen node. The listen_list_lock is assumed to be + * held when called. + */ +static void i40iw_qhash_ctrl(struct i40iw_device *iwdev, + struct i40iw_cm_listener *parent_listen_node, + struct i40iw_cm_info *nfo, + u32 *ipaddr, bool ipv4, bool ifup) +{ + struct list_head *child_listen_list = &parent_listen_node->child_listen_list; + struct i40iw_cm_listener *child_listen_node; + struct list_head *pos, *tpos; + enum i40iw_status_code ret; + bool node_allocated = false; + enum i40iw_quad_hash_manage_type op = + ifup ? I40IW_QHASH_MANAGE_TYPE_ADD : I40IW_QHASH_MANAGE_TYPE_DELETE; + + list_for_each_safe(pos, tpos, child_listen_list) { + child_listen_node = + list_entry(pos, + struct i40iw_cm_listener, + child_listen_list); + if (!memcmp(child_listen_node->loc_addr, ipaddr, ipv4 ? 4 : 16)) + goto set_qhash; + } + + /* if not found then add a child listener if interface is going up */ + if (!ifup) + return; + child_listen_node = kzalloc(sizeof(*child_listen_node), GFP_ATOMIC); + if (!child_listen_node) + return; + node_allocated = true; + memcpy(child_listen_node, parent_listen_node, sizeof(*child_listen_node)); + + memcpy(child_listen_node->loc_addr, ipaddr, ipv4 ? 4 : 16); + +set_qhash: + memcpy(nfo->loc_addr, + child_listen_node->loc_addr, + sizeof(nfo->loc_addr)); + nfo->vlan_id = child_listen_node->vlan_id; + ret = i40iw_manage_qhash(iwdev, nfo, + I40IW_QHASH_TYPE_TCP_SYN, + op, + NULL, false); + if (!ret) { + child_listen_node->qhash_set = ifup; + if (node_allocated) + list_add(&child_listen_node->child_listen_list, + &parent_listen_node->child_listen_list); + } else if (node_allocated) { + kfree(child_listen_node); + } +} + +/** + * i40iw_cm_disconnect_all - disconnect all connected qp's + * @iwdev: device pointer + */ +void i40iw_cm_disconnect_all(struct i40iw_device *iwdev) +{ + struct i40iw_cm_core *cm_core = &iwdev->cm_core; + struct list_head *list_core_temp; + struct list_head *list_node; + struct i40iw_cm_node *cm_node; + unsigned long flags; + struct list_head connected_list; + struct ib_qp_attr attr; + + INIT_LIST_HEAD(&connected_list); + spin_lock_irqsave(&cm_core->ht_lock, flags); + list_for_each_safe(list_node, list_core_temp, &cm_core->connected_nodes) { + cm_node = container_of(list_node, struct i40iw_cm_node, list); + atomic_inc(&cm_node->ref_count); + list_add(&cm_node->connected_entry, &connected_list); + } + spin_unlock_irqrestore(&cm_core->ht_lock, flags); + + list_for_each_safe(list_node, list_core_temp, &connected_list) { + cm_node = container_of(list_node, struct i40iw_cm_node, connected_entry); + attr.qp_state = IB_QPS_ERR; + i40iw_modify_qp(&cm_node->iwqp->ibqp, &attr, IB_QP_STATE, NULL); + i40iw_rem_ref_cm_node(cm_node); + } +} + +/** + * i40iw_ifdown_notify - process an ifdown on an interface + * @iwdev: device pointer + * @ipaddr: Pointer to IPv4 or IPv6 address + * @ipv4: flag indicating IPv4 when true + * @ifup: flag indicating interface up when true + */ +void i40iw_if_notify(struct i40iw_device *iwdev, struct net_device *netdev, + u32 *ipaddr, bool ipv4, bool ifup) +{ + struct i40iw_cm_core *cm_core = &iwdev->cm_core; + unsigned long flags; + struct i40iw_cm_listener *listen_node; + static const u32 ip_zero[4] = { 0, 0, 0, 0 }; + struct i40iw_cm_info nfo; + u16 vlan_id = rdma_vlan_dev_vlan_id(netdev); + enum i40iw_status_code ret; + enum i40iw_quad_hash_manage_type op = + ifup ? I40IW_QHASH_MANAGE_TYPE_ADD : I40IW_QHASH_MANAGE_TYPE_DELETE; + + /* Disable or enable qhash for listeners */ + spin_lock_irqsave(&cm_core->listen_list_lock, flags); + list_for_each_entry(listen_node, &cm_core->listen_nodes, list) { + if (vlan_id == listen_node->vlan_id && + (!memcmp(listen_node->loc_addr, ipaddr, ipv4 ? 4 : 16) || + !memcmp(listen_node->loc_addr, ip_zero, ipv4 ? 4 : 16))) { + memcpy(nfo.loc_addr, listen_node->loc_addr, + sizeof(nfo.loc_addr)); + nfo.loc_port = listen_node->loc_port; + nfo.ipv4 = listen_node->ipv4; + nfo.vlan_id = listen_node->vlan_id; + nfo.user_pri = listen_node->user_pri; + if (!list_empty(&listen_node->child_listen_list)) { + i40iw_qhash_ctrl(iwdev, + listen_node, + &nfo, + ipaddr, ipv4, ifup); + } else if (memcmp(listen_node->loc_addr, ip_zero, + ipv4 ? 4 : 16)) { + ret = i40iw_manage_qhash(iwdev, + &nfo, + I40IW_QHASH_TYPE_TCP_SYN, + op, + NULL, + false); + if (!ret) + listen_node->qhash_set = ifup; + } + } + } + spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); + + /* disconnect any connected qp's on ifdown */ + if (!ifup) + i40iw_cm_disconnect_all(iwdev); +} diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.h b/drivers/infiniband/hw/i40iw/i40iw_cm.h index e9046d9f9645..2e52e38ffcf3 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.h +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.h @@ -56,8 +56,6 @@ #define I40IW_MAX_IETF_SIZE 32 -#define MPA_ZERO_PAD_LEN 4 - /* IETF RTR MSG Fields */ #define IETF_PEER_TO_PEER 0x8000 #define IETF_FLPDU_ZERO_LEN 0x4000 @@ -299,6 +297,7 @@ struct i40iw_cm_listener { enum i40iw_cm_listener_state listener_state; u32 reused_node; u8 user_pri; + u8 tos; u16 vlan_id; bool qhash_set; bool ipv4; @@ -341,9 +340,11 @@ struct i40iw_cm_node { int accept_pend; struct list_head timer_entry; struct list_head reset_entry; + struct list_head connected_entry; atomic_t passive_state; bool qhash_set; u8 user_pri; + u8 tos; bool ipv4; bool snd_mark_en; u16 lsmm_size; @@ -368,7 +369,8 @@ struct i40iw_cm_info { u32 rem_addr[4]; u16 vlan_id; int backlog; - u16 user_pri; + u8 user_pri; + u8 tos; bool ipv4; }; @@ -445,4 +447,7 @@ int i40iw_arp_table(struct i40iw_device *iwdev, u8 *mac_addr, u32 action); +void i40iw_if_notify(struct i40iw_device *iwdev, struct net_device *netdev, + u32 *ipaddr, bool ipv4, bool ifup); +void i40iw_cm_disconnect_all(struct i40iw_device *iwdev); #endif /* I40IW_CM_H */ diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c index 2c4b4d072d6a..392f78384a60 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c @@ -103,6 +103,7 @@ static enum i40iw_status_code i40iw_cqp_poll_registers( if (newtail != tail) { /* SUCCESS */ I40IW_RING_MOVE_TAIL(cqp->sq_ring); + cqp->dev->cqp_cmd_stats[OP_COMPLETED_COMMANDS]++; return 0; } udelay(I40IW_SLEEP_COUNT); @@ -223,6 +224,136 @@ static enum i40iw_status_code i40iw_sc_parse_fpm_query_buf( } /** + * i40iw_fill_qos_list - Change all unknown qs handles to available ones + * @qs_list: list of qs_handles to be fixed with valid qs_handles + */ +static void i40iw_fill_qos_list(u16 *qs_list) +{ + u16 qshandle = qs_list[0]; + int i; + + for (i = 0; i < I40IW_MAX_USER_PRIORITY; i++) { + if (qs_list[i] == QS_HANDLE_UNKNOWN) + qs_list[i] = qshandle; + else + qshandle = qs_list[i]; + } +} + +/** + * i40iw_qp_from_entry - Given entry, get to the qp structure + * @entry: Points to list of qp structure + */ +static struct i40iw_sc_qp *i40iw_qp_from_entry(struct list_head *entry) +{ + if (!entry) + return NULL; + + return (struct i40iw_sc_qp *)((char *)entry - offsetof(struct i40iw_sc_qp, list)); +} + +/** + * i40iw_get_qp - get the next qp from the list given current qp + * @head: Listhead of qp's + * @qp: current qp + */ +static struct i40iw_sc_qp *i40iw_get_qp(struct list_head *head, struct i40iw_sc_qp *qp) +{ + struct list_head *entry = NULL; + struct list_head *lastentry; + + if (list_empty(head)) + return NULL; + + if (!qp) { + entry = head->next; + } else { + lastentry = &qp->list; + entry = (lastentry != head) ? lastentry->next : NULL; + } + + return i40iw_qp_from_entry(entry); +} + +/** + * i40iw_change_l2params - given the new l2 parameters, change all qp + * @vsi: pointer to the vsi structure + * @l2params: New paramaters from l2 + */ +void i40iw_change_l2params(struct i40iw_sc_vsi *vsi, struct i40iw_l2params *l2params) +{ + struct i40iw_sc_dev *dev = vsi->dev; + struct i40iw_sc_qp *qp = NULL; + bool qs_handle_change = false; + bool mss_change = false; + unsigned long flags; + u16 qs_handle; + int i; + + if (vsi->mss != l2params->mss) { + mss_change = true; + vsi->mss = l2params->mss; + } + + i40iw_fill_qos_list(l2params->qs_handle_list); + for (i = 0; i < I40IW_MAX_USER_PRIORITY; i++) { + qs_handle = l2params->qs_handle_list[i]; + if (vsi->qos[i].qs_handle != qs_handle) + qs_handle_change = true; + else if (!mss_change) + continue; /* no MSS nor qs handle change */ + spin_lock_irqsave(&vsi->qos[i].lock, flags); + qp = i40iw_get_qp(&vsi->qos[i].qplist, qp); + while (qp) { + if (mss_change) + i40iw_qp_mss_modify(dev, qp); + if (qs_handle_change) { + qp->qs_handle = qs_handle; + /* issue cqp suspend command */ + i40iw_qp_suspend_resume(dev, qp, true); + } + qp = i40iw_get_qp(&vsi->qos[i].qplist, qp); + } + spin_unlock_irqrestore(&vsi->qos[i].lock, flags); + vsi->qos[i].qs_handle = qs_handle; + } +} + +/** + * i40iw_qp_rem_qos - remove qp from qos lists during destroy qp + * @qp: qp to be removed from qos + */ +static void i40iw_qp_rem_qos(struct i40iw_sc_qp *qp) +{ + struct i40iw_sc_vsi *vsi = qp->vsi; + unsigned long flags; + + if (!qp->on_qoslist) + return; + spin_lock_irqsave(&vsi->qos[qp->user_pri].lock, flags); + list_del(&qp->list); + spin_unlock_irqrestore(&vsi->qos[qp->user_pri].lock, flags); +} + +/** + * i40iw_qp_add_qos - called during setctx fot qp to be added to qos + * @qp: qp to be added to qos + */ +void i40iw_qp_add_qos(struct i40iw_sc_qp *qp) +{ + struct i40iw_sc_vsi *vsi = qp->vsi; + unsigned long flags; + + if (qp->on_qoslist) + return; + spin_lock_irqsave(&vsi->qos[qp->user_pri].lock, flags); + qp->qs_handle = vsi->qos[qp->user_pri].qs_handle; + list_add(&qp->list, &vsi->qos[qp->user_pri].qplist); + qp->on_qoslist = true; + spin_unlock_irqrestore(&vsi->qos[qp->user_pri].lock, flags); +} + +/** * i40iw_sc_pd_init - initialize sc pd struct * @dev: sc device struct * @pd: sc pd ptr @@ -292,6 +423,9 @@ static enum i40iw_status_code i40iw_sc_cqp_init(struct i40iw_sc_cqp *cqp, info->dev->cqp = cqp; I40IW_RING_INIT(cqp->sq_ring, cqp->sq_size); + cqp->dev->cqp_cmd_stats[OP_REQUESTED_COMMANDS] = 0; + cqp->dev->cqp_cmd_stats[OP_COMPLETED_COMMANDS] = 0; + i40iw_debug(cqp->dev, I40IW_DEBUG_WQE, "%s: sq_size[%04d] hw_sq_size[%04d] sq_base[%p] sq_pa[%llxh] cqp[%p] polarity[x%04X]\n", __func__, cqp->sq_size, cqp->hw_sq_size, @@ -302,12 +436,10 @@ static enum i40iw_status_code i40iw_sc_cqp_init(struct i40iw_sc_cqp *cqp, /** * i40iw_sc_cqp_create - create cqp during bringup * @cqp: struct for cqp hw - * @disable_pfpdus: if pfpdu to be disabled * @maj_err: If error, major err number * @min_err: If error, minor err number */ static enum i40iw_status_code i40iw_sc_cqp_create(struct i40iw_sc_cqp *cqp, - bool disable_pfpdus, u16 *maj_err, u16 *min_err) { @@ -326,9 +458,6 @@ static enum i40iw_status_code i40iw_sc_cqp_create(struct i40iw_sc_cqp *cqp, temp = LS_64(cqp->hw_sq_size, I40IW_CQPHC_SQSIZE) | LS_64(cqp->struct_ver, I40IW_CQPHC_SVER); - if (disable_pfpdus) - temp |= LS_64(1, I40IW_CQPHC_DISABLE_PFPDUS); - set_64bit_val(cqp->host_ctx, 0, temp); set_64bit_val(cqp->host_ctx, 8, cqp->sq_pa); temp = LS_64(cqp->enabled_vf_count, I40IW_CQPHC_ENABLED_VFS) | @@ -424,6 +553,7 @@ u64 *i40iw_sc_cqp_get_next_send_wqe(struct i40iw_sc_cqp *cqp, u64 scratch) return NULL; } I40IW_ATOMIC_RING_MOVE_HEAD(cqp->sq_ring, wqe_idx, ret_code); + cqp->dev->cqp_cmd_stats[OP_REQUESTED_COMMANDS]++; if (ret_code) return NULL; if (!wqe_idx) @@ -559,6 +689,8 @@ static enum i40iw_status_code i40iw_sc_ccq_get_cqe_info( I40IW_RING_GETCURRENT_HEAD(ccq->cq_uk.cq_ring)); wmb(); /* write shadow area before tail */ I40IW_RING_MOVE_TAIL(cqp->sq_ring); + ccq->dev->cqp_cmd_stats[OP_COMPLETED_COMMANDS]++; + return ret_code; } @@ -1051,6 +1183,7 @@ static enum i40iw_status_code i40iw_sc_manage_qhash_table_entry( u64 qw1 = 0; u64 qw2 = 0; u64 temp; + struct i40iw_sc_vsi *vsi = info->vsi; wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) @@ -1082,7 +1215,7 @@ static enum i40iw_status_code i40iw_sc_manage_qhash_table_entry( LS_64(info->dest_ip[2], I40IW_CQPSQ_QHASH_ADDR2) | LS_64(info->dest_ip[3], I40IW_CQPSQ_QHASH_ADDR3)); } - qw2 = LS_64(cqp->dev->qs_handle, I40IW_CQPSQ_QHASH_QS_HANDLE); + qw2 = LS_64(vsi->qos[info->user_pri].qs_handle, I40IW_CQPSQ_QHASH_QS_HANDLE); if (info->vlan_valid) qw2 |= LS_64(info->vlan_id, I40IW_CQPSQ_QHASH_VLANID); set_64bit_val(wqe, 16, qw2); @@ -2103,6 +2236,7 @@ static enum i40iw_status_code i40iw_sc_qp_init(struct i40iw_sc_qp *qp, u32 offset; qp->dev = info->pd->dev; + qp->vsi = info->vsi; qp->sq_pa = info->sq_pa; qp->rq_pa = info->rq_pa; qp->hw_host_ctx_pa = info->host_ctx_pa; @@ -2151,7 +2285,7 @@ static enum i40iw_status_code i40iw_sc_qp_init(struct i40iw_sc_qp *qp, qp->rq_tph_en = info->rq_tph_en; qp->rcv_tph_en = info->rcv_tph_en; qp->xmit_tph_en = info->xmit_tph_en; - qp->qs_handle = qp->pd->dev->qs_handle; + qp->qs_handle = qp->vsi->qos[qp->user_pri].qs_handle; qp->exception_lan_queue = qp->pd->dev->exception_lan_queue; return 0; @@ -2296,6 +2430,7 @@ static enum i40iw_status_code i40iw_sc_qp_destroy( struct i40iw_sc_cqp *cqp; u64 header; + i40iw_qp_rem_qos(qp); cqp = qp->pd->dev->cqp; wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) @@ -2443,10 +2578,20 @@ static enum i40iw_status_code i40iw_sc_qp_setctx( { struct i40iwarp_offload_info *iw; struct i40iw_tcp_offload_info *tcp; + struct i40iw_sc_vsi *vsi; + struct i40iw_sc_dev *dev; u64 qw0, qw3, qw7 = 0; iw = info->iwarp_info; tcp = info->tcp_info; + vsi = qp->vsi; + dev = qp->dev; + if (info->add_to_qoslist) { + qp->user_pri = info->user_pri; + i40iw_qp_add_qos(qp); + i40iw_debug(qp->dev, I40IW_DEBUG_DCB, "%s qp[%d] UP[%d] qset[%d]\n", + __func__, qp->qp_uk.qp_id, qp->user_pri, qp->qs_handle); + } qw0 = LS_64(qp->qp_uk.rq_wqe_size, I40IWQPC_RQWQESIZE) | LS_64(info->err_rq_idx_valid, I40IWQPC_ERR_RQ_IDX_VALID) | LS_64(qp->rcv_tph_en, I40IWQPC_RCVTPHEN) | @@ -2487,16 +2632,14 @@ static enum i40iw_status_code i40iw_sc_qp_setctx( LS_64(iw->rdmap_ver, I40IWQPC_RDMAP_VER); qw7 |= LS_64(iw->pd_id, I40IWQPC_PDIDX); - set_64bit_val(qp_ctx, 144, qp->q2_pa); + set_64bit_val(qp_ctx, + 144, + LS_64(qp->q2_pa, I40IWQPC_Q2ADDR) | + LS_64(vsi->fcn_id, I40IWQPC_STAT_INDEX)); set_64bit_val(qp_ctx, 152, LS_64(iw->last_byte_sent, I40IWQPC_LASTBYTESENT)); - /* - * Hard-code IRD_SIZE to hw-limit, 128, in qpctx, i.e matching an - *advertisable IRD of 64 - */ - iw->ird_size = I40IW_QPCTX_ENCD_MAXIRD; set_64bit_val(qp_ctx, 160, LS_64(iw->ord_size, I40IWQPC_ORDSIZE) | @@ -2507,6 +2650,9 @@ static enum i40iw_status_code i40iw_sc_qp_setctx( LS_64(iw->bind_en, I40IWQPC_BINDEN) | LS_64(iw->fast_reg_en, I40IWQPC_FASTREGEN) | LS_64(iw->priv_mode_en, I40IWQPC_PRIVEN) | + LS_64((((vsi->stats_fcn_id_alloc) && + (dev->is_pf) && (vsi->fcn_id >= I40IW_FIRST_NON_PF_STAT)) ? 1 : 0), + I40IWQPC_USESTATSINSTANCE) | LS_64(1, I40IWQPC_IWARPMODE) | LS_64(iw->rcv_mark_en, I40IWQPC_RCVMARKERS) | LS_64(iw->align_hdrs, I40IWQPC_ALIGNHDRS) | @@ -2623,7 +2769,9 @@ static enum i40iw_status_code i40iw_sc_alloc_stag( u64 *wqe; struct i40iw_sc_cqp *cqp; u64 header; + enum i40iw_page_size page_size; + page_size = (info->page_size == 0x200000) ? I40IW_PAGE_SIZE_2M : I40IW_PAGE_SIZE_4K; cqp = dev->cqp; wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) @@ -2643,7 +2791,7 @@ static enum i40iw_status_code i40iw_sc_alloc_stag( LS_64(1, I40IW_CQPSQ_STAG_MR) | LS_64(info->access_rights, I40IW_CQPSQ_STAG_ARIGHTS) | LS_64(info->chunk_size, I40IW_CQPSQ_STAG_LPBLSIZE) | - LS_64(info->page_size, I40IW_CQPSQ_STAG_HPAGESIZE) | + LS_64(page_size, I40IW_CQPSQ_STAG_HPAGESIZE) | LS_64(info->remote_access, I40IW_CQPSQ_STAG_REMACCENABLED) | LS_64(info->use_hmc_fcn_index, I40IW_CQPSQ_STAG_USEHMCFNIDX) | LS_64(info->use_pf_rid, I40IW_CQPSQ_STAG_USEPFRID) | @@ -2679,7 +2827,9 @@ static enum i40iw_status_code i40iw_sc_mr_reg_non_shared( u32 pble_obj_cnt; bool remote_access; u8 addr_type; + enum i40iw_page_size page_size; + page_size = (info->page_size == 0x200000) ? I40IW_PAGE_SIZE_2M : I40IW_PAGE_SIZE_4K; if (info->access_rights & (I40IW_ACCESS_FLAGS_REMOTEREAD_ONLY | I40IW_ACCESS_FLAGS_REMOTEWRITE_ONLY)) remote_access = true; @@ -2722,7 +2872,7 @@ static enum i40iw_status_code i40iw_sc_mr_reg_non_shared( header = LS_64(I40IW_CQP_OP_REG_MR, I40IW_CQPSQ_OPCODE) | LS_64(1, I40IW_CQPSQ_STAG_MR) | LS_64(info->chunk_size, I40IW_CQPSQ_STAG_LPBLSIZE) | - LS_64(info->page_size, I40IW_CQPSQ_STAG_HPAGESIZE) | + LS_64(page_size, I40IW_CQPSQ_STAG_HPAGESIZE) | LS_64(info->access_rights, I40IW_CQPSQ_STAG_ARIGHTS) | LS_64(remote_access, I40IW_CQPSQ_STAG_REMACCENABLED) | LS_64(addr_type, I40IW_CQPSQ_STAG_VABASEDTO) | @@ -2937,7 +3087,9 @@ enum i40iw_status_code i40iw_sc_mr_fast_register( u64 temp, header; u64 *wqe; u32 wqe_idx; + enum i40iw_page_size page_size; + page_size = (info->page_size == 0x200000) ? I40IW_PAGE_SIZE_2M : I40IW_PAGE_SIZE_4K; wqe = i40iw_qp_get_next_send_wqe(&qp->qp_uk, &wqe_idx, I40IW_QP_WQE_MIN_SIZE, 0, info->wr_id); if (!wqe) @@ -2964,7 +3116,7 @@ enum i40iw_status_code i40iw_sc_mr_fast_register( LS_64(info->stag_idx, I40IWQPSQ_STAGINDEX) | LS_64(I40IWQP_OP_FAST_REGISTER, I40IWQPSQ_OPCODE) | LS_64(info->chunk_size, I40IWQPSQ_LPBLSIZE) | - LS_64(info->page_size, I40IWQPSQ_HPAGESIZE) | + LS_64(page_size, I40IWQPSQ_HPAGESIZE) | LS_64(info->access_rights, I40IWQPSQ_STAGRIGHTS) | LS_64(info->addr_type, I40IWQPSQ_VABASEDTO) | LS_64(info->read_fence, I40IWQPSQ_READFENCE) | @@ -3959,7 +4111,7 @@ enum i40iw_status_code i40iw_process_cqp_cmd(struct i40iw_sc_dev *dev, struct cqp_commands_info *pcmdinfo) { enum i40iw_status_code status = 0; - unsigned long flags; + unsigned long flags; spin_lock_irqsave(&dev->cqp_lock, flags); if (list_empty(&dev->cqp_cmd_head) && !i40iw_ring_full(dev->cqp)) @@ -3978,7 +4130,7 @@ enum i40iw_status_code i40iw_process_bh(struct i40iw_sc_dev *dev) { enum i40iw_status_code status = 0; struct cqp_commands_info *pcmdinfo; - unsigned long flags; + unsigned long flags; spin_lock_irqsave(&dev->cqp_lock, flags); while (!list_empty(&dev->cqp_cmd_head) && !i40iw_ring_full(dev->cqp)) { @@ -4055,7 +4207,6 @@ static int i40iw_bld_terminate_hdr(struct i40iw_sc_qp *qp, u16 ddp_seg_len; int copy_len = 0; u8 is_tagged = 0; - enum i40iw_flush_opcode flush_code = FLUSH_INVALID; u32 opcode; struct i40iw_terminate_hdr *termhdr; @@ -4228,9 +4379,6 @@ static int i40iw_bld_terminate_hdr(struct i40iw_sc_qp *qp, if (copy_len) memcpy(termhdr + 1, pkt, copy_len); - if (flush_code && !info->in_rdrsp_wr) - qp->sq_flush = (info->sq) ? true : false; - return sizeof(struct i40iw_terminate_hdr) + copy_len; } @@ -4321,286 +4469,370 @@ void i40iw_terminate_received(struct i40iw_sc_qp *qp, struct i40iw_aeqe_info *in } /** - * i40iw_hw_stat_init - Initiliaze HW stats table - * @devstat: pestat struct + * i40iw_sc_vsi_init - Initialize virtual device + * @vsi: pointer to the vsi structure + * @info: parameters to initialize vsi + **/ +void i40iw_sc_vsi_init(struct i40iw_sc_vsi *vsi, struct i40iw_vsi_init_info *info) +{ + int i; + + vsi->dev = info->dev; + vsi->back_vsi = info->back_vsi; + vsi->mss = info->params->mss; + i40iw_fill_qos_list(info->params->qs_handle_list); + + for (i = 0; i < I40IW_MAX_USER_PRIORITY; i++) { + vsi->qos[i].qs_handle = + info->params->qs_handle_list[i]; + i40iw_debug(vsi->dev, I40IW_DEBUG_DCB, "qset[%d]: %d\n", i, vsi->qos[i].qs_handle); + spin_lock_init(&vsi->qos[i].lock); + INIT_LIST_HEAD(&vsi->qos[i].qplist); + } +} + +/** + * i40iw_hw_stats_init - Initiliaze HW stats table + * @stats: pestat struct * @fcn_idx: PCI fn id - * @hw: PF i40iw_hw structure. * @is_pf: Is it a PF? * - * Populate the HW stat table with register offset addr for each - * stat. And start the perioidic stats timer. + * Populate the HW stats table with register offset addr for each + * stats. And start the perioidic stats timer. */ -static void i40iw_hw_stat_init(struct i40iw_dev_pestat *devstat, - u8 fcn_idx, - struct i40iw_hw *hw, bool is_pf) +void i40iw_hw_stats_init(struct i40iw_vsi_pestat *stats, u8 fcn_idx, bool is_pf) { - u32 stat_reg_offset; - u32 stat_index; - struct i40iw_dev_hw_stat_offsets *stat_table = - &devstat->hw_stat_offsets; - struct i40iw_dev_hw_stats *last_rd_stats = &devstat->last_read_hw_stats; - - devstat->hw = hw; + u32 stats_reg_offset; + u32 stats_index; + struct i40iw_dev_hw_stats_offsets *stats_table = + &stats->hw_stats_offsets; + struct i40iw_dev_hw_stats *last_rd_stats = &stats->last_read_hw_stats; if (is_pf) { - stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP4RXDISCARD] = + stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP4RXDISCARD] = I40E_GLPES_PFIP4RXDISCARD(fcn_idx); - stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP4RXTRUNC] = + stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP4RXTRUNC] = I40E_GLPES_PFIP4RXTRUNC(fcn_idx); - stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP4TXNOROUTE] = + stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP4TXNOROUTE] = I40E_GLPES_PFIP4TXNOROUTE(fcn_idx); - stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP6RXDISCARD] = + stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP6RXDISCARD] = I40E_GLPES_PFIP6RXDISCARD(fcn_idx); - stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP6RXTRUNC] = + stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP6RXTRUNC] = I40E_GLPES_PFIP6RXTRUNC(fcn_idx); - stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP6TXNOROUTE] = + stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP6TXNOROUTE] = I40E_GLPES_PFIP6TXNOROUTE(fcn_idx); - stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_TCPRTXSEG] = + stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_TCPRTXSEG] = I40E_GLPES_PFTCPRTXSEG(fcn_idx); - stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_TCPRXOPTERR] = + stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_TCPRXOPTERR] = I40E_GLPES_PFTCPRXOPTERR(fcn_idx); - stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_TCPRXPROTOERR] = + stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_TCPRXPROTOERR] = I40E_GLPES_PFTCPRXPROTOERR(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXOCTS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXOCTS] = I40E_GLPES_PFIP4RXOCTSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXPKTS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXPKTS] = I40E_GLPES_PFIP4RXPKTSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXFRAGS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXFRAGS] = I40E_GLPES_PFIP4RXFRAGSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXMCPKTS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXMCPKTS] = I40E_GLPES_PFIP4RXMCPKTSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXOCTS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXOCTS] = I40E_GLPES_PFIP4TXOCTSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXPKTS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXPKTS] = I40E_GLPES_PFIP4TXPKTSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXFRAGS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXFRAGS] = I40E_GLPES_PFIP4TXFRAGSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXMCPKTS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXMCPKTS] = I40E_GLPES_PFIP4TXMCPKTSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXOCTS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXOCTS] = I40E_GLPES_PFIP6RXOCTSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXPKTS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXPKTS] = I40E_GLPES_PFIP6RXPKTSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXFRAGS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXFRAGS] = I40E_GLPES_PFIP6RXFRAGSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXMCPKTS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXMCPKTS] = I40E_GLPES_PFIP6RXMCPKTSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXOCTS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXOCTS] = I40E_GLPES_PFIP6TXOCTSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] = I40E_GLPES_PFIP6TXPKTSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] = I40E_GLPES_PFIP6TXPKTSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXFRAGS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXFRAGS] = I40E_GLPES_PFIP6TXFRAGSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_TCPRXSEGS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_TCPRXSEGS] = I40E_GLPES_PFTCPRXSEGSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_TCPTXSEG] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_TCPTXSEG] = I40E_GLPES_PFTCPTXSEGLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMARXRDS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMARXRDS] = I40E_GLPES_PFRDMARXRDSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMARXSNDS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMARXSNDS] = I40E_GLPES_PFRDMARXSNDSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMARXWRS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMARXWRS] = I40E_GLPES_PFRDMARXWRSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMATXRDS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMATXRDS] = I40E_GLPES_PFRDMATXRDSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMATXSNDS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMATXSNDS] = I40E_GLPES_PFRDMATXSNDSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMATXWRS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMATXWRS] = I40E_GLPES_PFRDMATXWRSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMAVBND] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMAVBND] = I40E_GLPES_PFRDMAVBNDLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMAVINV] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMAVINV] = I40E_GLPES_PFRDMAVINVLO(fcn_idx); } else { - stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP4RXDISCARD] = + stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP4RXDISCARD] = I40E_GLPES_VFIP4RXDISCARD(fcn_idx); - stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP4RXTRUNC] = + stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP4RXTRUNC] = I40E_GLPES_VFIP4RXTRUNC(fcn_idx); - stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP4TXNOROUTE] = + stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP4TXNOROUTE] = I40E_GLPES_VFIP4TXNOROUTE(fcn_idx); - stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP6RXDISCARD] = + stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP6RXDISCARD] = I40E_GLPES_VFIP6RXDISCARD(fcn_idx); - stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP6RXTRUNC] = + stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP6RXTRUNC] = I40E_GLPES_VFIP6RXTRUNC(fcn_idx); - stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP6TXNOROUTE] = + stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP6TXNOROUTE] = I40E_GLPES_VFIP6TXNOROUTE(fcn_idx); - stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_TCPRTXSEG] = + stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_TCPRTXSEG] = I40E_GLPES_VFTCPRTXSEG(fcn_idx); - stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_TCPRXOPTERR] = + stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_TCPRXOPTERR] = I40E_GLPES_VFTCPRXOPTERR(fcn_idx); - stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_TCPRXPROTOERR] = + stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_TCPRXPROTOERR] = I40E_GLPES_VFTCPRXPROTOERR(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXOCTS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXOCTS] = I40E_GLPES_VFIP4RXOCTSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXPKTS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXPKTS] = I40E_GLPES_VFIP4RXPKTSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXFRAGS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXFRAGS] = I40E_GLPES_VFIP4RXFRAGSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXMCPKTS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXMCPKTS] = I40E_GLPES_VFIP4RXMCPKTSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXOCTS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXOCTS] = I40E_GLPES_VFIP4TXOCTSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXPKTS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXPKTS] = I40E_GLPES_VFIP4TXPKTSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXFRAGS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXFRAGS] = I40E_GLPES_VFIP4TXFRAGSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXMCPKTS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXMCPKTS] = I40E_GLPES_VFIP4TXMCPKTSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXOCTS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXOCTS] = I40E_GLPES_VFIP6RXOCTSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXPKTS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXPKTS] = I40E_GLPES_VFIP6RXPKTSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXFRAGS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXFRAGS] = I40E_GLPES_VFIP6RXFRAGSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXMCPKTS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXMCPKTS] = I40E_GLPES_VFIP6RXMCPKTSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXOCTS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXOCTS] = I40E_GLPES_VFIP6TXOCTSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] = I40E_GLPES_VFIP6TXPKTSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] = I40E_GLPES_VFIP6TXPKTSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXFRAGS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXFRAGS] = I40E_GLPES_VFIP6TXFRAGSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_TCPRXSEGS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_TCPRXSEGS] = I40E_GLPES_VFTCPRXSEGSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_TCPTXSEG] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_TCPTXSEG] = I40E_GLPES_VFTCPTXSEGLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMARXRDS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMARXRDS] = I40E_GLPES_VFRDMARXRDSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMARXSNDS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMARXSNDS] = I40E_GLPES_VFRDMARXSNDSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMARXWRS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMARXWRS] = I40E_GLPES_VFRDMARXWRSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMATXRDS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMATXRDS] = I40E_GLPES_VFRDMATXRDSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMATXSNDS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMATXSNDS] = I40E_GLPES_VFRDMATXSNDSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMATXWRS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMATXWRS] = I40E_GLPES_VFRDMATXWRSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMAVBND] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMAVBND] = I40E_GLPES_VFRDMAVBNDLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMAVINV] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMAVINV] = I40E_GLPES_VFRDMAVINVLO(fcn_idx); } - for (stat_index = 0; stat_index < I40IW_HW_STAT_INDEX_MAX_64; - stat_index++) { - stat_reg_offset = stat_table->stat_offset_64[stat_index]; - last_rd_stats->stat_value_64[stat_index] = - readq(devstat->hw->hw_addr + stat_reg_offset); + for (stats_index = 0; stats_index < I40IW_HW_STAT_INDEX_MAX_64; + stats_index++) { + stats_reg_offset = stats_table->stats_offset_64[stats_index]; + last_rd_stats->stats_value_64[stats_index] = + readq(stats->hw->hw_addr + stats_reg_offset); } - for (stat_index = 0; stat_index < I40IW_HW_STAT_INDEX_MAX_32; - stat_index++) { - stat_reg_offset = stat_table->stat_offset_32[stat_index]; - last_rd_stats->stat_value_32[stat_index] = - i40iw_rd32(devstat->hw, stat_reg_offset); + for (stats_index = 0; stats_index < I40IW_HW_STAT_INDEX_MAX_32; + stats_index++) { + stats_reg_offset = stats_table->stats_offset_32[stats_index]; + last_rd_stats->stats_value_32[stats_index] = + i40iw_rd32(stats->hw, stats_reg_offset); } } /** - * i40iw_hw_stat_read_32 - Read 32-bit HW stat counters and accommodates for roll-overs. - * @devstat: pestat struct - * @index: index in HW stat table which contains offset reg-addr - * @value: hw stat value + * i40iw_hw_stats_read_32 - Read 32-bit HW stats counters and accommodates for roll-overs. + * @stat: pestat struct + * @index: index in HW stats table which contains offset reg-addr + * @value: hw stats value */ -static void i40iw_hw_stat_read_32(struct i40iw_dev_pestat *devstat, - enum i40iw_hw_stat_index_32b index, - u64 *value) +void i40iw_hw_stats_read_32(struct i40iw_vsi_pestat *stats, + enum i40iw_hw_stats_index_32b index, + u64 *value) { - struct i40iw_dev_hw_stat_offsets *stat_table = - &devstat->hw_stat_offsets; - struct i40iw_dev_hw_stats *last_rd_stats = &devstat->last_read_hw_stats; - struct i40iw_dev_hw_stats *hw_stats = &devstat->hw_stats; - u64 new_stat_value = 0; - u32 stat_reg_offset = stat_table->stat_offset_32[index]; - - new_stat_value = i40iw_rd32(devstat->hw, stat_reg_offset); + struct i40iw_dev_hw_stats_offsets *stats_table = + &stats->hw_stats_offsets; + struct i40iw_dev_hw_stats *last_rd_stats = &stats->last_read_hw_stats; + struct i40iw_dev_hw_stats *hw_stats = &stats->hw_stats; + u64 new_stats_value = 0; + u32 stats_reg_offset = stats_table->stats_offset_32[index]; + + new_stats_value = i40iw_rd32(stats->hw, stats_reg_offset); /*roll-over case */ - if (new_stat_value < last_rd_stats->stat_value_32[index]) - hw_stats->stat_value_32[index] += new_stat_value; + if (new_stats_value < last_rd_stats->stats_value_32[index]) + hw_stats->stats_value_32[index] += new_stats_value; else - hw_stats->stat_value_32[index] += - new_stat_value - last_rd_stats->stat_value_32[index]; - last_rd_stats->stat_value_32[index] = new_stat_value; - *value = hw_stats->stat_value_32[index]; + hw_stats->stats_value_32[index] += + new_stats_value - last_rd_stats->stats_value_32[index]; + last_rd_stats->stats_value_32[index] = new_stats_value; + *value = hw_stats->stats_value_32[index]; } /** - * i40iw_hw_stat_read_64 - Read HW stat counters (greater than 32-bit) and accommodates for roll-overs. - * @devstat: pestat struct - * @index: index in HW stat table which contains offset reg-addr - * @value: hw stat value + * i40iw_hw_stats_read_64 - Read HW stats counters (greater than 32-bit) and accommodates for roll-overs. + * @stats: pestat struct + * @index: index in HW stats table which contains offset reg-addr + * @value: hw stats value */ -static void i40iw_hw_stat_read_64(struct i40iw_dev_pestat *devstat, - enum i40iw_hw_stat_index_64b index, - u64 *value) +void i40iw_hw_stats_read_64(struct i40iw_vsi_pestat *stats, + enum i40iw_hw_stats_index_64b index, + u64 *value) { - struct i40iw_dev_hw_stat_offsets *stat_table = - &devstat->hw_stat_offsets; - struct i40iw_dev_hw_stats *last_rd_stats = &devstat->last_read_hw_stats; - struct i40iw_dev_hw_stats *hw_stats = &devstat->hw_stats; - u64 new_stat_value = 0; - u32 stat_reg_offset = stat_table->stat_offset_64[index]; - - new_stat_value = readq(devstat->hw->hw_addr + stat_reg_offset); + struct i40iw_dev_hw_stats_offsets *stats_table = + &stats->hw_stats_offsets; + struct i40iw_dev_hw_stats *last_rd_stats = &stats->last_read_hw_stats; + struct i40iw_dev_hw_stats *hw_stats = &stats->hw_stats; + u64 new_stats_value = 0; + u32 stats_reg_offset = stats_table->stats_offset_64[index]; + + new_stats_value = readq(stats->hw->hw_addr + stats_reg_offset); /*roll-over case */ - if (new_stat_value < last_rd_stats->stat_value_64[index]) - hw_stats->stat_value_64[index] += new_stat_value; + if (new_stats_value < last_rd_stats->stats_value_64[index]) + hw_stats->stats_value_64[index] += new_stats_value; else - hw_stats->stat_value_64[index] += - new_stat_value - last_rd_stats->stat_value_64[index]; - last_rd_stats->stat_value_64[index] = new_stat_value; - *value = hw_stats->stat_value_64[index]; + hw_stats->stats_value_64[index] += + new_stats_value - last_rd_stats->stats_value_64[index]; + last_rd_stats->stats_value_64[index] = new_stats_value; + *value = hw_stats->stats_value_64[index]; } /** - * i40iw_hw_stat_read_all - read all HW stat counters - * @devstat: pestat struct - * @stat_values: hw stats structure + * i40iw_hw_stats_read_all - read all HW stat counters + * @stats: pestat struct + * @stats_values: hw stats structure * * Read all the HW stat counters and populates hw_stats structure - * of passed-in dev's pestat as well as copy created in stat_values. + * of passed-in vsi's pestat as well as copy created in stat_values. */ -static void i40iw_hw_stat_read_all(struct i40iw_dev_pestat *devstat, - struct i40iw_dev_hw_stats *stat_values) +void i40iw_hw_stats_read_all(struct i40iw_vsi_pestat *stats, + struct i40iw_dev_hw_stats *stats_values) { - u32 stat_index; - - for (stat_index = 0; stat_index < I40IW_HW_STAT_INDEX_MAX_32; - stat_index++) - i40iw_hw_stat_read_32(devstat, stat_index, - &stat_values->stat_value_32[stat_index]); - for (stat_index = 0; stat_index < I40IW_HW_STAT_INDEX_MAX_64; - stat_index++) - i40iw_hw_stat_read_64(devstat, stat_index, - &stat_values->stat_value_64[stat_index]); + u32 stats_index; + unsigned long flags; + + spin_lock_irqsave(&stats->lock, flags); + + for (stats_index = 0; stats_index < I40IW_HW_STAT_INDEX_MAX_32; + stats_index++) + i40iw_hw_stats_read_32(stats, stats_index, + &stats_values->stats_value_32[stats_index]); + for (stats_index = 0; stats_index < I40IW_HW_STAT_INDEX_MAX_64; + stats_index++) + i40iw_hw_stats_read_64(stats, stats_index, + &stats_values->stats_value_64[stats_index]); + spin_unlock_irqrestore(&stats->lock, flags); } /** - * i40iw_hw_stat_refresh_all - Update all HW stat structs - * @devstat: pestat struct - * @stat_values: hw stats structure + * i40iw_hw_stats_refresh_all - Update all HW stats structs + * @stats: pestat struct * - * Read all the HW stat counters to refresh values in hw_stats structure + * Read all the HW stats counters to refresh values in hw_stats structure * of passed-in dev's pestat */ -static void i40iw_hw_stat_refresh_all(struct i40iw_dev_pestat *devstat) +void i40iw_hw_stats_refresh_all(struct i40iw_vsi_pestat *stats) +{ + u64 stats_value; + u32 stats_index; + unsigned long flags; + + spin_lock_irqsave(&stats->lock, flags); + + for (stats_index = 0; stats_index < I40IW_HW_STAT_INDEX_MAX_32; + stats_index++) + i40iw_hw_stats_read_32(stats, stats_index, &stats_value); + for (stats_index = 0; stats_index < I40IW_HW_STAT_INDEX_MAX_64; + stats_index++) + i40iw_hw_stats_read_64(stats, stats_index, &stats_value); + spin_unlock_irqrestore(&stats->lock, flags); +} + +/** + * i40iw_get_fcn_id - Return the function id + * @dev: pointer to the device + */ +static u8 i40iw_get_fcn_id(struct i40iw_sc_dev *dev) +{ + u8 fcn_id = I40IW_INVALID_FCN_ID; + u8 i; + + for (i = I40IW_FIRST_NON_PF_STAT; i < I40IW_MAX_STATS_COUNT; i++) + if (!dev->fcn_id_array[i]) { + fcn_id = i; + dev->fcn_id_array[i] = true; + break; + } + return fcn_id; +} + +/** + * i40iw_vsi_stats_init - Initialize the vsi statistics + * @vsi: pointer to the vsi structure + * @info: The info structure used for initialization + */ +enum i40iw_status_code i40iw_vsi_stats_init(struct i40iw_sc_vsi *vsi, struct i40iw_vsi_stats_info *info) { - u64 stat_value; - u32 stat_index; - - for (stat_index = 0; stat_index < I40IW_HW_STAT_INDEX_MAX_32; - stat_index++) - i40iw_hw_stat_read_32(devstat, stat_index, &stat_value); - for (stat_index = 0; stat_index < I40IW_HW_STAT_INDEX_MAX_64; - stat_index++) - i40iw_hw_stat_read_64(devstat, stat_index, &stat_value); + u8 fcn_id = info->fcn_id; + + if (info->alloc_fcn_id) + fcn_id = i40iw_get_fcn_id(vsi->dev); + + if (fcn_id == I40IW_INVALID_FCN_ID) + return I40IW_ERR_NOT_READY; + + vsi->pestat = info->pestat; + vsi->pestat->hw = vsi->dev->hw; + + if (info->stats_initialize) { + i40iw_hw_stats_init(vsi->pestat, fcn_id, true); + spin_lock_init(&vsi->pestat->lock); + i40iw_hw_stats_start_timer(vsi); + } + vsi->stats_fcn_id_alloc = info->alloc_fcn_id; + vsi->fcn_id = fcn_id; + return I40IW_SUCCESS; +} + +/** + * i40iw_vsi_stats_free - Free the vsi stats + * @vsi: pointer to the vsi structure + */ +void i40iw_vsi_stats_free(struct i40iw_sc_vsi *vsi) +{ + u8 fcn_id = vsi->fcn_id; + + if ((vsi->stats_fcn_id_alloc) && (fcn_id != I40IW_INVALID_FCN_ID)) + vsi->dev->fcn_id_array[fcn_id] = false; + i40iw_hw_stats_stop_timer(vsi); } static struct i40iw_cqp_ops iw_cqp_ops = { @@ -4711,24 +4943,6 @@ static struct i40iw_hmc_ops iw_hmc_ops = { NULL }; -static const struct i40iw_device_pestat_ops iw_device_pestat_ops = { - i40iw_hw_stat_init, - i40iw_hw_stat_read_32, - i40iw_hw_stat_read_64, - i40iw_hw_stat_read_all, - i40iw_hw_stat_refresh_all -}; - -/** - * i40iw_device_init_pestat - Initialize the pestat structure - * @dev: pestat struct - */ -enum i40iw_status_code i40iw_device_init_pestat(struct i40iw_dev_pestat *devstat) -{ - devstat->ops = iw_device_pestat_ops; - return 0; -} - /** * i40iw_device_init - Initialize IWARP device * @dev: IWARP device pointer @@ -4750,14 +4964,7 @@ enum i40iw_status_code i40iw_device_init(struct i40iw_sc_dev *dev, dev->debug_mask = info->debug_mask; - ret_code = i40iw_device_init_pestat(&dev->dev_pestat); - if (ret_code) { - i40iw_debug(dev, I40IW_DEBUG_DEV, - "%s: i40iw_device_init_pestat failed\n", __func__); - return ret_code; - } dev->hmc_fn_id = info->hmc_fn_id; - dev->qs_handle = info->qs_handle; dev->exception_lan_queue = info->exception_lan_queue; dev->is_pf = info->is_pf; @@ -4770,15 +4977,10 @@ enum i40iw_status_code i40iw_device_init(struct i40iw_sc_dev *dev, dev->hw = info->hw; dev->hw->hw_addr = info->bar0; - val = i40iw_rd32(dev->hw, I40E_GLPCI_DREVID); - dev->hw_rev = (u8)RS_32(val, I40E_GLPCI_DREVID_DEFAULT_REVID); - if (dev->is_pf) { - dev->dev_pestat.ops.iw_hw_stat_init(&dev->dev_pestat, - dev->hmc_fn_id, dev->hw, true); - spin_lock_init(&dev->dev_pestat.stats_lock); - /*start the periodic stats_timer */ - i40iw_hw_stats_start_timer(dev); + val = i40iw_rd32(dev->hw, I40E_GLPCI_DREVID); + dev->hw_rev = (u8)RS_32(val, I40E_GLPCI_DREVID_DEFAULT_REVID); + val = i40iw_rd32(dev->hw, I40E_GLPCI_LBARCTRL); db_size = (u8)RS_32(val, I40E_GLPCI_LBARCTRL_PE_DB_SIZE); if ((db_size != I40IW_PE_DB_SIZE_4M) && diff --git a/drivers/infiniband/hw/i40iw/i40iw_d.h b/drivers/infiniband/hw/i40iw/i40iw_d.h index 2fac1db0e0a0..a39ac12b6a7e 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_d.h +++ b/drivers/infiniband/hw/i40iw/i40iw_d.h @@ -35,6 +35,8 @@ #ifndef I40IW_D_H #define I40IW_D_H +#define I40IW_FIRST_USER_QP_ID 2 + #define I40IW_DB_ADDR_OFFSET (4 * 1024 * 1024 - 64 * 1024) #define I40IW_VF_DB_ADDR_OFFSET (64 * 1024) @@ -67,6 +69,9 @@ #define I40IW_STAG_TYPE_NONSHARED 1 #define I40IW_MAX_USER_PRIORITY 8 +#define I40IW_MAX_STATS_COUNT 16 +#define I40IW_FIRST_NON_PF_STAT 4 + #define LS_64_1(val, bits) ((u64)(uintptr_t)val << bits) #define RS_64_1(val, bits) ((u64)(uintptr_t)val >> bits) @@ -74,6 +79,8 @@ #define RS_32_1(val, bits) (u32)(val >> bits) #define I40E_HI_DWORD(x) ((u32)((((x) >> 16) >> 16) & 0xFFFFFFFF)) +#define QS_HANDLE_UNKNOWN 0xffff + #define LS_64(val, field) (((u64)val << field ## _SHIFT) & (field ## _MASK)) #define RS_64(val, field) ((u64)(val & field ## _MASK) >> field ## _SHIFT) @@ -1199,8 +1206,11 @@ #define I40IWQPC_RXCQNUM_SHIFT 32 #define I40IWQPC_RXCQNUM_MASK (0x1ffffULL << I40IWQPC_RXCQNUM_SHIFT) -#define I40IWQPC_Q2ADDR_SHIFT I40IW_CQPHC_QPCTX_SHIFT -#define I40IWQPC_Q2ADDR_MASK I40IW_CQPHC_QPCTX_MASK +#define I40IWQPC_STAT_INDEX_SHIFT 0 +#define I40IWQPC_STAT_INDEX_MASK (0x1fULL << I40IWQPC_STAT_INDEX_SHIFT) + +#define I40IWQPC_Q2ADDR_SHIFT 0 +#define I40IWQPC_Q2ADDR_MASK (0xffffffffffffff00ULL << I40IWQPC_Q2ADDR_SHIFT) #define I40IWQPC_LASTBYTESENT_SHIFT 0 #define I40IWQPC_LASTBYTESENT_MASK (0xffUL << I40IWQPC_LASTBYTESENT_SHIFT) @@ -1232,11 +1242,8 @@ #define I40IWQPC_PRIVEN_SHIFT 25 #define I40IWQPC_PRIVEN_MASK (1UL << I40IWQPC_PRIVEN_SHIFT) -#define I40IWQPC_LSMMPRESENT_SHIFT 26 -#define I40IWQPC_LSMMPRESENT_MASK (1UL << I40IWQPC_LSMMPRESENT_SHIFT) - -#define I40IWQPC_ADJUSTFORLSMM_SHIFT 27 -#define I40IWQPC_ADJUSTFORLSMM_MASK (1UL << I40IWQPC_ADJUSTFORLSMM_SHIFT) +#define I40IWQPC_USESTATSINSTANCE_SHIFT 26 +#define I40IWQPC_USESTATSINSTANCE_MASK (1UL << I40IWQPC_USESTATSINSTANCE_SHIFT) #define I40IWQPC_IWARPMODE_SHIFT 28 #define I40IWQPC_IWARPMODE_MASK (1UL << I40IWQPC_IWARPMODE_SHIFT) @@ -1713,6 +1720,8 @@ enum i40iw_alignment { #define OP_MANAGE_VF_PBLE_BP 28 #define OP_QUERY_FPM_VALUES 29 #define OP_COMMIT_FPM_VALUES 30 -#define OP_SIZE_CQP_STAT_ARRAY 31 +#define OP_REQUESTED_COMMANDS 31 +#define OP_COMPLETED_COMMANDS 32 +#define OP_SIZE_CQP_STAT_ARRAY 33 #endif diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c index 0c92a40b3e86..476867a3f584 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_hw.c +++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c @@ -62,7 +62,7 @@ u32 i40iw_initialize_hw_resources(struct i40iw_device *iwdev) max_mr = iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_MR].cnt; arp_table_size = iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_ARP].cnt; iwdev->max_cqe = 0xFFFFF; - num_pds = max_qp * 4; + num_pds = I40IW_MAX_PDS; resources_size = sizeof(struct i40iw_arp_entry) * arp_table_size; resources_size += sizeof(unsigned long) * BITS_TO_LONGS(max_qp); resources_size += sizeof(unsigned long) * BITS_TO_LONGS(max_mr); @@ -308,7 +308,9 @@ void i40iw_process_aeq(struct i40iw_device *iwdev) iwqp = iwdev->qp_table[info->qp_cq_id]; if (!iwqp) { spin_unlock_irqrestore(&iwdev->qptable_lock, flags); - i40iw_pr_err("qp_id %d is already freed\n", info->qp_cq_id); + i40iw_debug(dev, I40IW_DEBUG_AEQ, + "%s qp_id %d is already freed\n", + __func__, info->qp_cq_id); continue; } i40iw_add_ref(&iwqp->ibqp); @@ -359,6 +361,9 @@ void i40iw_process_aeq(struct i40iw_device *iwdev) continue; i40iw_cm_disconn(iwqp); break; + case I40IW_AE_QP_SUSPEND_COMPLETE: + i40iw_qp_suspend_resume(dev, &iwqp->sc_qp, false); + break; case I40IW_AE_TERMINATE_SENT: i40iw_terminate_send_fin(qp); break; @@ -404,19 +409,18 @@ void i40iw_process_aeq(struct i40iw_device *iwdev) case I40IW_AE_LCE_CQ_CATASTROPHIC: case I40IW_AE_UDA_XMIT_DGRAM_TOO_LONG: case I40IW_AE_UDA_XMIT_IPADDR_MISMATCH: - case I40IW_AE_QP_SUSPEND_COMPLETE: ctx_info->err_rq_idx_valid = false; default: - if (!info->sq && ctx_info->err_rq_idx_valid) { - ctx_info->err_rq_idx = info->wqe_idx; - ctx_info->tcp_info_valid = false; - ctx_info->iwarp_info_valid = false; - ret = dev->iw_priv_qp_ops->qp_setctx(&iwqp->sc_qp, - iwqp->host_ctx.va, - ctx_info); - } - i40iw_terminate_connection(qp, info); - break; + if (!info->sq && ctx_info->err_rq_idx_valid) { + ctx_info->err_rq_idx = info->wqe_idx; + ctx_info->tcp_info_valid = false; + ctx_info->iwarp_info_valid = false; + ret = dev->iw_priv_qp_ops->qp_setctx(&iwqp->sc_qp, + iwqp->host_ctx.va, + ctx_info); + } + i40iw_terminate_connection(qp, info); + break; } if (info->qp) i40iw_rem_ref(&iwqp->ibqp); @@ -538,6 +542,7 @@ enum i40iw_status_code i40iw_manage_qhash(struct i40iw_device *iwdev, { struct i40iw_qhash_table_info *info; struct i40iw_sc_dev *dev = &iwdev->sc_dev; + struct i40iw_sc_vsi *vsi = &iwdev->vsi; enum i40iw_status_code status; struct i40iw_cqp *iwcqp = &iwdev->cqp; struct i40iw_cqp_request *cqp_request; @@ -550,6 +555,7 @@ enum i40iw_status_code i40iw_manage_qhash(struct i40iw_device *iwdev, info = &cqp_info->in.u.manage_qhash_table_entry.info; memset(info, 0, sizeof(*info)); + info->vsi = &iwdev->vsi; info->manage = mtype; info->entry_type = etype; if (cminfo->vlan_id != 0xFFFF) { @@ -560,8 +566,9 @@ enum i40iw_status_code i40iw_manage_qhash(struct i40iw_device *iwdev, } info->ipv4_valid = cminfo->ipv4; + info->user_pri = cminfo->user_pri; ether_addr_copy(info->mac_addr, iwdev->netdev->dev_addr); - info->qp_num = cpu_to_le32(dev->ilq->qp_id); + info->qp_num = cpu_to_le32(vsi->ilq->qp_id); info->dest_port = cpu_to_le16(cminfo->loc_port); info->dest_ip[0] = cpu_to_le32(cminfo->loc_addr[0]); info->dest_ip[1] = cpu_to_le32(cminfo->loc_addr[1]); @@ -617,6 +624,7 @@ enum i40iw_status_code i40iw_hw_flush_wqes(struct i40iw_device *iwdev, struct i40iw_qp_flush_info *hw_info; struct i40iw_cqp_request *cqp_request; struct cqp_commands_info *cqp_info; + struct i40iw_qp *iwqp = (struct i40iw_qp *)qp->back_qp; cqp_request = i40iw_get_cqp_request(&iwdev->cqp, wait); if (!cqp_request) @@ -631,9 +639,30 @@ enum i40iw_status_code i40iw_hw_flush_wqes(struct i40iw_device *iwdev, cqp_info->in.u.qp_flush_wqes.qp = qp; cqp_info->in.u.qp_flush_wqes.scratch = (uintptr_t)cqp_request; status = i40iw_handle_cqp_op(iwdev, cqp_request); - if (status) + if (status) { i40iw_pr_err("CQP-OP Flush WQE's fail"); - return status; + complete(&iwqp->sq_drained); + complete(&iwqp->rq_drained); + return status; + } + if (!cqp_request->compl_info.maj_err_code) { + switch (cqp_request->compl_info.min_err_code) { + case I40IW_CQP_COMPL_RQ_WQE_FLUSHED: + complete(&iwqp->sq_drained); + break; + case I40IW_CQP_COMPL_SQ_WQE_FLUSHED: + complete(&iwqp->rq_drained); + break; + case I40IW_CQP_COMPL_RQ_SQ_WQE_FLUSHED: + break; + default: + complete(&iwqp->sq_drained); + complete(&iwqp->rq_drained); + break; + } + } + + return 0; } /** diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index ac2f3cd9478c..2728af3103ce 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -237,14 +237,11 @@ static irqreturn_t i40iw_irq_handler(int irq, void *data) */ static void i40iw_destroy_cqp(struct i40iw_device *iwdev, bool free_hwcqp) { - enum i40iw_status_code status = 0; struct i40iw_sc_dev *dev = &iwdev->sc_dev; struct i40iw_cqp *cqp = &iwdev->cqp; - if (free_hwcqp && dev->cqp_ops->cqp_destroy) - status = dev->cqp_ops->cqp_destroy(dev->cqp); - if (status) - i40iw_pr_err("destroy cqp failed"); + if (free_hwcqp) + dev->cqp_ops->cqp_destroy(dev->cqp); i40iw_free_dma_mem(dev->hw, &cqp->sq); kfree(cqp->scratch_array); @@ -270,6 +267,7 @@ static void i40iw_disable_irq(struct i40iw_sc_dev *dev, i40iw_wr32(dev->hw, I40E_PFINT_DYN_CTLN(msix_vec->idx - 1), 0); else i40iw_wr32(dev->hw, I40E_VFINT_DYN_CTLN1(msix_vec->idx - 1), 0); + irq_set_affinity_hint(msix_vec->irq, NULL); free_irq(msix_vec->irq, dev_id); } @@ -603,7 +601,7 @@ static enum i40iw_status_code i40iw_create_cqp(struct i40iw_device *iwdev) i40iw_pr_err("cqp init status %d\n", status); goto exit; } - status = dev->cqp_ops->cqp_create(dev->cqp, true, &maj_err, &min_err); + status = dev->cqp_ops->cqp_create(dev->cqp, &maj_err, &min_err); if (status) { i40iw_pr_err("cqp create status %d maj_err %d min_err %d\n", status, maj_err, min_err); @@ -688,6 +686,7 @@ static enum i40iw_status_code i40iw_configure_ceq_vector(struct i40iw_device *iw struct i40iw_msix_vector *msix_vec) { enum i40iw_status_code status; + cpumask_t mask; if (iwdev->msix_shared && !ceq_id) { tasklet_init(&iwdev->dpc_tasklet, i40iw_dpc, (unsigned long)iwdev); @@ -697,12 +696,15 @@ static enum i40iw_status_code i40iw_configure_ceq_vector(struct i40iw_device *iw status = request_irq(msix_vec->irq, i40iw_ceq_handler, 0, "CEQ", iwceq); } + cpumask_clear(&mask); + cpumask_set_cpu(msix_vec->cpu_affinity, &mask); + irq_set_affinity_hint(msix_vec->irq, &mask); + if (status) { i40iw_pr_err("ceq irq config fail\n"); return I40IW_ERR_CONFIG; } msix_vec->ceq_id = ceq_id; - msix_vec->cpu_affinity = 0; return 0; } @@ -930,6 +932,7 @@ static enum i40iw_status_code i40iw_initialize_ilq(struct i40iw_device *iwdev) struct i40iw_puda_rsrc_info info; enum i40iw_status_code status; + memset(&info, 0, sizeof(info)); info.type = I40IW_PUDA_RSRC_TYPE_ILQ; info.cq_id = 1; info.qp_id = 0; @@ -939,10 +942,9 @@ static enum i40iw_status_code i40iw_initialize_ilq(struct i40iw_device *iwdev) info.rq_size = 8192; info.buf_size = 1024; info.tx_buf_cnt = 16384; - info.mss = iwdev->mss; info.receive = i40iw_receive_ilq; info.xmit_complete = i40iw_free_sqbuf; - status = i40iw_puda_create_rsrc(&iwdev->sc_dev, &info); + status = i40iw_puda_create_rsrc(&iwdev->vsi, &info); if (status) i40iw_pr_err("ilq create fail\n"); return status; @@ -959,6 +961,7 @@ static enum i40iw_status_code i40iw_initialize_ieq(struct i40iw_device *iwdev) struct i40iw_puda_rsrc_info info; enum i40iw_status_code status; + memset(&info, 0, sizeof(info)); info.type = I40IW_PUDA_RSRC_TYPE_IEQ; info.cq_id = 2; info.qp_id = iwdev->sc_dev.exception_lan_queue; @@ -967,9 +970,8 @@ static enum i40iw_status_code i40iw_initialize_ieq(struct i40iw_device *iwdev) info.sq_size = 8192; info.rq_size = 8192; info.buf_size = 2048; - info.mss = iwdev->mss; info.tx_buf_cnt = 16384; - status = i40iw_puda_create_rsrc(&iwdev->sc_dev, &info); + status = i40iw_puda_create_rsrc(&iwdev->vsi, &info); if (status) i40iw_pr_err("ieq create fail\n"); return status; @@ -1159,7 +1161,7 @@ static void i40iw_add_ipv6_addr(struct i40iw_device *iwdev) { struct net_device *ip_dev; struct inet6_dev *idev; - struct inet6_ifaddr *ifp; + struct inet6_ifaddr *ifp, *tmp; u32 local_ipaddr6[4]; rcu_read_lock(); @@ -1172,7 +1174,7 @@ static void i40iw_add_ipv6_addr(struct i40iw_device *iwdev) i40iw_pr_err("ipv6 inet device not found\n"); break; } - list_for_each_entry(ifp, &idev->addr_list, if_list) { + list_for_each_entry_safe(ifp, tmp, &idev->addr_list, if_list) { i40iw_pr_info("IP=%pI6, vlan_id=%d, MAC=%pM\n", &ifp->addr, rdma_vlan_dev_vlan_id(ip_dev), ip_dev->dev_addr); i40iw_copy_ip_ntohl(local_ipaddr6, @@ -1294,17 +1296,23 @@ static enum i40iw_status_code i40iw_initialize_dev(struct i40iw_device *iwdev, enum i40iw_status_code status; struct i40iw_sc_dev *dev = &iwdev->sc_dev; struct i40iw_device_init_info info; + struct i40iw_vsi_init_info vsi_info; struct i40iw_dma_mem mem; + struct i40iw_l2params l2params; u32 size; + struct i40iw_vsi_stats_info stats_info; + u16 last_qset = I40IW_NO_QSET; + u16 qset; + u32 i; + memset(&l2params, 0, sizeof(l2params)); memset(&info, 0, sizeof(info)); size = sizeof(struct i40iw_hmc_pble_rsrc) + sizeof(struct i40iw_hmc_info) + (sizeof(struct i40iw_hmc_obj_info) * I40IW_HMC_IW_MAX); iwdev->hmc_info_mem = kzalloc(size, GFP_KERNEL); - if (!iwdev->hmc_info_mem) { - i40iw_pr_err("memory alloc fail\n"); + if (!iwdev->hmc_info_mem) return I40IW_ERR_NO_MEMORY; - } + iwdev->pble_rsrc = (struct i40iw_hmc_pble_rsrc *)iwdev->hmc_info_mem; dev->hmc_info = &iwdev->hw.hmc; dev->hmc_info->hmc_obj = (struct i40iw_hmc_obj_info *)(iwdev->pble_rsrc + 1); @@ -1325,7 +1333,17 @@ static enum i40iw_status_code i40iw_initialize_dev(struct i40iw_device *iwdev, info.bar0 = ldev->hw_addr; info.hw = &iwdev->hw; info.debug_mask = debug; - info.qs_handle = ldev->params.qos.prio_qos[0].qs_handle; + l2params.mss = + (ldev->params.mtu) ? ldev->params.mtu - I40IW_MTU_TO_MSS : I40IW_DEFAULT_MSS; + for (i = 0; i < I40E_CLIENT_MAX_USER_PRIORITY; i++) { + qset = ldev->params.qos.prio_qos[i].qs_handle; + l2params.qs_handle_list[i] = qset; + if (last_qset == I40IW_NO_QSET) + last_qset = qset; + else if ((qset != last_qset) && (qset != I40IW_NO_QSET)) + iwdev->dcb = true; + } + i40iw_pr_info("DCB is set/clear = %d\n", iwdev->dcb); info.exception_lan_queue = 1; info.vchnl_send = i40iw_virtchnl_send; status = i40iw_device_init(&iwdev->sc_dev, &info); @@ -1334,6 +1352,20 @@ exit: kfree(iwdev->hmc_info_mem); iwdev->hmc_info_mem = NULL; } + memset(&vsi_info, 0, sizeof(vsi_info)); + vsi_info.dev = &iwdev->sc_dev; + vsi_info.back_vsi = (void *)iwdev; + vsi_info.params = &l2params; + i40iw_sc_vsi_init(&iwdev->vsi, &vsi_info); + + if (dev->is_pf) { + memset(&stats_info, 0, sizeof(stats_info)); + stats_info.fcn_id = ldev->fid; + stats_info.pestat = kzalloc(sizeof(*stats_info.pestat), GFP_KERNEL); + stats_info.stats_initialize = true; + if (stats_info.pestat) + i40iw_vsi_stats_init(&iwdev->vsi, &stats_info); + } return status; } @@ -1384,6 +1416,7 @@ static enum i40iw_status_code i40iw_save_msix_info(struct i40iw_device *iwdev, for (i = 0, ceq_idx = 0; i < iwdev->msix_count; i++, iw_qvinfo++) { iwdev->iw_msixtbl[i].idx = ldev->msix_entries[i].entry; iwdev->iw_msixtbl[i].irq = ldev->msix_entries[i].vector; + iwdev->iw_msixtbl[i].cpu_affinity = ceq_idx; if (i == 0) { iw_qvinfo->aeq_idx = 0; if (iwdev->msix_shared) @@ -1404,18 +1437,19 @@ static enum i40iw_status_code i40iw_save_msix_info(struct i40iw_device *iwdev, * i40iw_deinit_device - clean up the device resources * @iwdev: iwarp device * @reset: true if called before reset - * @del_hdl: true if delete hdl entry * * Destroy the ib device interface, remove the mac ip entry and ipv4/ipv6 addresses, * destroy the device queues and free the pble and the hmc objects */ -static void i40iw_deinit_device(struct i40iw_device *iwdev, bool reset, bool del_hdl) +static void i40iw_deinit_device(struct i40iw_device *iwdev, bool reset) { struct i40e_info *ldev = iwdev->ldev; struct i40iw_sc_dev *dev = &iwdev->sc_dev; i40iw_pr_info("state = %d\n", iwdev->init_state); + if (iwdev->param_wq) + destroy_workqueue(iwdev->param_wq); switch (iwdev->init_state) { case RDMA_DEV_REGISTERED: @@ -1441,10 +1475,10 @@ static void i40iw_deinit_device(struct i40iw_device *iwdev, bool reset, bool del i40iw_destroy_aeq(iwdev, reset); /* fallthrough */ case IEQ_CREATED: - i40iw_puda_dele_resources(dev, I40IW_PUDA_RSRC_TYPE_IEQ, reset); + i40iw_puda_dele_resources(&iwdev->vsi, I40IW_PUDA_RSRC_TYPE_IEQ, reset); /* fallthrough */ case ILQ_CREATED: - i40iw_puda_dele_resources(dev, I40IW_PUDA_RSRC_TYPE_ILQ, reset); + i40iw_puda_dele_resources(&iwdev->vsi, I40IW_PUDA_RSRC_TYPE_ILQ, reset); /* fallthrough */ case CCQ_CREATED: i40iw_destroy_ccq(iwdev, reset); @@ -1456,13 +1490,14 @@ static void i40iw_deinit_device(struct i40iw_device *iwdev, bool reset, bool del i40iw_del_hmc_objects(dev, dev->hmc_info, true, reset); /* fallthrough */ case CQP_CREATED: - i40iw_destroy_cqp(iwdev, !reset); + i40iw_destroy_cqp(iwdev, true); /* fallthrough */ case INITIAL_STATE: i40iw_cleanup_cm_core(&iwdev->cm_core); - if (dev->is_pf) - i40iw_hw_stats_del_timer(dev); - + if (iwdev->vsi.pestat) { + i40iw_vsi_stats_free(&iwdev->vsi); + kfree(iwdev->vsi.pestat); + } i40iw_del_init_mem(iwdev); break; case INVALID_STATE: @@ -1472,8 +1507,7 @@ static void i40iw_deinit_device(struct i40iw_device *iwdev, bool reset, bool del break; } - if (del_hdl) - i40iw_del_handler(i40iw_find_i40e_handler(ldev)); + i40iw_del_handler(i40iw_find_i40e_handler(ldev)); kfree(iwdev->hdl); } @@ -1508,7 +1542,6 @@ static enum i40iw_status_code i40iw_setup_init_state(struct i40iw_handler *hdl, iwdev->max_enabled_vfs = iwdev->max_rdma_vfs; iwdev->netdev = ldev->netdev; hdl->client = client; - iwdev->mss = (!ldev->params.mtu) ? I40IW_DEFAULT_MSS : ldev->params.mtu - I40IW_MTU_TO_MSS; if (!ldev->ftype) iwdev->db_start = pci_resource_start(ldev->pcidev, 0) + I40IW_DB_ADDR_OFFSET; else @@ -1528,6 +1561,7 @@ static enum i40iw_status_code i40iw_setup_init_state(struct i40iw_handler *hdl, init_waitqueue_head(&iwdev->vchnl_waitq); init_waitqueue_head(&dev->vf_reqs); + init_waitqueue_head(&iwdev->close_wq); status = i40iw_initialize_dev(iwdev, ldev); exit: @@ -1540,6 +1574,20 @@ exit: } /** + * i40iw_get_used_rsrc - determine resources used internally + * @iwdev: iwarp device + * + * Called after internal allocations + */ +static void i40iw_get_used_rsrc(struct i40iw_device *iwdev) +{ + iwdev->used_pds = find_next_zero_bit(iwdev->allocated_pds, iwdev->max_pd, 0); + iwdev->used_qps = find_next_zero_bit(iwdev->allocated_qps, iwdev->max_qp, 0); + iwdev->used_cqs = find_next_zero_bit(iwdev->allocated_cqs, iwdev->max_cq, 0); + iwdev->used_mrs = find_next_zero_bit(iwdev->allocated_mrs, iwdev->max_mr, 0); +} + +/** * i40iw_open - client interface operation open for iwarp/uda device * @ldev: lan device information * @client: iwarp client information, provided during registration @@ -1611,6 +1659,7 @@ static int i40iw_open(struct i40e_info *ldev, struct i40e_client *client) status = i40iw_initialize_hw_resources(iwdev); if (status) break; + i40iw_get_used_rsrc(iwdev); dev->ccq_ops->ccq_arm(dev->ccq); status = i40iw_hmc_init_pble(&iwdev->sc_dev, iwdev->pble_rsrc); if (status) @@ -1630,35 +1679,73 @@ static int i40iw_open(struct i40e_info *ldev, struct i40e_client *client) iwdev->init_state = RDMA_DEV_REGISTERED; iwdev->iw_status = 1; i40iw_port_ibevent(iwdev); + iwdev->param_wq = alloc_ordered_workqueue("l2params", WQ_MEM_RECLAIM); + if(iwdev->param_wq == NULL) + break; i40iw_pr_info("i40iw_open completed\n"); return 0; } while (0); i40iw_pr_err("status = %d last completion = %d\n", status, iwdev->init_state); - i40iw_deinit_device(iwdev, false, false); + i40iw_deinit_device(iwdev, false); return -ERESTART; } /** - * i40iw_l2param_change : handle qs handles for qos and mss change + * i40iw_l2params_worker - worker for l2 params change + * @work: work pointer for l2 params + */ +static void i40iw_l2params_worker(struct work_struct *work) +{ + struct l2params_work *dwork = + container_of(work, struct l2params_work, work); + struct i40iw_device *iwdev = dwork->iwdev; + + i40iw_change_l2params(&iwdev->vsi, &dwork->l2params); + atomic_dec(&iwdev->params_busy); + kfree(work); +} + +/** + * i40iw_l2param_change - handle qs handles for qos and mss change * @ldev: lan device information * @client: client for paramater change * @params: new parameters from L2 */ -static void i40iw_l2param_change(struct i40e_info *ldev, - struct i40e_client *client, +static void i40iw_l2param_change(struct i40e_info *ldev, struct i40e_client *client, struct i40e_params *params) { struct i40iw_handler *hdl; + struct i40iw_l2params *l2params; + struct l2params_work *work; struct i40iw_device *iwdev; + int i; hdl = i40iw_find_i40e_handler(ldev); if (!hdl) return; iwdev = &hdl->device; - if (params->mtu) - iwdev->mss = params->mtu - I40IW_MTU_TO_MSS; + + if (atomic_read(&iwdev->params_busy)) + return; + + + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) + return; + + atomic_inc(&iwdev->params_busy); + + work->iwdev = iwdev; + l2params = &work->l2params; + for (i = 0; i < I40E_CLIENT_MAX_USER_PRIORITY; i++) + l2params->qs_handle_list[i] = params->qos.prio_qos[i].qs_handle; + + l2params->mss = (params->mtu) ? params->mtu - I40IW_MTU_TO_MSS : iwdev->vsi.mss; + + INIT_WORK(&work->work, i40iw_l2params_worker); + queue_work(iwdev->param_wq, &work->work); } /** @@ -1679,8 +1766,11 @@ static void i40iw_close(struct i40e_info *ldev, struct i40e_client *client, bool return; iwdev = &hdl->device; + iwdev->closing = true; + + i40iw_cm_disconnect_all(iwdev); destroy_workqueue(iwdev->virtchnl_wq); - i40iw_deinit_device(iwdev, reset, true); + i40iw_deinit_device(iwdev, reset); } /** @@ -1701,21 +1791,23 @@ static void i40iw_vf_reset(struct i40e_info *ldev, struct i40e_client *client, u struct i40iw_vfdev *tmp_vfdev; unsigned int i; unsigned long flags; + struct i40iw_device *iwdev; hdl = i40iw_find_i40e_handler(ldev); if (!hdl) return; dev = &hdl->device.sc_dev; + iwdev = (struct i40iw_device *)dev->back_dev; for (i = 0; i < I40IW_MAX_PE_ENABLED_VF_COUNT; i++) { if (!dev->vf_dev[i] || (dev->vf_dev[i]->vf_id != vf_id)) continue; /* free all resources allocated on behalf of vf */ tmp_vfdev = dev->vf_dev[i]; - spin_lock_irqsave(&dev->dev_pestat.stats_lock, flags); + spin_lock_irqsave(&iwdev->vsi.pestat->lock, flags); dev->vf_dev[i] = NULL; - spin_unlock_irqrestore(&dev->dev_pestat.stats_lock, flags); + spin_unlock_irqrestore(&iwdev->vsi.pestat->lock, flags); i40iw_del_hmc_objects(dev, &tmp_vfdev->hmc_info, false, false); /* remove vf hmc function */ memset(&hmc_fcn_info, 0, sizeof(hmc_fcn_info)); diff --git a/drivers/infiniband/hw/i40iw/i40iw_osdep.h b/drivers/infiniband/hw/i40iw/i40iw_osdep.h index 80f422bf3967..aa66c1c63dfa 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_osdep.h +++ b/drivers/infiniband/hw/i40iw/i40iw_osdep.h @@ -198,6 +198,8 @@ enum i40iw_status_code i40iw_cqp_manage_vf_pble_bp(struct i40iw_sc_dev *dev, void i40iw_cqp_spawn_worker(struct i40iw_sc_dev *dev, struct i40iw_virtchnl_work_info *work_info, u32 iw_vf_idx); void *i40iw_remove_head(struct list_head *list); +void i40iw_qp_suspend_resume(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp, bool suspend); +void i40iw_qp_mss_modify(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp); void i40iw_term_modify_qp(struct i40iw_sc_qp *qp, u8 next_state, u8 term, u8 term_len); void i40iw_terminate_done(struct i40iw_sc_qp *qp, int timeout_occurred); @@ -207,9 +209,9 @@ void i40iw_terminate_del_timer(struct i40iw_sc_qp *qp); enum i40iw_status_code i40iw_hw_manage_vf_pble_bp(struct i40iw_device *iwdev, struct i40iw_manage_vf_pble_info *info, bool wait); -struct i40iw_dev_pestat; -void i40iw_hw_stats_start_timer(struct i40iw_sc_dev *); -void i40iw_hw_stats_del_timer(struct i40iw_sc_dev *); +struct i40iw_sc_vsi; +void i40iw_hw_stats_start_timer(struct i40iw_sc_vsi *vsi); +void i40iw_hw_stats_stop_timer(struct i40iw_sc_vsi *vsi); #define i40iw_mmiowb() mmiowb() void i40iw_wr32(struct i40iw_hw *hw, u32 reg, u32 value); u32 i40iw_rd32(struct i40iw_hw *hw, u32 reg); diff --git a/drivers/infiniband/hw/i40iw/i40iw_p.h b/drivers/infiniband/hw/i40iw/i40iw_p.h index a0b8ca10d67e..28a92fee0822 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_p.h +++ b/drivers/infiniband/hw/i40iw/i40iw_p.h @@ -47,8 +47,6 @@ void i40iw_debug_buf(struct i40iw_sc_dev *dev, enum i40iw_debug_flag mask, enum i40iw_status_code i40iw_device_init(struct i40iw_sc_dev *dev, struct i40iw_device_init_info *info); -enum i40iw_status_code i40iw_device_init_pestat(struct i40iw_dev_pestat *); - void i40iw_sc_cqp_post_sq(struct i40iw_sc_cqp *cqp); u64 *i40iw_sc_cqp_get_next_send_wqe(struct i40iw_sc_cqp *cqp, u64 scratch); @@ -64,7 +62,24 @@ enum i40iw_status_code i40iw_sc_init_iw_hmc(struct i40iw_sc_dev *dev, enum i40iw_status_code i40iw_pf_init_vfhmc(struct i40iw_sc_dev *dev, u8 vf_hmc_fn_id, u32 *vf_cnt_array); -/* cqp misc functions */ +/* stats functions */ +void i40iw_hw_stats_refresh_all(struct i40iw_vsi_pestat *stats); +void i40iw_hw_stats_read_all(struct i40iw_vsi_pestat *stats, struct i40iw_dev_hw_stats *stats_values); +void i40iw_hw_stats_read_32(struct i40iw_vsi_pestat *stats, + enum i40iw_hw_stats_index_32b index, + u64 *value); +void i40iw_hw_stats_read_64(struct i40iw_vsi_pestat *stats, + enum i40iw_hw_stats_index_64b index, + u64 *value); +void i40iw_hw_stats_init(struct i40iw_vsi_pestat *stats, u8 index, bool is_pf); + +/* vsi misc functions */ +enum i40iw_status_code i40iw_vsi_stats_init(struct i40iw_sc_vsi *vsi, struct i40iw_vsi_stats_info *info); +void i40iw_vsi_stats_free(struct i40iw_sc_vsi *vsi); +void i40iw_sc_vsi_init(struct i40iw_sc_vsi *vsi, struct i40iw_vsi_init_info *info); + +void i40iw_change_l2params(struct i40iw_sc_vsi *vsi, struct i40iw_l2params *l2params); +void i40iw_qp_add_qos(struct i40iw_sc_qp *qp); void i40iw_terminate_send_fin(struct i40iw_sc_qp *qp); diff --git a/drivers/infiniband/hw/i40iw/i40iw_pble.c b/drivers/infiniband/hw/i40iw/i40iw_pble.c index 85993dc44f6e..c87ba1617087 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_pble.c +++ b/drivers/infiniband/hw/i40iw/i40iw_pble.c @@ -353,10 +353,6 @@ static enum i40iw_status_code add_pble_pool(struct i40iw_sc_dev *dev, pages = (idx->rel_pd_idx) ? (I40IW_HMC_PD_CNT_IN_SD - idx->rel_pd_idx) : I40IW_HMC_PD_CNT_IN_SD; pages = min(pages, pble_rsrc->unallocated_pble >> PBLE_512_SHIFT); - if (!pages) { - ret_code = I40IW_ERR_NO_PBLCHUNKS_AVAILABLE; - goto error; - } info.chunk = chunk; info.hmc_info = hmc_info; info.pages = pages; diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.c b/drivers/infiniband/hw/i40iw/i40iw_puda.c index c62d354f7810..449ba8c81ce7 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_puda.c +++ b/drivers/infiniband/hw/i40iw/i40iw_puda.c @@ -42,12 +42,13 @@ #include "i40iw_p.h" #include "i40iw_puda.h" -static void i40iw_ieq_receive(struct i40iw_sc_dev *dev, +static void i40iw_ieq_receive(struct i40iw_sc_vsi *vsi, struct i40iw_puda_buf *buf); -static void i40iw_ieq_tx_compl(struct i40iw_sc_dev *dev, void *sqwrid); +static void i40iw_ieq_tx_compl(struct i40iw_sc_vsi *vsi, void *sqwrid); static void i40iw_ilq_putback_rcvbuf(struct i40iw_sc_qp *qp, u32 wqe_idx); static enum i40iw_status_code i40iw_puda_replenish_rq(struct i40iw_puda_rsrc *rsrc, bool initial); +static void i40iw_ieq_cleanup_qp(struct i40iw_puda_rsrc *ieq, struct i40iw_sc_qp *qp); /** * i40iw_puda_get_listbuf - get buffer from puda list * @list: list to use for buffers (ILQ or IEQ) @@ -292,7 +293,7 @@ enum i40iw_status_code i40iw_puda_poll_completion(struct i40iw_sc_dev *dev, unsigned long flags; if ((cq_type == I40IW_CQ_TYPE_ILQ) || (cq_type == I40IW_CQ_TYPE_IEQ)) { - rsrc = (cq_type == I40IW_CQ_TYPE_ILQ) ? dev->ilq : dev->ieq; + rsrc = (cq_type == I40IW_CQ_TYPE_ILQ) ? cq->vsi->ilq : cq->vsi->ieq; } else { i40iw_debug(dev, I40IW_DEBUG_PUDA, "%s qp_type error\n", __func__); return I40IW_ERR_BAD_PTR; @@ -335,7 +336,7 @@ enum i40iw_status_code i40iw_puda_poll_completion(struct i40iw_sc_dev *dev, rsrc->stats_pkt_rcvd++; rsrc->compl_rxwqe_idx = info.wqe_idx; i40iw_debug(dev, I40IW_DEBUG_PUDA, "%s RQ completion\n", __func__); - rsrc->receive(rsrc->dev, buf); + rsrc->receive(rsrc->vsi, buf); if (cq_type == I40IW_CQ_TYPE_ILQ) i40iw_ilq_putback_rcvbuf(&rsrc->qp, info.wqe_idx); else @@ -345,12 +346,12 @@ enum i40iw_status_code i40iw_puda_poll_completion(struct i40iw_sc_dev *dev, i40iw_debug(dev, I40IW_DEBUG_PUDA, "%s SQ completion\n", __func__); sqwrid = (void *)(uintptr_t)qp->sq_wrtrk_array[info.wqe_idx].wrid; I40IW_RING_SET_TAIL(qp->sq_ring, info.wqe_idx); - rsrc->xmit_complete(rsrc->dev, sqwrid); + rsrc->xmit_complete(rsrc->vsi, sqwrid); spin_lock_irqsave(&rsrc->bufpool_lock, flags); rsrc->tx_wqe_avail_cnt++; spin_unlock_irqrestore(&rsrc->bufpool_lock, flags); - if (!list_empty(&dev->ilq->txpend)) - i40iw_puda_send_buf(dev->ilq, NULL); + if (!list_empty(&rsrc->vsi->ilq->txpend)) + i40iw_puda_send_buf(rsrc->vsi->ilq, NULL); } done: @@ -513,10 +514,8 @@ static void i40iw_puda_qp_setctx(struct i40iw_puda_rsrc *rsrc) * i40iw_puda_qp_wqe - setup wqe for qp create * @rsrc: resource for qp */ -static enum i40iw_status_code i40iw_puda_qp_wqe(struct i40iw_puda_rsrc *rsrc) +static enum i40iw_status_code i40iw_puda_qp_wqe(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp) { - struct i40iw_sc_qp *qp = &rsrc->qp; - struct i40iw_sc_dev *dev = rsrc->dev; struct i40iw_sc_cqp *cqp; u64 *wqe; u64 header; @@ -582,6 +581,7 @@ static enum i40iw_status_code i40iw_puda_qp_create(struct i40iw_puda_rsrc *rsrc) qp->back_qp = (void *)rsrc; qp->sq_pa = mem->pa; qp->rq_pa = qp->sq_pa + sq_size; + qp->vsi = rsrc->vsi; ukqp->sq_base = mem->va; ukqp->rq_base = &ukqp->sq_base[rsrc->sq_size]; ukqp->shadow_area = ukqp->rq_base[rsrc->rq_size].elem; @@ -608,15 +608,63 @@ static enum i40iw_status_code i40iw_puda_qp_create(struct i40iw_puda_rsrc *rsrc) ukqp->wqe_alloc_reg = (u32 __iomem *)(i40iw_get_hw_addr(qp->pd->dev) + I40E_VFPE_WQEALLOC1); - qp->qs_handle = qp->dev->qs_handle; + qp->user_pri = 0; + i40iw_qp_add_qos(qp); i40iw_puda_qp_setctx(rsrc); - ret = i40iw_puda_qp_wqe(rsrc); + if (rsrc->ceq_valid) + ret = i40iw_cqp_qp_create_cmd(rsrc->dev, qp); + else + ret = i40iw_puda_qp_wqe(rsrc->dev, qp); if (ret) i40iw_free_dma_mem(rsrc->dev->hw, &rsrc->qpmem); return ret; } /** + * i40iw_puda_cq_wqe - setup wqe for cq create + * @rsrc: resource for cq + */ +static enum i40iw_status_code i40iw_puda_cq_wqe(struct i40iw_sc_dev *dev, struct i40iw_sc_cq *cq) +{ + u64 *wqe; + struct i40iw_sc_cqp *cqp; + u64 header; + struct i40iw_ccq_cqe_info compl_info; + enum i40iw_status_code status = 0; + + cqp = dev->cqp; + wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, 0); + if (!wqe) + return I40IW_ERR_RING_FULL; + + set_64bit_val(wqe, 0, cq->cq_uk.cq_size); + set_64bit_val(wqe, 8, RS_64_1(cq, 1)); + set_64bit_val(wqe, 16, + LS_64(cq->shadow_read_threshold, + I40IW_CQPSQ_CQ_SHADOW_READ_THRESHOLD)); + set_64bit_val(wqe, 32, cq->cq_pa); + + set_64bit_val(wqe, 40, cq->shadow_area_pa); + + header = cq->cq_uk.cq_id | + LS_64(I40IW_CQP_OP_CREATE_CQ, I40IW_CQPSQ_OPCODE) | + LS_64(1, I40IW_CQPSQ_CQ_CHKOVERFLOW) | + LS_64(1, I40IW_CQPSQ_CQ_ENCEQEMASK) | + LS_64(1, I40IW_CQPSQ_CQ_CEQIDVALID) | + LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID); + set_64bit_val(wqe, 24, header); + + i40iw_debug_buf(dev, I40IW_DEBUG_PUDA, "PUDA CQE", + wqe, I40IW_CQP_WQE_SIZE * 8); + + i40iw_sc_cqp_post_sq(dev->cqp); + status = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp, + I40IW_CQP_OP_CREATE_CQ, + &compl_info); + return status; +} + +/** * i40iw_puda_cq_create - create cq for resource * @rsrc: resource for which cq to create */ @@ -624,18 +672,13 @@ static enum i40iw_status_code i40iw_puda_cq_create(struct i40iw_puda_rsrc *rsrc) { struct i40iw_sc_dev *dev = rsrc->dev; struct i40iw_sc_cq *cq = &rsrc->cq; - u64 *wqe; - struct i40iw_sc_cqp *cqp; - u64 header; enum i40iw_status_code ret = 0; u32 tsize, cqsize; - u32 shadow_read_threshold = 128; struct i40iw_dma_mem *mem; - struct i40iw_ccq_cqe_info compl_info; struct i40iw_cq_init_info info; struct i40iw_cq_uk_init_info *init_info = &info.cq_uk_init_info; - cq->back_cq = (void *)rsrc; + cq->vsi = rsrc->vsi; cqsize = rsrc->cq_size * (sizeof(struct i40iw_cqe)); tsize = cqsize + sizeof(struct i40iw_cq_shadow_area); ret = i40iw_allocate_dma_mem(dev->hw, &rsrc->cqmem, tsize, @@ -656,43 +699,84 @@ static enum i40iw_status_code i40iw_puda_cq_create(struct i40iw_puda_rsrc *rsrc) init_info->shadow_area = (u64 *)((u8 *)mem->va + cqsize); init_info->cq_size = rsrc->cq_size; init_info->cq_id = rsrc->cq_id; + info.ceqe_mask = true; + info.ceq_id_valid = true; ret = dev->iw_priv_cq_ops->cq_init(cq, &info); if (ret) goto error; - cqp = dev->cqp; - wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, 0); - if (!wqe) { - ret = I40IW_ERR_RING_FULL; - goto error; - } + if (rsrc->ceq_valid) + ret = i40iw_cqp_cq_create_cmd(dev, cq); + else + ret = i40iw_puda_cq_wqe(dev, cq); +error: + if (ret) + i40iw_free_dma_mem(dev->hw, &rsrc->cqmem); + return ret; +} - set_64bit_val(wqe, 0, rsrc->cq_size); - set_64bit_val(wqe, 8, RS_64_1(cq, 1)); - set_64bit_val(wqe, 16, LS_64(shadow_read_threshold, I40IW_CQPSQ_CQ_SHADOW_READ_THRESHOLD)); - set_64bit_val(wqe, 32, cq->cq_pa); +/** + * i40iw_puda_free_qp - free qp for resource + * @rsrc: resource for which qp to free + */ +static void i40iw_puda_free_qp(struct i40iw_puda_rsrc *rsrc) +{ + enum i40iw_status_code ret; + struct i40iw_ccq_cqe_info compl_info; + struct i40iw_sc_dev *dev = rsrc->dev; - set_64bit_val(wqe, 40, cq->shadow_area_pa); + if (rsrc->ceq_valid) { + i40iw_cqp_qp_destroy_cmd(dev, &rsrc->qp); + return; + } - header = rsrc->cq_id | - LS_64(I40IW_CQP_OP_CREATE_CQ, I40IW_CQPSQ_OPCODE) | - LS_64(1, I40IW_CQPSQ_CQ_CHKOVERFLOW) | - LS_64(1, I40IW_CQPSQ_CQ_ENCEQEMASK) | - LS_64(1, I40IW_CQPSQ_CQ_CEQIDVALID) | - LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID); - set_64bit_val(wqe, 24, header); + ret = dev->iw_priv_qp_ops->qp_destroy(&rsrc->qp, + 0, false, true, true); + if (ret) + i40iw_debug(dev, I40IW_DEBUG_PUDA, + "%s error puda qp destroy wqe\n", + __func__); - i40iw_debug_buf(dev, I40IW_DEBUG_PUDA, "PUDA CQE", - wqe, I40IW_CQP_WQE_SIZE * 8); + if (!ret) { + ret = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp, + I40IW_CQP_OP_DESTROY_QP, + &compl_info); + if (ret) + i40iw_debug(dev, I40IW_DEBUG_PUDA, + "%s error puda qp destroy failed\n", + __func__); + } +} - i40iw_sc_cqp_post_sq(dev->cqp); - ret = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp, - I40IW_CQP_OP_CREATE_CQ, - &compl_info); +/** + * i40iw_puda_free_cq - free cq for resource + * @rsrc: resource for which cq to free + */ +static void i40iw_puda_free_cq(struct i40iw_puda_rsrc *rsrc) +{ + enum i40iw_status_code ret; + struct i40iw_ccq_cqe_info compl_info; + struct i40iw_sc_dev *dev = rsrc->dev; + + if (rsrc->ceq_valid) { + i40iw_cqp_cq_destroy_cmd(dev, &rsrc->cq); + return; + } + ret = dev->iw_priv_cq_ops->cq_destroy(&rsrc->cq, 0, true); -error: if (ret) - i40iw_free_dma_mem(dev->hw, &rsrc->cqmem); - return ret; + i40iw_debug(dev, I40IW_DEBUG_PUDA, + "%s error ieq cq destroy\n", + __func__); + + if (!ret) { + ret = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp, + I40IW_CQP_OP_DESTROY_CQ, + &compl_info); + if (ret) + i40iw_debug(dev, I40IW_DEBUG_PUDA, + "%s error ieq qp destroy done\n", + __func__); + } } /** @@ -701,25 +785,24 @@ error: * @type: type of resource to dele * @reset: true if reset chip */ -void i40iw_puda_dele_resources(struct i40iw_sc_dev *dev, +void i40iw_puda_dele_resources(struct i40iw_sc_vsi *vsi, enum puda_resource_type type, bool reset) { - struct i40iw_ccq_cqe_info compl_info; + struct i40iw_sc_dev *dev = vsi->dev; struct i40iw_puda_rsrc *rsrc; struct i40iw_puda_buf *buf = NULL; struct i40iw_puda_buf *nextbuf = NULL; struct i40iw_virt_mem *vmem; - enum i40iw_status_code ret; switch (type) { case I40IW_PUDA_RSRC_TYPE_ILQ: - rsrc = dev->ilq; - vmem = &dev->ilq_mem; + rsrc = vsi->ilq; + vmem = &vsi->ilq_mem; break; case I40IW_PUDA_RSRC_TYPE_IEQ: - rsrc = dev->ieq; - vmem = &dev->ieq_mem; + rsrc = vsi->ieq; + vmem = &vsi->ieq_mem; break; default: i40iw_debug(dev, I40IW_DEBUG_PUDA, "%s: error resource type = 0x%x\n", @@ -731,45 +814,14 @@ void i40iw_puda_dele_resources(struct i40iw_sc_dev *dev, case PUDA_HASH_CRC_COMPLETE: i40iw_free_hash_desc(rsrc->hash_desc); case PUDA_QP_CREATED: - do { - if (reset) - break; - ret = dev->iw_priv_qp_ops->qp_destroy(&rsrc->qp, - 0, false, true, true); - if (ret) - i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA, - "%s error ieq qp destroy\n", - __func__); - - ret = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp, - I40IW_CQP_OP_DESTROY_QP, - &compl_info); - if (ret) - i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA, - "%s error ieq qp destroy done\n", - __func__); - } while (0); + if (!reset) + i40iw_puda_free_qp(rsrc); i40iw_free_dma_mem(dev->hw, &rsrc->qpmem); /* fallthrough */ case PUDA_CQ_CREATED: - do { - if (reset) - break; - ret = dev->iw_priv_cq_ops->cq_destroy(&rsrc->cq, 0, true); - if (ret) - i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA, - "%s error ieq cq destroy\n", - __func__); - - ret = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp, - I40IW_CQP_OP_DESTROY_CQ, - &compl_info); - if (ret) - i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA, - "%s error ieq qp destroy done\n", - __func__); - } while (0); + if (!reset) + i40iw_puda_free_cq(rsrc); i40iw_free_dma_mem(dev->hw, &rsrc->cqmem); break; @@ -825,9 +877,10 @@ static enum i40iw_status_code i40iw_puda_allocbufs(struct i40iw_puda_rsrc *rsrc, * @dev: iwarp device * @info: resource information */ -enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_dev *dev, +enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_vsi *vsi, struct i40iw_puda_rsrc_info *info) { + struct i40iw_sc_dev *dev = vsi->dev; enum i40iw_status_code ret = 0; struct i40iw_puda_rsrc *rsrc; u32 pudasize; @@ -840,10 +893,10 @@ enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_dev *dev, rqwridsize = info->rq_size * 8; switch (info->type) { case I40IW_PUDA_RSRC_TYPE_ILQ: - vmem = &dev->ilq_mem; + vmem = &vsi->ilq_mem; break; case I40IW_PUDA_RSRC_TYPE_IEQ: - vmem = &dev->ieq_mem; + vmem = &vsi->ieq_mem; break; default: return I40IW_NOT_SUPPORTED; @@ -856,22 +909,22 @@ enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_dev *dev, rsrc = (struct i40iw_puda_rsrc *)vmem->va; spin_lock_init(&rsrc->bufpool_lock); if (info->type == I40IW_PUDA_RSRC_TYPE_ILQ) { - dev->ilq = (struct i40iw_puda_rsrc *)vmem->va; - dev->ilq_count = info->count; + vsi->ilq = (struct i40iw_puda_rsrc *)vmem->va; + vsi->ilq_count = info->count; rsrc->receive = info->receive; rsrc->xmit_complete = info->xmit_complete; } else { - vmem = &dev->ieq_mem; - dev->ieq_count = info->count; - dev->ieq = (struct i40iw_puda_rsrc *)vmem->va; + vmem = &vsi->ieq_mem; + vsi->ieq_count = info->count; + vsi->ieq = (struct i40iw_puda_rsrc *)vmem->va; rsrc->receive = i40iw_ieq_receive; rsrc->xmit_complete = i40iw_ieq_tx_compl; } + rsrc->ceq_valid = info->ceq_valid; rsrc->type = info->type; rsrc->sq_wrtrk_array = (struct i40iw_sq_uk_wr_trk_info *)((u8 *)vmem->va + pudasize); rsrc->rq_wrid_array = (u64 *)((u8 *)vmem->va + pudasize + sqwridsize); - rsrc->mss = info->mss; /* Initialize all ieq lists */ INIT_LIST_HEAD(&rsrc->bufpool); INIT_LIST_HEAD(&rsrc->txpend); @@ -885,6 +938,7 @@ enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_dev *dev, rsrc->cq_size = info->rq_size + info->sq_size; rsrc->buf_size = info->buf_size; rsrc->dev = dev; + rsrc->vsi = vsi; ret = i40iw_puda_cq_create(rsrc); if (!ret) { @@ -919,7 +973,7 @@ enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_dev *dev, dev->ccq_ops->ccq_arm(&rsrc->cq); return ret; error: - i40iw_puda_dele_resources(dev, info->type, false); + i40iw_puda_dele_resources(vsi, info->type, false); return ret; } @@ -1131,7 +1185,7 @@ static enum i40iw_status_code i40iw_ieq_handle_partial(struct i40iw_puda_rsrc *i list_add(&buf->list, &pbufl); status = i40iw_ieq_create_pbufl(pfpdu, rxlist, &pbufl, buf, fpdu_len); - if (!status) + if (status) goto error; txbuf = i40iw_puda_get_bufpool(ieq); @@ -1332,7 +1386,7 @@ static void i40iw_ieq_handle_exception(struct i40iw_puda_rsrc *ieq, } if (pfpdu->mode && (fps != pfpdu->fps)) { /* clean up qp as it is new partial sequence */ - i40iw_ieq_cleanup_qp(ieq->dev, qp); + i40iw_ieq_cleanup_qp(ieq, qp); i40iw_debug(ieq->dev, I40IW_DEBUG_IEQ, "%s: restarting new partial\n", __func__); pfpdu->mode = false; @@ -1344,7 +1398,7 @@ static void i40iw_ieq_handle_exception(struct i40iw_puda_rsrc *ieq, pfpdu->rcv_nxt = fps; pfpdu->fps = fps; pfpdu->mode = true; - pfpdu->max_fpdu_data = ieq->mss; + pfpdu->max_fpdu_data = ieq->vsi->mss; pfpdu->pmode_count++; INIT_LIST_HEAD(rxlist); i40iw_ieq_check_first_buf(buf, fps); @@ -1379,14 +1433,14 @@ static void i40iw_ieq_handle_exception(struct i40iw_puda_rsrc *ieq, * @dev: iwarp device * @buf: exception buffer received */ -static void i40iw_ieq_receive(struct i40iw_sc_dev *dev, +static void i40iw_ieq_receive(struct i40iw_sc_vsi *vsi, struct i40iw_puda_buf *buf) { - struct i40iw_puda_rsrc *ieq = dev->ieq; + struct i40iw_puda_rsrc *ieq = vsi->ieq; struct i40iw_sc_qp *qp = NULL; u32 wqe_idx = ieq->compl_rxwqe_idx; - qp = i40iw_ieq_get_qp(dev, buf); + qp = i40iw_ieq_get_qp(vsi->dev, buf); if (!qp) { ieq->stats_bad_qp_id++; i40iw_puda_ret_bufpool(ieq, buf); @@ -1404,12 +1458,12 @@ static void i40iw_ieq_receive(struct i40iw_sc_dev *dev, /** * i40iw_ieq_tx_compl - put back after sending completed exception buffer - * @dev: iwarp device + * @vsi: pointer to the vsi structure * @sqwrid: pointer to puda buffer */ -static void i40iw_ieq_tx_compl(struct i40iw_sc_dev *dev, void *sqwrid) +static void i40iw_ieq_tx_compl(struct i40iw_sc_vsi *vsi, void *sqwrid) { - struct i40iw_puda_rsrc *ieq = dev->ieq; + struct i40iw_puda_rsrc *ieq = vsi->ieq; struct i40iw_puda_buf *buf = (struct i40iw_puda_buf *)sqwrid; i40iw_puda_ret_bufpool(ieq, buf); @@ -1421,15 +1475,14 @@ static void i40iw_ieq_tx_compl(struct i40iw_sc_dev *dev, void *sqwrid) /** * i40iw_ieq_cleanup_qp - qp is being destroyed - * @dev: iwarp device + * @ieq: ieq resource * @qp: all pending fpdu buffers */ -void i40iw_ieq_cleanup_qp(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp) +static void i40iw_ieq_cleanup_qp(struct i40iw_puda_rsrc *ieq, struct i40iw_sc_qp *qp) { struct i40iw_puda_buf *buf; struct i40iw_pfpdu *pfpdu = &qp->pfpdu; struct list_head *rxlist = &pfpdu->rxlist; - struct i40iw_puda_rsrc *ieq = dev->ieq; if (!pfpdu->mode) return; diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.h b/drivers/infiniband/hw/i40iw/i40iw_puda.h index 52bf7826ce4e..dba05ce7d392 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_puda.h +++ b/drivers/infiniband/hw/i40iw/i40iw_puda.h @@ -100,6 +100,7 @@ struct i40iw_puda_rsrc_info { enum puda_resource_type type; /* ILQ or IEQ */ u32 count; u16 pd_id; + bool ceq_valid; u32 cq_id; u32 qp_id; u32 sq_size; @@ -107,8 +108,8 @@ struct i40iw_puda_rsrc_info { u16 buf_size; u16 mss; u32 tx_buf_cnt; /* total bufs allocated will be rq_size + tx_buf_cnt */ - void (*receive)(struct i40iw_sc_dev *, struct i40iw_puda_buf *); - void (*xmit_complete)(struct i40iw_sc_dev *, void *); + void (*receive)(struct i40iw_sc_vsi *, struct i40iw_puda_buf *); + void (*xmit_complete)(struct i40iw_sc_vsi *, void *); }; struct i40iw_puda_rsrc { @@ -116,6 +117,7 @@ struct i40iw_puda_rsrc { struct i40iw_sc_qp qp; struct i40iw_sc_pd sc_pd; struct i40iw_sc_dev *dev; + struct i40iw_sc_vsi *vsi; struct i40iw_dma_mem cqmem; struct i40iw_dma_mem qpmem; struct i40iw_virt_mem ilq_mem; @@ -123,6 +125,7 @@ struct i40iw_puda_rsrc { enum puda_resource_type type; u16 buf_size; /*buffer must be max datalen + tcpip hdr + mac */ u16 mss; + bool ceq_valid; u32 cq_id; u32 qp_id; u32 sq_size; @@ -142,8 +145,8 @@ struct i40iw_puda_rsrc { u32 avail_buf_count; /* snapshot of currently available buffers */ spinlock_t bufpool_lock; struct i40iw_puda_buf *alloclist; - void (*receive)(struct i40iw_sc_dev *, struct i40iw_puda_buf *); - void (*xmit_complete)(struct i40iw_sc_dev *, void *); + void (*receive)(struct i40iw_sc_vsi *, struct i40iw_puda_buf *); + void (*xmit_complete)(struct i40iw_sc_vsi *, void *); /* puda stats */ u64 stats_buf_alloc_fail; u64 stats_pkt_rcvd; @@ -160,14 +163,13 @@ void i40iw_puda_send_buf(struct i40iw_puda_rsrc *rsrc, struct i40iw_puda_buf *buf); enum i40iw_status_code i40iw_puda_send(struct i40iw_sc_qp *qp, struct i40iw_puda_send_info *info); -enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_dev *dev, +enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_vsi *vsi, struct i40iw_puda_rsrc_info *info); -void i40iw_puda_dele_resources(struct i40iw_sc_dev *dev, +void i40iw_puda_dele_resources(struct i40iw_sc_vsi *vsi, enum puda_resource_type type, bool reset); enum i40iw_status_code i40iw_puda_poll_completion(struct i40iw_sc_dev *dev, struct i40iw_sc_cq *cq, u32 *compl_err); -void i40iw_ieq_cleanup_qp(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp); struct i40iw_sc_qp *i40iw_ieq_get_qp(struct i40iw_sc_dev *dev, struct i40iw_puda_buf *buf); @@ -180,4 +182,8 @@ void i40iw_ieq_mpa_crc_ae(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp); void i40iw_free_hash_desc(struct shash_desc *desc); void i40iw_ieq_update_tcpip_info(struct i40iw_puda_buf *buf, u16 length, u32 seqnum); +enum i40iw_status_code i40iw_cqp_qp_create_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp); +enum i40iw_status_code i40iw_cqp_cq_create_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_cq *cq); +void i40iw_cqp_qp_destroy_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp); +void i40iw_cqp_cq_destroy_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_cq *cq); #endif diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h index 2b1a04e9ca3c..f3f8e9cc3c05 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_type.h +++ b/drivers/infiniband/hw/i40iw/i40iw_type.h @@ -61,7 +61,7 @@ struct i40iw_cq_shadow_area { struct i40iw_sc_dev; struct i40iw_hmc_info; -struct i40iw_dev_pestat; +struct i40iw_vsi_pestat; struct i40iw_cqp_ops; struct i40iw_ccq_ops; @@ -74,6 +74,11 @@ struct i40iw_priv_qp_ops; struct i40iw_priv_cq_ops; struct i40iw_hmc_ops; +enum i40iw_page_size { + I40IW_PAGE_SIZE_4K, + I40IW_PAGE_SIZE_2M +}; + enum i40iw_resource_indicator_type { I40IW_RSRC_INDICATOR_TYPE_ADAPTER = 0, I40IW_RSRC_INDICATOR_TYPE_CQ, @@ -186,7 +191,7 @@ enum i40iw_debug_flag { I40IW_DEBUG_ALL = 0xFFFFFFFF }; -enum i40iw_hw_stat_index_32b { +enum i40iw_hw_stats_index_32b { I40IW_HW_STAT_INDEX_IP4RXDISCARD = 0, I40IW_HW_STAT_INDEX_IP4RXTRUNC, I40IW_HW_STAT_INDEX_IP4TXNOROUTE, @@ -199,7 +204,7 @@ enum i40iw_hw_stat_index_32b { I40IW_HW_STAT_INDEX_MAX_32 }; -enum i40iw_hw_stat_index_64b { +enum i40iw_hw_stats_index_64b { I40IW_HW_STAT_INDEX_IP4RXOCTS = 0, I40IW_HW_STAT_INDEX_IP4RXPKTS, I40IW_HW_STAT_INDEX_IP4RXFRAGS, @@ -229,32 +234,23 @@ enum i40iw_hw_stat_index_64b { I40IW_HW_STAT_INDEX_MAX_64 }; -struct i40iw_dev_hw_stat_offsets { - u32 stat_offset_32[I40IW_HW_STAT_INDEX_MAX_32]; - u32 stat_offset_64[I40IW_HW_STAT_INDEX_MAX_64]; +struct i40iw_dev_hw_stats_offsets { + u32 stats_offset_32[I40IW_HW_STAT_INDEX_MAX_32]; + u32 stats_offset_64[I40IW_HW_STAT_INDEX_MAX_64]; }; struct i40iw_dev_hw_stats { - u64 stat_value_32[I40IW_HW_STAT_INDEX_MAX_32]; - u64 stat_value_64[I40IW_HW_STAT_INDEX_MAX_64]; -}; - -struct i40iw_device_pestat_ops { - void (*iw_hw_stat_init)(struct i40iw_dev_pestat *, u8, struct i40iw_hw *, bool); - void (*iw_hw_stat_read_32)(struct i40iw_dev_pestat *, enum i40iw_hw_stat_index_32b, u64 *); - void (*iw_hw_stat_read_64)(struct i40iw_dev_pestat *, enum i40iw_hw_stat_index_64b, u64 *); - void (*iw_hw_stat_read_all)(struct i40iw_dev_pestat *, struct i40iw_dev_hw_stats *); - void (*iw_hw_stat_refresh_all)(struct i40iw_dev_pestat *); + u64 stats_value_32[I40IW_HW_STAT_INDEX_MAX_32]; + u64 stats_value_64[I40IW_HW_STAT_INDEX_MAX_64]; }; -struct i40iw_dev_pestat { +struct i40iw_vsi_pestat { struct i40iw_hw *hw; - struct i40iw_device_pestat_ops ops; struct i40iw_dev_hw_stats hw_stats; struct i40iw_dev_hw_stats last_read_hw_stats; - struct i40iw_dev_hw_stat_offsets hw_stat_offsets; + struct i40iw_dev_hw_stats_offsets hw_stats_offsets; struct timer_list stats_timer; - spinlock_t stats_lock; /* rdma stats lock */ + spinlock_t lock; /* rdma stats lock */ }; struct i40iw_hw { @@ -350,6 +346,7 @@ struct i40iw_sc_cq { u64 cq_pa; u64 shadow_area_pa; struct i40iw_sc_dev *dev; + struct i40iw_sc_vsi *vsi; void *pbl_list; void *back_cq; u32 ceq_id; @@ -373,6 +370,7 @@ struct i40iw_sc_qp { u64 shadow_area_pa; u64 q2_pa; struct i40iw_sc_dev *dev; + struct i40iw_sc_vsi *vsi; struct i40iw_sc_pd *pd; u64 *hw_host_ctx; void *llp_stream_handle; @@ -397,6 +395,9 @@ struct i40iw_sc_qp { bool virtual_map; bool flush_sq; bool flush_rq; + u8 user_pri; + struct list_head list; + bool on_qoslist; bool sq_flush; enum i40iw_flush_opcode flush_code; enum i40iw_term_eventtypes eventtype; @@ -424,10 +425,16 @@ struct i40iw_vchnl_vf_msg_buffer { char parm_buffer[I40IW_VCHNL_MAX_VF_MSG_SIZE - 1]; }; +struct i40iw_qos { + struct list_head qplist; + spinlock_t lock; /* qos list */ + u16 qs_handle; +}; + struct i40iw_vfdev { struct i40iw_sc_dev *pf_dev; u8 *hmc_info_mem; - struct i40iw_dev_pestat dev_pestat; + struct i40iw_vsi_pestat pestat; struct i40iw_hmc_pble_info *pble_info; struct i40iw_hmc_info hmc_info; struct i40iw_vchnl_vf_msg_buffer vf_msg_buffer; @@ -441,11 +448,28 @@ struct i40iw_vfdev { bool stats_initialized; }; +#define I40IW_INVALID_FCN_ID 0xff +struct i40iw_sc_vsi { + struct i40iw_sc_dev *dev; + void *back_vsi; /* Owned by OS */ + u32 ilq_count; + struct i40iw_virt_mem ilq_mem; + struct i40iw_puda_rsrc *ilq; + u32 ieq_count; + struct i40iw_virt_mem ieq_mem; + struct i40iw_puda_rsrc *ieq; + u16 mss; + u8 fcn_id; + bool stats_fcn_id_alloc; + struct i40iw_qos qos[I40IW_MAX_USER_PRIORITY]; + struct i40iw_vsi_pestat *pestat; +}; + struct i40iw_sc_dev { struct list_head cqp_cmd_head; /* head of the CQP command list */ spinlock_t cqp_lock; /* cqp list sync */ struct i40iw_dev_uk dev_uk; - struct i40iw_dev_pestat dev_pestat; + bool fcn_id_array[I40IW_MAX_STATS_COUNT]; struct i40iw_dma_mem vf_fpm_query_buf[I40IW_MAX_PE_ENABLED_VF_COUNT]; u64 fpm_query_buf_pa; u64 fpm_commit_buf_pa; @@ -472,17 +496,9 @@ struct i40iw_sc_dev { struct i40iw_cqp_misc_ops *cqp_misc_ops; struct i40iw_hmc_ops *hmc_ops; struct i40iw_vchnl_if vchnl_if; - u32 ilq_count; - struct i40iw_virt_mem ilq_mem; - struct i40iw_puda_rsrc *ilq; - u32 ieq_count; - struct i40iw_virt_mem ieq_mem; - struct i40iw_puda_rsrc *ieq; - const struct i40iw_vf_cqp_ops *iw_vf_cqp_ops; struct i40iw_hmc_fpm_misc hmc_fpm_misc; - u16 qs_handle; u32 debug_mask; u16 exception_lan_queue; u8 hmc_fn_id; @@ -556,6 +572,19 @@ struct i40iw_l2params { u16 mss; }; +struct i40iw_vsi_init_info { + struct i40iw_sc_dev *dev; + void *back_vsi; + struct i40iw_l2params *params; +}; + +struct i40iw_vsi_stats_info { + struct i40iw_vsi_pestat *pestat; + u8 fcn_id; + bool alloc_fcn_id; + bool stats_initialize; +}; + struct i40iw_device_init_info { u64 fpm_query_buf_pa; u64 fpm_commit_buf_pa; @@ -564,7 +593,6 @@ struct i40iw_device_init_info { struct i40iw_hw *hw; void __iomem *bar0; enum i40iw_status_code (*vchnl_send)(struct i40iw_sc_dev *, u32, u8 *, u16); - u16 qs_handle; u16 exception_lan_queue; u8 hmc_fn_id; bool is_pf; @@ -722,6 +750,8 @@ struct i40iw_qp_host_ctx_info { bool iwarp_info_valid; bool err_rq_idx_valid; u16 err_rq_idx; + bool add_to_qoslist; + u8 user_pri; }; struct i40iw_aeqe_info { @@ -814,6 +844,7 @@ struct i40iw_register_shared_stag { struct i40iw_qp_init_info { struct i40iw_qp_uk_init_info qp_uk_init_info; struct i40iw_sc_pd *pd; + struct i40iw_sc_vsi *vsi; u64 *host_ctx; u8 *q2; u64 sq_pa; @@ -880,13 +911,14 @@ enum i40iw_quad_hash_manage_type { }; struct i40iw_qhash_table_info { + struct i40iw_sc_vsi *vsi; enum i40iw_quad_hash_manage_type manage; enum i40iw_quad_entry_type entry_type; bool vlan_valid; bool ipv4_valid; u8 mac_addr[6]; u16 vlan_id; - u16 qs_handle; + u8 user_pri; u32 qp_num; u32 dest_ip[4]; u32 src_ip[4]; @@ -976,7 +1008,7 @@ struct i40iw_cqp_query_fpm_values { struct i40iw_cqp_ops { enum i40iw_status_code (*cqp_init)(struct i40iw_sc_cqp *, struct i40iw_cqp_init_info *); - enum i40iw_status_code (*cqp_create)(struct i40iw_sc_cqp *, bool, u16 *, u16 *); + enum i40iw_status_code (*cqp_create)(struct i40iw_sc_cqp *, u16 *, u16 *); void (*cqp_post_sq)(struct i40iw_sc_cqp *); u64 *(*cqp_get_next_send_wqe)(struct i40iw_sc_cqp *, u64 scratch); enum i40iw_status_code (*cqp_destroy)(struct i40iw_sc_cqp *); diff --git a/drivers/infiniband/hw/i40iw/i40iw_uk.c b/drivers/infiniband/hw/i40iw/i40iw_uk.c index 4d28c3cb03cc..4376cd628774 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_uk.c +++ b/drivers/infiniband/hw/i40iw/i40iw_uk.c @@ -175,12 +175,10 @@ u64 *i40iw_qp_get_next_send_wqe(struct i40iw_qp_uk *qp, if (!*wqe_idx) qp->swqe_polarity = !qp->swqe_polarity; } - - for (i = 0; i < wqe_size / I40IW_QP_WQE_MIN_SIZE; i++) { - I40IW_RING_MOVE_HEAD(qp->sq_ring, ret_code); - if (ret_code) - return NULL; - } + I40IW_RING_MOVE_HEAD_BY_COUNT(qp->sq_ring, + wqe_size / I40IW_QP_WQE_MIN_SIZE, ret_code); + if (ret_code) + return NULL; wqe = qp->sq_base[*wqe_idx].elem; @@ -430,7 +428,7 @@ static enum i40iw_status_code i40iw_inline_rdma_write(struct i40iw_qp_uk *qp, struct i40iw_inline_rdma_write *op_info; u64 *push; u64 header = 0; - u32 i, wqe_idx; + u32 wqe_idx; enum i40iw_status_code ret_code; bool read_fence = false; u8 wqe_size; @@ -465,14 +463,12 @@ static enum i40iw_status_code i40iw_inline_rdma_write(struct i40iw_qp_uk *qp, src = (u8 *)(op_info->data); if (op_info->len <= 16) { - for (i = 0; i < op_info->len; i++, src++, dest++) - *dest = *src; + memcpy(dest, src, op_info->len); } else { - for (i = 0; i < 16; i++, src++, dest++) - *dest = *src; + memcpy(dest, src, 16); + src += 16; dest = (u8 *)wqe + 32; - for (; i < op_info->len; i++, src++, dest++) - *dest = *src; + memcpy(dest, src, op_info->len - 16); } wmb(); /* make sure WQE is populated before valid bit is set */ @@ -507,7 +503,7 @@ static enum i40iw_status_code i40iw_inline_send(struct i40iw_qp_uk *qp, u8 *dest, *src; struct i40iw_post_inline_send *op_info; u64 header; - u32 wqe_idx, i; + u32 wqe_idx; enum i40iw_status_code ret_code; bool read_fence = false; u8 wqe_size; @@ -540,14 +536,12 @@ static enum i40iw_status_code i40iw_inline_send(struct i40iw_qp_uk *qp, src = (u8 *)(op_info->data); if (op_info->len <= 16) { - for (i = 0; i < op_info->len; i++, src++, dest++) - *dest = *src; + memcpy(dest, src, op_info->len); } else { - for (i = 0; i < 16; i++, src++, dest++) - *dest = *src; + memcpy(dest, src, 16); + src += 16; dest = (u8 *)wqe + 32; - for (; i < op_info->len; i++, src++, dest++) - *dest = *src; + memcpy(dest, src, op_info->len - 16); } wmb(); /* make sure WQE is populated before valid bit is set */ @@ -1190,12 +1184,8 @@ enum i40iw_status_code i40iw_inline_data_size_to_wqesize(u32 data_size, if (data_size <= 16) *wqe_size = I40IW_QP_WQE_MIN_SIZE; - else if (data_size <= 48) - *wqe_size = 64; - else if (data_size <= 80) - *wqe_size = 96; else - *wqe_size = 128; + *wqe_size = 64; return 0; } diff --git a/drivers/infiniband/hw/i40iw/i40iw_user.h b/drivers/infiniband/hw/i40iw/i40iw_user.h index 276bcefffd7e..80d9f464f65e 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_user.h +++ b/drivers/infiniband/hw/i40iw/i40iw_user.h @@ -72,12 +72,12 @@ enum i40iw_device_capabilities_const { I40IW_MAX_SQ_PAYLOAD_SIZE = 2145386496, I40IW_MAX_INLINE_DATA_SIZE = 48, I40IW_MAX_PUSHMODE_INLINE_DATA_SIZE = 48, - I40IW_MAX_IRD_SIZE = 32, - I40IW_QPCTX_ENCD_MAXIRD = 3, + I40IW_MAX_IRD_SIZE = 63, + I40IW_MAX_ORD_SIZE = 127, I40IW_MAX_WQ_ENTRIES = 2048, - I40IW_MAX_ORD_SIZE = 32, I40IW_Q2_BUFFER_SIZE = (248 + 100), - I40IW_QP_CTX_SIZE = 248 + I40IW_QP_CTX_SIZE = 248, + I40IW_MAX_PDS = 32768 }; #define i40iw_handle void * @@ -96,12 +96,6 @@ enum i40iw_device_capabilities_const { #define i40iw_physical_fragment u64 #define i40iw_address_list u64 * -#define I40IW_CREATE_STAG(index, key) (((index) << 8) + (key)) - -#define I40IW_STAG_KEY_FROM_STAG(stag) ((stag) && 0x000000FF) - -#define I40IW_STAG_INDEX_FROM_STAG(stag) (((stag) && 0xFFFFFF00) >> 8) - #define I40IW_MAX_MR_SIZE 0x10000000000L struct i40iw_qp_uk; diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c index 6fd043b1d714..0f5d43d1f5fc 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_utils.c +++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c @@ -153,6 +153,7 @@ int i40iw_inetaddr_event(struct notifier_block *notifier, struct i40iw_device *iwdev; struct i40iw_handler *hdl; u32 local_ipaddr; + u32 action = I40IW_ARP_ADD; hdl = i40iw_find_netdev(event_netdev); if (!hdl) @@ -164,44 +165,25 @@ int i40iw_inetaddr_event(struct notifier_block *notifier, if (netdev != event_netdev) return NOTIFY_DONE; + if (upper_dev) + local_ipaddr = ntohl( + ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address); + else + local_ipaddr = ntohl(ifa->ifa_address); switch (event) { case NETDEV_DOWN: - if (upper_dev) - local_ipaddr = ntohl( - ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address); - else - local_ipaddr = ntohl(ifa->ifa_address); - i40iw_manage_arp_cache(iwdev, - netdev->dev_addr, - &local_ipaddr, - true, - I40IW_ARP_DELETE); - return NOTIFY_OK; + action = I40IW_ARP_DELETE; + /* Fall through */ case NETDEV_UP: - if (upper_dev) - local_ipaddr = ntohl( - ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address); - else - local_ipaddr = ntohl(ifa->ifa_address); - i40iw_manage_arp_cache(iwdev, - netdev->dev_addr, - &local_ipaddr, - true, - I40IW_ARP_ADD); - break; + /* Fall through */ case NETDEV_CHANGEADDR: - /* Add the address to the IP table */ - if (upper_dev) - local_ipaddr = ntohl( - ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address); - else - local_ipaddr = ntohl(ifa->ifa_address); - i40iw_manage_arp_cache(iwdev, netdev->dev_addr, &local_ipaddr, true, - I40IW_ARP_ADD); + action); + i40iw_if_notify(iwdev, netdev, &local_ipaddr, true, + (action == I40IW_ARP_ADD) ? true : false); break; default: break; @@ -225,6 +207,7 @@ int i40iw_inet6addr_event(struct notifier_block *notifier, struct i40iw_device *iwdev; struct i40iw_handler *hdl; u32 local_ipaddr6[4]; + u32 action = I40IW_ARP_ADD; hdl = i40iw_find_netdev(event_netdev); if (!hdl) @@ -235,24 +218,21 @@ int i40iw_inet6addr_event(struct notifier_block *notifier, if (netdev != event_netdev) return NOTIFY_DONE; + i40iw_copy_ip_ntohl(local_ipaddr6, ifa->addr.in6_u.u6_addr32); switch (event) { case NETDEV_DOWN: - i40iw_copy_ip_ntohl(local_ipaddr6, ifa->addr.in6_u.u6_addr32); - i40iw_manage_arp_cache(iwdev, - netdev->dev_addr, - local_ipaddr6, - false, - I40IW_ARP_DELETE); - return NOTIFY_OK; + action = I40IW_ARP_DELETE; + /* Fall through */ case NETDEV_UP: /* Fall through */ case NETDEV_CHANGEADDR: - i40iw_copy_ip_ntohl(local_ipaddr6, ifa->addr.in6_u.u6_addr32); i40iw_manage_arp_cache(iwdev, netdev->dev_addr, local_ipaddr6, false, - I40IW_ARP_ADD); + action); + i40iw_if_notify(iwdev, netdev, local_ipaddr6, false, + (action == I40IW_ARP_ADD) ? true : false); break; default: break; @@ -392,6 +372,7 @@ static void i40iw_free_qp(struct i40iw_cqp_request *cqp_request, u32 num) i40iw_rem_pdusecount(iwqp->iwpd, iwdev); i40iw_free_qp_resources(iwdev, iwqp, qp_num); + i40iw_rem_devusecount(iwdev); } /** @@ -415,7 +396,10 @@ static int i40iw_wait_event(struct i40iw_device *iwdev, i40iw_pr_err("error cqp command 0x%x timed out ret = %d\n", info->cqp_cmd, timeout_ret); err_code = -ETIME; - i40iw_request_reset(iwdev); + if (!iwdev->reset) { + iwdev->reset = true; + i40iw_request_reset(iwdev); + } goto done; } cqp_error = cqp_request->compl_info.error; @@ -445,6 +429,11 @@ enum i40iw_status_code i40iw_handle_cqp_op(struct i40iw_device *iwdev, struct cqp_commands_info *info = &cqp_request->info; int err_code = 0; + if (iwdev->reset) { + i40iw_free_cqp_request(&iwdev->cqp, cqp_request); + return I40IW_ERR_CQP_COMPL_ERROR; + } + status = i40iw_process_cqp_cmd(dev, info); if (status) { i40iw_pr_err("error cqp command 0x%x failed\n", info->cqp_cmd); @@ -459,6 +448,26 @@ enum i40iw_status_code i40iw_handle_cqp_op(struct i40iw_device *iwdev, } /** + * i40iw_add_devusecount - add dev refcount + * @iwdev: dev for refcount + */ +void i40iw_add_devusecount(struct i40iw_device *iwdev) +{ + atomic64_inc(&iwdev->use_count); +} + +/** + * i40iw_rem_devusecount - decrement refcount for dev + * @iwdev: device + */ +void i40iw_rem_devusecount(struct i40iw_device *iwdev) +{ + if (!atomic64_dec_and_test(&iwdev->use_count)) + return; + wake_up(&iwdev->close_wq); +} + +/** * i40iw_add_pdusecount - add pd refcount * @iwpd: pd for refcount */ @@ -712,6 +721,51 @@ enum i40iw_status_code i40iw_cqp_sds_cmd(struct i40iw_sc_dev *dev, } /** + * i40iw_qp_suspend_resume - cqp command for suspend/resume + * @dev: hardware control device structure + * @qp: hardware control qp + * @suspend: flag if suspend or resume + */ +void i40iw_qp_suspend_resume(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp, bool suspend) +{ + struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev; + struct i40iw_cqp_request *cqp_request; + struct i40iw_sc_cqp *cqp = dev->cqp; + struct cqp_commands_info *cqp_info; + enum i40iw_status_code status; + + cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false); + if (!cqp_request) + return; + + cqp_info = &cqp_request->info; + cqp_info->cqp_cmd = (suspend) ? OP_SUSPEND : OP_RESUME; + cqp_info->in.u.suspend_resume.cqp = cqp; + cqp_info->in.u.suspend_resume.qp = qp; + cqp_info->in.u.suspend_resume.scratch = (uintptr_t)cqp_request; + status = i40iw_handle_cqp_op(iwdev, cqp_request); + if (status) + i40iw_pr_err("CQP-OP QP Suspend/Resume fail"); +} + +/** + * i40iw_qp_mss_modify - modify mss for qp + * @dev: hardware control device structure + * @qp: hardware control qp + */ +void i40iw_qp_mss_modify(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp) +{ + struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev; + struct i40iw_qp *iwqp = (struct i40iw_qp *)qp->back_qp; + struct i40iw_modify_qp_info info; + + memset(&info, 0, sizeof(info)); + info.mss_change = true; + info.new_mss = qp->vsi->mss; + i40iw_hw_modify_qp(iwdev, iwqp, &info, false); +} + +/** * i40iw_term_modify_qp - modify qp for term message * @qp: hardware control qp * @next_state: qp's next state @@ -769,6 +823,7 @@ static void i40iw_terminate_timeout(unsigned long context) struct i40iw_sc_qp *qp = (struct i40iw_sc_qp *)&iwqp->sc_qp; i40iw_terminate_done(qp, 1); + i40iw_rem_ref(&iwqp->ibqp); } /** @@ -780,6 +835,7 @@ void i40iw_terminate_start_timer(struct i40iw_sc_qp *qp) struct i40iw_qp *iwqp; iwqp = (struct i40iw_qp *)qp->back_qp; + i40iw_add_ref(&iwqp->ibqp); init_timer(&iwqp->terminate_timer); iwqp->terminate_timer.function = i40iw_terminate_timeout; iwqp->terminate_timer.expires = jiffies + HZ; @@ -796,7 +852,8 @@ void i40iw_terminate_del_timer(struct i40iw_sc_qp *qp) struct i40iw_qp *iwqp; iwqp = (struct i40iw_qp *)qp->back_qp; - del_timer(&iwqp->terminate_timer); + if (del_timer(&iwqp->terminate_timer)) + i40iw_rem_ref(&iwqp->ibqp); } /** @@ -1011,6 +1068,116 @@ enum i40iw_status_code i40iw_vf_wait_vchnl_resp(struct i40iw_sc_dev *dev) } /** + * i40iw_cqp_cq_create_cmd - create a cq for the cqp + * @dev: device pointer + * @cq: pointer to created cq + */ +enum i40iw_status_code i40iw_cqp_cq_create_cmd(struct i40iw_sc_dev *dev, + struct i40iw_sc_cq *cq) +{ + struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev; + struct i40iw_cqp *iwcqp = &iwdev->cqp; + struct i40iw_cqp_request *cqp_request; + struct cqp_commands_info *cqp_info; + enum i40iw_status_code status; + + cqp_request = i40iw_get_cqp_request(iwcqp, true); + if (!cqp_request) + return I40IW_ERR_NO_MEMORY; + + cqp_info = &cqp_request->info; + cqp_info->cqp_cmd = OP_CQ_CREATE; + cqp_info->post_sq = 1; + cqp_info->in.u.cq_create.cq = cq; + cqp_info->in.u.cq_create.scratch = (uintptr_t)cqp_request; + status = i40iw_handle_cqp_op(iwdev, cqp_request); + if (status) + i40iw_pr_err("CQP-OP Create QP fail"); + + return status; +} + +/** + * i40iw_cqp_qp_create_cmd - create a qp for the cqp + * @dev: device pointer + * @qp: pointer to created qp + */ +enum i40iw_status_code i40iw_cqp_qp_create_cmd(struct i40iw_sc_dev *dev, + struct i40iw_sc_qp *qp) +{ + struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev; + struct i40iw_cqp *iwcqp = &iwdev->cqp; + struct i40iw_cqp_request *cqp_request; + struct cqp_commands_info *cqp_info; + struct i40iw_create_qp_info *qp_info; + enum i40iw_status_code status; + + cqp_request = i40iw_get_cqp_request(iwcqp, true); + if (!cqp_request) + return I40IW_ERR_NO_MEMORY; + + cqp_info = &cqp_request->info; + qp_info = &cqp_request->info.in.u.qp_create.info; + + memset(qp_info, 0, sizeof(*qp_info)); + + qp_info->cq_num_valid = true; + qp_info->next_iwarp_state = I40IW_QP_STATE_RTS; + + cqp_info->cqp_cmd = OP_QP_CREATE; + cqp_info->post_sq = 1; + cqp_info->in.u.qp_create.qp = qp; + cqp_info->in.u.qp_create.scratch = (uintptr_t)cqp_request; + status = i40iw_handle_cqp_op(iwdev, cqp_request); + if (status) + i40iw_pr_err("CQP-OP QP create fail"); + return status; +} + +/** + * i40iw_cqp_cq_destroy_cmd - destroy the cqp cq + * @dev: device pointer + * @cq: pointer to cq + */ +void i40iw_cqp_cq_destroy_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_cq *cq) +{ + struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev; + + i40iw_cq_wq_destroy(iwdev, cq); +} + +/** + * i40iw_cqp_qp_destroy_cmd - destroy the cqp + * @dev: device pointer + * @qp: pointer to qp + */ +void i40iw_cqp_qp_destroy_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp) +{ + struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev; + struct i40iw_cqp *iwcqp = &iwdev->cqp; + struct i40iw_cqp_request *cqp_request; + struct cqp_commands_info *cqp_info; + enum i40iw_status_code status; + + cqp_request = i40iw_get_cqp_request(iwcqp, true); + if (!cqp_request) + return; + + cqp_info = &cqp_request->info; + memset(cqp_info, 0, sizeof(*cqp_info)); + + cqp_info->cqp_cmd = OP_QP_DESTROY; + cqp_info->post_sq = 1; + cqp_info->in.u.qp_destroy.qp = qp; + cqp_info->in.u.qp_destroy.scratch = (uintptr_t)cqp_request; + cqp_info->in.u.qp_destroy.remove_hash_idx = true; + status = i40iw_handle_cqp_op(iwdev, cqp_request); + if (status) + i40iw_pr_err("CQP QP_DESTROY fail"); +} + + +/** * i40iw_ieq_mpa_crc_ae - generate AE for crc error * @dev: hardware control device structure * @qp: hardware control qp @@ -1208,7 +1375,7 @@ enum i40iw_status_code i40iw_puda_get_tcpip_info(struct i40iw_puda_completion_in buf->totallen = pkt_len + buf->maclen; - if (info->payload_len < buf->totallen - 4) { + if (info->payload_len < buf->totallen) { i40iw_pr_err("payload_len = 0x%x totallen expected0x%x\n", info->payload_len, buf->totallen); return I40IW_ERR_INVALID_SIZE; @@ -1224,27 +1391,29 @@ enum i40iw_status_code i40iw_puda_get_tcpip_info(struct i40iw_puda_completion_in /** * i40iw_hw_stats_timeout - Stats timer-handler which updates all HW stats - * @dev: hardware control device structure + * @vsi: pointer to the vsi structure */ -static void i40iw_hw_stats_timeout(unsigned long dev) +static void i40iw_hw_stats_timeout(unsigned long vsi) { - struct i40iw_sc_dev *pf_dev = (struct i40iw_sc_dev *)dev; - struct i40iw_dev_pestat *pf_devstat = &pf_dev->dev_pestat; - struct i40iw_dev_pestat *vf_devstat = NULL; + struct i40iw_sc_vsi *sc_vsi = (struct i40iw_sc_vsi *)vsi; + struct i40iw_sc_dev *pf_dev = sc_vsi->dev; + struct i40iw_vsi_pestat *pf_devstat = sc_vsi->pestat; + struct i40iw_vsi_pestat *vf_devstat = NULL; u16 iw_vf_idx; unsigned long flags; /*PF*/ - pf_devstat->ops.iw_hw_stat_read_all(pf_devstat, &pf_devstat->hw_stats); + i40iw_hw_stats_read_all(pf_devstat, &pf_devstat->hw_stats); + for (iw_vf_idx = 0; iw_vf_idx < I40IW_MAX_PE_ENABLED_VF_COUNT; iw_vf_idx++) { - spin_lock_irqsave(&pf_devstat->stats_lock, flags); + spin_lock_irqsave(&pf_devstat->lock, flags); if (pf_dev->vf_dev[iw_vf_idx]) { if (pf_dev->vf_dev[iw_vf_idx]->stats_initialized) { - vf_devstat = &pf_dev->vf_dev[iw_vf_idx]->dev_pestat; - vf_devstat->ops.iw_hw_stat_read_all(vf_devstat, &vf_devstat->hw_stats); + vf_devstat = &pf_dev->vf_dev[iw_vf_idx]->pestat; + i40iw_hw_stats_read_all(vf_devstat, &vf_devstat->hw_stats); } } - spin_unlock_irqrestore(&pf_devstat->stats_lock, flags); + spin_unlock_irqrestore(&pf_devstat->lock, flags); } mod_timer(&pf_devstat->stats_timer, @@ -1253,26 +1422,26 @@ static void i40iw_hw_stats_timeout(unsigned long dev) /** * i40iw_hw_stats_start_timer - Start periodic stats timer - * @dev: hardware control device structure + * @vsi: pointer to the vsi structure */ -void i40iw_hw_stats_start_timer(struct i40iw_sc_dev *dev) +void i40iw_hw_stats_start_timer(struct i40iw_sc_vsi *vsi) { - struct i40iw_dev_pestat *devstat = &dev->dev_pestat; + struct i40iw_vsi_pestat *devstat = vsi->pestat; init_timer(&devstat->stats_timer); devstat->stats_timer.function = i40iw_hw_stats_timeout; - devstat->stats_timer.data = (unsigned long)dev; + devstat->stats_timer.data = (unsigned long)vsi; mod_timer(&devstat->stats_timer, jiffies + msecs_to_jiffies(STATS_TIMER_DELAY)); } /** - * i40iw_hw_stats_del_timer - Delete periodic stats timer - * @dev: hardware control device structure + * i40iw_hw_stats_stop_timer - Delete periodic stats timer + * @vsi: pointer to the vsi structure */ -void i40iw_hw_stats_del_timer(struct i40iw_sc_dev *dev) +void i40iw_hw_stats_stop_timer(struct i40iw_sc_vsi *vsi) { - struct i40iw_dev_pestat *devstat = &dev->dev_pestat; + struct i40iw_vsi_pestat *devstat = vsi->pestat; del_timer_sync(&devstat->stats_timer); } diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 6329c971c22f..7368a50bbdaa 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -37,6 +37,7 @@ #include <linux/random.h> #include <linux/highmem.h> #include <linux/time.h> +#include <linux/hugetlb.h> #include <asm/byteorder.h> #include <net/ip.h> #include <rdma/ib_verbs.h> @@ -67,13 +68,13 @@ static int i40iw_query_device(struct ib_device *ibdev, props->vendor_part_id = iwdev->ldev->pcidev->device; props->hw_ver = (u32)iwdev->sc_dev.hw_rev; props->max_mr_size = I40IW_MAX_OUTBOUND_MESSAGE_SIZE; - props->max_qp = iwdev->max_qp; + props->max_qp = iwdev->max_qp - iwdev->used_qps; props->max_qp_wr = (I40IW_MAX_WQ_ENTRIES >> 2) - 1; props->max_sge = I40IW_MAX_WQ_FRAGMENT_COUNT; - props->max_cq = iwdev->max_cq; + props->max_cq = iwdev->max_cq - iwdev->used_cqs; props->max_cqe = iwdev->max_cqe; - props->max_mr = iwdev->max_mr; - props->max_pd = iwdev->max_pd; + props->max_mr = iwdev->max_mr - iwdev->used_mrs; + props->max_pd = iwdev->max_pd - iwdev->used_pds; props->max_sge_rd = I40IW_MAX_SGE_RD; props->max_qp_rd_atom = I40IW_MAX_IRD_SIZE; props->max_qp_init_rd_atom = props->max_qp_rd_atom; @@ -254,7 +255,6 @@ static void i40iw_alloc_push_page(struct i40iw_device *iwdev, struct i40iw_sc_qp { struct i40iw_cqp_request *cqp_request; struct cqp_commands_info *cqp_info; - struct i40iw_sc_dev *dev = &iwdev->sc_dev; enum i40iw_status_code status; if (qp->push_idx != I40IW_INVALID_PUSH_PAGE_INDEX) @@ -270,7 +270,7 @@ static void i40iw_alloc_push_page(struct i40iw_device *iwdev, struct i40iw_sc_qp cqp_info->cqp_cmd = OP_MANAGE_PUSH_PAGE; cqp_info->post_sq = 1; - cqp_info->in.u.manage_push_page.info.qs_handle = dev->qs_handle; + cqp_info->in.u.manage_push_page.info.qs_handle = qp->qs_handle; cqp_info->in.u.manage_push_page.info.free_page = 0; cqp_info->in.u.manage_push_page.cqp = &iwdev->cqp.sc_cqp; cqp_info->in.u.manage_push_page.scratch = (uintptr_t)cqp_request; @@ -292,7 +292,6 @@ static void i40iw_dealloc_push_page(struct i40iw_device *iwdev, struct i40iw_sc_ { struct i40iw_cqp_request *cqp_request; struct cqp_commands_info *cqp_info; - struct i40iw_sc_dev *dev = &iwdev->sc_dev; enum i40iw_status_code status; if (qp->push_idx == I40IW_INVALID_PUSH_PAGE_INDEX) @@ -307,7 +306,7 @@ static void i40iw_dealloc_push_page(struct i40iw_device *iwdev, struct i40iw_sc_ cqp_info->post_sq = 1; cqp_info->in.u.manage_push_page.info.push_idx = qp->push_idx; - cqp_info->in.u.manage_push_page.info.qs_handle = dev->qs_handle; + cqp_info->in.u.manage_push_page.info.qs_handle = qp->qs_handle; cqp_info->in.u.manage_push_page.info.free_page = 1; cqp_info->in.u.manage_push_page.cqp = &iwdev->cqp.sc_cqp; cqp_info->in.u.manage_push_page.scratch = (uintptr_t)cqp_request; @@ -337,6 +336,9 @@ static struct ib_pd *i40iw_alloc_pd(struct ib_device *ibdev, u32 pd_id = 0; int err; + if (iwdev->closing) + return ERR_PTR(-ENODEV); + err = i40iw_alloc_resource(iwdev, iwdev->allocated_pds, iwdev->max_pd, &pd_id, &iwdev->next_pd); if (err) { @@ -602,6 +604,9 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, struct i40iwarp_offload_info *iwarp_info; unsigned long flags; + if (iwdev->closing) + return ERR_PTR(-ENODEV); + if (init_attr->create_flags) return ERR_PTR(-EINVAL); if (init_attr->cap.max_inline_data > I40IW_MAX_INLINE_DATA_SIZE) @@ -610,11 +615,15 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, if (init_attr->cap.max_send_sge > I40IW_MAX_WQ_FRAGMENT_COUNT) init_attr->cap.max_send_sge = I40IW_MAX_WQ_FRAGMENT_COUNT; + if (init_attr->cap.max_recv_sge > I40IW_MAX_WQ_FRAGMENT_COUNT) + init_attr->cap.max_recv_sge = I40IW_MAX_WQ_FRAGMENT_COUNT; + memset(&init_info, 0, sizeof(init_info)); sq_size = init_attr->cap.max_send_wr; rq_size = init_attr->cap.max_recv_wr; + init_info.vsi = &iwdev->vsi; init_info.qp_uk_init_info.sq_size = sq_size; init_info.qp_uk_init_info.rq_size = rq_size; init_info.qp_uk_init_info.max_sq_frag_cnt = init_attr->cap.max_send_sge; @@ -774,6 +783,7 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, iwqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? 1 : 0; iwdev->qp_table[qp_num] = iwqp; i40iw_add_pdusecount(iwqp->iwpd); + i40iw_add_devusecount(iwdev); if (ibpd->uobject && udata) { memset(&uresp, 0, sizeof(uresp)); uresp.actual_sq_size = sq_size; @@ -815,8 +825,9 @@ static int i40iw_query_qp(struct ib_qp *ibqp, attr->qp_access_flags = 0; attr->cap.max_send_wr = qp->qp_uk.sq_size; attr->cap.max_recv_wr = qp->qp_uk.rq_size; - attr->cap.max_recv_sge = 1; attr->cap.max_inline_data = I40IW_MAX_INLINE_DATA_SIZE; + attr->cap.max_send_sge = I40IW_MAX_WQ_FRAGMENT_COUNT; + attr->cap.max_recv_sge = I40IW_MAX_WQ_FRAGMENT_COUNT; init_attr->event_handler = iwqp->ibqp.event_handler; init_attr->qp_context = iwqp->ibqp.qp_context; init_attr->send_cq = iwqp->ibqp.send_cq; @@ -884,6 +895,11 @@ int i40iw_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, spin_lock_irqsave(&iwqp->lock, flags); if (attr_mask & IB_QP_STATE) { + if (iwdev->closing && attr->qp_state != IB_QPS_ERR) { + err = -EINVAL; + goto exit; + } + switch (attr->qp_state) { case IB_QPS_INIT: case IB_QPS_RTR: @@ -944,7 +960,7 @@ int i40iw_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, goto exit; } if (iwqp->sc_qp.term_flags) - del_timer(&iwqp->terminate_timer); + i40iw_terminate_del_timer(&iwqp->sc_qp); info.next_iwarp_state = I40IW_QP_STATE_ERROR; if ((iwqp->hw_tcp_state > I40IW_TCP_STATE_CLOSED) && iwdev->iw_status && @@ -1037,11 +1053,11 @@ static void cq_free_resources(struct i40iw_device *iwdev, struct i40iw_cq *iwcq) } /** - * cq_wq_destroy - send cq destroy cqp + * i40iw_cq_wq_destroy - send cq destroy cqp * @iwdev: iwarp device * @cq: hardware control cq */ -static void cq_wq_destroy(struct i40iw_device *iwdev, struct i40iw_sc_cq *cq) +void i40iw_cq_wq_destroy(struct i40iw_device *iwdev, struct i40iw_sc_cq *cq) { enum i40iw_status_code status; struct i40iw_cqp_request *cqp_request; @@ -1080,9 +1096,10 @@ static int i40iw_destroy_cq(struct ib_cq *ib_cq) iwcq = to_iwcq(ib_cq); iwdev = to_iwdev(ib_cq->device); cq = &iwcq->sc_cq; - cq_wq_destroy(iwdev, cq); + i40iw_cq_wq_destroy(iwdev, cq); cq_free_resources(iwdev, iwcq); kfree(iwcq); + i40iw_rem_devusecount(iwdev); return 0; } @@ -1113,6 +1130,9 @@ static struct ib_cq *i40iw_create_cq(struct ib_device *ibdev, int err_code; int entries = attr->cqe; + if (iwdev->closing) + return ERR_PTR(-ENODEV); + if (entries > iwdev->max_cqe) return ERR_PTR(-EINVAL); @@ -1137,7 +1157,8 @@ static struct ib_cq *i40iw_create_cq(struct ib_device *ibdev, ukinfo->cq_id = cq_num; iwcq->ibcq.cqe = info.cq_uk_init_info.cq_size; info.ceqe_mask = 0; - info.ceq_id = 0; + if (attr->comp_vector < iwdev->ceqs_count) + info.ceq_id = attr->comp_vector; info.ceq_id_valid = true; info.ceqe_mask = 1; info.type = I40IW_CQ_TYPE_IWARP; @@ -1229,10 +1250,11 @@ static struct ib_cq *i40iw_create_cq(struct ib_device *ibdev, } } + i40iw_add_devusecount(iwdev); return (struct ib_cq *)iwcq; cq_destroy: - cq_wq_destroy(iwdev, cq); + i40iw_cq_wq_destroy(iwdev, cq); cq_free_resources: cq_free_resources(iwdev, iwcq); error: @@ -1266,6 +1288,7 @@ static void i40iw_free_stag(struct i40iw_device *iwdev, u32 stag) stag_idx = (stag & iwdev->mr_stagmask) >> I40IW_CQPSQ_STAG_IDX_SHIFT; i40iw_free_resource(iwdev, iwdev->allocated_mrs, stag_idx); + i40iw_rem_devusecount(iwdev); } /** @@ -1296,19 +1319,18 @@ static u32 i40iw_create_stag(struct i40iw_device *iwdev) stag = stag_index << I40IW_CQPSQ_STAG_IDX_SHIFT; stag |= driver_key; stag += (u32)consumer_key; + i40iw_add_devusecount(iwdev); } return stag; } /** * i40iw_next_pbl_addr - Get next pbl address - * @palloc: Poiner to allocated pbles * @pbl: pointer to a pble * @pinfo: info pointer * @idx: index */ -static inline u64 *i40iw_next_pbl_addr(struct i40iw_pble_alloc *palloc, - u64 *pbl, +static inline u64 *i40iw_next_pbl_addr(u64 *pbl, struct i40iw_pble_info **pinfo, u32 *idx) { @@ -1336,9 +1358,11 @@ static void i40iw_copy_user_pgaddrs(struct i40iw_mr *iwmr, struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc; struct i40iw_pble_info *pinfo; struct scatterlist *sg; + u64 pg_addr = 0; u32 idx = 0; pinfo = (level == I40IW_LEVEL_1) ? NULL : palloc->level2.leaf; + pg_shift = ffs(region->page_size) - 1; for_each_sg(region->sg_head.sgl, sg, region->nmap, entry) { chunk_pages = sg_dma_len(sg) >> pg_shift; @@ -1346,17 +1370,96 @@ static void i40iw_copy_user_pgaddrs(struct i40iw_mr *iwmr, !iwpbl->qp_mr.sq_page) iwpbl->qp_mr.sq_page = sg_page(sg); for (i = 0; i < chunk_pages; i++) { - *pbl = cpu_to_le64(sg_dma_address(sg) + region->page_size * i); - pbl = i40iw_next_pbl_addr(palloc, pbl, &pinfo, &idx); + pg_addr = sg_dma_address(sg) + region->page_size * i; + + if ((entry + i) == 0) + *pbl = cpu_to_le64(pg_addr & iwmr->page_msk); + else if (!(pg_addr & ~iwmr->page_msk)) + *pbl = cpu_to_le64(pg_addr); + else + continue; + pbl = i40iw_next_pbl_addr(pbl, &pinfo, &idx); + } + } +} + +/** + * i40iw_set_hugetlb_params - set MR pg size and mask to huge pg values. + * @addr: virtual address + * @iwmr: mr pointer for this memory registration + */ +static void i40iw_set_hugetlb_values(u64 addr, struct i40iw_mr *iwmr) +{ + struct vm_area_struct *vma; + struct hstate *h; + + vma = find_vma(current->mm, addr); + if (vma && is_vm_hugetlb_page(vma)) { + h = hstate_vma(vma); + if (huge_page_size(h) == 0x200000) { + iwmr->page_size = huge_page_size(h); + iwmr->page_msk = huge_page_mask(h); } } } /** + * i40iw_check_mem_contiguous - check if pbls stored in arr are contiguous + * @arr: lvl1 pbl array + * @npages: page count + * pg_size: page size + * + */ +static bool i40iw_check_mem_contiguous(u64 *arr, u32 npages, u32 pg_size) +{ + u32 pg_idx; + + for (pg_idx = 0; pg_idx < npages; pg_idx++) { + if ((*arr + (pg_size * pg_idx)) != arr[pg_idx]) + return false; + } + return true; +} + +/** + * i40iw_check_mr_contiguous - check if MR is physically contiguous + * @palloc: pbl allocation struct + * pg_size: page size + */ +static bool i40iw_check_mr_contiguous(struct i40iw_pble_alloc *palloc, u32 pg_size) +{ + struct i40iw_pble_level2 *lvl2 = &palloc->level2; + struct i40iw_pble_info *leaf = lvl2->leaf; + u64 *arr = NULL; + u64 *start_addr = NULL; + int i; + bool ret; + + if (palloc->level == I40IW_LEVEL_1) { + arr = (u64 *)palloc->level1.addr; + ret = i40iw_check_mem_contiguous(arr, palloc->total_cnt, pg_size); + return ret; + } + + start_addr = (u64 *)leaf->addr; + + for (i = 0; i < lvl2->leaf_cnt; i++, leaf++) { + arr = (u64 *)leaf->addr; + if ((*start_addr + (i * pg_size * PBLE_PER_PAGE)) != *arr) + return false; + ret = i40iw_check_mem_contiguous(arr, leaf->cnt, pg_size); + if (!ret) + return false; + } + + return true; +} + +/** * i40iw_setup_pbles - copy user pg address to pble's * @iwdev: iwarp device * @iwmr: mr pointer for this memory registration - * @use_pbles: flag if to use pble's or memory (level 0) + * @use_pbles: flag if to use pble's */ static int i40iw_setup_pbles(struct i40iw_device *iwdev, struct i40iw_mr *iwmr, @@ -1369,9 +1472,6 @@ static int i40iw_setup_pbles(struct i40iw_device *iwdev, enum i40iw_status_code status; enum i40iw_pble_level level = I40IW_LEVEL_1; - if (!use_pbles && (iwmr->page_cnt > MAX_SAVE_PAGE_ADDRS)) - return -ENOMEM; - if (use_pbles) { mutex_lock(&iwdev->pbl_mutex); status = i40iw_get_pble(&iwdev->sc_dev, iwdev->pble_rsrc, palloc, iwmr->page_cnt); @@ -1388,6 +1488,10 @@ static int i40iw_setup_pbles(struct i40iw_device *iwdev, } i40iw_copy_user_pgaddrs(iwmr, pbl, level); + + if (use_pbles) + iwmr->pgaddrmem[0] = *pbl; + return 0; } @@ -1409,14 +1513,18 @@ static int i40iw_handle_q_mem(struct i40iw_device *iwdev, struct i40iw_cq_mr *cqmr = &iwpbl->cq_mr; struct i40iw_hmc_pble *hmc_p; u64 *arr = iwmr->pgaddrmem; + u32 pg_size; int err; int total; + bool ret = true; total = req->sq_pages + req->rq_pages + req->cq_pages; + pg_size = iwmr->page_size; err = i40iw_setup_pbles(iwdev, iwmr, use_pbles); if (err) return err; + if (use_pbles && (palloc->level != I40IW_LEVEL_1)) { i40iw_free_pble(iwdev->pble_rsrc, palloc); iwpbl->pbl_allocated = false; @@ -1425,26 +1533,44 @@ static int i40iw_handle_q_mem(struct i40iw_device *iwdev, if (use_pbles) arr = (u64 *)palloc->level1.addr; - if (req->reg_type == IW_MEMREG_TYPE_QP) { + + if (iwmr->type == IW_MEMREG_TYPE_QP) { hmc_p = &qpmr->sq_pbl; qpmr->shadow = (dma_addr_t)arr[total]; + if (use_pbles) { + ret = i40iw_check_mem_contiguous(arr, req->sq_pages, pg_size); + if (ret) + ret = i40iw_check_mem_contiguous(&arr[req->sq_pages], req->rq_pages, pg_size); + } + + if (!ret) { hmc_p->idx = palloc->level1.idx; hmc_p = &qpmr->rq_pbl; hmc_p->idx = palloc->level1.idx + req->sq_pages; } else { hmc_p->addr = arr[0]; hmc_p = &qpmr->rq_pbl; - hmc_p->addr = arr[1]; + hmc_p->addr = arr[req->sq_pages]; } } else { /* CQ */ hmc_p = &cqmr->cq_pbl; cqmr->shadow = (dma_addr_t)arr[total]; + if (use_pbles) + ret = i40iw_check_mem_contiguous(arr, req->cq_pages, pg_size); + + if (!ret) hmc_p->idx = palloc->level1.idx; else hmc_p->addr = arr[0]; } + + if (use_pbles && ret) { + i40iw_free_pble(iwdev->pble_rsrc, palloc); + iwpbl->pbl_allocated = false; + } + return err; } @@ -1642,8 +1768,9 @@ static int i40iw_hwreg_mr(struct i40iw_device *iwdev, stag_info->access_rights = access; stag_info->pd_id = iwpd->sc_pd.pd_id; stag_info->addr_type = I40IW_ADDR_TYPE_VA_BASED; + stag_info->page_size = iwmr->page_size; - if (iwmr->page_cnt > 1) { + if (iwpbl->pbl_allocated) { if (palloc->level == I40IW_LEVEL_1) { stag_info->first_pm_pbl_index = palloc->level1.idx; stag_info->chunk_size = 1; @@ -1699,6 +1826,11 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd, bool use_pbles = false; unsigned long flags; int err = -ENOSYS; + int ret; + int pg_shift; + + if (iwdev->closing) + return ERR_PTR(-ENODEV); if (length > I40IW_MAX_MR_SIZE) return ERR_PTR(-EINVAL); @@ -1723,9 +1855,17 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd, iwmr->ibmr.pd = pd; iwmr->ibmr.device = pd->device; ucontext = to_ucontext(pd->uobject->context); - region_length = region->length + (start & 0xfff); - pbl_depth = region_length >> 12; - pbl_depth += (region_length & (4096 - 1)) ? 1 : 0; + + iwmr->page_size = region->page_size; + iwmr->page_msk = PAGE_MASK; + + if (region->hugetlb && (req.reg_type == IW_MEMREG_TYPE_MEM)) + i40iw_set_hugetlb_values(start, iwmr); + + region_length = region->length + (start & (iwmr->page_size - 1)); + pg_shift = ffs(iwmr->page_size) - 1; + pbl_depth = region_length >> pg_shift; + pbl_depth += (region_length & (iwmr->page_size - 1)) ? 1 : 0; iwmr->length = region->length; iwpbl->user_base = virt; @@ -1755,13 +1895,21 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd, spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags); break; case IW_MEMREG_TYPE_MEM: + use_pbles = (iwmr->page_cnt != 1); access = I40IW_ACCESS_FLAGS_LOCALREAD; - use_pbles = (iwmr->page_cnt != 1); err = i40iw_setup_pbles(iwdev, iwmr, use_pbles); if (err) goto error; + if (use_pbles) { + ret = i40iw_check_mr_contiguous(palloc, iwmr->page_size); + if (ret) { + i40iw_free_pble(iwdev->pble_rsrc, palloc); + iwpbl->pbl_allocated = false; + } + } + access |= i40iw_get_user_access(acc); stag = i40iw_create_stag(iwdev); if (!stag) { @@ -1778,6 +1926,7 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd, i40iw_free_stag(iwdev, stag); goto error; } + break; default: goto error; @@ -1789,7 +1938,7 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd, return &iwmr->ibmr; error: - if (palloc->level != I40IW_LEVEL_0) + if (palloc->level != I40IW_LEVEL_0 && iwpbl->pbl_allocated) i40iw_free_pble(iwdev->pble_rsrc, palloc); ib_umem_release(region); kfree(iwmr); @@ -2142,7 +2291,6 @@ static int i40iw_post_send(struct ib_qp *ibqp, case IB_WR_REG_MR: { struct i40iw_mr *iwmr = to_iwmr(reg_wr(ib_wr)->mr); - int page_shift = ilog2(reg_wr(ib_wr)->mr->page_size); int flags = reg_wr(ib_wr)->access; struct i40iw_pble_alloc *palloc = &iwmr->iwpbl.pble_alloc; struct i40iw_sc_dev *dev = &iwqp->iwdev->sc_dev; @@ -2153,6 +2301,7 @@ static int i40iw_post_send(struct ib_qp *ibqp, info.access_rights |= i40iw_get_user_access(flags); info.stag_key = reg_wr(ib_wr)->key & 0xff; info.stag_idx = reg_wr(ib_wr)->key >> 8; + info.page_size = reg_wr(ib_wr)->mr->page_size; info.wr_id = ib_wr->wr_id; info.addr_type = I40IW_ADDR_TYPE_VA_BASED; @@ -2166,9 +2315,6 @@ static int i40iw_post_send(struct ib_qp *ibqp, if (iwmr->npages > I40IW_MIN_PAGES_PER_FMR) info.chunk_size = 1; - if (page_shift == 21) - info.page_size = 1; /* 2M page */ - ret = dev->iw_priv_qp_ops->iw_mr_fast_register(&iwqp->sc_qp, &info, true); if (ret) err = -ENOMEM; @@ -2487,21 +2633,17 @@ static int i40iw_get_hw_stats(struct ib_device *ibdev, { struct i40iw_device *iwdev = to_iwdev(ibdev); struct i40iw_sc_dev *dev = &iwdev->sc_dev; - struct i40iw_dev_pestat *devstat = &dev->dev_pestat; + struct i40iw_vsi_pestat *devstat = iwdev->vsi.pestat; struct i40iw_dev_hw_stats *hw_stats = &devstat->hw_stats; - unsigned long flags; if (dev->is_pf) { - spin_lock_irqsave(&devstat->stats_lock, flags); - devstat->ops.iw_hw_stat_read_all(devstat, - &devstat->hw_stats); - spin_unlock_irqrestore(&devstat->stats_lock, flags); + i40iw_hw_stats_read_all(devstat, &devstat->hw_stats); } else { if (i40iw_vchnl_vf_get_pe_stats(dev, &devstat->hw_stats)) return -ENOSYS; } - memcpy(&stats->value[0], &hw_stats, sizeof(*hw_stats)); + memcpy(&stats->value[0], hw_stats, sizeof(*hw_stats)); return stats->num_counters; } @@ -2562,7 +2704,9 @@ static int i40iw_query_pkey(struct ib_device *ibdev, * @ah_attr: address handle attributes */ static struct ib_ah *i40iw_create_ah(struct ib_pd *ibpd, - struct ib_ah_attr *attr) + struct ib_ah_attr *attr, + struct ib_udata *udata) + { return ERR_PTR(-ENOSYS); } @@ -2621,7 +2765,7 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev (1ull << IB_USER_VERBS_CMD_POST_RECV) | (1ull << IB_USER_VERBS_CMD_POST_SEND); iwibdev->ibdev.phys_port_cnt = 1; - iwibdev->ibdev.num_comp_vectors = 1; + iwibdev->ibdev.num_comp_vectors = iwdev->ceqs_count; iwibdev->ibdev.dma_device = &pcidev->dev; iwibdev->ibdev.dev.parent = &pcidev->dev; iwibdev->ibdev.query_port = i40iw_query_port; @@ -2654,7 +2798,6 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev iwibdev->ibdev.iwcm = kzalloc(sizeof(*iwibdev->ibdev.iwcm), GFP_KERNEL); if (!iwibdev->ibdev.iwcm) { ib_dealloc_device(&iwibdev->ibdev); - i40iw_pr_err("iwcm == NULL\n"); return NULL; } @@ -2719,6 +2862,9 @@ void i40iw_destroy_rdma_device(struct i40iw_ib_device *iwibdev) i40iw_unregister_rdma_device(iwibdev); kfree(iwibdev->ibdev.iwcm); iwibdev->ibdev.iwcm = NULL; + wait_event_timeout(iwibdev->iwdev->close_wq, + !atomic64_read(&iwibdev->iwdev->use_count), + I40IW_EVENT_TIMEOUT); ib_dealloc_device(&iwibdev->ibdev); } diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.h b/drivers/infiniband/hw/i40iw/i40iw_verbs.h index 0069be8a5a38..6549c939500f 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.h +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.h @@ -92,6 +92,8 @@ struct i40iw_mr { struct ib_umem *region; u16 type; u32 page_cnt; + u32 page_size; + u64 page_msk; u32 npages; u32 stag; u64 length; diff --git a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c index 3041003c94d2..f4d13683a403 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c @@ -403,6 +403,19 @@ del_out: } /** + * i40iw_vf_init_pestat - Initialize stats for VF + * @devL pointer to the VF Device + * @stats: Statistics structure pointer + * @index: Stats index + */ +static void i40iw_vf_init_pestat(struct i40iw_sc_dev *dev, struct i40iw_vsi_pestat *stats, u16 index) +{ + stats->hw = dev->hw; + i40iw_hw_stats_init(stats, (u8)index, false); + spin_lock_init(&stats->lock); +} + +/** * i40iw_vchnl_recv_pf - Receive PF virtual channel messages * @dev: IWARP device pointer * @vf_id: Virtual function ID associated with the message @@ -421,9 +434,8 @@ enum i40iw_status_code i40iw_vchnl_recv_pf(struct i40iw_sc_dev *dev, u16 first_avail_iw_vf = I40IW_MAX_PE_ENABLED_VF_COUNT; struct i40iw_virt_mem vf_dev_mem; struct i40iw_virtchnl_work_info work_info; - struct i40iw_dev_pestat *devstat; + struct i40iw_vsi_pestat *stats; enum i40iw_status_code ret_code; - unsigned long flags; if (!dev || !msg || !len) return I40IW_ERR_PARAM; @@ -496,14 +508,7 @@ enum i40iw_status_code i40iw_vchnl_recv_pf(struct i40iw_sc_dev *dev, i40iw_debug(dev, I40IW_DEBUG_VIRT, "VF%u error CQP HMC Function operation.\n", vf_id); - ret_code = i40iw_device_init_pestat(&vf_dev->dev_pestat); - if (ret_code) - i40iw_debug(dev, I40IW_DEBUG_VIRT, - "VF%u - i40iw_device_init_pestat failed\n", - vf_id); - vf_dev->dev_pestat.ops.iw_hw_stat_init(&vf_dev->dev_pestat, - (u8)vf_dev->pmf_index, - dev->hw, false); + i40iw_vf_init_pestat(dev, &vf_dev->pestat, vf_dev->pmf_index); vf_dev->stats_initialized = true; } else { if (vf_dev) { @@ -534,12 +539,10 @@ enum i40iw_status_code i40iw_vchnl_recv_pf(struct i40iw_sc_dev *dev, case I40IW_VCHNL_OP_GET_STATS: if (!vf_dev) return I40IW_ERR_BAD_PTR; - devstat = &vf_dev->dev_pestat; - spin_lock_irqsave(&dev->dev_pestat.stats_lock, flags); - devstat->ops.iw_hw_stat_read_all(devstat, &devstat->hw_stats); - spin_unlock_irqrestore(&dev->dev_pestat.stats_lock, flags); + stats = &vf_dev->pestat; + i40iw_hw_stats_read_all(stats, &stats->hw_stats); vf_dev->msg_count--; - vchnl_pf_send_get_pe_stats_resp(dev, vf_id, vchnl_msg, &devstat->hw_stats); + vchnl_pf_send_get_pe_stats_resp(dev, vf_id, vchnl_msg, &stats->hw_stats); break; default: i40iw_debug(dev, I40IW_DEBUG_VIRT, diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index b9bf0759f10a..077c33d2dc75 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -114,7 +114,9 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr !(1 << ah->av.eth.stat_rate & dev->caps.stat_rate_support)) --ah->av.eth.stat_rate; } - + ah->av.eth.sl_tclass_flowlabel |= + cpu_to_be32((ah_attr->grh.traffic_class << 20) | + ah_attr->grh.flow_label); /* * HW requires multicast LID so we just choose one. */ @@ -122,12 +124,14 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr ah->av.ib.dlid = cpu_to_be16(0xc000); memcpy(ah->av.eth.dgid, ah_attr->grh.dgid.raw, 16); - ah->av.eth.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 29); + ah->av.eth.sl_tclass_flowlabel |= cpu_to_be32(ah_attr->sl << 29); return &ah->ibah; } -struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) +struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, + struct ib_udata *udata) + { struct mlx4_ib_ah *ah; struct ib_ah *ret; diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c index 5e9939045852..06020c54db20 100644 --- a/drivers/infiniband/hw/mlx4/alias_GUID.c +++ b/drivers/infiniband/hw/mlx4/alias_GUID.c @@ -755,10 +755,8 @@ static void alias_guid_work(struct work_struct *work) struct mlx4_ib_dev *dev = container_of(ib_sriov, struct mlx4_ib_dev, sriov); rec = kzalloc(sizeof *rec, GFP_KERNEL); - if (!rec) { - pr_err("alias_guid_work: No Memory\n"); + if (!rec) return; - } pr_debug("starting [port: %d]...\n", sriov_alias_port->port + 1); ret = get_next_record_to_update(dev, sriov_alias_port->port, rec); diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c index 39a488889fc7..d64845335e87 100644 --- a/drivers/infiniband/hw/mlx4/cm.c +++ b/drivers/infiniband/hw/mlx4/cm.c @@ -247,10 +247,8 @@ id_map_alloc(struct ib_device *ibdev, int slave_id, u32 sl_cm_id) struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov; ent = kmalloc(sizeof (struct id_map_entry), GFP_KERNEL); - if (!ent) { - mlx4_ib_warn(ibdev, "Couldn't allocate id cache entry - out of memory\n"); + if (!ent) return ERR_PTR(-ENOMEM); - } ent->sl_cm_id = sl_cm_id; ent->slave_id = slave_id; diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 1672907ff219..db564ccc0f92 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -39,6 +39,8 @@ #include <linux/mlx4/cmd.h> #include <linux/gfp.h> #include <rdma/ib_pma.h> +#include <linux/ip.h> +#include <net/ipv6.h> #include <linux/mlx4/driver.h> #include "mlx4_ib.h" @@ -480,6 +482,23 @@ static int find_slave_port_pkey_ix(struct mlx4_ib_dev *dev, int slave, return -EINVAL; } +static int get_gids_from_l3_hdr(struct ib_grh *grh, union ib_gid *sgid, + union ib_gid *dgid) +{ + int version = ib_get_rdma_header_version((const union rdma_network_hdr *)grh); + enum rdma_network_type net_type; + + if (version == 4) + net_type = RDMA_NETWORK_IPV4; + else if (version == 6) + net_type = RDMA_NETWORK_IPV6; + else + return -EINVAL; + + return ib_get_gids_from_rdma_hdr((union rdma_network_hdr *)grh, net_type, + sgid, dgid); +} + int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, enum ib_qp_type dest_qpt, struct ib_wc *wc, struct ib_grh *grh, struct ib_mad *mad) @@ -538,7 +557,10 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, memset(&attr, 0, sizeof attr); attr.port_num = port; if (is_eth) { - memcpy(&attr.grh.dgid.raw[0], &grh->dgid.raw[0], 16); + union ib_gid sgid; + + if (get_gids_from_l3_hdr(grh, &sgid, &attr.grh.dgid)) + return -EINVAL; attr.ah_flags = IB_AH_GRH; } ah = ib_create_ah(tun_ctx->pd, &attr); @@ -651,6 +673,11 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port, is_eth = 1; if (is_eth) { + union ib_gid dgid; + union ib_gid sgid; + + if (get_gids_from_l3_hdr(grh, &sgid, &dgid)) + return -EINVAL; if (!(wc->wc_flags & IB_WC_GRH)) { mlx4_ib_warn(ibdev, "RoCE grh not present.\n"); return -EINVAL; @@ -659,10 +686,10 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port, mlx4_ib_warn(ibdev, "RoCE mgmt class is not CM\n"); return -EINVAL; } - err = mlx4_get_slave_from_roce_gid(dev->dev, port, grh->dgid.raw, &slave); + err = mlx4_get_slave_from_roce_gid(dev->dev, port, dgid.raw, &slave); if (err && mlx4_is_mf_bonded(dev->dev)) { other_port = (port == 1) ? 2 : 1; - err = mlx4_get_slave_from_roce_gid(dev->dev, other_port, grh->dgid.raw, &slave); + err = mlx4_get_slave_from_roce_gid(dev->dev, other_port, dgid.raw, &slave); if (!err) { port = other_port; pr_debug("resolved slave %d from gid %pI6 wire port %d other %d\n", @@ -702,10 +729,18 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port, /* If a grh is present, we demux according to it */ if (wc->wc_flags & IB_WC_GRH) { - slave = mlx4_ib_find_real_gid(ibdev, port, grh->dgid.global.interface_id); - if (slave < 0) { - mlx4_ib_warn(ibdev, "failed matching grh\n"); - return -ENOENT; + if (grh->dgid.global.interface_id == + cpu_to_be64(IB_SA_WELL_KNOWN_GUID) && + grh->dgid.global.subnet_prefix == cpu_to_be64( + atomic64_read(&dev->sriov.demux[port - 1].subnet_prefix))) { + slave = 0; + } else { + slave = mlx4_ib_find_real_gid(ibdev, port, + grh->dgid.global.interface_id); + if (slave < 0) { + mlx4_ib_warn(ibdev, "failed matching grh\n"); + return -ENOENT; + } } } /* Class-specific handling */ @@ -1102,10 +1137,8 @@ static void handle_slaves_guid_change(struct mlx4_ib_dev *dev, u8 port_num, in_mad = kmalloc(sizeof *in_mad, GFP_KERNEL); out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); - if (!in_mad || !out_mad) { - mlx4_ib_warn(&dev->ib_dev, "failed to allocate memory for guid info mads\n"); + if (!in_mad || !out_mad) goto out; - } guid_tbl_blk_num *= 4; @@ -1916,11 +1949,8 @@ static int alloc_pv_object(struct mlx4_ib_dev *dev, int slave, int port, *ret_ctx = NULL; ctx = kzalloc(sizeof (struct mlx4_ib_demux_pv_ctx), GFP_KERNEL); - if (!ctx) { - pr_err("failed allocating pv resource context " - "for port %d, slave %d\n", port, slave); + if (!ctx) return -ENOMEM; - } ctx->ib_dev = &dev->ib_dev; ctx->port = port; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index b597e8227591..c8413fc120e6 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -430,7 +430,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, struct mlx4_ib_dev *dev = to_mdev(ibdev); struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; - int err = -ENOMEM; + int err; int have_ib_ports; struct mlx4_uverbs_ex_query_device cmd; struct mlx4_uverbs_ex_query_device_resp resp = {.comp_mask = 0}; @@ -455,6 +455,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, sizeof(resp.response_length); in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); + err = -ENOMEM; if (!in_mad || !out_mad) goto out; @@ -547,6 +548,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->max_map_per_fmr = dev->dev->caps.max_fmr_maps; props->hca_core_clock = dev->dev->caps.hca_core_clock * 1000UL; props->timestamp_mask = 0xFFFFFFFFFFFFULL; + props->max_ah = INT_MAX; if (!mlx4_is_slave(dev->dev)) err = mlx4_get_internal_clock_params(dev->dev, &clock_params); @@ -697,9 +699,11 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port, if (err) goto out; - props->active_width = (((u8 *)mailbox->buf)[5] == 0x40) ? - IB_WIDTH_4X : IB_WIDTH_1X; - props->active_speed = IB_SPEED_QDR; + props->active_width = (((u8 *)mailbox->buf)[5] == 0x40) || + (((u8 *)mailbox->buf)[5] == 0x20 /*56Gb*/) ? + IB_WIDTH_4X : IB_WIDTH_1X; + props->active_speed = (((u8 *)mailbox->buf)[5] == 0x20 /*56Gb*/) ? + IB_SPEED_FDR : IB_SPEED_QDR; props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_IP_BASED_GIDS; props->gid_tbl_len = mdev->dev->caps.gid_table_len[port]; props->max_msg_sz = mdev->dev->caps.max_msg_sz; @@ -2814,20 +2818,22 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) kmalloc(BITS_TO_LONGS(ibdev->steer_qpn_count) * sizeof(long), GFP_KERNEL); - if (!ibdev->ib_uc_qpns_bitmap) { - dev_err(&dev->persist->pdev->dev, - "bit map alloc failed\n"); + if (!ibdev->ib_uc_qpns_bitmap) goto err_steer_qp_release; - } - bitmap_zero(ibdev->ib_uc_qpns_bitmap, ibdev->steer_qpn_count); - - err = mlx4_FLOW_STEERING_IB_UC_QP_RANGE( - dev, ibdev->steer_qpn_base, - ibdev->steer_qpn_base + - ibdev->steer_qpn_count - 1); - if (err) - goto err_steer_free_bitmap; + if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_IPOIB) { + bitmap_zero(ibdev->ib_uc_qpns_bitmap, + ibdev->steer_qpn_count); + err = mlx4_FLOW_STEERING_IB_UC_QP_RANGE( + dev, ibdev->steer_qpn_base, + ibdev->steer_qpn_base + + ibdev->steer_qpn_count - 1); + if (err) + goto err_steer_free_bitmap; + } else { + bitmap_fill(ibdev->ib_uc_qpns_bitmap, + ibdev->steer_qpn_count); + } } for (j = 1; j <= ibdev->dev->caps.num_ports; j++) @@ -3055,15 +3061,12 @@ static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init) first_port = find_first_bit(actv_ports.ports, dev->caps.num_ports); dm = kcalloc(ports, sizeof(*dm), GFP_ATOMIC); - if (!dm) { - pr_err("failed to allocate memory for tunneling qp update\n"); + if (!dm) return; - } for (i = 0; i < ports; i++) { dm[i] = kmalloc(sizeof (struct mlx4_ib_demux_work), GFP_ATOMIC); if (!dm[i]) { - pr_err("failed to allocate memory for tunneling qp update work struct\n"); while (--i >= 0) kfree(dm[i]); goto out; @@ -3223,8 +3226,6 @@ void mlx4_sched_ib_sl2vl_update_work(struct mlx4_ib_dev *ibdev, ew->port = port; ew->ib_dev = ibdev; queue_work(wq, &ew->work); - } else { - pr_err("failed to allocate memory for sl2vl update work\n"); } } @@ -3284,10 +3285,8 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, case MLX4_DEV_EVENT_PORT_MGMT_CHANGE: ew = kmalloc(sizeof *ew, GFP_ATOMIC); - if (!ew) { - pr_err("failed to allocate memory for events work\n"); + if (!ew) break; - } INIT_WORK(&ew->work, handle_port_mgmt_change_event); memcpy(&ew->ib_eqe, eqe, sizeof *eqe); diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c index a21d37f02f35..e010fe459e67 100644 --- a/drivers/infiniband/hw/mlx4/mcg.c +++ b/drivers/infiniband/hw/mlx4/mcg.c @@ -1142,7 +1142,6 @@ void mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy_wq) work = kmalloc(sizeof *work, GFP_KERNEL); if (!work) { ctx->flushing = 0; - mcg_warn("failed allocating work for cleanup\n"); return; } @@ -1202,10 +1201,8 @@ static int push_deleteing_req(struct mcast_group *group, int slave) return 0; req = kzalloc(sizeof *req, GFP_KERNEL); - if (!req) { - mcg_warn_group(group, "failed allocation - may leave stall groups\n"); + if (!req) return -ENOMEM; - } if (!list_empty(&group->func[slave].pending)) { pend_req = list_entry(group->func[slave].pending.prev, struct mcast_req, group_list); diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 35141f451e5c..7f3d976d81ed 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -742,7 +742,8 @@ int mlx4_ib_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq); void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq); -struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); +struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, + struct ib_udata *udata); int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr); int mlx4_ib_destroy_ah(struct ib_ah *ah); diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 570bc866b1d6..c068add8838b 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -644,7 +644,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, int qpn; int err; struct ib_qp_cap backup_cap; - struct mlx4_ib_sqp *sqp; + struct mlx4_ib_sqp *sqp = NULL; struct mlx4_ib_qp *qp; enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type; struct mlx4_ib_cq *mcq; @@ -933,7 +933,9 @@ err_db: mlx4_db_free(dev->dev, &qp->db); err: - if (!*caller_qp) + if (sqp) + kfree(sqp); + else if (!*caller_qp) kfree(qp); return err; } @@ -1280,7 +1282,8 @@ static int _mlx4_ib_destroy_qp(struct ib_qp *qp) if (is_qp0(dev, mqp)) mlx4_CLOSE_PORT(dev->dev, mqp->port); - if (dev->qp1_proxy[mqp->port - 1] == mqp) { + if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI && + dev->qp1_proxy[mqp->port - 1] == mqp) { mutex_lock(&dev->qp1_proxy_lock[mqp->port - 1]); dev->qp1_proxy[mqp->port - 1] = NULL; mutex_unlock(&dev->qp1_proxy_lock[mqp->port - 1]); @@ -1764,14 +1767,14 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, u8 port_num = mlx4_is_bonded(to_mdev(ibqp->device)->dev) ? 1 : attr_mask & IB_QP_PORT ? attr->port_num : qp->port; union ib_gid gid; - struct ib_gid_attr gid_attr; + struct ib_gid_attr gid_attr = {.gid_type = IB_GID_TYPE_IB}; u16 vlan = 0xffff; u8 smac[ETH_ALEN]; int status = 0; int is_eth = rdma_cap_eth_ah(&dev->ib_dev, port_num) && attr->ah_attr.ah_flags & IB_AH_GRH; - if (is_eth) { + if (is_eth && attr->ah_attr.ah_flags & IB_AH_GRH) { int index = attr->ah_attr.grh.sgid_index; status = ib_get_cached_gid(ibqp->device, port_num, diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c index 745efa4cfc71..d090e96f6f01 100644 --- a/drivers/infiniband/hw/mlx5/ah.c +++ b/drivers/infiniband/hw/mlx5/ah.c @@ -64,7 +64,9 @@ static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev, return &ah->ibah; } -struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) +struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, + struct ib_udata *udata) + { struct mlx5_ib_ah *ah; struct mlx5_ib_dev *dev = to_mdev(pd->device); @@ -75,6 +77,27 @@ struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) if (ll == IB_LINK_LAYER_ETHERNET && !(ah_attr->ah_flags & IB_AH_GRH)) return ERR_PTR(-EINVAL); + if (ll == IB_LINK_LAYER_ETHERNET && udata) { + int err; + struct mlx5_ib_create_ah_resp resp = {}; + u32 min_resp_len = offsetof(typeof(resp), dmac) + + sizeof(resp.dmac); + + if (udata->outlen < min_resp_len) + return ERR_PTR(-EINVAL); + + resp.response_length = min_resp_len; + + err = ib_resolve_eth_dmac(pd->device, ah_attr); + if (err) + return ERR_PTR(err); + + memcpy(resp.dmac, ah_attr->dmac, ETH_ALEN); + err = ib_copy_to_udata(udata, &resp, resp.response_length); + if (err) + return ERR_PTR(err); + } + ah = kzalloc(sizeof(*ah), GFP_ATOMIC); if (!ah) return ERR_PTR(-ENOMEM); diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index fcd04b881ec1..b3ef47c3ab73 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -731,7 +731,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, int entries, u32 **cqb, int *cqe_size, int *index, int *inlen) { - struct mlx5_ib_create_cq ucmd; + struct mlx5_ib_create_cq ucmd = {}; size_t ucmdlen; int page_shift; __be64 *pas; @@ -770,7 +770,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, if (err) goto err_umem; - mlx5_ib_cont_pages(cq->buf.umem, ucmd.buf_addr, &npages, &page_shift, + mlx5_ib_cont_pages(cq->buf.umem, ucmd.buf_addr, 0, &npages, &page_shift, &ncont, NULL); mlx5_ib_dbg(dev, "addr 0x%llx, size %u, npages %d, page_shift %d, ncont %d\n", ucmd.buf_addr, entries * ucmd.cqe_size, npages, page_shift, ncont); @@ -792,8 +792,36 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, *index = to_mucontext(context)->uuari.uars[0].index; + if (ucmd.cqe_comp_en == 1) { + if (unlikely((*cqe_size != 64) || + !MLX5_CAP_GEN(dev->mdev, cqe_compression))) { + err = -EOPNOTSUPP; + mlx5_ib_warn(dev, "CQE compression is not supported for size %d!\n", + *cqe_size); + goto err_cqb; + } + + if (unlikely(!ucmd.cqe_comp_res_format || + !(ucmd.cqe_comp_res_format < + MLX5_IB_CQE_RES_RESERVED) || + (ucmd.cqe_comp_res_format & + (ucmd.cqe_comp_res_format - 1)))) { + err = -EOPNOTSUPP; + mlx5_ib_warn(dev, "CQE compression res format %d is not supported!\n", + ucmd.cqe_comp_res_format); + goto err_cqb; + } + + MLX5_SET(cqc, cqc, cqe_comp_en, 1); + MLX5_SET(cqc, cqc, mini_cqe_res_format, + ilog2(ucmd.cqe_comp_res_format)); + } + return 0; +err_cqb: + kfree(cqb); + err_db: mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db); @@ -1124,7 +1152,7 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, return err; } - mlx5_ib_cont_pages(umem, ucmd.buf_addr, &npages, page_shift, + mlx5_ib_cont_pages(umem, ucmd.buf_addr, 0, &npages, page_shift, npas, NULL); cq->resize_umem = umem; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 2be65ddf56ba..d566f6738833 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -127,7 +127,7 @@ static int mlx5_netdev_event(struct notifier_block *this, if ((upper == ndev || (!upper && ndev == ibdev->roce.netdev)) && ibdev->ib_active) { - struct ib_event ibev = {0}; + struct ib_event ibev = { }; ibev.device = &ibdev->ib_dev; ibev.event = (event == NETDEV_UP) ? @@ -496,6 +496,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_core_dev *mdev = dev->mdev; int err = -ENOMEM; + int max_sq_desc; int max_rq_sg; int max_sq_sg; u64 min_page_size = 1ull << MLX5_CAP_GEN(mdev, log_pg_sz); @@ -618,9 +619,10 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->max_qp_wr = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz); max_rq_sg = MLX5_CAP_GEN(mdev, max_wqe_sz_rq) / sizeof(struct mlx5_wqe_data_seg); - max_sq_sg = (MLX5_CAP_GEN(mdev, max_wqe_sz_sq) - - sizeof(struct mlx5_wqe_ctrl_seg)) / - sizeof(struct mlx5_wqe_data_seg); + max_sq_desc = min_t(int, MLX5_CAP_GEN(mdev, max_wqe_sz_sq), 512); + max_sq_sg = (max_sq_desc - sizeof(struct mlx5_wqe_ctrl_seg) - + sizeof(struct mlx5_wqe_raddr_seg)) / + sizeof(struct mlx5_wqe_data_seg); props->max_sge = min(max_rq_sg, max_sq_sg); props->max_sge_rd = MLX5_MAX_SGE_RD; props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq); @@ -643,6 +645,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * props->max_mcast_grp; props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */ + props->max_ah = INT_MAX; props->hca_core_clock = MLX5_CAP_GEN(mdev, device_frequency_khz); props->timestamp_mask = 0x7FFFFFFFFFFFFFFFULL; @@ -669,6 +672,40 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, 1 << MLX5_CAP_GEN(dev->mdev, log_max_rq); } + if (field_avail(typeof(resp), mlx5_ib_support_multi_pkt_send_wqes, + uhw->outlen)) { + resp.mlx5_ib_support_multi_pkt_send_wqes = + MLX5_CAP_ETH(mdev, multi_pkt_send_wqe); + resp.response_length += + sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes); + } + + if (field_avail(typeof(resp), reserved, uhw->outlen)) + resp.response_length += sizeof(resp.reserved); + + if (field_avail(typeof(resp), cqe_comp_caps, uhw->outlen)) { + resp.cqe_comp_caps.max_num = + MLX5_CAP_GEN(dev->mdev, cqe_compression) ? + MLX5_CAP_GEN(dev->mdev, cqe_compression_max_num) : 0; + resp.cqe_comp_caps.supported_format = + MLX5_IB_CQE_RES_FORMAT_HASH | + MLX5_IB_CQE_RES_FORMAT_CSUM; + resp.response_length += sizeof(resp.cqe_comp_caps); + } + + if (field_avail(typeof(resp), packet_pacing_caps, uhw->outlen)) { + if (MLX5_CAP_QOS(mdev, packet_pacing) && + MLX5_CAP_GEN(mdev, qos)) { + resp.packet_pacing_caps.qp_rate_limit_max = + MLX5_CAP_QOS(mdev, packet_pacing_max_rate); + resp.packet_pacing_caps.qp_rate_limit_min = + MLX5_CAP_QOS(mdev, packet_pacing_min_rate); + resp.packet_pacing_caps.supported_qpts |= + 1 << IB_QPT_RAW_PACKET; + } + resp.response_length += sizeof(resp.packet_pacing_caps); + } + if (uhw->outlen) { err = ib_copy_to_udata(uhw, &resp, resp.response_length); @@ -1093,7 +1130,8 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, resp.response_length += sizeof(resp.cqe_version); if (field_avail(typeof(resp), cmds_supp_uhw, udata->outlen)) { - resp.cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE; + resp.cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE | + MLX5_USER_CMDS_SUPP_UHW_CREATE_AH; resp.response_length += sizeof(resp.cmds_supp_uhw); } @@ -1502,6 +1540,22 @@ static void set_proto(void *outer_c, void *outer_v, u8 mask, u8 val) MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val); } +static void set_flow_label(void *misc_c, void *misc_v, u8 mask, u8 val, + bool inner) +{ + if (inner) { + MLX5_SET(fte_match_set_misc, + misc_c, inner_ipv6_flow_label, mask); + MLX5_SET(fte_match_set_misc, + misc_v, inner_ipv6_flow_label, val); + } else { + MLX5_SET(fte_match_set_misc, + misc_c, outer_ipv6_flow_label, mask); + MLX5_SET(fte_match_set_misc, + misc_v, outer_ipv6_flow_label, val); + } +} + static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val) { MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask); @@ -1515,6 +1569,7 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val) #define LAST_IPV4_FIELD tos #define LAST_IPV6_FIELD traffic_class #define LAST_TCP_UDP_FIELD src_port +#define LAST_TUNNEL_FIELD tunnel_id /* Field is the last supported field */ #define FIELDS_NOT_SUPPORTED(filter, field)\ @@ -1527,155 +1582,164 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val) static int parse_flow_attr(u32 *match_c, u32 *match_v, const union ib_flow_spec *ib_spec) { - void *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_c, - outer_headers); - void *outer_headers_v = MLX5_ADDR_OF(fte_match_param, match_v, - outer_headers); void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c, misc_parameters); void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v, misc_parameters); + void *headers_c; + void *headers_v; + + if (ib_spec->type & IB_FLOW_SPEC_INNER) { + headers_c = MLX5_ADDR_OF(fte_match_param, match_c, + inner_headers); + headers_v = MLX5_ADDR_OF(fte_match_param, match_v, + inner_headers); + } else { + headers_c = MLX5_ADDR_OF(fte_match_param, match_c, + outer_headers); + headers_v = MLX5_ADDR_OF(fte_match_param, match_v, + outer_headers); + } - switch (ib_spec->type) { + switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) { case IB_FLOW_SPEC_ETH: if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD)) return -ENOTSUPP; - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, dmac_47_16), ib_spec->eth.mask.dst_mac); - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, dmac_47_16), ib_spec->eth.val.dst_mac); - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, smac_47_16), ib_spec->eth.mask.src_mac); - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, smac_47_16), ib_spec->eth.val.src_mac); if (ib_spec->eth.mask.vlan_tag) { - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + MLX5_SET(fte_match_set_lyr_2_4, headers_c, vlan_tag, 1); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + MLX5_SET(fte_match_set_lyr_2_4, headers_v, vlan_tag, 1); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, ntohs(ib_spec->eth.mask.vlan_tag)); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, ntohs(ib_spec->eth.val.vlan_tag)); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_cfi, ntohs(ib_spec->eth.mask.vlan_tag) >> 12); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_cfi, ntohs(ib_spec->eth.val.vlan_tag) >> 12); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio, ntohs(ib_spec->eth.mask.vlan_tag) >> 13); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio, ntohs(ib_spec->eth.val.vlan_tag) >> 13); } - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype, ntohs(ib_spec->eth.mask.ether_type)); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ntohs(ib_spec->eth.val.ether_type)); break; case IB_FLOW_SPEC_IPV4: if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD)) return -ENOTSUPP; - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype, 0xffff); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IP); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, src_ipv4_src_ipv6.ipv4_layout.ipv4), &ib_spec->ipv4.mask.src_ip, sizeof(ib_spec->ipv4.mask.src_ip)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, src_ipv4_src_ipv6.ipv4_layout.ipv4), &ib_spec->ipv4.val.src_ip, sizeof(ib_spec->ipv4.val.src_ip)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, dst_ipv4_dst_ipv6.ipv4_layout.ipv4), &ib_spec->ipv4.mask.dst_ip, sizeof(ib_spec->ipv4.mask.dst_ip)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, dst_ipv4_dst_ipv6.ipv4_layout.ipv4), &ib_spec->ipv4.val.dst_ip, sizeof(ib_spec->ipv4.val.dst_ip)); - set_tos(outer_headers_c, outer_headers_v, + set_tos(headers_c, headers_v, ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos); - set_proto(outer_headers_c, outer_headers_v, + set_proto(headers_c, headers_v, ib_spec->ipv4.mask.proto, ib_spec->ipv4.val.proto); break; case IB_FLOW_SPEC_IPV6: if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD)) return -ENOTSUPP; - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype, 0xffff); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IPV6); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, src_ipv4_src_ipv6.ipv6_layout.ipv6), &ib_spec->ipv6.mask.src_ip, sizeof(ib_spec->ipv6.mask.src_ip)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, src_ipv4_src_ipv6.ipv6_layout.ipv6), &ib_spec->ipv6.val.src_ip, sizeof(ib_spec->ipv6.val.src_ip)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, dst_ipv4_dst_ipv6.ipv6_layout.ipv6), &ib_spec->ipv6.mask.dst_ip, sizeof(ib_spec->ipv6.mask.dst_ip)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, dst_ipv4_dst_ipv6.ipv6_layout.ipv6), &ib_spec->ipv6.val.dst_ip, sizeof(ib_spec->ipv6.val.dst_ip)); - set_tos(outer_headers_c, outer_headers_v, + set_tos(headers_c, headers_v, ib_spec->ipv6.mask.traffic_class, ib_spec->ipv6.val.traffic_class); - set_proto(outer_headers_c, outer_headers_v, + set_proto(headers_c, headers_v, ib_spec->ipv6.mask.next_hdr, ib_spec->ipv6.val.next_hdr); - MLX5_SET(fte_match_set_misc, misc_params_c, - outer_ipv6_flow_label, - ntohl(ib_spec->ipv6.mask.flow_label)); - MLX5_SET(fte_match_set_misc, misc_params_v, - outer_ipv6_flow_label, - ntohl(ib_spec->ipv6.val.flow_label)); + set_flow_label(misc_params_c, misc_params_v, + ntohl(ib_spec->ipv6.mask.flow_label), + ntohl(ib_spec->ipv6.val.flow_label), + ib_spec->type & IB_FLOW_SPEC_INNER); + break; case IB_FLOW_SPEC_TCP: if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, LAST_TCP_UDP_FIELD)) return -ENOTSUPP; - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol, + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, 0xff); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol, + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_TCP); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_sport, + MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_sport, ntohs(ib_spec->tcp_udp.mask.src_port)); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_sport, + MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_sport, ntohs(ib_spec->tcp_udp.val.src_port)); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_dport, + MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_dport, ntohs(ib_spec->tcp_udp.mask.dst_port)); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_dport, + MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_dport, ntohs(ib_spec->tcp_udp.val.dst_port)); break; case IB_FLOW_SPEC_UDP: @@ -1683,21 +1747,31 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v, LAST_TCP_UDP_FIELD)) return -ENOTSUPP; - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol, + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, 0xff); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol, + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_sport, + MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport, ntohs(ib_spec->tcp_udp.mask.src_port)); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_sport, + MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport, ntohs(ib_spec->tcp_udp.val.src_port)); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_dport, + MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport, ntohs(ib_spec->tcp_udp.mask.dst_port)); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_dport, + MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, ntohs(ib_spec->tcp_udp.val.dst_port)); break; + case IB_FLOW_SPEC_VXLAN_TUNNEL: + if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask, + LAST_TUNNEL_FIELD)) + return -ENOTSUPP; + + MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni, + ntohl(ib_spec->tunnel.mask.tunnel_id)); + MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni, + ntohl(ib_spec->tunnel.val.tunnel_id)); + break; default: return -EINVAL; } @@ -2721,6 +2795,8 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num, struct ib_port_immutable *immutable) { struct ib_port_attr attr; + struct mlx5_ib_dev *dev = to_mdev(ibdev); + enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, port_num); int err; err = mlx5_ib_query_port(ibdev, port_num, &attr); @@ -2730,7 +2806,8 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num, immutable->pkey_tbl_len = attr.pkey_tbl_len; immutable->gid_tbl_len = attr.gid_tbl_len; immutable->core_cap_flags = get_core_cap_flags(ibdev); - immutable->max_mad_size = IB_MGMT_MAD_SIZE; + if ((ll == IB_LINK_LAYER_INFINIBAND) || MLX5_CAP_GEN(dev->mdev, roce)) + immutable->max_mad_size = IB_MGMT_MAD_SIZE; return 0; } @@ -2744,7 +2821,7 @@ static void get_dev_fw_str(struct ib_device *ibdev, char *str, fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev)); } -static int mlx5_roce_lag_init(struct mlx5_ib_dev *dev) +static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev) { struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_flow_namespace *ns = mlx5_get_flow_namespace(mdev, @@ -2773,7 +2850,7 @@ err_destroy_vport_lag: return err; } -static void mlx5_roce_lag_cleanup(struct mlx5_ib_dev *dev) +static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev) { struct mlx5_core_dev *mdev = dev->mdev; @@ -2785,7 +2862,21 @@ static void mlx5_roce_lag_cleanup(struct mlx5_ib_dev *dev) } } -static void mlx5_remove_roce_notifier(struct mlx5_ib_dev *dev) +static int mlx5_add_netdev_notifier(struct mlx5_ib_dev *dev) +{ + int err; + + dev->roce.nb.notifier_call = mlx5_netdev_event; + err = register_netdevice_notifier(&dev->roce.nb); + if (err) { + dev->roce.nb.notifier_call = NULL; + return err; + } + + return 0; +} + +static void mlx5_remove_netdev_notifier(struct mlx5_ib_dev *dev) { if (dev->roce.nb.notifier_call) { unregister_netdevice_notifier(&dev->roce.nb); @@ -2793,39 +2884,40 @@ static void mlx5_remove_roce_notifier(struct mlx5_ib_dev *dev) } } -static int mlx5_enable_roce(struct mlx5_ib_dev *dev) +static int mlx5_enable_eth(struct mlx5_ib_dev *dev) { int err; - dev->roce.nb.notifier_call = mlx5_netdev_event; - err = register_netdevice_notifier(&dev->roce.nb); - if (err) { - dev->roce.nb.notifier_call = NULL; + err = mlx5_add_netdev_notifier(dev); + if (err) return err; - } - err = mlx5_nic_vport_enable_roce(dev->mdev); - if (err) - goto err_unregister_netdevice_notifier; + if (MLX5_CAP_GEN(dev->mdev, roce)) { + err = mlx5_nic_vport_enable_roce(dev->mdev); + if (err) + goto err_unregister_netdevice_notifier; + } - err = mlx5_roce_lag_init(dev); + err = mlx5_eth_lag_init(dev); if (err) goto err_disable_roce; return 0; err_disable_roce: - mlx5_nic_vport_disable_roce(dev->mdev); + if (MLX5_CAP_GEN(dev->mdev, roce)) + mlx5_nic_vport_disable_roce(dev->mdev); err_unregister_netdevice_notifier: - mlx5_remove_roce_notifier(dev); + mlx5_remove_netdev_notifier(dev); return err; } -static void mlx5_disable_roce(struct mlx5_ib_dev *dev) +static void mlx5_disable_eth(struct mlx5_ib_dev *dev) { - mlx5_roce_lag_cleanup(dev); - mlx5_nic_vport_disable_roce(dev->mdev); + mlx5_eth_lag_cleanup(dev); + if (MLX5_CAP_GEN(dev->mdev, roce)) + mlx5_nic_vport_disable_roce(dev->mdev); } static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev) @@ -2947,9 +3039,6 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) port_type_cap = MLX5_CAP_GEN(mdev, port_type); ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); - if ((ll == IB_LINK_LAYER_ETHERNET) && !MLX5_CAP_GEN(mdev, roce)) - return NULL; - printk_once(KERN_INFO "%s", mlx5_version); dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev)); @@ -2995,6 +3084,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_CREATE_AH) | + (1ull << IB_USER_VERBS_CMD_DESTROY_AH) | (1ull << IB_USER_VERBS_CMD_REG_MR) | (1ull << IB_USER_VERBS_CMD_REREG_MR) | (1ull << IB_USER_VERBS_CMD_DEREG_MR) | @@ -3017,7 +3108,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) dev->ib_dev.uverbs_ex_cmd_mask = (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) | (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP); + (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP) | + (1ull << IB_USER_VERBS_EX_CMD_MODIFY_QP); dev->ib_dev.query_device = mlx5_ib_query_device; dev->ib_dev.query_port = mlx5_ib_query_port; @@ -3128,14 +3220,14 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) spin_lock_init(&dev->reset_flow_resource_lock); if (ll == IB_LINK_LAYER_ETHERNET) { - err = mlx5_enable_roce(dev); + err = mlx5_enable_eth(dev); if (err) goto err_free_port; } err = create_dev_resources(&dev->devr); if (err) - goto err_disable_roce; + goto err_disable_eth; err = mlx5_ib_odp_init_one(dev); if (err) @@ -3179,10 +3271,10 @@ err_odp: err_rsrc: destroy_dev_resources(&dev->devr); -err_disable_roce: +err_disable_eth: if (ll == IB_LINK_LAYER_ETHERNET) { - mlx5_disable_roce(dev); - mlx5_remove_roce_notifier(dev); + mlx5_disable_eth(dev); + mlx5_remove_netdev_notifier(dev); } err_free_port: @@ -3199,14 +3291,14 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) struct mlx5_ib_dev *dev = context; enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1); - mlx5_remove_roce_notifier(dev); + mlx5_remove_netdev_notifier(dev); ib_unregister_device(&dev->ib_dev); mlx5_ib_dealloc_q_counters(dev); destroy_umrc_res(dev); mlx5_ib_odp_remove_one(dev); destroy_dev_resources(&dev->devr); if (ll == IB_LINK_LAYER_ETHERNET) - mlx5_disable_roce(dev); + mlx5_disable_eth(dev); kfree(dev->port); ib_dealloc_device(&dev->ib_dev); } diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index 996b54e366b0..6851357c16f4 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -37,12 +37,15 @@ /* @umem: umem object to scan * @addr: ib virtual address requested by the user + * @max_page_shift: high limit for page_shift - 0 means no limit * @count: number of PAGE_SIZE pages covered by umem * @shift: page shift for the compound pages found in the region * @ncont: number of compund pages * @order: log2 of the number of compound pages */ -void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, +void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, + unsigned long max_page_shift, + int *count, int *shift, int *ncont, int *order) { unsigned long tmp; @@ -72,6 +75,8 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, addr = addr >> page_shift; tmp = (unsigned long)addr; m = find_first_bit(&tmp, BITS_PER_LONG); + if (max_page_shift) + m = min_t(unsigned long, max_page_shift - page_shift, m); skip = 1 << m; mask = skip - 1; i = 0; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 854748b61212..6c6057eb60ea 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -63,6 +63,8 @@ pr_warn("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ #define MLX5_IB_DEFAULT_UIDX 0xffffff #define MLX5_USER_ASSIGNED_UIDX_MASK __mlx5_mask(qpc, user_index) +#define MLX5_MKEY_PAGE_SHIFT_MASK __mlx5_mask(mkc, log_page_size) + enum { MLX5_IB_MMAP_CMD_SHIFT = 8, MLX5_IB_MMAP_CMD_MASK = 0xff, @@ -387,6 +389,7 @@ struct mlx5_ib_qp { struct list_head qps_list; struct list_head cq_recv_list; struct list_head cq_send_list; + u32 rate_limit; }; struct mlx5_ib_cq_buf { @@ -418,7 +421,7 @@ struct mlx5_umr_wr { struct ib_pd *pd; unsigned int page_shift; unsigned int npages; - u32 length; + u64 length; int access_flags; u32 mkey; }; @@ -739,7 +742,8 @@ void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index); int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, const void *in_mad, void *response_mad); -struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); +struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, + struct ib_udata *udata); int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr); int mlx5_ib_destroy_ah(struct ib_ah *ah); struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, @@ -825,7 +829,9 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props); int mlx5_ib_init_fmr(struct mlx5_ib_dev *dev); void mlx5_ib_cleanup_fmr(struct mlx5_ib_dev *dev); -void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, +void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, + unsigned long max_page_shift, + int *count, int *shift, int *ncont, int *order); void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, int page_shift, size_t offset, size_t num_pages, diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 4e9012463c37..8f608debe141 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -628,7 +628,8 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) ent->order = i + 2; ent->dev = dev; - if (dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) + if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) && + (mlx5_core_is_pf(dev->mdev))) limit = dev->mdev->profile->mr_cache[i].limit; else limit = 0; @@ -646,6 +647,33 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) return 0; } +static void wait_for_async_commands(struct mlx5_ib_dev *dev) +{ + struct mlx5_mr_cache *cache = &dev->cache; + struct mlx5_cache_ent *ent; + int total = 0; + int i; + int j; + + for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { + ent = &cache->ent[i]; + for (j = 0 ; j < 1000; j++) { + if (!ent->pending) + break; + msleep(50); + } + } + for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { + ent = &cache->ent[i]; + total += ent->pending; + } + + if (total) + mlx5_ib_warn(dev, "aborted while there are %d pending mr requests\n", total); + else + mlx5_ib_warn(dev, "done with all pending requests\n"); +} + int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) { int i; @@ -659,6 +687,7 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) clean_keys(dev, i); destroy_workqueue(dev->cache.wq); + wait_for_async_commands(dev); del_timer_sync(&dev->delay_timer); return 0; @@ -816,29 +845,34 @@ static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev, umrwr->mkey = key; } -static struct ib_umem *mr_umem_get(struct ib_pd *pd, u64 start, u64 length, - int access_flags, int *npages, - int *page_shift, int *ncont, int *order) +static int mr_umem_get(struct ib_pd *pd, u64 start, u64 length, + int access_flags, struct ib_umem **umem, + int *npages, int *page_shift, int *ncont, + int *order) { struct mlx5_ib_dev *dev = to_mdev(pd->device); - struct ib_umem *umem = ib_umem_get(pd->uobject->context, start, length, - access_flags, 0); - if (IS_ERR(umem)) { + int err; + + *umem = ib_umem_get(pd->uobject->context, start, length, + access_flags, 0); + err = PTR_ERR_OR_ZERO(*umem); + if (err < 0) { mlx5_ib_err(dev, "umem get failed (%ld)\n", PTR_ERR(umem)); - return (void *)umem; + return err; } - mlx5_ib_cont_pages(umem, start, npages, page_shift, ncont, order); + mlx5_ib_cont_pages(*umem, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages, + page_shift, ncont, order); if (!*npages) { mlx5_ib_warn(dev, "avoid zero region\n"); - ib_umem_release(umem); - return ERR_PTR(-EINVAL); + ib_umem_release(*umem); + return -EINVAL; } mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n", *npages, *ncont, *order, *page_shift); - return umem; + return 0; } static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc) @@ -1164,11 +1198,11 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", start, virt_addr, length, access_flags); - umem = mr_umem_get(pd, start, length, access_flags, &npages, + err = mr_umem_get(pd, start, length, access_flags, &umem, &npages, &page_shift, &ncont, &order); - if (IS_ERR(umem)) - return (void *)umem; + if (err < 0) + return ERR_PTR(err); if (use_umr(order)) { mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift, @@ -1345,10 +1379,9 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, */ flags |= IB_MR_REREG_TRANS; ib_umem_release(mr->umem); - mr->umem = mr_umem_get(pd, addr, len, access_flags, &npages, - &page_shift, &ncont, &order); - if (IS_ERR(mr->umem)) { - err = PTR_ERR(mr->umem); + err = mr_umem_get(pd, addr, len, access_flags, &mr->umem, + &npages, &page_shift, &ncont, &order); + if (err < 0) { mr->umem = NULL; return err; } diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index d1e921816bfe..a1b3125f0a6e 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -77,12 +77,14 @@ struct mlx5_wqe_eth_pad { enum raw_qp_set_mask_map { MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID = 1UL << 0, + MLX5_RAW_QP_RATE_LIMIT = 1UL << 1, }; struct mlx5_modify_raw_qp_param { u16 operation; u32 set_mask; /* raw_qp_set_mask_map */ + u32 rate_limit; u8 rq_q_ctr_id; }; @@ -351,6 +353,29 @@ static int calc_send_wqe(struct ib_qp_init_attr *attr) return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB); } +static int get_send_sge(struct ib_qp_init_attr *attr, int wqe_size) +{ + int max_sge; + + if (attr->qp_type == IB_QPT_RC) + max_sge = (min_t(int, wqe_size, 512) - + sizeof(struct mlx5_wqe_ctrl_seg) - + sizeof(struct mlx5_wqe_raddr_seg)) / + sizeof(struct mlx5_wqe_data_seg); + else if (attr->qp_type == IB_QPT_XRC_INI) + max_sge = (min_t(int, wqe_size, 512) - + sizeof(struct mlx5_wqe_ctrl_seg) - + sizeof(struct mlx5_wqe_xrc_seg) - + sizeof(struct mlx5_wqe_raddr_seg)) / + sizeof(struct mlx5_wqe_data_seg); + else + max_sge = (wqe_size - sq_overhead(attr)) / + sizeof(struct mlx5_wqe_data_seg); + + return min_t(int, max_sge, wqe_size - sq_overhead(attr) / + sizeof(struct mlx5_wqe_data_seg)); +} + static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, struct mlx5_ib_qp *qp) { @@ -381,13 +406,18 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size); qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB; if (qp->sq.wqe_cnt > (1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz))) { - mlx5_ib_dbg(dev, "wqe count(%d) exceeds limits(%d)\n", + mlx5_ib_dbg(dev, "send queue size (%d * %d / %d -> %d) exceeds limits(%d)\n", + attr->cap.max_send_wr, wqe_size, MLX5_SEND_WQE_BB, qp->sq.wqe_cnt, 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz)); return -ENOMEM; } qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB); - qp->sq.max_gs = attr->cap.max_send_sge; + qp->sq.max_gs = get_send_sge(attr, wqe_size); + if (qp->sq.max_gs < attr->cap.max_send_sge) + return -ENOMEM; + + attr->cap.max_send_sge = qp->sq.max_gs; qp->sq.max_post = wq_size / wqe_size; attr->cap.max_send_wr = qp->sq.max_post; @@ -647,7 +677,7 @@ static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev, return PTR_ERR(*umem); } - mlx5_ib_cont_pages(*umem, addr, npages, page_shift, ncont, NULL); + mlx5_ib_cont_pages(*umem, addr, 0, npages, page_shift, ncont, NULL); err = mlx5_ib_get_buf_offset(addr, *page_shift, offset); if (err) { @@ -700,7 +730,7 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, return err; } - mlx5_ib_cont_pages(rwq->umem, ucmd->buf_addr, &npages, &page_shift, + mlx5_ib_cont_pages(rwq->umem, ucmd->buf_addr, 0, &npages, &page_shift, &ncont, NULL); err = mlx5_ib_get_buf_offset(ucmd->buf_addr, page_shift, &rwq->rq_page_offset); @@ -2442,8 +2472,14 @@ out: } static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev, - struct mlx5_ib_sq *sq, int new_state) + struct mlx5_ib_sq *sq, + int new_state, + const struct mlx5_modify_raw_qp_param *raw_qp_param) { + struct mlx5_ib_qp *ibqp = sq->base.container_mibqp; + u32 old_rate = ibqp->rate_limit; + u32 new_rate = old_rate; + u16 rl_index = 0; void *in; void *sqc; int inlen; @@ -2459,10 +2495,44 @@ static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev, sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx); MLX5_SET(sqc, sqc, state, new_state); + if (raw_qp_param->set_mask & MLX5_RAW_QP_RATE_LIMIT) { + if (new_state != MLX5_SQC_STATE_RDY) + pr_warn("%s: Rate limit can only be changed when SQ is moving to RDY\n", + __func__); + else + new_rate = raw_qp_param->rate_limit; + } + + if (old_rate != new_rate) { + if (new_rate) { + err = mlx5_rl_add_rate(dev, new_rate, &rl_index); + if (err) { + pr_err("Failed configuring rate %u: %d\n", + new_rate, err); + goto out; + } + } + + MLX5_SET64(modify_sq_in, in, modify_bitmask, 1); + MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, rl_index); + } + err = mlx5_core_modify_sq(dev, sq->base.mqp.qpn, in, inlen); - if (err) + if (err) { + /* Remove new rate from table if failed */ + if (new_rate && + old_rate != new_rate) + mlx5_rl_remove_rate(dev, new_rate); goto out; + } + + /* Only remove the old rate after new rate was set */ + if ((old_rate && + (old_rate != new_rate)) || + (new_state != MLX5_SQC_STATE_RDY)) + mlx5_rl_remove_rate(dev, old_rate); + ibqp->rate_limit = new_rate; sq->state = new_state; out: @@ -2477,6 +2547,8 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp; struct mlx5_ib_rq *rq = &raw_packet_qp->rq; struct mlx5_ib_sq *sq = &raw_packet_qp->sq; + int modify_rq = !!qp->rq.wqe_cnt; + int modify_sq = !!qp->sq.wqe_cnt; int rq_state; int sq_state; int err; @@ -2494,10 +2566,18 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, rq_state = MLX5_RQC_STATE_RST; sq_state = MLX5_SQC_STATE_RST; break; - case MLX5_CMD_OP_INIT2INIT_QP: - case MLX5_CMD_OP_INIT2RTR_QP: case MLX5_CMD_OP_RTR2RTS_QP: case MLX5_CMD_OP_RTS2RTS_QP: + if (raw_qp_param->set_mask == + MLX5_RAW_QP_RATE_LIMIT) { + modify_rq = 0; + sq_state = sq->state; + } else { + return raw_qp_param->set_mask ? -EINVAL : 0; + } + break; + case MLX5_CMD_OP_INIT2INIT_QP: + case MLX5_CMD_OP_INIT2RTR_QP: if (raw_qp_param->set_mask) return -EINVAL; else @@ -2507,13 +2587,13 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, return -EINVAL; } - if (qp->rq.wqe_cnt) { - err = modify_raw_packet_qp_rq(dev, rq, rq_state, raw_qp_param); + if (modify_rq) { + err = modify_raw_packet_qp_rq(dev, rq, rq_state, raw_qp_param); if (err) return err; } - if (qp->sq.wqe_cnt) { + if (modify_sq) { if (tx_affinity) { err = modify_raw_packet_tx_affinity(dev->mdev, sq, tx_affinity); @@ -2521,7 +2601,7 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, return err; } - return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state); + return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state, raw_qp_param); } return 0; @@ -2577,7 +2657,6 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, struct mlx5_ib_port *mibport = NULL; enum mlx5_qp_state mlx5_cur, mlx5_new; enum mlx5_qp_optpar optpar; - int sqd_event; int mlx5_st; int err; u16 op; @@ -2724,12 +2803,6 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) context->db_rec_addr = cpu_to_be64(qp->db.dma); - if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD && - attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify) - sqd_event = 1; - else - sqd_event = 0; - if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { u8 port_num = (attr_mask & IB_QP_PORT ? attr->port_num : qp->port) - 1; @@ -2776,6 +2849,12 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, raw_qp_param.rq_q_ctr_id = mibport->q_cnt_id; raw_qp_param.set_mask |= MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID; } + + if (attr_mask & IB_QP_RATE_LIMIT) { + raw_qp_param.rate_limit = attr->rate_limit; + raw_qp_param.set_mask |= MLX5_RAW_QP_RATE_LIMIT; + } + err = modify_raw_packet_qp(dev, qp, &raw_qp_param, tx_affinity); } else { err = mlx5_core_qp_modify(dev->mdev, op, optpar, context, @@ -3067,10 +3146,10 @@ static void set_linv_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr) { memset(umr, 0, sizeof(*umr)); umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); - umr->flags = 1 << 7; + umr->flags = MLX5_UMR_INLINE; } -static __be64 get_umr_reg_mr_mask(void) +static __be64 get_umr_reg_mr_mask(int atomic) { u64 result; @@ -3083,9 +3162,11 @@ static __be64 get_umr_reg_mr_mask(void) MLX5_MKEY_MASK_KEY | MLX5_MKEY_MASK_RR | MLX5_MKEY_MASK_RW | - MLX5_MKEY_MASK_A | MLX5_MKEY_MASK_FREE; + if (atomic) + result |= MLX5_MKEY_MASK_A; + return cpu_to_be64(result); } @@ -3146,7 +3227,7 @@ static __be64 get_umr_update_pd_mask(void) } static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, - struct ib_send_wr *wr) + struct ib_send_wr *wr, int atomic) { struct mlx5_umr_wr *umrwr = umr_wr(wr); @@ -3171,7 +3252,7 @@ static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD) umr->mkey_mask |= get_umr_update_pd_mask(); if (!umr->mkey_mask) - umr->mkey_mask = get_umr_reg_mr_mask(); + umr->mkey_mask = get_umr_reg_mr_mask(atomic); } else { umr->mkey_mask = get_umr_unreg_mr_mask(); } @@ -4024,7 +4105,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } qp->sq.wr_data[idx] = MLX5_IB_WR_UMR; ctrl->imm = cpu_to_be32(umr_wr(wr)->mkey); - set_reg_umr_segment(seg, wr); + set_reg_umr_segment(seg, wr, !!(MLX5_CAP_GEN(mdev, atomic))); seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; if (unlikely((seg == qend))) diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 3857dbd9c956..6f4397ee1ed6 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -118,7 +118,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, return err; } - mlx5_ib_cont_pages(srq->umem, ucmd.buf_addr, &npages, + mlx5_ib_cont_pages(srq->umem, ucmd.buf_addr, 0, &npages, &page_shift, &ncont, NULL); err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, &offset); @@ -203,8 +203,6 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, srq->wrid = kmalloc(srq->msrq.max * sizeof(u64), GFP_KERNEL); if (!srq->wrid) { - mlx5_ib_dbg(dev, "kmalloc failed %lu\n", - (unsigned long)(srq->msrq.max * sizeof(u64))); err = -ENOMEM; goto err_in; } @@ -282,6 +280,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, mlx5_ib_dbg(dev, "desc_size 0x%x, req wr 0x%x, srq size 0x%x, max_gs 0x%x, max_avail_gather 0x%x\n", desc_size, init_attr->attr.max_wr, srq->msrq.max, srq->msrq.max_gs, srq->msrq.max_avail_gather); + in.type = init_attr->srq_type; if (pd->uobject) err = create_srq_user(pd, srq, &in, udata, buf_size); @@ -294,7 +293,6 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, goto err_srq; } - in.type = init_attr->srq_type; in.log_size = ilog2(srq->msrq.max); in.wqe_shift = srq->msrq.wqe_shift - 4; if (srq->wq_sig) diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c index bcac294042f5..c9f0f364f484 100644 --- a/drivers/infiniband/hw/mthca/mthca_av.c +++ b/drivers/infiniband/hw/mthca/mthca_av.c @@ -186,8 +186,8 @@ int mthca_create_ah(struct mthca_dev *dev, on_hca_fail: if (ah->type == MTHCA_AH_PCI_POOL) { - ah->av = pci_pool_alloc(dev->av_table.pool, - GFP_ATOMIC, &ah->avdma); + ah->av = pci_pool_zalloc(dev->av_table.pool, + GFP_ATOMIC, &ah->avdma); if (!ah->av) return -ENOMEM; @@ -196,8 +196,6 @@ on_hca_fail: ah->key = pd->ntmr.ibmr.lkey; - memset(av, 0, MTHCA_AV_SIZE); - av->port_pd = cpu_to_be32(pd->pd_num | (ah_attr->port_num << 24)); av->g_slid = ah_attr->src_path_bits; av->dlid = cpu_to_be16(ah_attr->dlid); diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 358930a41e36..d31708742ba5 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -410,7 +410,9 @@ static int mthca_dealloc_pd(struct ib_pd *pd) } static struct ib_ah *mthca_ah_create(struct ib_pd *pd, - struct ib_ah_attr *ah_attr) + struct ib_ah_attr *ah_attr, + struct ib_udata *udata) + { int err; struct mthca_ah *ah; diff --git a/drivers/infiniband/hw/mthca/mthca_reset.c b/drivers/infiniband/hw/mthca/mthca_reset.c index 6727af27c017..2a6979e4ae1c 100644 --- a/drivers/infiniband/hw/mthca/mthca_reset.c +++ b/drivers/infiniband/hw/mthca/mthca_reset.c @@ -96,8 +96,6 @@ int mthca_reset(struct mthca_dev *mdev) hca_header = kmalloc(256, GFP_KERNEL); if (!hca_header) { err = -ENOMEM; - mthca_err(mdev, "Couldn't allocate memory to save HCA " - "PCI header, aborting.\n"); goto put_dev; } @@ -119,8 +117,6 @@ int mthca_reset(struct mthca_dev *mdev) bridge_header = kmalloc(256, GFP_KERNEL); if (!bridge_header) { err = -ENOMEM; - mthca_err(mdev, "Couldn't allocate memory to save HCA " - "bridge PCI header, aborting.\n"); goto free_hca; } diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index 2baa45a8e401..5b9601014f0c 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -515,7 +515,6 @@ static int nes_probe(struct pci_dev *pcidev, const struct pci_device_id *ent) /* Allocate hardware structure */ nesdev = kzalloc(sizeof(struct nes_device), GFP_KERNEL); if (!nesdev) { - printk(KERN_ERR PFX "%s: Unable to alloc hardware struct\n", pci_name(pcidev)); ret = -ENOMEM; goto bail2; } diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 57db9b332f44..8e703479e7ce 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -2282,10 +2282,8 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core, if (!listener) { /* create a CM listen node (1/2 node to compare incoming traffic to) */ listener = kzalloc(sizeof(*listener), GFP_ATOMIC); - if (!listener) { - nes_debug(NES_DBG_CM, "Not creating listener memory allocation failed\n"); + if (!listener) return NULL; - } listener->loc_addr = cm_info->loc_addr; listener->loc_port = cm_info->loc_port; diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index a1c6481d8038..19acd13c6cb1 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -351,9 +351,8 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) { /* allocate a new adapter struct */ nesadapter = kzalloc(adapter_size, GFP_KERNEL); - if (nesadapter == NULL) { + if (!nesadapter) return NULL; - } nes_debug(NES_DBG_INIT, "Allocating new nesadapter @ %p, size = %u (actual size = %u).\n", nesadapter, (u32)sizeof(struct nes_adapter), adapter_size); @@ -1007,8 +1006,7 @@ int nes_init_cqp(struct nes_device *nesdev) /* Allocate a twice the number of CQP requests as the SQ size */ nesdev->nes_cqp_requests = kzalloc(sizeof(struct nes_cqp_request) * 2 * NES_CQP_SQ_SIZE, GFP_KERNEL); - if (nesdev->nes_cqp_requests == NULL) { - nes_debug(NES_DBG_INIT, "Unable to allocate memory CQP request entries.\n"); + if (!nesdev->nes_cqp_requests) { pci_free_consistent(nesdev->pcidev, nesdev->cqp_mem_size, nesdev->cqp.sq_vbase, nesdev->cqp.sq_pbase); return -ENOMEM; diff --git a/drivers/infiniband/hw/nes/nes_mgt.c b/drivers/infiniband/hw/nes/nes_mgt.c index 416645259b0f..33624f17c347 100644 --- a/drivers/infiniband/hw/nes/nes_mgt.c +++ b/drivers/infiniband/hw/nes/nes_mgt.c @@ -320,8 +320,7 @@ static int get_fpdu_info(struct nes_device *nesdev, struct nes_qp *nesqp, /* Found one */ fpdu_info = kzalloc(sizeof(*fpdu_info), GFP_ATOMIC); - if (fpdu_info == NULL) { - nes_debug(NES_DBG_PAU, "Failed to alloc a fpdu_info.\n"); + if (!fpdu_info) { rc = -ENOMEM; goto out; } @@ -729,8 +728,7 @@ static int nes_change_quad_hash(struct nes_device *nesdev, } qh_chg = kmalloc(sizeof *qh_chg, GFP_ATOMIC); - if (qh_chg == NULL) { - nes_debug(NES_DBG_PAU, "Failed to get a cqp_request.\n"); + if (!qh_chg) { ret = -ENOMEM; goto chg_qh_err; } @@ -880,10 +878,8 @@ int nes_init_mgt_qp(struct nes_device *nesdev, struct net_device *netdev, struct /* Allocate space the all mgt QPs once */ mgtvnic = kzalloc(NES_MGT_QP_COUNT * sizeof(struct nes_vnic_mgt), GFP_KERNEL); - if (mgtvnic == NULL) { - nes_debug(NES_DBG_INIT, "Unable to allocate memory for mgt structure\n"); + if (!mgtvnic) return -ENOMEM; - } /* Allocate fragment, RQ, and CQ; Reuse CEQ based on the PCI function */ /* We are not sending from this NIC so sq is not allocated */ diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index 7f8597d6738b..5921ea3d50ae 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -662,10 +662,14 @@ tso_sq_no_longer_full: nesnic->sq_head &= nesnic->sq_size-1; } } else { - nesvnic->linearized_skbs++; hoffset = skb_transport_header(skb) - skb->data; nhoffset = skb_network_header(skb) - skb->data; - skb_linearize(skb); + if (skb_linearize(skb)) { + nesvnic->tx_sw_dropped++; + kfree_skb(skb); + return NETDEV_TX_OK; + } + nesvnic->linearized_skbs++; skb_set_transport_header(skb, hoffset); skb_set_network_header(skb, nhoffset); if (!nes_nic_send(skb, netdev)) @@ -1461,7 +1465,8 @@ static int nes_netdev_set_pauseparam(struct net_device *netdev, /** * nes_netdev_get_settings */ -static int nes_netdev_get_settings(struct net_device *netdev, struct ethtool_cmd *et_cmd) +static int nes_netdev_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *cmd) { struct nes_vnic *nesvnic = netdev_priv(netdev); struct nes_device *nesdev = nesvnic->nesdev; @@ -1470,54 +1475,59 @@ static int nes_netdev_get_settings(struct net_device *netdev, struct ethtool_cmd u8 phy_type = nesadapter->phy_type[mac_index]; u8 phy_index = nesadapter->phy_index[mac_index]; u16 phy_data; + u32 supported, advertising; - et_cmd->duplex = DUPLEX_FULL; - et_cmd->port = PORT_MII; - et_cmd->maxtxpkt = 511; - et_cmd->maxrxpkt = 511; + cmd->base.duplex = DUPLEX_FULL; + cmd->base.port = PORT_MII; if (nesadapter->OneG_Mode) { - ethtool_cmd_speed_set(et_cmd, SPEED_1000); + cmd->base.speed = SPEED_1000; if (phy_type == NES_PHY_TYPE_PUMA_1G) { - et_cmd->supported = SUPPORTED_1000baseT_Full; - et_cmd->advertising = ADVERTISED_1000baseT_Full; - et_cmd->autoneg = AUTONEG_DISABLE; - et_cmd->transceiver = XCVR_INTERNAL; - et_cmd->phy_address = mac_index; + supported = SUPPORTED_1000baseT_Full; + advertising = ADVERTISED_1000baseT_Full; + cmd->base.autoneg = AUTONEG_DISABLE; + cmd->base.phy_address = mac_index; } else { unsigned long flags; - et_cmd->supported = SUPPORTED_1000baseT_Full - | SUPPORTED_Autoneg; - et_cmd->advertising = ADVERTISED_1000baseT_Full - | ADVERTISED_Autoneg; + + supported = SUPPORTED_1000baseT_Full + | SUPPORTED_Autoneg; + advertising = ADVERTISED_1000baseT_Full + | ADVERTISED_Autoneg; spin_lock_irqsave(&nesadapter->phy_lock, flags); nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data); spin_unlock_irqrestore(&nesadapter->phy_lock, flags); if (phy_data & 0x1000) - et_cmd->autoneg = AUTONEG_ENABLE; + cmd->base.autoneg = AUTONEG_ENABLE; else - et_cmd->autoneg = AUTONEG_DISABLE; - et_cmd->transceiver = XCVR_EXTERNAL; - et_cmd->phy_address = phy_index; + cmd->base.autoneg = AUTONEG_DISABLE; + cmd->base.phy_address = phy_index; } + ethtool_convert_legacy_u32_to_link_mode( + cmd->link_modes.supported, supported); + ethtool_convert_legacy_u32_to_link_mode( + cmd->link_modes.advertising, advertising); return 0; } if ((phy_type == NES_PHY_TYPE_ARGUS) || (phy_type == NES_PHY_TYPE_SFP_D) || (phy_type == NES_PHY_TYPE_KR)) { - et_cmd->transceiver = XCVR_EXTERNAL; - et_cmd->port = PORT_FIBRE; - et_cmd->supported = SUPPORTED_FIBRE; - et_cmd->advertising = ADVERTISED_FIBRE; - et_cmd->phy_address = phy_index; + cmd->base.port = PORT_FIBRE; + supported = SUPPORTED_FIBRE; + advertising = ADVERTISED_FIBRE; + cmd->base.phy_address = phy_index; } else { - et_cmd->transceiver = XCVR_INTERNAL; - et_cmd->supported = SUPPORTED_10000baseT_Full; - et_cmd->advertising = ADVERTISED_10000baseT_Full; - et_cmd->phy_address = mac_index; + supported = SUPPORTED_10000baseT_Full; + advertising = ADVERTISED_10000baseT_Full; + cmd->base.phy_address = mac_index; } - ethtool_cmd_speed_set(et_cmd, SPEED_10000); - et_cmd->autoneg = AUTONEG_DISABLE; + cmd->base.speed = SPEED_10000; + cmd->base.autoneg = AUTONEG_DISABLE; + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + supported); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, + advertising); + return 0; } @@ -1525,7 +1535,9 @@ static int nes_netdev_get_settings(struct net_device *netdev, struct ethtool_cmd /** * nes_netdev_set_settings */ -static int nes_netdev_set_settings(struct net_device *netdev, struct ethtool_cmd *et_cmd) +static int +nes_netdev_set_link_ksettings(struct net_device *netdev, + const struct ethtool_link_ksettings *cmd) { struct nes_vnic *nesvnic = netdev_priv(netdev); struct nes_device *nesdev = nesvnic->nesdev; @@ -1539,7 +1551,7 @@ static int nes_netdev_set_settings(struct net_device *netdev, struct ethtool_cmd spin_lock_irqsave(&nesadapter->phy_lock, flags); nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data); - if (et_cmd->autoneg) { + if (cmd->base.autoneg) { /* Turn on Full duplex, Autoneg, and restart autonegotiation */ phy_data |= 0x1300; } else { @@ -1556,8 +1568,6 @@ static int nes_netdev_set_settings(struct net_device *netdev, struct ethtool_cmd static const struct ethtool_ops nes_ethtool_ops = { .get_link = ethtool_op_get_link, - .get_settings = nes_netdev_get_settings, - .set_settings = nes_netdev_set_settings, .get_strings = nes_netdev_get_strings, .get_sset_count = nes_netdev_get_sset_count, .get_ethtool_stats = nes_netdev_get_ethtool_stats, @@ -1566,6 +1576,8 @@ static const struct ethtool_ops nes_ethtool_ops = { .set_coalesce = nes_netdev_set_coalesce, .get_pauseparam = nes_netdev_get_pauseparam, .set_pauseparam = nes_netdev_set_pauseparam, + .get_link_ksettings = nes_netdev_get_link_ksettings, + .set_link_ksettings = nes_netdev_set_link_ksettings, }; static void nes_vlan_mode(struct net_device *netdev, struct nes_device *nesdev, netdev_features_t features) diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index bd69125731c1..aff9fb14768b 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -771,7 +771,8 @@ static int nes_dealloc_pd(struct ib_pd *ibpd) /** * nes_create_ah */ -static struct ib_ah *nes_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) +static struct ib_ah *nes_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, + struct ib_udata *udata) { return ERR_PTR(-ENOSYS); } @@ -1075,7 +1076,6 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, mem = kzalloc(sizeof(*nesqp)+NES_SW_CONTEXT_ALIGN-1, GFP_KERNEL); if (!mem) { nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); - nes_debug(NES_DBG_QP, "Unable to allocate QP\n"); return ERR_PTR(-ENOMEM); } u64nesqp = (unsigned long)mem; @@ -1475,7 +1475,6 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, nescq = kzalloc(sizeof(struct nes_cq), GFP_KERNEL); if (!nescq) { nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); - nes_debug(NES_DBG_CQ, "Unable to allocate nes_cq struct\n"); return ERR_PTR(-ENOMEM); } @@ -2408,7 +2407,6 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } nespbl = kzalloc(sizeof(*nespbl), GFP_KERNEL); if (!nespbl) { - nes_debug(NES_DBG_MR, "Unable to allocate PBL\n"); ib_umem_release(region); return ERR_PTR(-ENOMEM); } @@ -2416,7 +2414,6 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (!nesmr) { ib_umem_release(region); kfree(nespbl); - nes_debug(NES_DBG_MR, "Unable to allocate nesmr\n"); return ERR_PTR(-ENOMEM); } nesmr->region = region; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index 797362a297b2..14d33b0f3950 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -154,7 +154,8 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, return status; } -struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) +struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr, + struct ib_udata *udata) { u32 *ahid_addr; int status; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h index 3856dd4c7e3d..0704a24b17c8 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h @@ -50,7 +50,9 @@ enum { OCRDMA_AH_L3_TYPE_MASK = 0x03, OCRDMA_AH_L3_TYPE_SHIFT = 0x1D /* 29 bits */ }; -struct ib_ah *ocrdma_create_ah(struct ib_pd *, struct ib_ah_attr *); + +struct ib_ah *ocrdma_create_ah(struct ib_pd *, struct ib_ah_attr *, + struct ib_udata *); int ocrdma_destroy_ah(struct ib_ah *); int ocrdma_query_ah(struct ib_ah *, struct ib_ah_attr *); int ocrdma_modify_ah(struct ib_ah *, struct ib_ah_attr *); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 67fc0b6857e1..9a305201545e 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -1596,10 +1596,9 @@ void ocrdma_alloc_pd_pool(struct ocrdma_dev *dev) dev->pd_mgr = kzalloc(sizeof(struct ocrdma_pd_resource_mgr), GFP_KERNEL); - if (!dev->pd_mgr) { - pr_err("%s(%d)Memory allocation failure.\n", __func__, dev->id); + if (!dev->pd_mgr) return; - } + status = ocrdma_mbx_alloc_pd_range(dev); if (status) { pr_err("%s(%d) Unable to initialize PD pool, using default.\n", @@ -1642,7 +1641,7 @@ static int ocrdma_build_q_conf(u32 *num_entries, int entry_size, static int ocrdma_mbx_create_ah_tbl(struct ocrdma_dev *dev) { int i; - int status = 0; + int status = -ENOMEM; int max_ah; struct ocrdma_create_ah_tbl *cmd; struct ocrdma_create_ah_tbl_rsp *rsp; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c index 8bef09a8c49f..f8e4b0a6486f 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c @@ -84,10 +84,8 @@ bool ocrdma_alloc_stats_resources(struct ocrdma_dev *dev) /* Alloc debugfs mem */ mem->debugfs_mem = kzalloc(OCRDMA_MAX_DBGFS_MEM, GFP_KERNEL); - if (!mem->debugfs_mem) { - pr_err("%s: stats debugfs mem allocation failed\n", __func__); + if (!mem->debugfs_mem) return false; - } return true; } diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index a61514296767..302fb05e6e6f 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -511,8 +511,10 @@ int qedr_dealloc_pd(struct ib_pd *ibpd) struct qedr_dev *dev = get_qedr_dev(ibpd->device); struct qedr_pd *pd = get_qedr_pd(ibpd); - if (!pd) + if (!pd) { pr_err("Invalid PD received in dealloc_pd\n"); + return -EINVAL; + } DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id); dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id); @@ -1477,6 +1479,7 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd, struct qedr_ucontext *ctx = NULL; struct qedr_create_qp_ureq ureq; struct qedr_qp *qp; + struct ib_qp *ibqp; int rc = 0; DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n", @@ -1486,13 +1489,13 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd, if (rc) return ERR_PTR(rc); + if (attrs->srq) + return ERR_PTR(-EINVAL); + qp = kzalloc(sizeof(*qp), GFP_KERNEL); if (!qp) return ERR_PTR(-ENOMEM); - if (attrs->srq) - return ERR_PTR(-EINVAL); - DP_DEBUG(dev, QEDR_MSG_QP, "create qp: sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n", get_qedr_cq(attrs->send_cq), @@ -1508,7 +1511,10 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd, "create qp: unexpected udata when creating GSI QP\n"); goto err0; } - return qedr_create_gsi_qp(dev, attrs, qp); + ibqp = qedr_create_gsi_qp(dev, attrs, qp); + if (IS_ERR(ibqp)) + kfree(qp); + return ibqp; } memset(&in_params, 0, sizeof(in_params)); @@ -2094,7 +2100,8 @@ int qedr_destroy_qp(struct ib_qp *ibqp) return rc; } -struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) +struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr, + struct ib_udata *udata) { struct qedr_ah *ah; @@ -2413,8 +2420,7 @@ static void handle_completed_mrs(struct qedr_dev *dev, struct mr_info *info) */ pbl = list_first_entry(&info->inuse_pbl_list, struct qedr_pbl, list_entry); - list_del(&pbl->list_entry); - list_add_tail(&pbl->list_entry, &info->free_pbl_list); + list_move_tail(&pbl->list_entry, &info->free_pbl_list); info->completed_handled++; } } @@ -2981,11 +2987,6 @@ int qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, return -EINVAL; } - if (!wr) { - DP_ERR(dev, "Got an empty post send.\n"); - return -EINVAL; - } - while (wr) { rc = __qedr_post_send(ibqp, wr, bad_wr); if (rc) diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h index a9b5e67bb81e..070677ca4d19 100644 --- a/drivers/infiniband/hw/qedr/verbs.h +++ b/drivers/infiniband/hw/qedr/verbs.h @@ -70,7 +70,8 @@ int qedr_query_qp(struct ib_qp *, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *); int qedr_destroy_qp(struct ib_qp *ibqp); -struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr); +struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr, + struct ib_udata *udata); int qedr_destroy_ah(struct ib_ah *ibah); int qedr_dereg_mr(struct ib_mr *); diff --git a/drivers/infiniband/hw/qib/qib_diag.c b/drivers/infiniband/hw/qib/qib_diag.c index 8c34b23e5bf6..775018b32b0d 100644 --- a/drivers/infiniband/hw/qib/qib_diag.c +++ b/drivers/infiniband/hw/qib/qib_diag.c @@ -609,8 +609,6 @@ static ssize_t qib_diagpkt_write(struct file *fp, tmpbuf = vmalloc(plen); if (!tmpbuf) { - qib_devinfo(dd->pcidev, - "Unable to allocate tmp buffer, failing\n"); ret = -ENOMEM; goto bail; } @@ -702,10 +700,8 @@ int qib_register_observer(struct qib_devdata *dd, if (!dd || !op) return -EINVAL; olp = vmalloc(sizeof(*olp)); - if (!olp) { - pr_err("vmalloc for observer failed\n"); + if (!olp) return -ENOMEM; - } spin_lock_irqsave(&dd->qib_diag_trans_lock, flags); olp->op = op; diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c index 728e0a030d2e..2b5982f743ef 100644 --- a/drivers/infiniband/hw/qib/qib_driver.c +++ b/drivers/infiniband/hw/qib/qib_driver.c @@ -420,8 +420,7 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd, if (list_empty(&qp->rspwait)) { qp->r_flags |= RVT_R_RSP_NAK; - atomic_inc( - &qp->refcount); + rvt_get_qp(qp); list_add_tail( &qp->rspwait, &rcd->qp_wait_list); diff --git a/drivers/infiniband/hw/qib/qib_eeprom.c b/drivers/infiniband/hw/qib/qib_eeprom.c index 311ee6c3dd5e..33a2e74c8495 100644 --- a/drivers/infiniband/hw/qib/qib_eeprom.c +++ b/drivers/infiniband/hw/qib/qib_eeprom.c @@ -182,12 +182,8 @@ void qib_get_eeprom_info(struct qib_devdata *dd) * */ len = sizeof(struct qib_flash); buf = vmalloc(len); - if (!buf) { - qib_dev_err(dd, - "Couldn't allocate memory to read %u bytes from eeprom for GUID\n", - len); + if (!buf) goto bail; - } /* * Use "public" eeprom read function, which does locking and diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index 382466a90da7..2d1eacf1dfed 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -2066,8 +2066,11 @@ static ssize_t qib_write(struct file *fp, const char __user *data, ssize_t ret = 0; void *dest; - if (WARN_ON_ONCE(!ib_safe_file_access(fp))) + if (!ib_safe_file_access(fp)) { + pr_err_once("qib_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n", + task_tgid_vnr(current), current->comm); return -EACCES; + } if (count < sizeof(cmd.type)) { ret = -EINVAL; diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index a3733f25280f..92399d3ffd15 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -1759,9 +1759,7 @@ static void pe_boardname(struct qib_devdata *dd) } namelen = strlen(n) + 1; dd->boardname = kmalloc(namelen, GFP_KERNEL); - if (!dd->boardname) - qib_dev_err(dd, "Failed allocation for board name: %s\n", n); - else + if (dd->boardname) snprintf(dd->boardname, namelen, "%s", n); if (dd->majrev != 4 || !dd->minrev || dd->minrev > 2) @@ -2533,8 +2531,6 @@ static void init_6120_cntrnames(struct qib_devdata *dd) dd->cspec->cntrnamelen = 1 + s - cntr6120names; dd->cspec->cntrs = kmalloc(dd->cspec->ncntrs * sizeof(u64), GFP_KERNEL); - if (!dd->cspec->cntrs) - qib_dev_err(dd, "Failed allocation for counters\n"); for (i = 0, s = (char *)portcntr6120names; s; i++) s = strchr(s + 1, '\n'); @@ -2542,8 +2538,6 @@ static void init_6120_cntrnames(struct qib_devdata *dd) dd->cspec->portcntrnamelen = sizeof(portcntr6120names) - 1; dd->cspec->portcntrs = kmalloc(dd->cspec->nportcntrs * sizeof(u64), GFP_KERNEL); - if (!dd->cspec->portcntrs) - qib_dev_err(dd, "Failed allocation for portcounters\n"); } static u32 qib_read_6120cntrs(struct qib_devdata *dd, loff_t pos, char **namep, diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c index 00b2af211157..e55e31a69195 100644 --- a/drivers/infiniband/hw/qib/qib_iba7220.c +++ b/drivers/infiniband/hw/qib/qib_iba7220.c @@ -2070,9 +2070,7 @@ static void qib_7220_boardname(struct qib_devdata *dd) namelen = strlen(n) + 1; dd->boardname = kmalloc(namelen, GFP_KERNEL); - if (!dd->boardname) - qib_dev_err(dd, "Failed allocation for board name: %s\n", n); - else + if (dd->boardname) snprintf(dd->boardname, namelen, "%s", n); if (dd->majrev != 5 || !dd->minrev || dd->minrev > 2) @@ -3179,8 +3177,6 @@ static void init_7220_cntrnames(struct qib_devdata *dd) dd->cspec->cntrnamelen = 1 + s - cntr7220names; dd->cspec->cntrs = kmalloc(dd->cspec->ncntrs * sizeof(u64), GFP_KERNEL); - if (!dd->cspec->cntrs) - qib_dev_err(dd, "Failed allocation for counters\n"); for (i = 0, s = (char *)portcntr7220names; s; i++) s = strchr(s + 1, '\n'); @@ -3188,8 +3184,6 @@ static void init_7220_cntrnames(struct qib_devdata *dd) dd->cspec->portcntrnamelen = sizeof(portcntr7220names) - 1; dd->cspec->portcntrs = kmalloc(dd->cspec->nportcntrs * sizeof(u64), GFP_KERNEL); - if (!dd->cspec->portcntrs) - qib_dev_err(dd, "Failed allocation for portcounters\n"); } static u32 qib_read_7220cntrs(struct qib_devdata *dd, loff_t pos, char **namep, diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index ded27172320e..c4a3616062f1 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -3627,9 +3627,7 @@ static unsigned qib_7322_boardname(struct qib_devdata *dd) namelen = strlen(n) + 1; dd->boardname = kmalloc(namelen, GFP_KERNEL); - if (!dd->boardname) - qib_dev_err(dd, "Failed allocation for board name: %s\n", n); - else + if (dd->boardname) snprintf(dd->boardname, namelen, "%s", n); snprintf(dd->boardversion, sizeof(dd->boardversion), @@ -3656,7 +3654,7 @@ static unsigned qib_7322_boardname(struct qib_devdata *dd) static int qib_do_7322_reset(struct qib_devdata *dd) { u64 val; - u64 *msix_vecsave; + u64 *msix_vecsave = NULL; int i, msix_entries, ret = 1; u16 cmdval; u8 int_line, clinesz; @@ -3677,10 +3675,7 @@ static int qib_do_7322_reset(struct qib_devdata *dd) /* can be up to 512 bytes, too big for stack */ msix_vecsave = kmalloc(2 * dd->cspec->num_msix_entries * sizeof(u64), GFP_KERNEL); - if (!msix_vecsave) - qib_dev_err(dd, "No mem to save MSIx data\n"); - } else - msix_vecsave = NULL; + } /* * Core PCI (as of 2.6.18) doesn't save or rewrite the full vector @@ -5043,8 +5038,6 @@ static void init_7322_cntrnames(struct qib_devdata *dd) dd->cspec->cntrnamelen = 1 + s - cntr7322names; dd->cspec->cntrs = kmalloc(dd->cspec->ncntrs * sizeof(u64), GFP_KERNEL); - if (!dd->cspec->cntrs) - qib_dev_err(dd, "Failed allocation for counters\n"); for (i = 0, s = (char *)portcntr7322names; s; i++) s = strchr(s + 1, '\n'); @@ -5053,9 +5046,6 @@ static void init_7322_cntrnames(struct qib_devdata *dd) for (i = 0; i < dd->num_pports; ++i) { dd->pport[i].cpspec->portcntrs = kmalloc(dd->cspec->nportcntrs * sizeof(u64), GFP_KERNEL); - if (!dd->pport[i].cpspec->portcntrs) - qib_dev_err(dd, - "Failed allocation for portcounters\n"); } } @@ -6461,7 +6451,6 @@ static int qib_init_7322_variables(struct qib_devdata *dd) sizeof(*dd->cspec->sendibchk), GFP_KERNEL); if (!dd->cspec->sendchkenable || !dd->cspec->sendgrhchk || !dd->cspec->sendibchk) { - qib_dev_err(dd, "Failed allocation for hdrchk bitmaps\n"); ret = -ENOMEM; goto bail; } @@ -7338,10 +7327,9 @@ struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *pdev, tabsize = actual_cnt; dd->cspec->msix_entries = kzalloc(tabsize * sizeof(struct qib_msix_entry), GFP_KERNEL); - if (!dd->cspec->msix_entries) { - qib_dev_err(dd, "No memory for MSIx table\n"); + if (!dd->cspec->msix_entries) tabsize = 0; - } + for (i = 0; i < tabsize; i++) dd->cspec->msix_entries[i].msix.entry = i; diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index 1730aa839a47..b50240b1d5a4 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -133,11 +133,8 @@ int qib_create_ctxts(struct qib_devdata *dd) * cleanup iterates across all possible ctxts. */ dd->rcd = kcalloc(dd->ctxtcnt, sizeof(*dd->rcd), GFP_KERNEL); - if (!dd->rcd) { - qib_dev_err(dd, - "Unable to allocate ctxtdata array, failing\n"); + if (!dd->rcd) return -ENOMEM; - } /* create (one or more) kctxt */ for (i = 0; i < dd->first_user_ctxt; ++i) { @@ -265,39 +262,23 @@ int qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd, size = IB_CC_TABLE_CAP_DEFAULT * sizeof(struct ib_cc_table_entry) * IB_CCT_ENTRIES; ppd->ccti_entries = kzalloc(size, GFP_KERNEL); - if (!ppd->ccti_entries) { - qib_dev_err(dd, - "failed to allocate congestion control table for port %d!\n", - port); + if (!ppd->ccti_entries) goto bail; - } size = IB_CC_CCS_ENTRIES * sizeof(struct ib_cc_congestion_entry); ppd->congestion_entries = kzalloc(size, GFP_KERNEL); - if (!ppd->congestion_entries) { - qib_dev_err(dd, - "failed to allocate congestion setting list for port %d!\n", - port); + if (!ppd->congestion_entries) goto bail_1; - } size = sizeof(struct cc_table_shadow); ppd->ccti_entries_shadow = kzalloc(size, GFP_KERNEL); - if (!ppd->ccti_entries_shadow) { - qib_dev_err(dd, - "failed to allocate shadow ccti list for port %d!\n", - port); + if (!ppd->ccti_entries_shadow) goto bail_2; - } size = sizeof(struct ib_cc_congestion_setting_attr); ppd->congestion_entries_shadow = kzalloc(size, GFP_KERNEL); - if (!ppd->congestion_entries_shadow) { - qib_dev_err(dd, - "failed to allocate shadow congestion setting list for port %d!\n", - port); + if (!ppd->congestion_entries_shadow) goto bail_3; - } return 0; @@ -391,18 +372,12 @@ static void init_shadow_tids(struct qib_devdata *dd) dma_addr_t *addrs; pages = vzalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(struct page *)); - if (!pages) { - qib_dev_err(dd, - "failed to allocate shadow page * array, no expected sends!\n"); + if (!pages) goto bail; - } addrs = vzalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(dma_addr_t)); - if (!addrs) { - qib_dev_err(dd, - "failed to allocate shadow dma handle array, no expected sends!\n"); + if (!addrs) goto bail_free; - } dd->pageshadow = pages; dd->physshadow = addrs; @@ -1026,11 +1001,8 @@ static void qib_verify_pioperf(struct qib_devdata *dd) cnt = 1024; addr = vmalloc(cnt); - if (!addr) { - qib_devinfo(dd->pcidev, - "Couldn't get memory for checking PIO perf, skipping\n"); + if (!addr) goto done; - } preempt_disable(); /* we want reasonably accurate elapsed time */ msecs = 1 + jiffies_to_msecs(jiffies); @@ -1172,9 +1144,6 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) sizeof(long), GFP_KERNEL); if (qib_cpulist) qib_cpulist_count = count; - else - qib_early_err(&pdev->dev, - "Could not alloc cpulist info, cpu affinity might be wrong\n"); } #ifdef CONFIG_DEBUG_FS qib_dbg_ibdev_init(&dd->verbs_dev); diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index 2097512e75aa..031433cb7206 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -941,8 +941,6 @@ void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr) { struct ib_other_headers *ohdr; struct rvt_swqe *wqe; - struct ib_wc wc; - unsigned i; u32 opcode; u32 psn; @@ -988,22 +986,8 @@ void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr) qp->s_last = s_last; /* see post_send() */ barrier(); - for (i = 0; i < wqe->wr.num_sge; i++) { - struct rvt_sge *sge = &wqe->sg_list[i]; - - rvt_put_mr(sge->mr); - } - /* Post a send completion queue entry if requested. */ - if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) || - (wqe->wr.send_flags & IB_SEND_SIGNALED)) { - memset(&wc, 0, sizeof(wc)); - wc.wr_id = wqe->wr.wr_id; - wc.status = IB_WC_SUCCESS; - wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode]; - wc.byte_len = wqe->length; - wc.qp = &qp->ibqp; - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0); - } + rvt_put_swqe(wqe); + rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS); } /* * If we were waiting for sends to complete before resending, @@ -1032,9 +1016,6 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, struct rvt_swqe *wqe, struct qib_ibport *ibp) { - struct ib_wc wc; - unsigned i; - /* * Don't decrement refcount and don't generate a * completion if the SWQE is being resent until the send @@ -1044,28 +1025,14 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) { u32 s_last; - for (i = 0; i < wqe->wr.num_sge; i++) { - struct rvt_sge *sge = &wqe->sg_list[i]; - - rvt_put_mr(sge->mr); - } + rvt_put_swqe(wqe); s_last = qp->s_last; if (++s_last >= qp->s_size) s_last = 0; qp->s_last = s_last; /* see post_send() */ barrier(); - /* Post a send completion queue entry if requested. */ - if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) || - (wqe->wr.send_flags & IB_SEND_SIGNALED)) { - memset(&wc, 0, sizeof(wc)); - wc.wr_id = wqe->wr.wr_id; - wc.status = IB_WC_SUCCESS; - wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode]; - wc.byte_len = wqe->length; - wc.qp = &qp->ibqp; - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0); - } + rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS); } else this_cpu_inc(*ibp->rvp.rc_delayed_comp); @@ -2112,8 +2079,7 @@ send_last: * Update the next expected PSN. We add 1 later * below, so only add the remainder here. */ - if (len > pmtu) - qp->r_psn += (len - 1) / pmtu; + qp->r_psn += rvt_div_mtu(qp, len - 1); } else { e->rdma_sge.mr = NULL; e->rdma_sge.vaddr = NULL; diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index de1bde5950f5..e54a2feeeb10 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -793,7 +793,6 @@ void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, enum ib_wc_status status) { u32 old_last, last; - unsigned i; if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND)) return; @@ -805,32 +804,13 @@ void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, qp->s_last = last; /* See post_send() */ barrier(); - for (i = 0; i < wqe->wr.num_sge; i++) { - struct rvt_sge *sge = &wqe->sg_list[i]; - - rvt_put_mr(sge->mr); - } + rvt_put_swqe(wqe); if (qp->ibqp.qp_type == IB_QPT_UD || qp->ibqp.qp_type == IB_QPT_SMI || qp->ibqp.qp_type == IB_QPT_GSI) atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount); - /* See ch. 11.2.4.1 and 10.7.3.1 */ - if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) || - (wqe->wr.send_flags & IB_SEND_SIGNALED) || - status != IB_WC_SUCCESS) { - struct ib_wc wc; - - memset(&wc, 0, sizeof(wc)); - wc.wr_id = wqe->wr.wr_id; - wc.status = status; - wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode]; - wc.qp = &qp->ibqp; - if (status == IB_WC_SUCCESS) - wc.byte_len = wqe->length; - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, - status != IB_WC_SUCCESS); - } + rvt_qp_swqe_complete(qp, wqe, status); if (qp->s_acked == old_last) qp->s_acked = last; diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 954f15064514..4b54c0ddd08a 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -114,19 +114,6 @@ module_param_named(disable_sma, ib_qib_disable_sma, uint, S_IWUSR | S_IRUGO); MODULE_PARM_DESC(disable_sma, "Disable the SMA"); /* - * Translate ib_wr_opcode into ib_wc_opcode. - */ -const enum ib_wc_opcode ib_qib_wc_opcode[] = { - [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE, - [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE, - [IB_WR_SEND] = IB_WC_SEND, - [IB_WR_SEND_WITH_IMM] = IB_WC_SEND, - [IB_WR_RDMA_READ] = IB_WC_RDMA_READ, - [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP, - [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD -}; - -/* * System image GUID. */ __be64 ib_qib_sys_image_guid; @@ -464,7 +451,7 @@ static void mem_timer(unsigned long data) priv = list_entry(list->next, struct qib_qp_priv, iowait); qp = priv->owner; list_del_init(&priv->iowait); - atomic_inc(&qp->refcount); + rvt_get_qp(qp); if (!list_empty(list)) mod_timer(&dev->mem_timer, jiffies + 1); } @@ -477,8 +464,7 @@ static void mem_timer(unsigned long data) qib_schedule_send(qp); } spin_unlock_irqrestore(&qp->s_lock, flags); - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); } } @@ -762,7 +748,7 @@ void qib_put_txreq(struct qib_verbs_txreq *tx) iowait); qp = priv->owner; list_del_init(&priv->iowait); - atomic_inc(&qp->refcount); + rvt_get_qp(qp); spin_unlock_irqrestore(&dev->rdi.pending_lock, flags); spin_lock_irqsave(&qp->s_lock, flags); @@ -772,8 +758,7 @@ void qib_put_txreq(struct qib_verbs_txreq *tx) } spin_unlock_irqrestore(&qp->s_lock, flags); - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); } else spin_unlock_irqrestore(&dev->rdi.pending_lock, flags); } @@ -808,7 +793,7 @@ void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail) break; avail -= qpp->s_tx->txreq.sg_count; list_del_init(&qpp->iowait); - atomic_inc(&qp->refcount); + rvt_get_qp(qp); qps[n++] = qp; } @@ -822,8 +807,7 @@ void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail) qib_schedule_send(qp); } spin_unlock(&qp->s_lock); - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); } } @@ -1288,7 +1272,7 @@ void qib_ib_piobufavail(struct qib_devdata *dd) priv = list_entry(list->next, struct qib_qp_priv, iowait); qp = priv->owner; list_del_init(&priv->iowait); - atomic_inc(&qp->refcount); + rvt_get_qp(qp); qps[n++] = qp; } dd->f_wantpiobuf_intr(dd, 0); @@ -1306,8 +1290,7 @@ full: spin_unlock_irqrestore(&qp->s_lock, flags); /* Notify qib_destroy_qp() if it is waiting. */ - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); } } diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c index 5b0248adf4ce..092d4e11a633 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c @@ -117,10 +117,10 @@ static int enable_qp_grp(struct usnic_ib_qp_grp *qp_grp) vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic); res_chunk = get_qp_res_chunk(qp_grp); - if (IS_ERR_OR_NULL(res_chunk)) { + if (IS_ERR(res_chunk)) { usnic_err("Unable to get qp res with err %ld\n", PTR_ERR(res_chunk)); - return res_chunk ? PTR_ERR(res_chunk) : -ENOMEM; + return PTR_ERR(res_chunk); } for (i = 0; i < res_chunk->cnt; i++) { @@ -158,10 +158,10 @@ static int disable_qp_grp(struct usnic_ib_qp_grp *qp_grp) vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic); res_chunk = get_qp_res_chunk(qp_grp); - if (IS_ERR_OR_NULL(res_chunk)) { + if (IS_ERR(res_chunk)) { usnic_err("Unable to get qp res with err %ld\n", PTR_ERR(res_chunk)); - return res_chunk ? PTR_ERR(res_chunk) : -ENOMEM; + return PTR_ERR(res_chunk); } for (i = 0; i < res_chunk->cnt; i++) { @@ -186,11 +186,11 @@ static int init_filter_action(struct usnic_ib_qp_grp *qp_grp, struct usnic_vnic_res_chunk *res_chunk; res_chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_RQ); - if (IS_ERR_OR_NULL(res_chunk)) { + if (IS_ERR(res_chunk)) { usnic_err("Unable to get %s with err %ld\n", usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_RQ), PTR_ERR(res_chunk)); - return res_chunk ? PTR_ERR(res_chunk) : -ENOMEM; + return PTR_ERR(res_chunk); } uaction->vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic); @@ -228,8 +228,6 @@ create_roce_custom_flow(struct usnic_ib_qp_grp *qp_grp, flow = usnic_fwd_alloc_flow(qp_grp->ufdev, &filter, &uaction); if (IS_ERR_OR_NULL(flow)) { - usnic_err("Unable to alloc flow failed with err %ld\n", - PTR_ERR(flow)); err = flow ? PTR_ERR(flow) : -EFAULT; goto out_unreserve_port; } @@ -303,8 +301,6 @@ create_udp_flow(struct usnic_ib_qp_grp *qp_grp, flow = usnic_fwd_alloc_flow(qp_grp->ufdev, &filter, &uaction); if (IS_ERR_OR_NULL(flow)) { - usnic_err("Unable to alloc flow failed with err %ld\n", - PTR_ERR(flow)); err = flow ? PTR_ERR(flow) : -EFAULT; goto out_put_sock; } @@ -694,18 +690,14 @@ usnic_ib_qp_grp_create(struct usnic_fwd_dev *ufdev, struct usnic_ib_vf *vf, } qp_grp = kzalloc(sizeof(*qp_grp), GFP_ATOMIC); - if (!qp_grp) { - usnic_err("Unable to alloc qp_grp - Out of memory\n"); + if (!qp_grp) return NULL; - } qp_grp->res_chunk_list = alloc_res_chunk_list(vf->vnic, res_spec, qp_grp); if (IS_ERR_OR_NULL(qp_grp->res_chunk_list)) { err = qp_grp->res_chunk_list ? PTR_ERR(qp_grp->res_chunk_list) : -ENOMEM; - usnic_err("Unable to alloc res for %d with err %d\n", - qp_grp->grp_id, err); goto out_free_qp_grp; } diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index a5bfbba6bbac..74819a7951e2 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -87,12 +87,12 @@ static int usnic_ib_fill_create_qp_resp(struct usnic_ib_qp_grp *qp_grp, resp.bar_len = bar->len; chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_RQ); - if (IS_ERR_OR_NULL(chunk)) { + if (IS_ERR(chunk)) { usnic_err("Failed to get chunk %s for qp_grp %d with err %ld\n", usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_RQ), qp_grp->grp_id, PTR_ERR(chunk)); - return chunk ? PTR_ERR(chunk) : -ENOMEM; + return PTR_ERR(chunk); } WARN_ON(chunk->type != USNIC_VNIC_RES_TYPE_RQ); @@ -101,12 +101,12 @@ static int usnic_ib_fill_create_qp_resp(struct usnic_ib_qp_grp *qp_grp, resp.rq_idx[i] = chunk->res[i]->vnic_idx; chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_WQ); - if (IS_ERR_OR_NULL(chunk)) { + if (IS_ERR(chunk)) { usnic_err("Failed to get chunk %s for qp_grp %d with err %ld\n", usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_WQ), qp_grp->grp_id, PTR_ERR(chunk)); - return chunk ? PTR_ERR(chunk) : -ENOMEM; + return PTR_ERR(chunk); } WARN_ON(chunk->type != USNIC_VNIC_RES_TYPE_WQ); @@ -115,12 +115,12 @@ static int usnic_ib_fill_create_qp_resp(struct usnic_ib_qp_grp *qp_grp, resp.wq_idx[i] = chunk->res[i]->vnic_idx; chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_CQ); - if (IS_ERR_OR_NULL(chunk)) { + if (IS_ERR(chunk)) { usnic_err("Failed to get chunk %s for qp_grp %d with err %ld\n", usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_CQ), qp_grp->grp_id, PTR_ERR(chunk)); - return chunk ? PTR_ERR(chunk) : -ENOMEM; + return PTR_ERR(chunk); } WARN_ON(chunk->type != USNIC_VNIC_RES_TYPE_CQ); @@ -738,7 +738,9 @@ int usnic_ib_mmap(struct ib_ucontext *context, /* In ib callbacks section - Start of stub funcs */ struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd, - struct ib_ah_attr *ah_attr) + struct ib_ah_attr *ah_attr, + struct ib_udata *udata) + { usnic_dbg("\n"); return ERR_PTR(-EPERM); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h index 0d9d2e6a14d5..0ed8e072329e 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h @@ -75,7 +75,9 @@ int usnic_ib_dealloc_ucontext(struct ib_ucontext *ibcontext); int usnic_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd, - struct ib_ah_attr *ah_attr); + struct ib_ah_attr *ah_attr, + struct ib_udata *udata); + int usnic_ib_destroy_ah(struct ib_ah *ah); int usnic_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr); diff --git a/drivers/infiniband/hw/usnic/usnic_vnic.c b/drivers/infiniband/hw/usnic/usnic_vnic.c index 887510718690..e7b0030254da 100644 --- a/drivers/infiniband/hw/usnic/usnic_vnic.c +++ b/drivers/infiniband/hw/usnic/usnic_vnic.c @@ -241,17 +241,12 @@ usnic_vnic_get_resources(struct usnic_vnic *vnic, enum usnic_vnic_res_type type, return ERR_PTR(-EINVAL); ret = kzalloc(sizeof(*ret), GFP_ATOMIC); - if (!ret) { - usnic_err("Failed to allocate chunk for %s - Out of memory\n", - usnic_vnic_pci_name(vnic)); + if (!ret) return ERR_PTR(-ENOMEM); - } if (cnt > 0) { ret->res = kcalloc(cnt, sizeof(*(ret->res)), GFP_ATOMIC); if (!ret->res) { - usnic_err("Failed to allocate resources for %s. Out of memory\n", - usnic_vnic_pci_name(vnic)); kfree(ret); return ERR_PTR(-ENOMEM); } @@ -311,8 +306,10 @@ static int usnic_vnic_alloc_res_chunk(struct usnic_vnic *vnic, struct usnic_vnic_res *res; cnt = vnic_dev_get_res_count(vnic->vdev, _to_vnic_res_type(type)); - if (cnt < 1) + if (cnt < 1) { + usnic_err("Wrong res count with cnt %d\n", cnt); return -EINVAL; + } chunk->cnt = chunk->free_cnt = cnt; chunk->res = kzalloc(sizeof(*(chunk->res))*cnt, GFP_KERNEL); @@ -384,12 +381,8 @@ static int usnic_vnic_discover_resources(struct pci_dev *pdev, res_type < USNIC_VNIC_RES_TYPE_MAX; res_type++) { err = usnic_vnic_alloc_res_chunk(vnic, res_type, &vnic->chunks[res_type]); - if (err) { - usnic_err("Failed to alloc res %s with err %d\n", - usnic_vnic_res_type_to_str(res_type), - err); + if (err) goto out_clean_chunks; - } } return 0; @@ -454,11 +447,8 @@ struct usnic_vnic *usnic_vnic_alloc(struct pci_dev *pdev) } vnic = kzalloc(sizeof(*vnic), GFP_KERNEL); - if (!vnic) { - usnic_err("Failed to alloc vnic for %s - out of memory\n", - pci_name(pdev)); + if (!vnic) return ERR_PTR(-ENOMEM); - } spin_lock_init(&vnic->res_lock); diff --git a/drivers/infiniband/hw/vmw_pvrdma/Kconfig b/drivers/infiniband/hw/vmw_pvrdma/Kconfig new file mode 100644 index 000000000000..5a9790ac0ede --- /dev/null +++ b/drivers/infiniband/hw/vmw_pvrdma/Kconfig @@ -0,0 +1,7 @@ +config INFINIBAND_VMWARE_PVRDMA + tristate "VMware Paravirtualized RDMA Driver" + depends on NETDEVICES && ETHERNET && PCI && INET && VMXNET3 + ---help--- + This driver provides low-level support for VMware Paravirtual + RDMA adapter. It interacts with the VMXNet3 driver to provide + Ethernet capabilities. diff --git a/drivers/infiniband/hw/vmw_pvrdma/Makefile b/drivers/infiniband/hw/vmw_pvrdma/Makefile new file mode 100644 index 000000000000..0194ed19f542 --- /dev/null +++ b/drivers/infiniband/hw/vmw_pvrdma/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_INFINIBAND_VMWARE_PVRDMA) += vmw_pvrdma.o + +vmw_pvrdma-y := pvrdma_cmd.o pvrdma_cq.o pvrdma_doorbell.o pvrdma_main.o pvrdma_misc.o pvrdma_mr.o pvrdma_qp.o pvrdma_verbs.o diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h new file mode 100644 index 000000000000..71e1d55d69d6 --- /dev/null +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h @@ -0,0 +1,474 @@ +/* + * Copyright (c) 2012-2016 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of EITHER the GNU General Public License + * version 2 as published by the Free Software Foundation or the BSD + * 2-Clause License. This program is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED + * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License version 2 for more details at + * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html. + * + * You should have received a copy of the GNU General Public License + * along with this program available in the file COPYING in the main + * directory of this source tree. + * + * The BSD 2-Clause License + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __PVRDMA_H__ +#define __PVRDMA_H__ + +#include <linux/compiler.h> +#include <linux/interrupt.h> +#include <linux/list.h> +#include <linux/mutex.h> +#include <linux/pci.h> +#include <linux/semaphore.h> +#include <linux/workqueue.h> +#include <rdma/ib_umem.h> +#include <rdma/ib_verbs.h> +#include <rdma/vmw_pvrdma-abi.h> + +#include "pvrdma_ring.h" +#include "pvrdma_dev_api.h" +#include "pvrdma_verbs.h" + +/* NOT the same as BIT_MASK(). */ +#define PVRDMA_MASK(n) ((n << 1) - 1) + +/* + * VMware PVRDMA PCI device id. + */ +#define PCI_DEVICE_ID_VMWARE_PVRDMA 0x0820 + +struct pvrdma_dev; + +struct pvrdma_page_dir { + dma_addr_t dir_dma; + u64 *dir; + int ntables; + u64 **tables; + u64 npages; + void **pages; +}; + +struct pvrdma_cq { + struct ib_cq ibcq; + int offset; + spinlock_t cq_lock; /* Poll lock. */ + struct pvrdma_uar_map *uar; + struct ib_umem *umem; + struct pvrdma_ring_state *ring_state; + struct pvrdma_page_dir pdir; + u32 cq_handle; + bool is_kernel; + atomic_t refcnt; + wait_queue_head_t wait; +}; + +struct pvrdma_id_table { + u32 last; + u32 top; + u32 max; + u32 mask; + spinlock_t lock; /* Table lock. */ + unsigned long *table; +}; + +struct pvrdma_uar_map { + unsigned long pfn; + void __iomem *map; + int index; +}; + +struct pvrdma_uar_table { + struct pvrdma_id_table tbl; + int size; +}; + +struct pvrdma_ucontext { + struct ib_ucontext ibucontext; + struct pvrdma_dev *dev; + struct pvrdma_uar_map uar; + u64 ctx_handle; +}; + +struct pvrdma_pd { + struct ib_pd ibpd; + u32 pdn; + u32 pd_handle; + int privileged; +}; + +struct pvrdma_mr { + u32 mr_handle; + u64 iova; + u64 size; +}; + +struct pvrdma_user_mr { + struct ib_mr ibmr; + struct ib_umem *umem; + struct pvrdma_mr mmr; + struct pvrdma_page_dir pdir; + u64 *pages; + u32 npages; + u32 max_pages; + u32 page_shift; +}; + +struct pvrdma_wq { + struct pvrdma_ring *ring; + spinlock_t lock; /* Work queue lock. */ + int wqe_cnt; + int wqe_size; + int max_sg; + int offset; +}; + +struct pvrdma_ah { + struct ib_ah ibah; + struct pvrdma_av av; +}; + +struct pvrdma_qp { + struct ib_qp ibqp; + u32 qp_handle; + u32 qkey; + struct pvrdma_wq sq; + struct pvrdma_wq rq; + struct ib_umem *rumem; + struct ib_umem *sumem; + struct pvrdma_page_dir pdir; + int npages; + int npages_send; + int npages_recv; + u32 flags; + u8 port; + u8 state; + bool is_kernel; + struct mutex mutex; /* QP state mutex. */ + atomic_t refcnt; + wait_queue_head_t wait; +}; + +struct pvrdma_dev { + /* PCI device-related information. */ + struct ib_device ib_dev; + struct pci_dev *pdev; + void __iomem *regs; + struct pvrdma_device_shared_region *dsr; /* Shared region pointer */ + dma_addr_t dsrbase; /* Shared region base address */ + void *cmd_slot; + void *resp_slot; + unsigned long flags; + struct list_head device_link; + + /* Locking and interrupt information. */ + spinlock_t cmd_lock; /* Command lock. */ + struct semaphore cmd_sema; + struct completion cmd_done; + struct { + enum pvrdma_intr_type type; /* Intr type */ + struct msix_entry msix_entry[PVRDMA_MAX_INTERRUPTS]; + irq_handler_t handler[PVRDMA_MAX_INTERRUPTS]; + u8 enabled[PVRDMA_MAX_INTERRUPTS]; + u8 size; + } intr; + + /* RDMA-related device information. */ + union ib_gid *sgid_tbl; + struct pvrdma_ring_state *async_ring_state; + struct pvrdma_page_dir async_pdir; + struct pvrdma_ring_state *cq_ring_state; + struct pvrdma_page_dir cq_pdir; + struct pvrdma_cq **cq_tbl; + spinlock_t cq_tbl_lock; + struct pvrdma_qp **qp_tbl; + spinlock_t qp_tbl_lock; + struct pvrdma_uar_table uar_table; + struct pvrdma_uar_map driver_uar; + __be64 sys_image_guid; + spinlock_t desc_lock; /* Device modification lock. */ + u32 port_cap_mask; + struct mutex port_mutex; /* Port modification mutex. */ + bool ib_active; + atomic_t num_qps; + atomic_t num_cqs; + atomic_t num_pds; + atomic_t num_ahs; + + /* Network device information. */ + struct net_device *netdev; + struct notifier_block nb_netdev; +}; + +struct pvrdma_netdevice_work { + struct work_struct work; + struct net_device *event_netdev; + unsigned long event; +}; + +static inline struct pvrdma_dev *to_vdev(struct ib_device *ibdev) +{ + return container_of(ibdev, struct pvrdma_dev, ib_dev); +} + +static inline struct +pvrdma_ucontext *to_vucontext(struct ib_ucontext *ibucontext) +{ + return container_of(ibucontext, struct pvrdma_ucontext, ibucontext); +} + +static inline struct pvrdma_pd *to_vpd(struct ib_pd *ibpd) +{ + return container_of(ibpd, struct pvrdma_pd, ibpd); +} + +static inline struct pvrdma_cq *to_vcq(struct ib_cq *ibcq) +{ + return container_of(ibcq, struct pvrdma_cq, ibcq); +} + +static inline struct pvrdma_user_mr *to_vmr(struct ib_mr *ibmr) +{ + return container_of(ibmr, struct pvrdma_user_mr, ibmr); +} + +static inline struct pvrdma_qp *to_vqp(struct ib_qp *ibqp) +{ + return container_of(ibqp, struct pvrdma_qp, ibqp); +} + +static inline struct pvrdma_ah *to_vah(struct ib_ah *ibah) +{ + return container_of(ibah, struct pvrdma_ah, ibah); +} + +static inline void pvrdma_write_reg(struct pvrdma_dev *dev, u32 reg, u32 val) +{ + writel(cpu_to_le32(val), dev->regs + reg); +} + +static inline u32 pvrdma_read_reg(struct pvrdma_dev *dev, u32 reg) +{ + return le32_to_cpu(readl(dev->regs + reg)); +} + +static inline void pvrdma_write_uar_cq(struct pvrdma_dev *dev, u32 val) +{ + writel(cpu_to_le32(val), dev->driver_uar.map + PVRDMA_UAR_CQ_OFFSET); +} + +static inline void pvrdma_write_uar_qp(struct pvrdma_dev *dev, u32 val) +{ + writel(cpu_to_le32(val), dev->driver_uar.map + PVRDMA_UAR_QP_OFFSET); +} + +static inline void *pvrdma_page_dir_get_ptr(struct pvrdma_page_dir *pdir, + u64 offset) +{ + return pdir->pages[offset / PAGE_SIZE] + (offset % PAGE_SIZE); +} + +static inline enum pvrdma_mtu ib_mtu_to_pvrdma(enum ib_mtu mtu) +{ + return (enum pvrdma_mtu)mtu; +} + +static inline enum ib_mtu pvrdma_mtu_to_ib(enum pvrdma_mtu mtu) +{ + return (enum ib_mtu)mtu; +} + +static inline enum pvrdma_port_state ib_port_state_to_pvrdma( + enum ib_port_state state) +{ + return (enum pvrdma_port_state)state; +} + +static inline enum ib_port_state pvrdma_port_state_to_ib( + enum pvrdma_port_state state) +{ + return (enum ib_port_state)state; +} + +static inline int ib_port_cap_flags_to_pvrdma(int flags) +{ + return flags & PVRDMA_MASK(PVRDMA_PORT_CAP_FLAGS_MAX); +} + +static inline int pvrdma_port_cap_flags_to_ib(int flags) +{ + return flags; +} + +static inline enum pvrdma_port_width ib_port_width_to_pvrdma( + enum ib_port_width width) +{ + return (enum pvrdma_port_width)width; +} + +static inline enum ib_port_width pvrdma_port_width_to_ib( + enum pvrdma_port_width width) +{ + return (enum ib_port_width)width; +} + +static inline enum pvrdma_port_speed ib_port_speed_to_pvrdma( + enum ib_port_speed speed) +{ + return (enum pvrdma_port_speed)speed; +} + +static inline enum ib_port_speed pvrdma_port_speed_to_ib( + enum pvrdma_port_speed speed) +{ + return (enum ib_port_speed)speed; +} + +static inline int pvrdma_qp_attr_mask_to_ib(int attr_mask) +{ + return attr_mask; +} + +static inline int ib_qp_attr_mask_to_pvrdma(int attr_mask) +{ + return attr_mask & PVRDMA_MASK(PVRDMA_QP_ATTR_MASK_MAX); +} + +static inline enum pvrdma_mig_state ib_mig_state_to_pvrdma( + enum ib_mig_state state) +{ + return (enum pvrdma_mig_state)state; +} + +static inline enum ib_mig_state pvrdma_mig_state_to_ib( + enum pvrdma_mig_state state) +{ + return (enum ib_mig_state)state; +} + +static inline int ib_access_flags_to_pvrdma(int flags) +{ + return flags; +} + +static inline int pvrdma_access_flags_to_ib(int flags) +{ + return flags & PVRDMA_MASK(PVRDMA_ACCESS_FLAGS_MAX); +} + +static inline enum pvrdma_qp_type ib_qp_type_to_pvrdma(enum ib_qp_type type) +{ + return (enum pvrdma_qp_type)type; +} + +static inline enum ib_qp_type pvrdma_qp_type_to_ib(enum pvrdma_qp_type type) +{ + return (enum ib_qp_type)type; +} + +static inline enum pvrdma_qp_state ib_qp_state_to_pvrdma(enum ib_qp_state state) +{ + return (enum pvrdma_qp_state)state; +} + +static inline enum ib_qp_state pvrdma_qp_state_to_ib(enum pvrdma_qp_state state) +{ + return (enum ib_qp_state)state; +} + +static inline enum pvrdma_wr_opcode ib_wr_opcode_to_pvrdma(enum ib_wr_opcode op) +{ + return (enum pvrdma_wr_opcode)op; +} + +static inline enum ib_wc_status pvrdma_wc_status_to_ib( + enum pvrdma_wc_status status) +{ + return (enum ib_wc_status)status; +} + +static inline int pvrdma_wc_opcode_to_ib(int opcode) +{ + return opcode; +} + +static inline int pvrdma_wc_flags_to_ib(int flags) +{ + return flags; +} + +static inline int ib_send_flags_to_pvrdma(int flags) +{ + return flags & PVRDMA_MASK(PVRDMA_SEND_FLAGS_MAX); +} + +void pvrdma_qp_cap_to_ib(struct ib_qp_cap *dst, + const struct pvrdma_qp_cap *src); +void ib_qp_cap_to_pvrdma(struct pvrdma_qp_cap *dst, + const struct ib_qp_cap *src); +void pvrdma_gid_to_ib(union ib_gid *dst, const union pvrdma_gid *src); +void ib_gid_to_pvrdma(union pvrdma_gid *dst, const union ib_gid *src); +void pvrdma_global_route_to_ib(struct ib_global_route *dst, + const struct pvrdma_global_route *src); +void ib_global_route_to_pvrdma(struct pvrdma_global_route *dst, + const struct ib_global_route *src); +void pvrdma_ah_attr_to_ib(struct ib_ah_attr *dst, + const struct pvrdma_ah_attr *src); +void ib_ah_attr_to_pvrdma(struct pvrdma_ah_attr *dst, + const struct ib_ah_attr *src); + +int pvrdma_uar_table_init(struct pvrdma_dev *dev); +void pvrdma_uar_table_cleanup(struct pvrdma_dev *dev); + +int pvrdma_uar_alloc(struct pvrdma_dev *dev, struct pvrdma_uar_map *uar); +void pvrdma_uar_free(struct pvrdma_dev *dev, struct pvrdma_uar_map *uar); + +void _pvrdma_flush_cqe(struct pvrdma_qp *qp, struct pvrdma_cq *cq); + +int pvrdma_page_dir_init(struct pvrdma_dev *dev, struct pvrdma_page_dir *pdir, + u64 npages, bool alloc_pages); +void pvrdma_page_dir_cleanup(struct pvrdma_dev *dev, + struct pvrdma_page_dir *pdir); +int pvrdma_page_dir_insert_dma(struct pvrdma_page_dir *pdir, u64 idx, + dma_addr_t daddr); +int pvrdma_page_dir_insert_umem(struct pvrdma_page_dir *pdir, + struct ib_umem *umem, u64 offset); +dma_addr_t pvrdma_page_dir_get_dma(struct pvrdma_page_dir *pdir, u64 idx); +int pvrdma_page_dir_insert_page_list(struct pvrdma_page_dir *pdir, + u64 *page_list, int num_pages); + +int pvrdma_cmd_post(struct pvrdma_dev *dev, union pvrdma_cmd_req *req, + union pvrdma_cmd_resp *rsp, unsigned resp_code); + +#endif /* __PVRDMA_H__ */ diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cmd.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cmd.c new file mode 100644 index 000000000000..4a78c537d8a1 --- /dev/null +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cmd.c @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2012-2016 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of EITHER the GNU General Public License + * version 2 as published by the Free Software Foundation or the BSD + * 2-Clause License. This program is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED + * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License version 2 for more details at + * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html. + * + * You should have received a copy of the GNU General Public License + * along with this program available in the file COPYING in the main + * directory of this source tree. + * + * The BSD 2-Clause License + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/list.h> + +#include "pvrdma.h" + +#define PVRDMA_CMD_TIMEOUT 10000 /* ms */ + +static inline int pvrdma_cmd_recv(struct pvrdma_dev *dev, + union pvrdma_cmd_resp *resp, + unsigned resp_code) +{ + int err; + + dev_dbg(&dev->pdev->dev, "receive response from device\n"); + + err = wait_for_completion_interruptible_timeout(&dev->cmd_done, + msecs_to_jiffies(PVRDMA_CMD_TIMEOUT)); + if (err == 0 || err == -ERESTARTSYS) { + dev_warn(&dev->pdev->dev, + "completion timeout or interrupted\n"); + return -ETIMEDOUT; + } + + spin_lock(&dev->cmd_lock); + memcpy(resp, dev->resp_slot, sizeof(*resp)); + spin_unlock(&dev->cmd_lock); + + if (resp->hdr.ack != resp_code) { + dev_warn(&dev->pdev->dev, + "unknown response %#x expected %#x\n", + resp->hdr.ack, resp_code); + return -EFAULT; + } + + return 0; +} + +int +pvrdma_cmd_post(struct pvrdma_dev *dev, union pvrdma_cmd_req *req, + union pvrdma_cmd_resp *resp, unsigned resp_code) +{ + int err; + + dev_dbg(&dev->pdev->dev, "post request to device\n"); + + /* Serializiation */ + down(&dev->cmd_sema); + + BUILD_BUG_ON(sizeof(union pvrdma_cmd_req) != + sizeof(struct pvrdma_cmd_modify_qp)); + + spin_lock(&dev->cmd_lock); + memcpy(dev->cmd_slot, req, sizeof(*req)); + spin_unlock(&dev->cmd_lock); + + init_completion(&dev->cmd_done); + pvrdma_write_reg(dev, PVRDMA_REG_REQUEST, 0); + + /* Make sure the request is written before reading status. */ + mb(); + + err = pvrdma_read_reg(dev, PVRDMA_REG_ERR); + if (err == 0) { + if (resp != NULL) + err = pvrdma_cmd_recv(dev, resp, resp_code); + } else { + dev_warn(&dev->pdev->dev, + "failed to write request error reg: %d\n", err); + err = -EFAULT; + } + + up(&dev->cmd_sema); + + return err; +} diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c new file mode 100644 index 000000000000..e429ca5b16aa --- /dev/null +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c @@ -0,0 +1,425 @@ +/* + * Copyright (c) 2012-2016 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of EITHER the GNU General Public License + * version 2 as published by the Free Software Foundation or the BSD + * 2-Clause License. This program is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED + * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License version 2 for more details at + * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html. + * + * You should have received a copy of the GNU General Public License + * along with this program available in the file COPYING in the main + * directory of this source tree. + * + * The BSD 2-Clause License + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <asm/page.h> +#include <linux/io.h> +#include <linux/wait.h> +#include <rdma/ib_addr.h> +#include <rdma/ib_smi.h> +#include <rdma/ib_user_verbs.h> + +#include "pvrdma.h" + +/** + * pvrdma_req_notify_cq - request notification for a completion queue + * @ibcq: the completion queue + * @notify_flags: notification flags + * + * @return: 0 for success. + */ +int pvrdma_req_notify_cq(struct ib_cq *ibcq, + enum ib_cq_notify_flags notify_flags) +{ + struct pvrdma_dev *dev = to_vdev(ibcq->device); + struct pvrdma_cq *cq = to_vcq(ibcq); + u32 val = cq->cq_handle; + + val |= (notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ? + PVRDMA_UAR_CQ_ARM_SOL : PVRDMA_UAR_CQ_ARM; + + pvrdma_write_uar_cq(dev, val); + + return 0; +} + +/** + * pvrdma_create_cq - create completion queue + * @ibdev: the device + * @attr: completion queue attributes + * @context: user context + * @udata: user data + * + * @return: ib_cq completion queue pointer on success, + * otherwise returns negative errno. + */ +struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + int entries = attr->cqe; + struct pvrdma_dev *dev = to_vdev(ibdev); + struct pvrdma_cq *cq; + int ret; + int npages; + unsigned long flags; + union pvrdma_cmd_req req; + union pvrdma_cmd_resp rsp; + struct pvrdma_cmd_create_cq *cmd = &req.create_cq; + struct pvrdma_cmd_create_cq_resp *resp = &rsp.create_cq_resp; + struct pvrdma_create_cq ucmd; + + BUILD_BUG_ON(sizeof(struct pvrdma_cqe) != 64); + + entries = roundup_pow_of_two(entries); + if (entries < 1 || entries > dev->dsr->caps.max_cqe) + return ERR_PTR(-EINVAL); + + if (!atomic_add_unless(&dev->num_cqs, 1, dev->dsr->caps.max_cq)) + return ERR_PTR(-ENOMEM); + + cq = kzalloc(sizeof(*cq), GFP_KERNEL); + if (!cq) { + atomic_dec(&dev->num_cqs); + return ERR_PTR(-ENOMEM); + } + + cq->ibcq.cqe = entries; + + if (context) { + if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { + ret = -EFAULT; + goto err_cq; + } + + cq->umem = ib_umem_get(context, ucmd.buf_addr, ucmd.buf_size, + IB_ACCESS_LOCAL_WRITE, 1); + if (IS_ERR(cq->umem)) { + ret = PTR_ERR(cq->umem); + goto err_cq; + } + + npages = ib_umem_page_count(cq->umem); + } else { + cq->is_kernel = true; + + /* One extra page for shared ring state */ + npages = 1 + (entries * sizeof(struct pvrdma_cqe) + + PAGE_SIZE - 1) / PAGE_SIZE; + + /* Skip header page. */ + cq->offset = PAGE_SIZE; + } + + if (npages < 0 || npages > PVRDMA_PAGE_DIR_MAX_PAGES) { + dev_warn(&dev->pdev->dev, + "overflow pages in completion queue\n"); + ret = -EINVAL; + goto err_umem; + } + + ret = pvrdma_page_dir_init(dev, &cq->pdir, npages, cq->is_kernel); + if (ret) { + dev_warn(&dev->pdev->dev, + "could not allocate page directory\n"); + goto err_umem; + } + + /* Ring state is always the first page. Set in library for user cq. */ + if (cq->is_kernel) + cq->ring_state = cq->pdir.pages[0]; + else + pvrdma_page_dir_insert_umem(&cq->pdir, cq->umem, 0); + + atomic_set(&cq->refcnt, 1); + init_waitqueue_head(&cq->wait); + spin_lock_init(&cq->cq_lock); + + memset(cmd, 0, sizeof(*cmd)); + cmd->hdr.cmd = PVRDMA_CMD_CREATE_CQ; + cmd->nchunks = npages; + cmd->ctx_handle = (context) ? + (u64)to_vucontext(context)->ctx_handle : 0; + cmd->cqe = entries; + cmd->pdir_dma = cq->pdir.dir_dma; + ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_CQ_RESP); + if (ret < 0) { + dev_warn(&dev->pdev->dev, + "could not create completion queue, error: %d\n", ret); + goto err_page_dir; + } + + cq->ibcq.cqe = resp->cqe; + cq->cq_handle = resp->cq_handle; + spin_lock_irqsave(&dev->cq_tbl_lock, flags); + dev->cq_tbl[cq->cq_handle % dev->dsr->caps.max_cq] = cq; + spin_unlock_irqrestore(&dev->cq_tbl_lock, flags); + + if (context) { + cq->uar = &(to_vucontext(context)->uar); + + /* Copy udata back. */ + if (ib_copy_to_udata(udata, &cq->cq_handle, sizeof(__u32))) { + dev_warn(&dev->pdev->dev, + "failed to copy back udata\n"); + pvrdma_destroy_cq(&cq->ibcq); + return ERR_PTR(-EINVAL); + } + } + + return &cq->ibcq; + +err_page_dir: + pvrdma_page_dir_cleanup(dev, &cq->pdir); +err_umem: + if (context) + ib_umem_release(cq->umem); +err_cq: + atomic_dec(&dev->num_cqs); + kfree(cq); + + return ERR_PTR(ret); +} + +static void pvrdma_free_cq(struct pvrdma_dev *dev, struct pvrdma_cq *cq) +{ + atomic_dec(&cq->refcnt); + wait_event(cq->wait, !atomic_read(&cq->refcnt)); + + if (!cq->is_kernel) + ib_umem_release(cq->umem); + + pvrdma_page_dir_cleanup(dev, &cq->pdir); + kfree(cq); +} + +/** + * pvrdma_destroy_cq - destroy completion queue + * @cq: the completion queue to destroy. + * + * @return: 0 for success. + */ +int pvrdma_destroy_cq(struct ib_cq *cq) +{ + struct pvrdma_cq *vcq = to_vcq(cq); + union pvrdma_cmd_req req; + struct pvrdma_cmd_destroy_cq *cmd = &req.destroy_cq; + struct pvrdma_dev *dev = to_vdev(cq->device); + unsigned long flags; + int ret; + + memset(cmd, 0, sizeof(*cmd)); + cmd->hdr.cmd = PVRDMA_CMD_DESTROY_CQ; + cmd->cq_handle = vcq->cq_handle; + + ret = pvrdma_cmd_post(dev, &req, NULL, 0); + if (ret < 0) + dev_warn(&dev->pdev->dev, + "could not destroy completion queue, error: %d\n", + ret); + + /* free cq's resources */ + spin_lock_irqsave(&dev->cq_tbl_lock, flags); + dev->cq_tbl[vcq->cq_handle] = NULL; + spin_unlock_irqrestore(&dev->cq_tbl_lock, flags); + + pvrdma_free_cq(dev, vcq); + atomic_dec(&dev->num_cqs); + + return ret; +} + +/** + * pvrdma_modify_cq - modify the CQ moderation parameters + * @ibcq: the CQ to modify + * @cq_count: number of CQEs that will trigger an event + * @cq_period: max period of time in usec before triggering an event + * + * @return: -EOPNOTSUPP as CQ resize is not supported. + */ +int pvrdma_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) +{ + return -EOPNOTSUPP; +} + +static inline struct pvrdma_cqe *get_cqe(struct pvrdma_cq *cq, int i) +{ + return (struct pvrdma_cqe *)pvrdma_page_dir_get_ptr( + &cq->pdir, + cq->offset + + sizeof(struct pvrdma_cqe) * i); +} + +void _pvrdma_flush_cqe(struct pvrdma_qp *qp, struct pvrdma_cq *cq) +{ + int head; + int has_data; + + if (!cq->is_kernel) + return; + + /* Lock held */ + has_data = pvrdma_idx_ring_has_data(&cq->ring_state->rx, + cq->ibcq.cqe, &head); + if (unlikely(has_data > 0)) { + int items; + int curr; + int tail = pvrdma_idx(&cq->ring_state->rx.prod_tail, + cq->ibcq.cqe); + struct pvrdma_cqe *cqe; + struct pvrdma_cqe *curr_cqe; + + items = (tail > head) ? (tail - head) : + (cq->ibcq.cqe - head + tail); + curr = --tail; + while (items-- > 0) { + if (curr < 0) + curr = cq->ibcq.cqe - 1; + if (tail < 0) + tail = cq->ibcq.cqe - 1; + curr_cqe = get_cqe(cq, curr); + if ((curr_cqe->qp & 0xFFFF) != qp->qp_handle) { + if (curr != tail) { + cqe = get_cqe(cq, tail); + *cqe = *curr_cqe; + } + tail--; + } else { + pvrdma_idx_ring_inc( + &cq->ring_state->rx.cons_head, + cq->ibcq.cqe); + } + curr--; + } + } +} + +static int pvrdma_poll_one(struct pvrdma_cq *cq, struct pvrdma_qp **cur_qp, + struct ib_wc *wc) +{ + struct pvrdma_dev *dev = to_vdev(cq->ibcq.device); + int has_data; + unsigned int head; + bool tried = false; + struct pvrdma_cqe *cqe; + +retry: + has_data = pvrdma_idx_ring_has_data(&cq->ring_state->rx, + cq->ibcq.cqe, &head); + if (has_data == 0) { + if (tried) + return -EAGAIN; + + pvrdma_write_uar_cq(dev, cq->cq_handle | PVRDMA_UAR_CQ_POLL); + + tried = true; + goto retry; + } else if (has_data == PVRDMA_INVALID_IDX) { + dev_err(&dev->pdev->dev, "CQ ring state invalid\n"); + return -EAGAIN; + } + + cqe = get_cqe(cq, head); + + /* Ensure cqe is valid. */ + rmb(); + if (dev->qp_tbl[cqe->qp & 0xffff]) + *cur_qp = (struct pvrdma_qp *)dev->qp_tbl[cqe->qp & 0xffff]; + else + return -EAGAIN; + + wc->opcode = pvrdma_wc_opcode_to_ib(cqe->opcode); + wc->status = pvrdma_wc_status_to_ib(cqe->status); + wc->wr_id = cqe->wr_id; + wc->qp = &(*cur_qp)->ibqp; + wc->byte_len = cqe->byte_len; + wc->ex.imm_data = cqe->imm_data; + wc->src_qp = cqe->src_qp; + wc->wc_flags = pvrdma_wc_flags_to_ib(cqe->wc_flags); + wc->pkey_index = cqe->pkey_index; + wc->slid = cqe->slid; + wc->sl = cqe->sl; + wc->dlid_path_bits = cqe->dlid_path_bits; + wc->port_num = cqe->port_num; + wc->vendor_err = 0; + + /* Update shared ring state */ + pvrdma_idx_ring_inc(&cq->ring_state->rx.cons_head, cq->ibcq.cqe); + + return 0; +} + +/** + * pvrdma_poll_cq - poll for work completion queue entries + * @ibcq: completion queue + * @num_entries: the maximum number of entries + * @entry: pointer to work completion array + * + * @return: number of polled completion entries + */ +int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) +{ + struct pvrdma_cq *cq = to_vcq(ibcq); + struct pvrdma_qp *cur_qp = NULL; + unsigned long flags; + int npolled; + + if (num_entries < 1 || wc == NULL) + return 0; + + spin_lock_irqsave(&cq->cq_lock, flags); + for (npolled = 0; npolled < num_entries; ++npolled) { + if (pvrdma_poll_one(cq, &cur_qp, wc + npolled)) + break; + } + + spin_unlock_irqrestore(&cq->cq_lock, flags); + + /* Ensure we do not return errors from poll_cq */ + return npolled; +} + +/** + * pvrdma_resize_cq - resize CQ + * @ibcq: the completion queue + * @entries: CQ entries + * @udata: user data + * + * @return: -EOPNOTSUPP as CQ resize is not supported. + */ +int pvrdma_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) +{ + return -EOPNOTSUPP; +} diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h new file mode 100644 index 000000000000..c06768635d65 --- /dev/null +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h @@ -0,0 +1,586 @@ +/* + * Copyright (c) 2012-2016 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of EITHER the GNU General Public License + * version 2 as published by the Free Software Foundation or the BSD + * 2-Clause License. This program is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED + * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License version 2 for more details at + * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html. + * + * You should have received a copy of the GNU General Public License + * along with this program available in the file COPYING in the main + * directory of this source tree. + * + * The BSD 2-Clause License + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __PVRDMA_DEV_API_H__ +#define __PVRDMA_DEV_API_H__ + +#include <linux/types.h> + +#include "pvrdma_verbs.h" + +#define PVRDMA_VERSION 17 +#define PVRDMA_BOARD_ID 1 +#define PVRDMA_REV_ID 1 + +/* + * Masks and accessors for page directory, which is a two-level lookup: + * page directory -> page table -> page. Only one directory for now, but we + * could expand that easily. 9 bits for tables, 9 bits for pages, gives one + * gigabyte for memory regions and so forth. + */ + +#define PVRDMA_PDIR_SHIFT 18 +#define PVRDMA_PTABLE_SHIFT 9 +#define PVRDMA_PAGE_DIR_DIR(x) (((x) >> PVRDMA_PDIR_SHIFT) & 0x1) +#define PVRDMA_PAGE_DIR_TABLE(x) (((x) >> PVRDMA_PTABLE_SHIFT) & 0x1ff) +#define PVRDMA_PAGE_DIR_PAGE(x) ((x) & 0x1ff) +#define PVRDMA_PAGE_DIR_MAX_PAGES (1 * 512 * 512) +#define PVRDMA_MAX_FAST_REG_PAGES 128 + +/* + * Max MSI-X vectors. + */ + +#define PVRDMA_MAX_INTERRUPTS 3 + +/* Register offsets within PCI resource on BAR1. */ +#define PVRDMA_REG_VERSION 0x00 /* R: Version of device. */ +#define PVRDMA_REG_DSRLOW 0x04 /* W: Device shared region low PA. */ +#define PVRDMA_REG_DSRHIGH 0x08 /* W: Device shared region high PA. */ +#define PVRDMA_REG_CTL 0x0c /* W: PVRDMA_DEVICE_CTL */ +#define PVRDMA_REG_REQUEST 0x10 /* W: Indicate device request. */ +#define PVRDMA_REG_ERR 0x14 /* R: Device error. */ +#define PVRDMA_REG_ICR 0x18 /* R: Interrupt cause. */ +#define PVRDMA_REG_IMR 0x1c /* R/W: Interrupt mask. */ +#define PVRDMA_REG_MACL 0x20 /* R/W: MAC address low. */ +#define PVRDMA_REG_MACH 0x24 /* R/W: MAC address high. */ + +/* Object flags. */ +#define PVRDMA_CQ_FLAG_ARMED_SOL BIT(0) /* Armed for solicited-only. */ +#define PVRDMA_CQ_FLAG_ARMED BIT(1) /* Armed. */ +#define PVRDMA_MR_FLAG_DMA BIT(0) /* DMA region. */ +#define PVRDMA_MR_FLAG_FRMR BIT(1) /* Fast reg memory region. */ + +/* + * Atomic operation capability (masked versions are extended atomic + * operations. + */ + +#define PVRDMA_ATOMIC_OP_COMP_SWAP BIT(0) /* Compare and swap. */ +#define PVRDMA_ATOMIC_OP_FETCH_ADD BIT(1) /* Fetch and add. */ +#define PVRDMA_ATOMIC_OP_MASK_COMP_SWAP BIT(2) /* Masked compare and swap. */ +#define PVRDMA_ATOMIC_OP_MASK_FETCH_ADD BIT(3) /* Masked fetch and add. */ + +/* + * Base Memory Management Extension flags to support Fast Reg Memory Regions + * and Fast Reg Work Requests. Each flag represents a verb operation and we + * must support all of them to qualify for the BMME device cap. + */ + +#define PVRDMA_BMME_FLAG_LOCAL_INV BIT(0) /* Local Invalidate. */ +#define PVRDMA_BMME_FLAG_REMOTE_INV BIT(1) /* Remote Invalidate. */ +#define PVRDMA_BMME_FLAG_FAST_REG_WR BIT(2) /* Fast Reg Work Request. */ + +/* + * GID types. The interpretation of the gid_types bit field in the device + * capabilities will depend on the device mode. For now, the device only + * supports RoCE as mode, so only the different GID types for RoCE are + * defined. + */ + +#define PVRDMA_GID_TYPE_FLAG_ROCE_V1 BIT(0) +#define PVRDMA_GID_TYPE_FLAG_ROCE_V2 BIT(1) + +enum pvrdma_pci_resource { + PVRDMA_PCI_RESOURCE_MSIX, /* BAR0: MSI-X, MMIO. */ + PVRDMA_PCI_RESOURCE_REG, /* BAR1: Registers, MMIO. */ + PVRDMA_PCI_RESOURCE_UAR, /* BAR2: UAR pages, MMIO, 64-bit. */ + PVRDMA_PCI_RESOURCE_LAST, /* Last. */ +}; + +enum pvrdma_device_ctl { + PVRDMA_DEVICE_CTL_ACTIVATE, /* Activate device. */ + PVRDMA_DEVICE_CTL_QUIESCE, /* Quiesce device. */ + PVRDMA_DEVICE_CTL_RESET, /* Reset device. */ +}; + +enum pvrdma_intr_vector { + PVRDMA_INTR_VECTOR_RESPONSE, /* Command response. */ + PVRDMA_INTR_VECTOR_ASYNC, /* Async events. */ + PVRDMA_INTR_VECTOR_CQ, /* CQ notification. */ + /* Additional CQ notification vectors. */ +}; + +enum pvrdma_intr_cause { + PVRDMA_INTR_CAUSE_RESPONSE = (1 << PVRDMA_INTR_VECTOR_RESPONSE), + PVRDMA_INTR_CAUSE_ASYNC = (1 << PVRDMA_INTR_VECTOR_ASYNC), + PVRDMA_INTR_CAUSE_CQ = (1 << PVRDMA_INTR_VECTOR_CQ), +}; + +enum pvrdma_intr_type { + PVRDMA_INTR_TYPE_INTX, /* Legacy. */ + PVRDMA_INTR_TYPE_MSI, /* MSI. */ + PVRDMA_INTR_TYPE_MSIX, /* MSI-X. */ +}; + +enum pvrdma_gos_bits { + PVRDMA_GOS_BITS_UNK, /* Unknown. */ + PVRDMA_GOS_BITS_32, /* 32-bit. */ + PVRDMA_GOS_BITS_64, /* 64-bit. */ +}; + +enum pvrdma_gos_type { + PVRDMA_GOS_TYPE_UNK, /* Unknown. */ + PVRDMA_GOS_TYPE_LINUX, /* Linux. */ +}; + +enum pvrdma_device_mode { + PVRDMA_DEVICE_MODE_ROCE, /* RoCE. */ + PVRDMA_DEVICE_MODE_IWARP, /* iWarp. */ + PVRDMA_DEVICE_MODE_IB, /* InfiniBand. */ +}; + +struct pvrdma_gos_info { + u32 gos_bits:2; /* W: PVRDMA_GOS_BITS_ */ + u32 gos_type:4; /* W: PVRDMA_GOS_TYPE_ */ + u32 gos_ver:16; /* W: Guest OS version. */ + u32 gos_misc:10; /* W: Other. */ + u32 pad; /* Pad to 8-byte alignment. */ +}; + +struct pvrdma_device_caps { + u64 fw_ver; /* R: Query device. */ + __be64 node_guid; + __be64 sys_image_guid; + u64 max_mr_size; + u64 page_size_cap; + u64 atomic_arg_sizes; /* EX verbs. */ + u32 ex_comp_mask; /* EX verbs. */ + u32 device_cap_flags2; /* EX verbs. */ + u32 max_fa_bit_boundary; /* EX verbs. */ + u32 log_max_atomic_inline_arg; /* EX verbs. */ + u32 vendor_id; + u32 vendor_part_id; + u32 hw_ver; + u32 max_qp; + u32 max_qp_wr; + u32 device_cap_flags; + u32 max_sge; + u32 max_sge_rd; + u32 max_cq; + u32 max_cqe; + u32 max_mr; + u32 max_pd; + u32 max_qp_rd_atom; + u32 max_ee_rd_atom; + u32 max_res_rd_atom; + u32 max_qp_init_rd_atom; + u32 max_ee_init_rd_atom; + u32 max_ee; + u32 max_rdd; + u32 max_mw; + u32 max_raw_ipv6_qp; + u32 max_raw_ethy_qp; + u32 max_mcast_grp; + u32 max_mcast_qp_attach; + u32 max_total_mcast_qp_attach; + u32 max_ah; + u32 max_fmr; + u32 max_map_per_fmr; + u32 max_srq; + u32 max_srq_wr; + u32 max_srq_sge; + u32 max_uar; + u32 gid_tbl_len; + u16 max_pkeys; + u8 local_ca_ack_delay; + u8 phys_port_cnt; + u8 mode; /* PVRDMA_DEVICE_MODE_ */ + u8 atomic_ops; /* PVRDMA_ATOMIC_OP_* bits */ + u8 bmme_flags; /* FRWR Mem Mgmt Extensions */ + u8 gid_types; /* PVRDMA_GID_TYPE_FLAG_ */ + u8 reserved[4]; +}; + +struct pvrdma_ring_page_info { + u32 num_pages; /* Num pages incl. header. */ + u32 reserved; /* Reserved. */ + u64 pdir_dma; /* Page directory PA. */ +}; + +#pragma pack(push, 1) + +struct pvrdma_device_shared_region { + u32 driver_version; /* W: Driver version. */ + u32 pad; /* Pad to 8-byte align. */ + struct pvrdma_gos_info gos_info; /* W: Guest OS information. */ + u64 cmd_slot_dma; /* W: Command slot address. */ + u64 resp_slot_dma; /* W: Response slot address. */ + struct pvrdma_ring_page_info async_ring_pages; + /* W: Async ring page info. */ + struct pvrdma_ring_page_info cq_ring_pages; + /* W: CQ ring page info. */ + u32 uar_pfn; /* W: UAR pageframe. */ + u32 pad2; /* Pad to 8-byte align. */ + struct pvrdma_device_caps caps; /* R: Device capabilities. */ +}; + +#pragma pack(pop) + +/* Event types. Currently a 1:1 mapping with enum ib_event. */ +enum pvrdma_eqe_type { + PVRDMA_EVENT_CQ_ERR, + PVRDMA_EVENT_QP_FATAL, + PVRDMA_EVENT_QP_REQ_ERR, + PVRDMA_EVENT_QP_ACCESS_ERR, + PVRDMA_EVENT_COMM_EST, + PVRDMA_EVENT_SQ_DRAINED, + PVRDMA_EVENT_PATH_MIG, + PVRDMA_EVENT_PATH_MIG_ERR, + PVRDMA_EVENT_DEVICE_FATAL, + PVRDMA_EVENT_PORT_ACTIVE, + PVRDMA_EVENT_PORT_ERR, + PVRDMA_EVENT_LID_CHANGE, + PVRDMA_EVENT_PKEY_CHANGE, + PVRDMA_EVENT_SM_CHANGE, + PVRDMA_EVENT_SRQ_ERR, + PVRDMA_EVENT_SRQ_LIMIT_REACHED, + PVRDMA_EVENT_QP_LAST_WQE_REACHED, + PVRDMA_EVENT_CLIENT_REREGISTER, + PVRDMA_EVENT_GID_CHANGE, +}; + +/* Event queue element. */ +struct pvrdma_eqe { + u32 type; /* Event type. */ + u32 info; /* Handle, other. */ +}; + +/* CQ notification queue element. */ +struct pvrdma_cqne { + u32 info; /* Handle */ +}; + +enum { + PVRDMA_CMD_FIRST, + PVRDMA_CMD_QUERY_PORT = PVRDMA_CMD_FIRST, + PVRDMA_CMD_QUERY_PKEY, + PVRDMA_CMD_CREATE_PD, + PVRDMA_CMD_DESTROY_PD, + PVRDMA_CMD_CREATE_MR, + PVRDMA_CMD_DESTROY_MR, + PVRDMA_CMD_CREATE_CQ, + PVRDMA_CMD_RESIZE_CQ, + PVRDMA_CMD_DESTROY_CQ, + PVRDMA_CMD_CREATE_QP, + PVRDMA_CMD_MODIFY_QP, + PVRDMA_CMD_QUERY_QP, + PVRDMA_CMD_DESTROY_QP, + PVRDMA_CMD_CREATE_UC, + PVRDMA_CMD_DESTROY_UC, + PVRDMA_CMD_CREATE_BIND, + PVRDMA_CMD_DESTROY_BIND, + PVRDMA_CMD_MAX, +}; + +enum { + PVRDMA_CMD_FIRST_RESP = (1 << 31), + PVRDMA_CMD_QUERY_PORT_RESP = PVRDMA_CMD_FIRST_RESP, + PVRDMA_CMD_QUERY_PKEY_RESP, + PVRDMA_CMD_CREATE_PD_RESP, + PVRDMA_CMD_DESTROY_PD_RESP_NOOP, + PVRDMA_CMD_CREATE_MR_RESP, + PVRDMA_CMD_DESTROY_MR_RESP_NOOP, + PVRDMA_CMD_CREATE_CQ_RESP, + PVRDMA_CMD_RESIZE_CQ_RESP, + PVRDMA_CMD_DESTROY_CQ_RESP_NOOP, + PVRDMA_CMD_CREATE_QP_RESP, + PVRDMA_CMD_MODIFY_QP_RESP, + PVRDMA_CMD_QUERY_QP_RESP, + PVRDMA_CMD_DESTROY_QP_RESP, + PVRDMA_CMD_CREATE_UC_RESP, + PVRDMA_CMD_DESTROY_UC_RESP_NOOP, + PVRDMA_CMD_CREATE_BIND_RESP_NOOP, + PVRDMA_CMD_DESTROY_BIND_RESP_NOOP, + PVRDMA_CMD_MAX_RESP, +}; + +struct pvrdma_cmd_hdr { + u64 response; /* Key for response lookup. */ + u32 cmd; /* PVRDMA_CMD_ */ + u32 reserved; /* Reserved. */ +}; + +struct pvrdma_cmd_resp_hdr { + u64 response; /* From cmd hdr. */ + u32 ack; /* PVRDMA_CMD_XXX_RESP */ + u8 err; /* Error. */ + u8 reserved[3]; /* Reserved. */ +}; + +struct pvrdma_cmd_query_port { + struct pvrdma_cmd_hdr hdr; + u8 port_num; + u8 reserved[7]; +}; + +struct pvrdma_cmd_query_port_resp { + struct pvrdma_cmd_resp_hdr hdr; + struct pvrdma_port_attr attrs; +}; + +struct pvrdma_cmd_query_pkey { + struct pvrdma_cmd_hdr hdr; + u8 port_num; + u8 index; + u8 reserved[6]; +}; + +struct pvrdma_cmd_query_pkey_resp { + struct pvrdma_cmd_resp_hdr hdr; + u16 pkey; + u8 reserved[6]; +}; + +struct pvrdma_cmd_create_uc { + struct pvrdma_cmd_hdr hdr; + u32 pfn; /* UAR page frame number */ + u8 reserved[4]; +}; + +struct pvrdma_cmd_create_uc_resp { + struct pvrdma_cmd_resp_hdr hdr; + u32 ctx_handle; + u8 reserved[4]; +}; + +struct pvrdma_cmd_destroy_uc { + struct pvrdma_cmd_hdr hdr; + u32 ctx_handle; + u8 reserved[4]; +}; + +struct pvrdma_cmd_create_pd { + struct pvrdma_cmd_hdr hdr; + u32 ctx_handle; + u8 reserved[4]; +}; + +struct pvrdma_cmd_create_pd_resp { + struct pvrdma_cmd_resp_hdr hdr; + u32 pd_handle; + u8 reserved[4]; +}; + +struct pvrdma_cmd_destroy_pd { + struct pvrdma_cmd_hdr hdr; + u32 pd_handle; + u8 reserved[4]; +}; + +struct pvrdma_cmd_create_mr { + struct pvrdma_cmd_hdr hdr; + u64 start; + u64 length; + u64 pdir_dma; + u32 pd_handle; + u32 access_flags; + u32 flags; + u32 nchunks; +}; + +struct pvrdma_cmd_create_mr_resp { + struct pvrdma_cmd_resp_hdr hdr; + u32 mr_handle; + u32 lkey; + u32 rkey; + u8 reserved[4]; +}; + +struct pvrdma_cmd_destroy_mr { + struct pvrdma_cmd_hdr hdr; + u32 mr_handle; + u8 reserved[4]; +}; + +struct pvrdma_cmd_create_cq { + struct pvrdma_cmd_hdr hdr; + u64 pdir_dma; + u32 ctx_handle; + u32 cqe; + u32 nchunks; + u8 reserved[4]; +}; + +struct pvrdma_cmd_create_cq_resp { + struct pvrdma_cmd_resp_hdr hdr; + u32 cq_handle; + u32 cqe; +}; + +struct pvrdma_cmd_resize_cq { + struct pvrdma_cmd_hdr hdr; + u32 cq_handle; + u32 cqe; +}; + +struct pvrdma_cmd_resize_cq_resp { + struct pvrdma_cmd_resp_hdr hdr; + u32 cqe; + u8 reserved[4]; +}; + +struct pvrdma_cmd_destroy_cq { + struct pvrdma_cmd_hdr hdr; + u32 cq_handle; + u8 reserved[4]; +}; + +struct pvrdma_cmd_create_qp { + struct pvrdma_cmd_hdr hdr; + u64 pdir_dma; + u32 pd_handle; + u32 send_cq_handle; + u32 recv_cq_handle; + u32 srq_handle; + u32 max_send_wr; + u32 max_recv_wr; + u32 max_send_sge; + u32 max_recv_sge; + u32 max_inline_data; + u32 lkey; + u32 access_flags; + u16 total_chunks; + u16 send_chunks; + u16 max_atomic_arg; + u8 sq_sig_all; + u8 qp_type; + u8 is_srq; + u8 reserved[3]; +}; + +struct pvrdma_cmd_create_qp_resp { + struct pvrdma_cmd_resp_hdr hdr; + u32 qpn; + u32 max_send_wr; + u32 max_recv_wr; + u32 max_send_sge; + u32 max_recv_sge; + u32 max_inline_data; +}; + +struct pvrdma_cmd_modify_qp { + struct pvrdma_cmd_hdr hdr; + u32 qp_handle; + u32 attr_mask; + struct pvrdma_qp_attr attrs; +}; + +struct pvrdma_cmd_query_qp { + struct pvrdma_cmd_hdr hdr; + u32 qp_handle; + u32 attr_mask; +}; + +struct pvrdma_cmd_query_qp_resp { + struct pvrdma_cmd_resp_hdr hdr; + struct pvrdma_qp_attr attrs; +}; + +struct pvrdma_cmd_destroy_qp { + struct pvrdma_cmd_hdr hdr; + u32 qp_handle; + u8 reserved[4]; +}; + +struct pvrdma_cmd_destroy_qp_resp { + struct pvrdma_cmd_resp_hdr hdr; + u32 events_reported; + u8 reserved[4]; +}; + +struct pvrdma_cmd_create_bind { + struct pvrdma_cmd_hdr hdr; + u32 mtu; + u32 vlan; + u32 index; + u8 new_gid[16]; + u8 gid_type; + u8 reserved[3]; +}; + +struct pvrdma_cmd_destroy_bind { + struct pvrdma_cmd_hdr hdr; + u32 index; + u8 dest_gid[16]; + u8 reserved[4]; +}; + +union pvrdma_cmd_req { + struct pvrdma_cmd_hdr hdr; + struct pvrdma_cmd_query_port query_port; + struct pvrdma_cmd_query_pkey query_pkey; + struct pvrdma_cmd_create_uc create_uc; + struct pvrdma_cmd_destroy_uc destroy_uc; + struct pvrdma_cmd_create_pd create_pd; + struct pvrdma_cmd_destroy_pd destroy_pd; + struct pvrdma_cmd_create_mr create_mr; + struct pvrdma_cmd_destroy_mr destroy_mr; + struct pvrdma_cmd_create_cq create_cq; + struct pvrdma_cmd_resize_cq resize_cq; + struct pvrdma_cmd_destroy_cq destroy_cq; + struct pvrdma_cmd_create_qp create_qp; + struct pvrdma_cmd_modify_qp modify_qp; + struct pvrdma_cmd_query_qp query_qp; + struct pvrdma_cmd_destroy_qp destroy_qp; + struct pvrdma_cmd_create_bind create_bind; + struct pvrdma_cmd_destroy_bind destroy_bind; +}; + +union pvrdma_cmd_resp { + struct pvrdma_cmd_resp_hdr hdr; + struct pvrdma_cmd_query_port_resp query_port_resp; + struct pvrdma_cmd_query_pkey_resp query_pkey_resp; + struct pvrdma_cmd_create_uc_resp create_uc_resp; + struct pvrdma_cmd_create_pd_resp create_pd_resp; + struct pvrdma_cmd_create_mr_resp create_mr_resp; + struct pvrdma_cmd_create_cq_resp create_cq_resp; + struct pvrdma_cmd_resize_cq_resp resize_cq_resp; + struct pvrdma_cmd_create_qp_resp create_qp_resp; + struct pvrdma_cmd_query_qp_resp query_qp_resp; + struct pvrdma_cmd_destroy_qp_resp destroy_qp_resp; +}; + +#endif /* __PVRDMA_DEV_API_H__ */ diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_doorbell.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_doorbell.c new file mode 100644 index 000000000000..bf51357ea3aa --- /dev/null +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_doorbell.c @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2012-2016 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of EITHER the GNU General Public License + * version 2 as published by the Free Software Foundation or the BSD + * 2-Clause License. This program is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED + * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License version 2 for more details at + * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html. + * + * You should have received a copy of the GNU General Public License + * along with this program available in the file COPYING in the main + * directory of this source tree. + * + * The BSD 2-Clause License + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/bitmap.h> +#include <linux/errno.h> +#include <linux/slab.h> + +#include "pvrdma.h" + +int pvrdma_uar_table_init(struct pvrdma_dev *dev) +{ + u32 num = dev->dsr->caps.max_uar; + u32 mask = num - 1; + struct pvrdma_id_table *tbl = &dev->uar_table.tbl; + + if (!is_power_of_2(num)) + return -EINVAL; + + tbl->last = 0; + tbl->top = 0; + tbl->max = num; + tbl->mask = mask; + spin_lock_init(&tbl->lock); + tbl->table = kcalloc(BITS_TO_LONGS(num), sizeof(long), GFP_KERNEL); + if (!tbl->table) + return -ENOMEM; + + /* 0th UAR is taken by the device. */ + set_bit(0, tbl->table); + + return 0; +} + +void pvrdma_uar_table_cleanup(struct pvrdma_dev *dev) +{ + struct pvrdma_id_table *tbl = &dev->uar_table.tbl; + + kfree(tbl->table); +} + +int pvrdma_uar_alloc(struct pvrdma_dev *dev, struct pvrdma_uar_map *uar) +{ + struct pvrdma_id_table *tbl; + unsigned long flags; + u32 obj; + + tbl = &dev->uar_table.tbl; + + spin_lock_irqsave(&tbl->lock, flags); + obj = find_next_zero_bit(tbl->table, tbl->max, tbl->last); + if (obj >= tbl->max) { + tbl->top = (tbl->top + tbl->max) & tbl->mask; + obj = find_first_zero_bit(tbl->table, tbl->max); + } + + if (obj >= tbl->max) { + spin_unlock_irqrestore(&tbl->lock, flags); + return -ENOMEM; + } + + set_bit(obj, tbl->table); + obj |= tbl->top; + + spin_unlock_irqrestore(&tbl->lock, flags); + + uar->index = obj; + uar->pfn = (pci_resource_start(dev->pdev, PVRDMA_PCI_RESOURCE_UAR) >> + PAGE_SHIFT) + uar->index; + + return 0; +} + +void pvrdma_uar_free(struct pvrdma_dev *dev, struct pvrdma_uar_map *uar) +{ + struct pvrdma_id_table *tbl = &dev->uar_table.tbl; + unsigned long flags; + u32 obj; + + obj = uar->index & (tbl->max - 1); + spin_lock_irqsave(&tbl->lock, flags); + clear_bit(obj, tbl->table); + tbl->last = min(tbl->last, obj); + tbl->top = (tbl->top + tbl->max) & tbl->mask; + spin_unlock_irqrestore(&tbl->lock, flags); +} diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c new file mode 100644 index 000000000000..231a1ce1f4be --- /dev/null +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -0,0 +1,1211 @@ +/* + * Copyright (c) 2012-2016 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of EITHER the GNU General Public License + * version 2 as published by the Free Software Foundation or the BSD + * 2-Clause License. This program is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED + * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License version 2 for more details at + * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html. + * + * You should have received a copy of the GNU General Public License + * along with this program available in the file COPYING in the main + * directory of this source tree. + * + * The BSD 2-Clause License + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/errno.h> +#include <linux/inetdevice.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <rdma/ib_addr.h> +#include <rdma/ib_smi.h> +#include <rdma/ib_user_verbs.h> +#include <net/addrconf.h> + +#include "pvrdma.h" + +#define DRV_NAME "vmw_pvrdma" +#define DRV_VERSION "1.0.0.0-k" + +static DEFINE_MUTEX(pvrdma_device_list_lock); +static LIST_HEAD(pvrdma_device_list); +static struct workqueue_struct *event_wq; + +static int pvrdma_add_gid(struct ib_device *ibdev, + u8 port_num, + unsigned int index, + const union ib_gid *gid, + const struct ib_gid_attr *attr, + void **context); +static int pvrdma_del_gid(struct ib_device *ibdev, + u8 port_num, + unsigned int index, + void **context); + + +static ssize_t show_hca(struct device *device, struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "VMW_PVRDMA-%s\n", DRV_VERSION); +} + +static ssize_t show_rev(struct device *device, struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "%d\n", PVRDMA_REV_ID); +} + +static ssize_t show_board(struct device *device, struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "%d\n", PVRDMA_BOARD_ID); +} + +static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); +static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); +static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); + +static struct device_attribute *pvrdma_class_attributes[] = { + &dev_attr_hw_rev, + &dev_attr_hca_type, + &dev_attr_board_id +}; + +static void pvrdma_get_fw_ver_str(struct ib_device *device, char *str, + size_t str_len) +{ + struct pvrdma_dev *dev = + container_of(device, struct pvrdma_dev, ib_dev); + snprintf(str, str_len, "%d.%d.%d\n", + (int) (dev->dsr->caps.fw_ver >> 32), + (int) (dev->dsr->caps.fw_ver >> 16) & 0xffff, + (int) dev->dsr->caps.fw_ver & 0xffff); +} + +static int pvrdma_init_device(struct pvrdma_dev *dev) +{ + /* Initialize some device related stuff */ + spin_lock_init(&dev->cmd_lock); + sema_init(&dev->cmd_sema, 1); + atomic_set(&dev->num_qps, 0); + atomic_set(&dev->num_cqs, 0); + atomic_set(&dev->num_pds, 0); + atomic_set(&dev->num_ahs, 0); + + return 0; +} + +static int pvrdma_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct ib_port_attr attr; + int err; + + err = pvrdma_query_port(ibdev, port_num, &attr); + if (err) + return err; + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE; + immutable->max_mad_size = IB_MGMT_MAD_SIZE; + return 0; +} + +static struct net_device *pvrdma_get_netdev(struct ib_device *ibdev, + u8 port_num) +{ + struct net_device *netdev; + struct pvrdma_dev *dev = to_vdev(ibdev); + + if (port_num != 1) + return NULL; + + rcu_read_lock(); + netdev = dev->netdev; + if (netdev) + dev_hold(netdev); + rcu_read_unlock(); + + return netdev; +} + +static int pvrdma_register_device(struct pvrdma_dev *dev) +{ + int ret = -1; + int i = 0; + + strlcpy(dev->ib_dev.name, "vmw_pvrdma%d", IB_DEVICE_NAME_MAX); + dev->ib_dev.node_guid = dev->dsr->caps.node_guid; + dev->sys_image_guid = dev->dsr->caps.sys_image_guid; + dev->flags = 0; + dev->ib_dev.owner = THIS_MODULE; + dev->ib_dev.num_comp_vectors = 1; + dev->ib_dev.dma_device = &dev->pdev->dev; + dev->ib_dev.uverbs_abi_ver = PVRDMA_UVERBS_ABI_VERSION; + dev->ib_dev.uverbs_cmd_mask = + (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | + (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | + (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | + (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_REG_MR) | + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | + (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | + (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_CMD_POLL_CQ) | + (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | + (1ull << IB_USER_VERBS_CMD_CREATE_QP) | + (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | + (1ull << IB_USER_VERBS_CMD_QUERY_QP) | + (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | + (1ull << IB_USER_VERBS_CMD_POST_SEND) | + (1ull << IB_USER_VERBS_CMD_POST_RECV) | + (1ull << IB_USER_VERBS_CMD_CREATE_AH) | + (1ull << IB_USER_VERBS_CMD_DESTROY_AH); + + dev->ib_dev.node_type = RDMA_NODE_IB_CA; + dev->ib_dev.phys_port_cnt = dev->dsr->caps.phys_port_cnt; + + dev->ib_dev.query_device = pvrdma_query_device; + dev->ib_dev.query_port = pvrdma_query_port; + dev->ib_dev.query_gid = pvrdma_query_gid; + dev->ib_dev.query_pkey = pvrdma_query_pkey; + dev->ib_dev.modify_port = pvrdma_modify_port; + dev->ib_dev.alloc_ucontext = pvrdma_alloc_ucontext; + dev->ib_dev.dealloc_ucontext = pvrdma_dealloc_ucontext; + dev->ib_dev.mmap = pvrdma_mmap; + dev->ib_dev.alloc_pd = pvrdma_alloc_pd; + dev->ib_dev.dealloc_pd = pvrdma_dealloc_pd; + dev->ib_dev.create_ah = pvrdma_create_ah; + dev->ib_dev.destroy_ah = pvrdma_destroy_ah; + dev->ib_dev.create_qp = pvrdma_create_qp; + dev->ib_dev.modify_qp = pvrdma_modify_qp; + dev->ib_dev.query_qp = pvrdma_query_qp; + dev->ib_dev.destroy_qp = pvrdma_destroy_qp; + dev->ib_dev.post_send = pvrdma_post_send; + dev->ib_dev.post_recv = pvrdma_post_recv; + dev->ib_dev.create_cq = pvrdma_create_cq; + dev->ib_dev.modify_cq = pvrdma_modify_cq; + dev->ib_dev.resize_cq = pvrdma_resize_cq; + dev->ib_dev.destroy_cq = pvrdma_destroy_cq; + dev->ib_dev.poll_cq = pvrdma_poll_cq; + dev->ib_dev.req_notify_cq = pvrdma_req_notify_cq; + dev->ib_dev.get_dma_mr = pvrdma_get_dma_mr; + dev->ib_dev.reg_user_mr = pvrdma_reg_user_mr; + dev->ib_dev.dereg_mr = pvrdma_dereg_mr; + dev->ib_dev.alloc_mr = pvrdma_alloc_mr; + dev->ib_dev.map_mr_sg = pvrdma_map_mr_sg; + dev->ib_dev.add_gid = pvrdma_add_gid; + dev->ib_dev.del_gid = pvrdma_del_gid; + dev->ib_dev.get_netdev = pvrdma_get_netdev; + dev->ib_dev.get_port_immutable = pvrdma_port_immutable; + dev->ib_dev.get_link_layer = pvrdma_port_link_layer; + dev->ib_dev.get_dev_fw_str = pvrdma_get_fw_ver_str; + + mutex_init(&dev->port_mutex); + spin_lock_init(&dev->desc_lock); + + dev->cq_tbl = kcalloc(dev->dsr->caps.max_cq, sizeof(void *), + GFP_KERNEL); + if (!dev->cq_tbl) + return ret; + spin_lock_init(&dev->cq_tbl_lock); + + dev->qp_tbl = kcalloc(dev->dsr->caps.max_qp, sizeof(void *), + GFP_KERNEL); + if (!dev->qp_tbl) + goto err_cq_free; + spin_lock_init(&dev->qp_tbl_lock); + + ret = ib_register_device(&dev->ib_dev, NULL); + if (ret) + goto err_qp_free; + + for (i = 0; i < ARRAY_SIZE(pvrdma_class_attributes); ++i) { + ret = device_create_file(&dev->ib_dev.dev, + pvrdma_class_attributes[i]); + if (ret) + goto err_class; + } + + dev->ib_active = true; + + return 0; + +err_class: + ib_unregister_device(&dev->ib_dev); +err_qp_free: + kfree(dev->qp_tbl); +err_cq_free: + kfree(dev->cq_tbl); + + return ret; +} + +static irqreturn_t pvrdma_intr0_handler(int irq, void *dev_id) +{ + u32 icr = PVRDMA_INTR_CAUSE_RESPONSE; + struct pvrdma_dev *dev = dev_id; + + dev_dbg(&dev->pdev->dev, "interrupt 0 (response) handler\n"); + + if (dev->intr.type != PVRDMA_INTR_TYPE_MSIX) { + /* Legacy intr */ + icr = pvrdma_read_reg(dev, PVRDMA_REG_ICR); + if (icr == 0) + return IRQ_NONE; + } + + if (icr == PVRDMA_INTR_CAUSE_RESPONSE) + complete(&dev->cmd_done); + + return IRQ_HANDLED; +} + +static void pvrdma_qp_event(struct pvrdma_dev *dev, u32 qpn, int type) +{ + struct pvrdma_qp *qp; + unsigned long flags; + + spin_lock_irqsave(&dev->qp_tbl_lock, flags); + qp = dev->qp_tbl[qpn % dev->dsr->caps.max_qp]; + if (qp) + atomic_inc(&qp->refcnt); + spin_unlock_irqrestore(&dev->qp_tbl_lock, flags); + + if (qp && qp->ibqp.event_handler) { + struct ib_qp *ibqp = &qp->ibqp; + struct ib_event e; + + e.device = ibqp->device; + e.element.qp = ibqp; + e.event = type; /* 1:1 mapping for now. */ + ibqp->event_handler(&e, ibqp->qp_context); + } + if (qp) { + atomic_dec(&qp->refcnt); + if (atomic_read(&qp->refcnt) == 0) + wake_up(&qp->wait); + } +} + +static void pvrdma_cq_event(struct pvrdma_dev *dev, u32 cqn, int type) +{ + struct pvrdma_cq *cq; + unsigned long flags; + + spin_lock_irqsave(&dev->cq_tbl_lock, flags); + cq = dev->cq_tbl[cqn % dev->dsr->caps.max_cq]; + if (cq) + atomic_inc(&cq->refcnt); + spin_unlock_irqrestore(&dev->cq_tbl_lock, flags); + + if (cq && cq->ibcq.event_handler) { + struct ib_cq *ibcq = &cq->ibcq; + struct ib_event e; + + e.device = ibcq->device; + e.element.cq = ibcq; + e.event = type; /* 1:1 mapping for now. */ + ibcq->event_handler(&e, ibcq->cq_context); + } + if (cq) { + atomic_dec(&cq->refcnt); + if (atomic_read(&cq->refcnt) == 0) + wake_up(&cq->wait); + } +} + +static void pvrdma_dispatch_event(struct pvrdma_dev *dev, int port, + enum ib_event_type event) +{ + struct ib_event ib_event; + + memset(&ib_event, 0, sizeof(ib_event)); + ib_event.device = &dev->ib_dev; + ib_event.element.port_num = port; + ib_event.event = event; + ib_dispatch_event(&ib_event); +} + +static void pvrdma_dev_event(struct pvrdma_dev *dev, u8 port, int type) +{ + if (port < 1 || port > dev->dsr->caps.phys_port_cnt) { + dev_warn(&dev->pdev->dev, "event on port %d\n", port); + return; + } + + pvrdma_dispatch_event(dev, port, type); +} + +static inline struct pvrdma_eqe *get_eqe(struct pvrdma_dev *dev, unsigned int i) +{ + return (struct pvrdma_eqe *)pvrdma_page_dir_get_ptr( + &dev->async_pdir, + PAGE_SIZE + + sizeof(struct pvrdma_eqe) * i); +} + +static irqreturn_t pvrdma_intr1_handler(int irq, void *dev_id) +{ + struct pvrdma_dev *dev = dev_id; + struct pvrdma_ring *ring = &dev->async_ring_state->rx; + int ring_slots = (dev->dsr->async_ring_pages.num_pages - 1) * + PAGE_SIZE / sizeof(struct pvrdma_eqe); + unsigned int head; + + dev_dbg(&dev->pdev->dev, "interrupt 1 (async event) handler\n"); + + /* + * Don't process events until the IB device is registered. Otherwise + * we'll try to ib_dispatch_event() on an invalid device. + */ + if (!dev->ib_active) + return IRQ_HANDLED; + + while (pvrdma_idx_ring_has_data(ring, ring_slots, &head) > 0) { + struct pvrdma_eqe *eqe; + + eqe = get_eqe(dev, head); + + switch (eqe->type) { + case PVRDMA_EVENT_QP_FATAL: + case PVRDMA_EVENT_QP_REQ_ERR: + case PVRDMA_EVENT_QP_ACCESS_ERR: + case PVRDMA_EVENT_COMM_EST: + case PVRDMA_EVENT_SQ_DRAINED: + case PVRDMA_EVENT_PATH_MIG: + case PVRDMA_EVENT_PATH_MIG_ERR: + case PVRDMA_EVENT_QP_LAST_WQE_REACHED: + pvrdma_qp_event(dev, eqe->info, eqe->type); + break; + + case PVRDMA_EVENT_CQ_ERR: + pvrdma_cq_event(dev, eqe->info, eqe->type); + break; + + case PVRDMA_EVENT_SRQ_ERR: + case PVRDMA_EVENT_SRQ_LIMIT_REACHED: + break; + + case PVRDMA_EVENT_PORT_ACTIVE: + case PVRDMA_EVENT_PORT_ERR: + case PVRDMA_EVENT_LID_CHANGE: + case PVRDMA_EVENT_PKEY_CHANGE: + case PVRDMA_EVENT_SM_CHANGE: + case PVRDMA_EVENT_CLIENT_REREGISTER: + case PVRDMA_EVENT_GID_CHANGE: + pvrdma_dev_event(dev, eqe->info, eqe->type); + break; + + case PVRDMA_EVENT_DEVICE_FATAL: + pvrdma_dev_event(dev, 1, eqe->type); + break; + + default: + break; + } + + pvrdma_idx_ring_inc(&ring->cons_head, ring_slots); + } + + return IRQ_HANDLED; +} + +static inline struct pvrdma_cqne *get_cqne(struct pvrdma_dev *dev, + unsigned int i) +{ + return (struct pvrdma_cqne *)pvrdma_page_dir_get_ptr( + &dev->cq_pdir, + PAGE_SIZE + + sizeof(struct pvrdma_cqne) * i); +} + +static irqreturn_t pvrdma_intrx_handler(int irq, void *dev_id) +{ + struct pvrdma_dev *dev = dev_id; + struct pvrdma_ring *ring = &dev->cq_ring_state->rx; + int ring_slots = (dev->dsr->cq_ring_pages.num_pages - 1) * PAGE_SIZE / + sizeof(struct pvrdma_cqne); + unsigned int head; + unsigned long flags; + + dev_dbg(&dev->pdev->dev, "interrupt x (completion) handler\n"); + + while (pvrdma_idx_ring_has_data(ring, ring_slots, &head) > 0) { + struct pvrdma_cqne *cqne; + struct pvrdma_cq *cq; + + cqne = get_cqne(dev, head); + spin_lock_irqsave(&dev->cq_tbl_lock, flags); + cq = dev->cq_tbl[cqne->info % dev->dsr->caps.max_cq]; + if (cq) + atomic_inc(&cq->refcnt); + spin_unlock_irqrestore(&dev->cq_tbl_lock, flags); + + if (cq && cq->ibcq.comp_handler) + cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); + if (cq) { + atomic_dec(&cq->refcnt); + if (atomic_read(&cq->refcnt)) + wake_up(&cq->wait); + } + pvrdma_idx_ring_inc(&ring->cons_head, ring_slots); + } + + return IRQ_HANDLED; +} + +static void pvrdma_disable_msi_all(struct pvrdma_dev *dev) +{ + if (dev->intr.type == PVRDMA_INTR_TYPE_MSIX) + pci_disable_msix(dev->pdev); + else if (dev->intr.type == PVRDMA_INTR_TYPE_MSI) + pci_disable_msi(dev->pdev); +} + +static void pvrdma_free_irq(struct pvrdma_dev *dev) +{ + int i; + + dev_dbg(&dev->pdev->dev, "freeing interrupts\n"); + + if (dev->intr.type == PVRDMA_INTR_TYPE_MSIX) { + for (i = 0; i < dev->intr.size; i++) { + if (dev->intr.enabled[i]) { + free_irq(dev->intr.msix_entry[i].vector, dev); + dev->intr.enabled[i] = 0; + } + } + } else if (dev->intr.type == PVRDMA_INTR_TYPE_INTX || + dev->intr.type == PVRDMA_INTR_TYPE_MSI) { + free_irq(dev->pdev->irq, dev); + } +} + +static void pvrdma_enable_intrs(struct pvrdma_dev *dev) +{ + dev_dbg(&dev->pdev->dev, "enable interrupts\n"); + pvrdma_write_reg(dev, PVRDMA_REG_IMR, 0); +} + +static void pvrdma_disable_intrs(struct pvrdma_dev *dev) +{ + dev_dbg(&dev->pdev->dev, "disable interrupts\n"); + pvrdma_write_reg(dev, PVRDMA_REG_IMR, ~0); +} + +static int pvrdma_enable_msix(struct pci_dev *pdev, struct pvrdma_dev *dev) +{ + int i; + int ret; + + for (i = 0; i < PVRDMA_MAX_INTERRUPTS; i++) { + dev->intr.msix_entry[i].entry = i; + dev->intr.msix_entry[i].vector = i; + + switch (i) { + case 0: + /* CMD ring handler */ + dev->intr.handler[i] = pvrdma_intr0_handler; + break; + case 1: + /* Async event ring handler */ + dev->intr.handler[i] = pvrdma_intr1_handler; + break; + default: + /* Completion queue handler */ + dev->intr.handler[i] = pvrdma_intrx_handler; + break; + } + } + + ret = pci_enable_msix(pdev, dev->intr.msix_entry, + PVRDMA_MAX_INTERRUPTS); + if (!ret) { + dev->intr.type = PVRDMA_INTR_TYPE_MSIX; + dev->intr.size = PVRDMA_MAX_INTERRUPTS; + } else if (ret > 0) { + ret = pci_enable_msix(pdev, dev->intr.msix_entry, ret); + if (!ret) { + dev->intr.type = PVRDMA_INTR_TYPE_MSIX; + dev->intr.size = ret; + } else { + dev->intr.size = 0; + } + } + + dev_dbg(&pdev->dev, "using interrupt type %d, size %d\n", + dev->intr.type, dev->intr.size); + + return ret; +} + +static int pvrdma_alloc_intrs(struct pvrdma_dev *dev) +{ + int ret = 0; + int i; + + if (pci_find_capability(dev->pdev, PCI_CAP_ID_MSIX) && + pvrdma_enable_msix(dev->pdev, dev)) { + /* Try MSI */ + ret = pci_enable_msi(dev->pdev); + if (!ret) { + dev->intr.type = PVRDMA_INTR_TYPE_MSI; + } else { + /* Legacy INTR */ + dev->intr.type = PVRDMA_INTR_TYPE_INTX; + } + } + + /* Request First IRQ */ + switch (dev->intr.type) { + case PVRDMA_INTR_TYPE_INTX: + case PVRDMA_INTR_TYPE_MSI: + ret = request_irq(dev->pdev->irq, pvrdma_intr0_handler, + IRQF_SHARED, DRV_NAME, dev); + if (ret) { + dev_err(&dev->pdev->dev, + "failed to request interrupt\n"); + goto disable_msi; + } + break; + case PVRDMA_INTR_TYPE_MSIX: + ret = request_irq(dev->intr.msix_entry[0].vector, + pvrdma_intr0_handler, 0, DRV_NAME, dev); + if (ret) { + dev_err(&dev->pdev->dev, + "failed to request interrupt 0\n"); + goto disable_msi; + } + dev->intr.enabled[0] = 1; + break; + default: + /* Not reached */ + break; + } + + /* For MSIX: request intr for each vector */ + if (dev->intr.size > 1) { + ret = request_irq(dev->intr.msix_entry[1].vector, + pvrdma_intr1_handler, 0, DRV_NAME, dev); + if (ret) { + dev_err(&dev->pdev->dev, + "failed to request interrupt 1\n"); + goto free_irq; + } + dev->intr.enabled[1] = 1; + + for (i = 2; i < dev->intr.size; i++) { + ret = request_irq(dev->intr.msix_entry[i].vector, + pvrdma_intrx_handler, 0, + DRV_NAME, dev); + if (ret) { + dev_err(&dev->pdev->dev, + "failed to request interrupt %d\n", i); + goto free_irq; + } + dev->intr.enabled[i] = 1; + } + } + + return 0; + +free_irq: + pvrdma_free_irq(dev); +disable_msi: + pvrdma_disable_msi_all(dev); + return ret; +} + +static void pvrdma_free_slots(struct pvrdma_dev *dev) +{ + struct pci_dev *pdev = dev->pdev; + + if (dev->resp_slot) + dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->resp_slot, + dev->dsr->resp_slot_dma); + if (dev->cmd_slot) + dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->cmd_slot, + dev->dsr->cmd_slot_dma); +} + +static int pvrdma_add_gid_at_index(struct pvrdma_dev *dev, + const union ib_gid *gid, + int index) +{ + int ret; + union pvrdma_cmd_req req; + struct pvrdma_cmd_create_bind *cmd_bind = &req.create_bind; + + if (!dev->sgid_tbl) { + dev_warn(&dev->pdev->dev, "sgid table not initialized\n"); + return -EINVAL; + } + + memset(cmd_bind, 0, sizeof(*cmd_bind)); + cmd_bind->hdr.cmd = PVRDMA_CMD_CREATE_BIND; + memcpy(cmd_bind->new_gid, gid->raw, 16); + cmd_bind->mtu = ib_mtu_enum_to_int(IB_MTU_1024); + cmd_bind->vlan = 0xfff; + cmd_bind->index = index; + cmd_bind->gid_type = PVRDMA_GID_TYPE_FLAG_ROCE_V1; + + ret = pvrdma_cmd_post(dev, &req, NULL, 0); + if (ret < 0) { + dev_warn(&dev->pdev->dev, + "could not create binding, error: %d\n", ret); + return -EFAULT; + } + memcpy(&dev->sgid_tbl[index], gid, sizeof(*gid)); + return 0; +} + +static int pvrdma_add_gid(struct ib_device *ibdev, + u8 port_num, + unsigned int index, + const union ib_gid *gid, + const struct ib_gid_attr *attr, + void **context) +{ + struct pvrdma_dev *dev = to_vdev(ibdev); + + return pvrdma_add_gid_at_index(dev, gid, index); +} + +static int pvrdma_del_gid_at_index(struct pvrdma_dev *dev, int index) +{ + int ret; + union pvrdma_cmd_req req; + struct pvrdma_cmd_destroy_bind *cmd_dest = &req.destroy_bind; + + /* Update sgid table. */ + if (!dev->sgid_tbl) { + dev_warn(&dev->pdev->dev, "sgid table not initialized\n"); + return -EINVAL; + } + + memset(cmd_dest, 0, sizeof(*cmd_dest)); + cmd_dest->hdr.cmd = PVRDMA_CMD_DESTROY_BIND; + memcpy(cmd_dest->dest_gid, &dev->sgid_tbl[index], 16); + cmd_dest->index = index; + + ret = pvrdma_cmd_post(dev, &req, NULL, 0); + if (ret < 0) { + dev_warn(&dev->pdev->dev, + "could not destroy binding, error: %d\n", ret); + return ret; + } + memset(&dev->sgid_tbl[index], 0, 16); + return 0; +} + +static int pvrdma_del_gid(struct ib_device *ibdev, + u8 port_num, + unsigned int index, + void **context) +{ + struct pvrdma_dev *dev = to_vdev(ibdev); + + dev_dbg(&dev->pdev->dev, "removing gid at index %u from %s", + index, dev->netdev->name); + + return pvrdma_del_gid_at_index(dev, index); +} + +static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev, + unsigned long event) +{ + switch (event) { + case NETDEV_REBOOT: + case NETDEV_DOWN: + pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ERR); + break; + case NETDEV_UP: + pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE); + break; + default: + dev_dbg(&dev->pdev->dev, "ignore netdevice event %ld on %s\n", + event, dev->ib_dev.name); + break; + } +} + +static void pvrdma_netdevice_event_work(struct work_struct *work) +{ + struct pvrdma_netdevice_work *netdev_work; + struct pvrdma_dev *dev; + + netdev_work = container_of(work, struct pvrdma_netdevice_work, work); + + mutex_lock(&pvrdma_device_list_lock); + list_for_each_entry(dev, &pvrdma_device_list, device_link) { + if (dev->netdev == netdev_work->event_netdev) { + pvrdma_netdevice_event_handle(dev, netdev_work->event); + break; + } + } + mutex_unlock(&pvrdma_device_list_lock); + + kfree(netdev_work); +} + +static int pvrdma_netdevice_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct net_device *event_netdev = netdev_notifier_info_to_dev(ptr); + struct pvrdma_netdevice_work *netdev_work; + + netdev_work = kmalloc(sizeof(*netdev_work), GFP_ATOMIC); + if (!netdev_work) + return NOTIFY_BAD; + + INIT_WORK(&netdev_work->work, pvrdma_netdevice_event_work); + netdev_work->event_netdev = event_netdev; + netdev_work->event = event; + queue_work(event_wq, &netdev_work->work); + + return NOTIFY_DONE; +} + +static int pvrdma_pci_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + struct pci_dev *pdev_net; + struct pvrdma_dev *dev; + int ret; + unsigned long start; + unsigned long len; + unsigned int version; + dma_addr_t slot_dma = 0; + + dev_dbg(&pdev->dev, "initializing driver %s\n", pci_name(pdev)); + + /* Allocate zero-out device */ + dev = (struct pvrdma_dev *)ib_alloc_device(sizeof(*dev)); + if (!dev) { + dev_err(&pdev->dev, "failed to allocate IB device\n"); + return -ENOMEM; + } + + mutex_lock(&pvrdma_device_list_lock); + list_add(&dev->device_link, &pvrdma_device_list); + mutex_unlock(&pvrdma_device_list_lock); + + ret = pvrdma_init_device(dev); + if (ret) + goto err_free_device; + + dev->pdev = pdev; + pci_set_drvdata(pdev, dev); + + ret = pci_enable_device(pdev); + if (ret) { + dev_err(&pdev->dev, "cannot enable PCI device\n"); + goto err_free_device; + } + + dev_dbg(&pdev->dev, "PCI resource flags BAR0 %#lx\n", + pci_resource_flags(pdev, 0)); + dev_dbg(&pdev->dev, "PCI resource len %#llx\n", + (unsigned long long)pci_resource_len(pdev, 0)); + dev_dbg(&pdev->dev, "PCI resource start %#llx\n", + (unsigned long long)pci_resource_start(pdev, 0)); + dev_dbg(&pdev->dev, "PCI resource flags BAR1 %#lx\n", + pci_resource_flags(pdev, 1)); + dev_dbg(&pdev->dev, "PCI resource len %#llx\n", + (unsigned long long)pci_resource_len(pdev, 1)); + dev_dbg(&pdev->dev, "PCI resource start %#llx\n", + (unsigned long long)pci_resource_start(pdev, 1)); + + if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) || + !(pci_resource_flags(pdev, 1) & IORESOURCE_MEM)) { + dev_err(&pdev->dev, "PCI BAR region not MMIO\n"); + ret = -ENOMEM; + goto err_free_device; + } + + ret = pci_request_regions(pdev, DRV_NAME); + if (ret) { + dev_err(&pdev->dev, "cannot request PCI resources\n"); + goto err_disable_pdev; + } + + /* Enable 64-Bit DMA */ + if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) { + ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); + if (ret != 0) { + dev_err(&pdev->dev, + "pci_set_consistent_dma_mask failed\n"); + goto err_free_resource; + } + } else { + ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + if (ret != 0) { + dev_err(&pdev->dev, + "pci_set_dma_mask failed\n"); + goto err_free_resource; + } + } + + pci_set_master(pdev); + + /* Map register space */ + start = pci_resource_start(dev->pdev, PVRDMA_PCI_RESOURCE_REG); + len = pci_resource_len(dev->pdev, PVRDMA_PCI_RESOURCE_REG); + dev->regs = ioremap(start, len); + if (!dev->regs) { + dev_err(&pdev->dev, "register mapping failed\n"); + ret = -ENOMEM; + goto err_free_resource; + } + + /* Setup per-device UAR. */ + dev->driver_uar.index = 0; + dev->driver_uar.pfn = + pci_resource_start(dev->pdev, PVRDMA_PCI_RESOURCE_UAR) >> + PAGE_SHIFT; + dev->driver_uar.map = + ioremap(dev->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE); + if (!dev->driver_uar.map) { + dev_err(&pdev->dev, "failed to remap UAR pages\n"); + ret = -ENOMEM; + goto err_unmap_regs; + } + + version = pvrdma_read_reg(dev, PVRDMA_REG_VERSION); + dev_info(&pdev->dev, "device version %d, driver version %d\n", + version, PVRDMA_VERSION); + if (version < PVRDMA_VERSION) { + dev_err(&pdev->dev, "incompatible device version\n"); + goto err_uar_unmap; + } + + dev->dsr = dma_alloc_coherent(&pdev->dev, sizeof(*dev->dsr), + &dev->dsrbase, GFP_KERNEL); + if (!dev->dsr) { + dev_err(&pdev->dev, "failed to allocate shared region\n"); + ret = -ENOMEM; + goto err_uar_unmap; + } + + /* Setup the shared region */ + memset(dev->dsr, 0, sizeof(*dev->dsr)); + dev->dsr->driver_version = PVRDMA_VERSION; + dev->dsr->gos_info.gos_bits = sizeof(void *) == 4 ? + PVRDMA_GOS_BITS_32 : + PVRDMA_GOS_BITS_64; + dev->dsr->gos_info.gos_type = PVRDMA_GOS_TYPE_LINUX; + dev->dsr->gos_info.gos_ver = 1; + dev->dsr->uar_pfn = dev->driver_uar.pfn; + + /* Command slot. */ + dev->cmd_slot = dma_alloc_coherent(&pdev->dev, PAGE_SIZE, + &slot_dma, GFP_KERNEL); + if (!dev->cmd_slot) { + ret = -ENOMEM; + goto err_free_dsr; + } + + dev->dsr->cmd_slot_dma = (u64)slot_dma; + + /* Response slot. */ + dev->resp_slot = dma_alloc_coherent(&pdev->dev, PAGE_SIZE, + &slot_dma, GFP_KERNEL); + if (!dev->resp_slot) { + ret = -ENOMEM; + goto err_free_slots; + } + + dev->dsr->resp_slot_dma = (u64)slot_dma; + + /* Async event ring */ + dev->dsr->async_ring_pages.num_pages = 4; + ret = pvrdma_page_dir_init(dev, &dev->async_pdir, + dev->dsr->async_ring_pages.num_pages, true); + if (ret) + goto err_free_slots; + dev->async_ring_state = dev->async_pdir.pages[0]; + dev->dsr->async_ring_pages.pdir_dma = dev->async_pdir.dir_dma; + + /* CQ notification ring */ + dev->dsr->cq_ring_pages.num_pages = 4; + ret = pvrdma_page_dir_init(dev, &dev->cq_pdir, + dev->dsr->cq_ring_pages.num_pages, true); + if (ret) + goto err_free_async_ring; + dev->cq_ring_state = dev->cq_pdir.pages[0]; + dev->dsr->cq_ring_pages.pdir_dma = dev->cq_pdir.dir_dma; + + /* + * Write the PA of the shared region to the device. The writes must be + * ordered such that the high bits are written last. When the writes + * complete, the device will have filled out the capabilities. + */ + + pvrdma_write_reg(dev, PVRDMA_REG_DSRLOW, (u32)dev->dsrbase); + pvrdma_write_reg(dev, PVRDMA_REG_DSRHIGH, + (u32)((u64)(dev->dsrbase) >> 32)); + + /* Make sure the write is complete before reading status. */ + mb(); + + /* Currently, the driver only supports RoCE mode. */ + if (dev->dsr->caps.mode != PVRDMA_DEVICE_MODE_ROCE) { + dev_err(&pdev->dev, "unsupported transport %d\n", + dev->dsr->caps.mode); + ret = -EFAULT; + goto err_free_cq_ring; + } + + /* Currently, the driver only supports RoCE V1. */ + if (!(dev->dsr->caps.gid_types & PVRDMA_GID_TYPE_FLAG_ROCE_V1)) { + dev_err(&pdev->dev, "driver needs RoCE v1 support\n"); + ret = -EFAULT; + goto err_free_cq_ring; + } + + /* Paired vmxnet3 will have same bus, slot. But func will be 0 */ + pdev_net = pci_get_slot(pdev->bus, PCI_DEVFN(PCI_SLOT(pdev->devfn), 0)); + if (!pdev_net) { + dev_err(&pdev->dev, "failed to find paired net device\n"); + ret = -ENODEV; + goto err_free_cq_ring; + } + + if (pdev_net->vendor != PCI_VENDOR_ID_VMWARE || + pdev_net->device != PCI_DEVICE_ID_VMWARE_VMXNET3) { + dev_err(&pdev->dev, "failed to find paired vmxnet3 device\n"); + pci_dev_put(pdev_net); + ret = -ENODEV; + goto err_free_cq_ring; + } + + dev->netdev = pci_get_drvdata(pdev_net); + pci_dev_put(pdev_net); + if (!dev->netdev) { + dev_err(&pdev->dev, "failed to get vmxnet3 device\n"); + ret = -ENODEV; + goto err_free_cq_ring; + } + + dev_info(&pdev->dev, "paired device to %s\n", dev->netdev->name); + + /* Interrupt setup */ + ret = pvrdma_alloc_intrs(dev); + if (ret) { + dev_err(&pdev->dev, "failed to allocate interrupts\n"); + ret = -ENOMEM; + goto err_netdevice; + } + + /* Allocate UAR table. */ + ret = pvrdma_uar_table_init(dev); + if (ret) { + dev_err(&pdev->dev, "failed to allocate UAR table\n"); + ret = -ENOMEM; + goto err_free_intrs; + } + + /* Allocate GID table */ + dev->sgid_tbl = kcalloc(dev->dsr->caps.gid_tbl_len, + sizeof(union ib_gid), GFP_KERNEL); + if (!dev->sgid_tbl) { + ret = -ENOMEM; + goto err_free_uar_table; + } + dev_dbg(&pdev->dev, "gid table len %d\n", dev->dsr->caps.gid_tbl_len); + + pvrdma_enable_intrs(dev); + + /* Activate pvrdma device */ + pvrdma_write_reg(dev, PVRDMA_REG_CTL, PVRDMA_DEVICE_CTL_ACTIVATE); + + /* Make sure the write is complete before reading status. */ + mb(); + + /* Check if device was successfully activated */ + ret = pvrdma_read_reg(dev, PVRDMA_REG_ERR); + if (ret != 0) { + dev_err(&pdev->dev, "failed to activate device\n"); + ret = -EFAULT; + goto err_disable_intr; + } + + /* Register IB device */ + ret = pvrdma_register_device(dev); + if (ret) { + dev_err(&pdev->dev, "failed to register IB device\n"); + goto err_disable_intr; + } + + dev->nb_netdev.notifier_call = pvrdma_netdevice_event; + ret = register_netdevice_notifier(&dev->nb_netdev); + if (ret) { + dev_err(&pdev->dev, "failed to register netdevice events\n"); + goto err_unreg_ibdev; + } + + dev_info(&pdev->dev, "attached to device\n"); + return 0; + +err_unreg_ibdev: + ib_unregister_device(&dev->ib_dev); +err_disable_intr: + pvrdma_disable_intrs(dev); + kfree(dev->sgid_tbl); +err_free_uar_table: + pvrdma_uar_table_cleanup(dev); +err_free_intrs: + pvrdma_free_irq(dev); + pvrdma_disable_msi_all(dev); +err_netdevice: + unregister_netdevice_notifier(&dev->nb_netdev); +err_free_cq_ring: + pvrdma_page_dir_cleanup(dev, &dev->cq_pdir); +err_free_async_ring: + pvrdma_page_dir_cleanup(dev, &dev->async_pdir); +err_free_slots: + pvrdma_free_slots(dev); +err_free_dsr: + dma_free_coherent(&pdev->dev, sizeof(*dev->dsr), dev->dsr, + dev->dsrbase); +err_uar_unmap: + iounmap(dev->driver_uar.map); +err_unmap_regs: + iounmap(dev->regs); +err_free_resource: + pci_release_regions(pdev); +err_disable_pdev: + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); +err_free_device: + mutex_lock(&pvrdma_device_list_lock); + list_del(&dev->device_link); + mutex_unlock(&pvrdma_device_list_lock); + ib_dealloc_device(&dev->ib_dev); + return ret; +} + +static void pvrdma_pci_remove(struct pci_dev *pdev) +{ + struct pvrdma_dev *dev = pci_get_drvdata(pdev); + + if (!dev) + return; + + dev_info(&pdev->dev, "detaching from device\n"); + + unregister_netdevice_notifier(&dev->nb_netdev); + dev->nb_netdev.notifier_call = NULL; + + flush_workqueue(event_wq); + + /* Unregister ib device */ + ib_unregister_device(&dev->ib_dev); + + mutex_lock(&pvrdma_device_list_lock); + list_del(&dev->device_link); + mutex_unlock(&pvrdma_device_list_lock); + + pvrdma_disable_intrs(dev); + pvrdma_free_irq(dev); + pvrdma_disable_msi_all(dev); + + /* Deactivate pvrdma device */ + pvrdma_write_reg(dev, PVRDMA_REG_CTL, PVRDMA_DEVICE_CTL_RESET); + pvrdma_page_dir_cleanup(dev, &dev->cq_pdir); + pvrdma_page_dir_cleanup(dev, &dev->async_pdir); + pvrdma_free_slots(dev); + + iounmap(dev->regs); + kfree(dev->sgid_tbl); + kfree(dev->cq_tbl); + kfree(dev->qp_tbl); + pvrdma_uar_table_cleanup(dev); + iounmap(dev->driver_uar.map); + + ib_dealloc_device(&dev->ib_dev); + + /* Free pci resources */ + pci_release_regions(pdev); + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); +} + +static struct pci_device_id pvrdma_pci_table[] = { + { PCI_DEVICE(PCI_VENDOR_ID_VMWARE, PCI_DEVICE_ID_VMWARE_PVRDMA), }, + { 0 }, +}; + +MODULE_DEVICE_TABLE(pci, pvrdma_pci_table); + +static struct pci_driver pvrdma_driver = { + .name = DRV_NAME, + .id_table = pvrdma_pci_table, + .probe = pvrdma_pci_probe, + .remove = pvrdma_pci_remove, +}; + +static int __init pvrdma_init(void) +{ + int err; + + event_wq = alloc_ordered_workqueue("pvrdma_event_wq", WQ_MEM_RECLAIM); + if (!event_wq) + return -ENOMEM; + + err = pci_register_driver(&pvrdma_driver); + if (err) + destroy_workqueue(event_wq); + + return err; +} + +static void __exit pvrdma_cleanup(void) +{ + pci_unregister_driver(&pvrdma_driver); + + destroy_workqueue(event_wq); +} + +module_init(pvrdma_init); +module_exit(pvrdma_cleanup); + +MODULE_AUTHOR("VMware, Inc"); +MODULE_DESCRIPTION("VMware Paravirtual RDMA driver"); +MODULE_VERSION(DRV_VERSION); +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c new file mode 100644 index 000000000000..948b5ccd2a70 --- /dev/null +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c @@ -0,0 +1,304 @@ +/* + * Copyright (c) 2012-2016 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of EITHER the GNU General Public License + * version 2 as published by the Free Software Foundation or the BSD + * 2-Clause License. This program is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED + * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License version 2 for more details at + * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html. + * + * You should have received a copy of the GNU General Public License + * along with this program available in the file COPYING in the main + * directory of this source tree. + * + * The BSD 2-Clause License + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/errno.h> +#include <linux/slab.h> +#include <linux/bitmap.h> + +#include "pvrdma.h" + +int pvrdma_page_dir_init(struct pvrdma_dev *dev, struct pvrdma_page_dir *pdir, + u64 npages, bool alloc_pages) +{ + u64 i; + + if (npages > PVRDMA_PAGE_DIR_MAX_PAGES) + return -EINVAL; + + memset(pdir, 0, sizeof(*pdir)); + + pdir->dir = dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE, + &pdir->dir_dma, GFP_KERNEL); + if (!pdir->dir) + goto err; + + pdir->ntables = PVRDMA_PAGE_DIR_TABLE(npages - 1) + 1; + pdir->tables = kcalloc(pdir->ntables, sizeof(*pdir->tables), + GFP_KERNEL); + if (!pdir->tables) + goto err; + + for (i = 0; i < pdir->ntables; i++) { + pdir->tables[i] = dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE, + (dma_addr_t *)&pdir->dir[i], + GFP_KERNEL); + if (!pdir->tables[i]) + goto err; + } + + pdir->npages = npages; + + if (alloc_pages) { + pdir->pages = kcalloc(npages, sizeof(*pdir->pages), + GFP_KERNEL); + if (!pdir->pages) + goto err; + + for (i = 0; i < pdir->npages; i++) { + dma_addr_t page_dma; + + pdir->pages[i] = dma_alloc_coherent(&dev->pdev->dev, + PAGE_SIZE, + &page_dma, + GFP_KERNEL); + if (!pdir->pages[i]) + goto err; + + pvrdma_page_dir_insert_dma(pdir, i, page_dma); + } + } + + return 0; + +err: + pvrdma_page_dir_cleanup(dev, pdir); + + return -ENOMEM; +} + +static u64 *pvrdma_page_dir_table(struct pvrdma_page_dir *pdir, u64 idx) +{ + return pdir->tables[PVRDMA_PAGE_DIR_TABLE(idx)]; +} + +dma_addr_t pvrdma_page_dir_get_dma(struct pvrdma_page_dir *pdir, u64 idx) +{ + return pvrdma_page_dir_table(pdir, idx)[PVRDMA_PAGE_DIR_PAGE(idx)]; +} + +static void pvrdma_page_dir_cleanup_pages(struct pvrdma_dev *dev, + struct pvrdma_page_dir *pdir) +{ + if (pdir->pages) { + u64 i; + + for (i = 0; i < pdir->npages && pdir->pages[i]; i++) { + dma_addr_t page_dma = pvrdma_page_dir_get_dma(pdir, i); + + dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, + pdir->pages[i], page_dma); + } + + kfree(pdir->pages); + } +} + +static void pvrdma_page_dir_cleanup_tables(struct pvrdma_dev *dev, + struct pvrdma_page_dir *pdir) +{ + if (pdir->tables) { + int i; + + pvrdma_page_dir_cleanup_pages(dev, pdir); + + for (i = 0; i < pdir->ntables; i++) { + u64 *table = pdir->tables[i]; + + if (table) + dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, + table, pdir->dir[i]); + } + + kfree(pdir->tables); + } +} + +void pvrdma_page_dir_cleanup(struct pvrdma_dev *dev, + struct pvrdma_page_dir *pdir) +{ + if (pdir->dir) { + pvrdma_page_dir_cleanup_tables(dev, pdir); + dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, + pdir->dir, pdir->dir_dma); + } +} + +int pvrdma_page_dir_insert_dma(struct pvrdma_page_dir *pdir, u64 idx, + dma_addr_t daddr) +{ + u64 *table; + + if (idx >= pdir->npages) + return -EINVAL; + + table = pvrdma_page_dir_table(pdir, idx); + table[PVRDMA_PAGE_DIR_PAGE(idx)] = daddr; + + return 0; +} + +int pvrdma_page_dir_insert_umem(struct pvrdma_page_dir *pdir, + struct ib_umem *umem, u64 offset) +{ + u64 i = offset; + int j, entry; + int ret = 0, len = 0; + struct scatterlist *sg; + + if (offset >= pdir->npages) + return -EINVAL; + + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { + len = sg_dma_len(sg) >> PAGE_SHIFT; + for (j = 0; j < len; j++) { + dma_addr_t addr = sg_dma_address(sg) + + umem->page_size * j; + + ret = pvrdma_page_dir_insert_dma(pdir, i, addr); + if (ret) + goto exit; + + i++; + } + } + +exit: + return ret; +} + +int pvrdma_page_dir_insert_page_list(struct pvrdma_page_dir *pdir, + u64 *page_list, + int num_pages) +{ + int i; + int ret; + + if (num_pages > pdir->npages) + return -EINVAL; + + for (i = 0; i < num_pages; i++) { + ret = pvrdma_page_dir_insert_dma(pdir, i, page_list[i]); + if (ret) + return ret; + } + + return 0; +} + +void pvrdma_qp_cap_to_ib(struct ib_qp_cap *dst, const struct pvrdma_qp_cap *src) +{ + dst->max_send_wr = src->max_send_wr; + dst->max_recv_wr = src->max_recv_wr; + dst->max_send_sge = src->max_send_sge; + dst->max_recv_sge = src->max_recv_sge; + dst->max_inline_data = src->max_inline_data; +} + +void ib_qp_cap_to_pvrdma(struct pvrdma_qp_cap *dst, const struct ib_qp_cap *src) +{ + dst->max_send_wr = src->max_send_wr; + dst->max_recv_wr = src->max_recv_wr; + dst->max_send_sge = src->max_send_sge; + dst->max_recv_sge = src->max_recv_sge; + dst->max_inline_data = src->max_inline_data; +} + +void pvrdma_gid_to_ib(union ib_gid *dst, const union pvrdma_gid *src) +{ + BUILD_BUG_ON(sizeof(union pvrdma_gid) != sizeof(union ib_gid)); + memcpy(dst, src, sizeof(*src)); +} + +void ib_gid_to_pvrdma(union pvrdma_gid *dst, const union ib_gid *src) +{ + BUILD_BUG_ON(sizeof(union pvrdma_gid) != sizeof(union ib_gid)); + memcpy(dst, src, sizeof(*src)); +} + +void pvrdma_global_route_to_ib(struct ib_global_route *dst, + const struct pvrdma_global_route *src) +{ + pvrdma_gid_to_ib(&dst->dgid, &src->dgid); + dst->flow_label = src->flow_label; + dst->sgid_index = src->sgid_index; + dst->hop_limit = src->hop_limit; + dst->traffic_class = src->traffic_class; +} + +void ib_global_route_to_pvrdma(struct pvrdma_global_route *dst, + const struct ib_global_route *src) +{ + ib_gid_to_pvrdma(&dst->dgid, &src->dgid); + dst->flow_label = src->flow_label; + dst->sgid_index = src->sgid_index; + dst->hop_limit = src->hop_limit; + dst->traffic_class = src->traffic_class; +} + +void pvrdma_ah_attr_to_ib(struct ib_ah_attr *dst, + const struct pvrdma_ah_attr *src) +{ + pvrdma_global_route_to_ib(&dst->grh, &src->grh); + dst->dlid = src->dlid; + dst->sl = src->sl; + dst->src_path_bits = src->src_path_bits; + dst->static_rate = src->static_rate; + dst->ah_flags = src->ah_flags; + dst->port_num = src->port_num; + memcpy(&dst->dmac, &src->dmac, sizeof(dst->dmac)); +} + +void ib_ah_attr_to_pvrdma(struct pvrdma_ah_attr *dst, + const struct ib_ah_attr *src) +{ + ib_global_route_to_pvrdma(&dst->grh, &src->grh); + dst->dlid = src->dlid; + dst->sl = src->sl; + dst->src_path_bits = src->src_path_bits; + dst->static_rate = src->static_rate; + dst->ah_flags = src->ah_flags; + dst->port_num = src->port_num; + memcpy(&dst->dmac, &src->dmac, sizeof(dst->dmac)); +} diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c new file mode 100644 index 000000000000..8519f3212e52 --- /dev/null +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c @@ -0,0 +1,334 @@ +/* + * Copyright (c) 2012-2016 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of EITHER the GNU General Public License + * version 2 as published by the Free Software Foundation or the BSD + * 2-Clause License. This program is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED + * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License version 2 for more details at + * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html. + * + * You should have received a copy of the GNU General Public License + * along with this program available in the file COPYING in the main + * directory of this source tree. + * + * The BSD 2-Clause License + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/list.h> +#include <linux/slab.h> + +#include "pvrdma.h" + +/** + * pvrdma_get_dma_mr - get a DMA memory region + * @pd: protection domain + * @acc: access flags + * + * @return: ib_mr pointer on success, otherwise returns an errno. + */ +struct ib_mr *pvrdma_get_dma_mr(struct ib_pd *pd, int acc) +{ + struct pvrdma_dev *dev = to_vdev(pd->device); + struct pvrdma_user_mr *mr; + union pvrdma_cmd_req req; + union pvrdma_cmd_resp rsp; + struct pvrdma_cmd_create_mr *cmd = &req.create_mr; + struct pvrdma_cmd_create_mr_resp *resp = &rsp.create_mr_resp; + int ret; + + /* Support only LOCAL_WRITE flag for DMA MRs */ + if (acc & ~IB_ACCESS_LOCAL_WRITE) { + dev_warn(&dev->pdev->dev, + "unsupported dma mr access flags %#x\n", acc); + return ERR_PTR(-EOPNOTSUPP); + } + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + memset(cmd, 0, sizeof(*cmd)); + cmd->hdr.cmd = PVRDMA_CMD_CREATE_MR; + cmd->pd_handle = to_vpd(pd)->pd_handle; + cmd->access_flags = acc; + cmd->flags = PVRDMA_MR_FLAG_DMA; + + ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_MR_RESP); + if (ret < 0) { + dev_warn(&dev->pdev->dev, + "could not get DMA mem region, error: %d\n", ret); + kfree(mr); + return ERR_PTR(ret); + } + + mr->mmr.mr_handle = resp->mr_handle; + mr->ibmr.lkey = resp->lkey; + mr->ibmr.rkey = resp->rkey; + + return &mr->ibmr; +} + +/** + * pvrdma_reg_user_mr - register a userspace memory region + * @pd: protection domain + * @start: starting address + * @length: length of region + * @virt_addr: I/O virtual address + * @access_flags: access flags for memory region + * @udata: user data + * + * @return: ib_mr pointer on success, otherwise returns an errno. + */ +struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt_addr, int access_flags, + struct ib_udata *udata) +{ + struct pvrdma_dev *dev = to_vdev(pd->device); + struct pvrdma_user_mr *mr = NULL; + struct ib_umem *umem; + union pvrdma_cmd_req req; + union pvrdma_cmd_resp rsp; + struct pvrdma_cmd_create_mr *cmd = &req.create_mr; + struct pvrdma_cmd_create_mr_resp *resp = &rsp.create_mr_resp; + int nchunks; + int ret; + int entry; + struct scatterlist *sg; + + if (length == 0 || length > dev->dsr->caps.max_mr_size) { + dev_warn(&dev->pdev->dev, "invalid mem region length\n"); + return ERR_PTR(-EINVAL); + } + + umem = ib_umem_get(pd->uobject->context, start, + length, access_flags, 0); + if (IS_ERR(umem)) { + dev_warn(&dev->pdev->dev, + "could not get umem for mem region\n"); + return ERR_CAST(umem); + } + + nchunks = 0; + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) + nchunks += sg_dma_len(sg) >> PAGE_SHIFT; + + if (nchunks < 0 || nchunks > PVRDMA_PAGE_DIR_MAX_PAGES) { + dev_warn(&dev->pdev->dev, "overflow %d pages in mem region\n", + nchunks); + ret = -EINVAL; + goto err_umem; + } + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) { + ret = -ENOMEM; + goto err_umem; + } + + mr->mmr.iova = virt_addr; + mr->mmr.size = length; + mr->umem = umem; + + ret = pvrdma_page_dir_init(dev, &mr->pdir, nchunks, false); + if (ret) { + dev_warn(&dev->pdev->dev, + "could not allocate page directory\n"); + goto err_umem; + } + + ret = pvrdma_page_dir_insert_umem(&mr->pdir, mr->umem, 0); + if (ret) + goto err_pdir; + + memset(cmd, 0, sizeof(*cmd)); + cmd->hdr.cmd = PVRDMA_CMD_CREATE_MR; + cmd->start = start; + cmd->length = length; + cmd->pd_handle = to_vpd(pd)->pd_handle; + cmd->access_flags = access_flags; + cmd->nchunks = nchunks; + cmd->pdir_dma = mr->pdir.dir_dma; + + ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_MR_RESP); + if (ret < 0) { + dev_warn(&dev->pdev->dev, + "could not register mem region, error: %d\n", ret); + goto err_pdir; + } + + mr->mmr.mr_handle = resp->mr_handle; + mr->ibmr.lkey = resp->lkey; + mr->ibmr.rkey = resp->rkey; + + return &mr->ibmr; + +err_pdir: + pvrdma_page_dir_cleanup(dev, &mr->pdir); +err_umem: + ib_umem_release(umem); + kfree(mr); + + return ERR_PTR(ret); +} + +/** + * pvrdma_alloc_mr - allocate a memory region + * @pd: protection domain + * @mr_type: type of memory region + * @max_num_sg: maximum number of pages + * + * @return: ib_mr pointer on success, otherwise returns an errno. + */ +struct ib_mr *pvrdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg) +{ + struct pvrdma_dev *dev = to_vdev(pd->device); + struct pvrdma_user_mr *mr; + union pvrdma_cmd_req req; + union pvrdma_cmd_resp rsp; + struct pvrdma_cmd_create_mr *cmd = &req.create_mr; + struct pvrdma_cmd_create_mr_resp *resp = &rsp.create_mr_resp; + int size = max_num_sg * sizeof(u64); + int ret; + + if (mr_type != IB_MR_TYPE_MEM_REG || + max_num_sg > PVRDMA_MAX_FAST_REG_PAGES) + return ERR_PTR(-EINVAL); + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + mr->pages = kzalloc(size, GFP_KERNEL); + if (!mr->pages) { + ret = -ENOMEM; + goto freemr; + } + + ret = pvrdma_page_dir_init(dev, &mr->pdir, max_num_sg, false); + if (ret) { + dev_warn(&dev->pdev->dev, + "failed to allocate page dir for mr\n"); + ret = -ENOMEM; + goto freepages; + } + + memset(cmd, 0, sizeof(*cmd)); + cmd->hdr.cmd = PVRDMA_CMD_CREATE_MR; + cmd->pd_handle = to_vpd(pd)->pd_handle; + cmd->access_flags = 0; + cmd->flags = PVRDMA_MR_FLAG_FRMR; + cmd->nchunks = max_num_sg; + + ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_MR_RESP); + if (ret < 0) { + dev_warn(&dev->pdev->dev, + "could not create FR mem region, error: %d\n", ret); + goto freepdir; + } + + mr->max_pages = max_num_sg; + mr->mmr.mr_handle = resp->mr_handle; + mr->ibmr.lkey = resp->lkey; + mr->ibmr.rkey = resp->rkey; + mr->page_shift = PAGE_SHIFT; + mr->umem = NULL; + + return &mr->ibmr; + +freepdir: + pvrdma_page_dir_cleanup(dev, &mr->pdir); +freepages: + kfree(mr->pages); +freemr: + kfree(mr); + return ERR_PTR(ret); +} + +/** + * pvrdma_dereg_mr - deregister a memory region + * @ibmr: memory region + * + * @return: 0 on success. + */ +int pvrdma_dereg_mr(struct ib_mr *ibmr) +{ + struct pvrdma_user_mr *mr = to_vmr(ibmr); + struct pvrdma_dev *dev = to_vdev(ibmr->device); + union pvrdma_cmd_req req; + struct pvrdma_cmd_destroy_mr *cmd = &req.destroy_mr; + int ret; + + memset(cmd, 0, sizeof(*cmd)); + cmd->hdr.cmd = PVRDMA_CMD_DESTROY_MR; + cmd->mr_handle = mr->mmr.mr_handle; + ret = pvrdma_cmd_post(dev, &req, NULL, 0); + if (ret < 0) + dev_warn(&dev->pdev->dev, + "could not deregister mem region, error: %d\n", ret); + + pvrdma_page_dir_cleanup(dev, &mr->pdir); + if (mr->umem) + ib_umem_release(mr->umem); + + kfree(mr->pages); + kfree(mr); + + return 0; +} + +static int pvrdma_set_page(struct ib_mr *ibmr, u64 addr) +{ + struct pvrdma_user_mr *mr = to_vmr(ibmr); + + if (mr->npages == mr->max_pages) + return -ENOMEM; + + mr->pages[mr->npages++] = addr; + return 0; +} + +int pvrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, + unsigned int *sg_offset) +{ + struct pvrdma_user_mr *mr = to_vmr(ibmr); + struct pvrdma_dev *dev = to_vdev(ibmr->device); + int ret; + + mr->npages = 0; + + ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, pvrdma_set_page); + if (ret < 0) + dev_warn(&dev->pdev->dev, "could not map sg to pages\n"); + + return ret; +} diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c new file mode 100644 index 000000000000..c8c01e558125 --- /dev/null +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c @@ -0,0 +1,972 @@ +/* + * Copyright (c) 2012-2016 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of EITHER the GNU General Public License + * version 2 as published by the Free Software Foundation or the BSD + * 2-Clause License. This program is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED + * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License version 2 for more details at + * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html. + * + * You should have received a copy of the GNU General Public License + * along with this program available in the file COPYING in the main + * directory of this source tree. + * + * The BSD 2-Clause License + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <asm/page.h> +#include <linux/io.h> +#include <linux/wait.h> +#include <rdma/ib_addr.h> +#include <rdma/ib_smi.h> +#include <rdma/ib_user_verbs.h> + +#include "pvrdma.h" + +static inline void get_cqs(struct pvrdma_qp *qp, struct pvrdma_cq **send_cq, + struct pvrdma_cq **recv_cq) +{ + *send_cq = to_vcq(qp->ibqp.send_cq); + *recv_cq = to_vcq(qp->ibqp.recv_cq); +} + +static void pvrdma_lock_cqs(struct pvrdma_cq *scq, struct pvrdma_cq *rcq, + unsigned long *scq_flags, + unsigned long *rcq_flags) + __acquires(scq->cq_lock) __acquires(rcq->cq_lock) +{ + if (scq == rcq) { + spin_lock_irqsave(&scq->cq_lock, *scq_flags); + __acquire(rcq->cq_lock); + } else if (scq->cq_handle < rcq->cq_handle) { + spin_lock_irqsave(&scq->cq_lock, *scq_flags); + spin_lock_irqsave_nested(&rcq->cq_lock, *rcq_flags, + SINGLE_DEPTH_NESTING); + } else { + spin_lock_irqsave(&rcq->cq_lock, *rcq_flags); + spin_lock_irqsave_nested(&scq->cq_lock, *scq_flags, + SINGLE_DEPTH_NESTING); + } +} + +static void pvrdma_unlock_cqs(struct pvrdma_cq *scq, struct pvrdma_cq *rcq, + unsigned long *scq_flags, + unsigned long *rcq_flags) + __releases(scq->cq_lock) __releases(rcq->cq_lock) +{ + if (scq == rcq) { + __release(rcq->cq_lock); + spin_unlock_irqrestore(&scq->cq_lock, *scq_flags); + } else if (scq->cq_handle < rcq->cq_handle) { + spin_unlock_irqrestore(&rcq->cq_lock, *rcq_flags); + spin_unlock_irqrestore(&scq->cq_lock, *scq_flags); + } else { + spin_unlock_irqrestore(&scq->cq_lock, *scq_flags); + spin_unlock_irqrestore(&rcq->cq_lock, *rcq_flags); + } +} + +static void pvrdma_reset_qp(struct pvrdma_qp *qp) +{ + struct pvrdma_cq *scq, *rcq; + unsigned long scq_flags, rcq_flags; + + /* Clean up cqes */ + get_cqs(qp, &scq, &rcq); + pvrdma_lock_cqs(scq, rcq, &scq_flags, &rcq_flags); + + _pvrdma_flush_cqe(qp, scq); + if (scq != rcq) + _pvrdma_flush_cqe(qp, rcq); + + pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags); + + /* + * Reset queuepair. The checks are because usermode queuepairs won't + * have kernel ringstates. + */ + if (qp->rq.ring) { + atomic_set(&qp->rq.ring->cons_head, 0); + atomic_set(&qp->rq.ring->prod_tail, 0); + } + if (qp->sq.ring) { + atomic_set(&qp->sq.ring->cons_head, 0); + atomic_set(&qp->sq.ring->prod_tail, 0); + } +} + +static int pvrdma_set_rq_size(struct pvrdma_dev *dev, + struct ib_qp_cap *req_cap, + struct pvrdma_qp *qp) +{ + if (req_cap->max_recv_wr > dev->dsr->caps.max_qp_wr || + req_cap->max_recv_sge > dev->dsr->caps.max_sge) { + dev_warn(&dev->pdev->dev, "recv queue size invalid\n"); + return -EINVAL; + } + + qp->rq.wqe_cnt = roundup_pow_of_two(max(1U, req_cap->max_recv_wr)); + qp->rq.max_sg = roundup_pow_of_two(max(1U, req_cap->max_recv_sge)); + + /* Write back */ + req_cap->max_recv_wr = qp->rq.wqe_cnt; + req_cap->max_recv_sge = qp->rq.max_sg; + + qp->rq.wqe_size = roundup_pow_of_two(sizeof(struct pvrdma_rq_wqe_hdr) + + sizeof(struct pvrdma_sge) * + qp->rq.max_sg); + qp->npages_recv = (qp->rq.wqe_cnt * qp->rq.wqe_size + PAGE_SIZE - 1) / + PAGE_SIZE; + + return 0; +} + +static int pvrdma_set_sq_size(struct pvrdma_dev *dev, struct ib_qp_cap *req_cap, + enum ib_qp_type type, struct pvrdma_qp *qp) +{ + if (req_cap->max_send_wr > dev->dsr->caps.max_qp_wr || + req_cap->max_send_sge > dev->dsr->caps.max_sge) { + dev_warn(&dev->pdev->dev, "send queue size invalid\n"); + return -EINVAL; + } + + qp->sq.wqe_cnt = roundup_pow_of_two(max(1U, req_cap->max_send_wr)); + qp->sq.max_sg = roundup_pow_of_two(max(1U, req_cap->max_send_sge)); + + /* Write back */ + req_cap->max_send_wr = qp->sq.wqe_cnt; + req_cap->max_send_sge = qp->sq.max_sg; + + qp->sq.wqe_size = roundup_pow_of_two(sizeof(struct pvrdma_sq_wqe_hdr) + + sizeof(struct pvrdma_sge) * + qp->sq.max_sg); + /* Note: one extra page for the header. */ + qp->npages_send = 1 + (qp->sq.wqe_cnt * qp->sq.wqe_size + + PAGE_SIZE - 1) / PAGE_SIZE; + + return 0; +} + +/** + * pvrdma_create_qp - create queue pair + * @pd: protection domain + * @init_attr: queue pair attributes + * @udata: user data + * + * @return: the ib_qp pointer on success, otherwise returns an errno. + */ +struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) +{ + struct pvrdma_qp *qp = NULL; + struct pvrdma_dev *dev = to_vdev(pd->device); + union pvrdma_cmd_req req; + union pvrdma_cmd_resp rsp; + struct pvrdma_cmd_create_qp *cmd = &req.create_qp; + struct pvrdma_cmd_create_qp_resp *resp = &rsp.create_qp_resp; + struct pvrdma_create_qp ucmd; + unsigned long flags; + int ret; + + if (init_attr->create_flags) { + dev_warn(&dev->pdev->dev, + "invalid create queuepair flags %#x\n", + init_attr->create_flags); + return ERR_PTR(-EINVAL); + } + + if (init_attr->qp_type != IB_QPT_RC && + init_attr->qp_type != IB_QPT_UD && + init_attr->qp_type != IB_QPT_GSI) { + dev_warn(&dev->pdev->dev, "queuepair type %d not supported\n", + init_attr->qp_type); + return ERR_PTR(-EINVAL); + } + + if (!atomic_add_unless(&dev->num_qps, 1, dev->dsr->caps.max_qp)) + return ERR_PTR(-ENOMEM); + + switch (init_attr->qp_type) { + case IB_QPT_GSI: + if (init_attr->port_num == 0 || + init_attr->port_num > pd->device->phys_port_cnt || + udata) { + dev_warn(&dev->pdev->dev, "invalid queuepair attrs\n"); + ret = -EINVAL; + goto err_qp; + } + /* fall through */ + case IB_QPT_RC: + case IB_QPT_UD: + qp = kzalloc(sizeof(*qp), GFP_KERNEL); + if (!qp) { + ret = -ENOMEM; + goto err_qp; + } + + spin_lock_init(&qp->sq.lock); + spin_lock_init(&qp->rq.lock); + mutex_init(&qp->mutex); + atomic_set(&qp->refcnt, 1); + init_waitqueue_head(&qp->wait); + + qp->state = IB_QPS_RESET; + + if (pd->uobject && udata) { + dev_dbg(&dev->pdev->dev, + "create queuepair from user space\n"); + + if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { + ret = -EFAULT; + goto err_qp; + } + + /* set qp->sq.wqe_cnt, shift, buf_size.. */ + qp->rumem = ib_umem_get(pd->uobject->context, + ucmd.rbuf_addr, + ucmd.rbuf_size, 0, 0); + if (IS_ERR(qp->rumem)) { + ret = PTR_ERR(qp->rumem); + goto err_qp; + } + + qp->sumem = ib_umem_get(pd->uobject->context, + ucmd.sbuf_addr, + ucmd.sbuf_size, 0, 0); + if (IS_ERR(qp->sumem)) { + ib_umem_release(qp->rumem); + ret = PTR_ERR(qp->sumem); + goto err_qp; + } + + qp->npages_send = ib_umem_page_count(qp->sumem); + qp->npages_recv = ib_umem_page_count(qp->rumem); + qp->npages = qp->npages_send + qp->npages_recv; + } else { + qp->is_kernel = true; + + ret = pvrdma_set_sq_size(to_vdev(pd->device), + &init_attr->cap, + init_attr->qp_type, qp); + if (ret) + goto err_qp; + + ret = pvrdma_set_rq_size(to_vdev(pd->device), + &init_attr->cap, qp); + if (ret) + goto err_qp; + + qp->npages = qp->npages_send + qp->npages_recv; + + /* Skip header page. */ + qp->sq.offset = PAGE_SIZE; + + /* Recv queue pages are after send pages. */ + qp->rq.offset = qp->npages_send * PAGE_SIZE; + } + + if (qp->npages < 0 || qp->npages > PVRDMA_PAGE_DIR_MAX_PAGES) { + dev_warn(&dev->pdev->dev, + "overflow pages in queuepair\n"); + ret = -EINVAL; + goto err_umem; + } + + ret = pvrdma_page_dir_init(dev, &qp->pdir, qp->npages, + qp->is_kernel); + if (ret) { + dev_warn(&dev->pdev->dev, + "could not allocate page directory\n"); + goto err_umem; + } + + if (!qp->is_kernel) { + pvrdma_page_dir_insert_umem(&qp->pdir, qp->sumem, 0); + pvrdma_page_dir_insert_umem(&qp->pdir, qp->rumem, + qp->npages_send); + } else { + /* Ring state is always the first page. */ + qp->sq.ring = qp->pdir.pages[0]; + qp->rq.ring = &qp->sq.ring[1]; + } + break; + default: + ret = -EINVAL; + goto err_qp; + } + + /* Not supported */ + init_attr->cap.max_inline_data = 0; + + memset(cmd, 0, sizeof(*cmd)); + cmd->hdr.cmd = PVRDMA_CMD_CREATE_QP; + cmd->pd_handle = to_vpd(pd)->pd_handle; + cmd->send_cq_handle = to_vcq(init_attr->send_cq)->cq_handle; + cmd->recv_cq_handle = to_vcq(init_attr->recv_cq)->cq_handle; + cmd->max_send_wr = init_attr->cap.max_send_wr; + cmd->max_recv_wr = init_attr->cap.max_recv_wr; + cmd->max_send_sge = init_attr->cap.max_send_sge; + cmd->max_recv_sge = init_attr->cap.max_recv_sge; + cmd->max_inline_data = init_attr->cap.max_inline_data; + cmd->sq_sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? 1 : 0; + cmd->qp_type = ib_qp_type_to_pvrdma(init_attr->qp_type); + cmd->access_flags = IB_ACCESS_LOCAL_WRITE; + cmd->total_chunks = qp->npages; + cmd->send_chunks = qp->npages_send - 1; + cmd->pdir_dma = qp->pdir.dir_dma; + + dev_dbg(&dev->pdev->dev, "create queuepair with %d, %d, %d, %d\n", + cmd->max_send_wr, cmd->max_recv_wr, cmd->max_send_sge, + cmd->max_recv_sge); + + ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_QP_RESP); + if (ret < 0) { + dev_warn(&dev->pdev->dev, + "could not create queuepair, error: %d\n", ret); + goto err_pdir; + } + + /* max_send_wr/_recv_wr/_send_sge/_recv_sge/_inline_data */ + qp->qp_handle = resp->qpn; + qp->port = init_attr->port_num; + qp->ibqp.qp_num = resp->qpn; + spin_lock_irqsave(&dev->qp_tbl_lock, flags); + dev->qp_tbl[qp->qp_handle % dev->dsr->caps.max_qp] = qp; + spin_unlock_irqrestore(&dev->qp_tbl_lock, flags); + + return &qp->ibqp; + +err_pdir: + pvrdma_page_dir_cleanup(dev, &qp->pdir); +err_umem: + if (pd->uobject && udata) { + if (qp->rumem) + ib_umem_release(qp->rumem); + if (qp->sumem) + ib_umem_release(qp->sumem); + } +err_qp: + kfree(qp); + atomic_dec(&dev->num_qps); + + return ERR_PTR(ret); +} + +static void pvrdma_free_qp(struct pvrdma_qp *qp) +{ + struct pvrdma_dev *dev = to_vdev(qp->ibqp.device); + struct pvrdma_cq *scq; + struct pvrdma_cq *rcq; + unsigned long flags, scq_flags, rcq_flags; + + /* In case cq is polling */ + get_cqs(qp, &scq, &rcq); + pvrdma_lock_cqs(scq, rcq, &scq_flags, &rcq_flags); + + _pvrdma_flush_cqe(qp, scq); + if (scq != rcq) + _pvrdma_flush_cqe(qp, rcq); + + spin_lock_irqsave(&dev->qp_tbl_lock, flags); + dev->qp_tbl[qp->qp_handle] = NULL; + spin_unlock_irqrestore(&dev->qp_tbl_lock, flags); + + pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags); + + atomic_dec(&qp->refcnt); + wait_event(qp->wait, !atomic_read(&qp->refcnt)); + + pvrdma_page_dir_cleanup(dev, &qp->pdir); + + kfree(qp); + + atomic_dec(&dev->num_qps); +} + +/** + * pvrdma_destroy_qp - destroy a queue pair + * @qp: the queue pair to destroy + * + * @return: 0 on success. + */ +int pvrdma_destroy_qp(struct ib_qp *qp) +{ + struct pvrdma_qp *vqp = to_vqp(qp); + union pvrdma_cmd_req req; + struct pvrdma_cmd_destroy_qp *cmd = &req.destroy_qp; + int ret; + + memset(cmd, 0, sizeof(*cmd)); + cmd->hdr.cmd = PVRDMA_CMD_DESTROY_QP; + cmd->qp_handle = vqp->qp_handle; + + ret = pvrdma_cmd_post(to_vdev(qp->device), &req, NULL, 0); + if (ret < 0) + dev_warn(&to_vdev(qp->device)->pdev->dev, + "destroy queuepair failed, error: %d\n", ret); + + pvrdma_free_qp(vqp); + + return 0; +} + +/** + * pvrdma_modify_qp - modify queue pair attributes + * @ibqp: the queue pair + * @attr: the new queue pair's attributes + * @attr_mask: attributes mask + * @udata: user data + * + * @returns 0 on success, otherwise returns an errno. + */ +int pvrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) +{ + struct pvrdma_dev *dev = to_vdev(ibqp->device); + struct pvrdma_qp *qp = to_vqp(ibqp); + union pvrdma_cmd_req req; + union pvrdma_cmd_resp rsp; + struct pvrdma_cmd_modify_qp *cmd = &req.modify_qp; + int cur_state, next_state; + int ret; + + /* Sanity checking. Should need lock here */ + mutex_lock(&qp->mutex); + cur_state = (attr_mask & IB_QP_CUR_STATE) ? attr->cur_qp_state : + qp->state; + next_state = (attr_mask & IB_QP_STATE) ? attr->qp_state : cur_state; + + if (!ib_modify_qp_is_ok(cur_state, next_state, ibqp->qp_type, + attr_mask, IB_LINK_LAYER_ETHERNET)) { + ret = -EINVAL; + goto out; + } + + if (attr_mask & IB_QP_PORT) { + if (attr->port_num == 0 || + attr->port_num > ibqp->device->phys_port_cnt) { + ret = -EINVAL; + goto out; + } + } + + if (attr_mask & IB_QP_MIN_RNR_TIMER) { + if (attr->min_rnr_timer > 31) { + ret = -EINVAL; + goto out; + } + } + + if (attr_mask & IB_QP_PKEY_INDEX) { + if (attr->pkey_index >= dev->dsr->caps.max_pkeys) { + ret = -EINVAL; + goto out; + } + } + + if (attr_mask & IB_QP_QKEY) + qp->qkey = attr->qkey; + + if (cur_state == next_state && cur_state == IB_QPS_RESET) { + ret = 0; + goto out; + } + + qp->state = next_state; + memset(cmd, 0, sizeof(*cmd)); + cmd->hdr.cmd = PVRDMA_CMD_MODIFY_QP; + cmd->qp_handle = qp->qp_handle; + cmd->attr_mask = ib_qp_attr_mask_to_pvrdma(attr_mask); + cmd->attrs.qp_state = ib_qp_state_to_pvrdma(attr->qp_state); + cmd->attrs.cur_qp_state = + ib_qp_state_to_pvrdma(attr->cur_qp_state); + cmd->attrs.path_mtu = ib_mtu_to_pvrdma(attr->path_mtu); + cmd->attrs.path_mig_state = + ib_mig_state_to_pvrdma(attr->path_mig_state); + cmd->attrs.qkey = attr->qkey; + cmd->attrs.rq_psn = attr->rq_psn; + cmd->attrs.sq_psn = attr->sq_psn; + cmd->attrs.dest_qp_num = attr->dest_qp_num; + cmd->attrs.qp_access_flags = + ib_access_flags_to_pvrdma(attr->qp_access_flags); + cmd->attrs.pkey_index = attr->pkey_index; + cmd->attrs.alt_pkey_index = attr->alt_pkey_index; + cmd->attrs.en_sqd_async_notify = attr->en_sqd_async_notify; + cmd->attrs.sq_draining = attr->sq_draining; + cmd->attrs.max_rd_atomic = attr->max_rd_atomic; + cmd->attrs.max_dest_rd_atomic = attr->max_dest_rd_atomic; + cmd->attrs.min_rnr_timer = attr->min_rnr_timer; + cmd->attrs.port_num = attr->port_num; + cmd->attrs.timeout = attr->timeout; + cmd->attrs.retry_cnt = attr->retry_cnt; + cmd->attrs.rnr_retry = attr->rnr_retry; + cmd->attrs.alt_port_num = attr->alt_port_num; + cmd->attrs.alt_timeout = attr->alt_timeout; + ib_qp_cap_to_pvrdma(&cmd->attrs.cap, &attr->cap); + ib_ah_attr_to_pvrdma(&cmd->attrs.ah_attr, &attr->ah_attr); + ib_ah_attr_to_pvrdma(&cmd->attrs.alt_ah_attr, &attr->alt_ah_attr); + + ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_MODIFY_QP_RESP); + if (ret < 0) { + dev_warn(&dev->pdev->dev, + "could not modify queuepair, error: %d\n", ret); + } else if (rsp.hdr.err > 0) { + dev_warn(&dev->pdev->dev, + "cannot modify queuepair, error: %d\n", rsp.hdr.err); + ret = -EINVAL; + } + + if (ret == 0 && next_state == IB_QPS_RESET) + pvrdma_reset_qp(qp); + +out: + mutex_unlock(&qp->mutex); + + return ret; +} + +static inline void *get_sq_wqe(struct pvrdma_qp *qp, int n) +{ + return pvrdma_page_dir_get_ptr(&qp->pdir, + qp->sq.offset + n * qp->sq.wqe_size); +} + +static inline void *get_rq_wqe(struct pvrdma_qp *qp, int n) +{ + return pvrdma_page_dir_get_ptr(&qp->pdir, + qp->rq.offset + n * qp->rq.wqe_size); +} + +static int set_reg_seg(struct pvrdma_sq_wqe_hdr *wqe_hdr, struct ib_reg_wr *wr) +{ + struct pvrdma_user_mr *mr = to_vmr(wr->mr); + + wqe_hdr->wr.fast_reg.iova_start = mr->ibmr.iova; + wqe_hdr->wr.fast_reg.pl_pdir_dma = mr->pdir.dir_dma; + wqe_hdr->wr.fast_reg.page_shift = mr->page_shift; + wqe_hdr->wr.fast_reg.page_list_len = mr->npages; + wqe_hdr->wr.fast_reg.length = mr->ibmr.length; + wqe_hdr->wr.fast_reg.access_flags = wr->access; + wqe_hdr->wr.fast_reg.rkey = wr->key; + + return pvrdma_page_dir_insert_page_list(&mr->pdir, mr->pages, + mr->npages); +} + +/** + * pvrdma_post_send - post send work request entries on a QP + * @ibqp: the QP + * @wr: work request list to post + * @bad_wr: the first bad WR returned + * + * @return: 0 on success, otherwise errno returned. + */ +int pvrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + struct pvrdma_qp *qp = to_vqp(ibqp); + struct pvrdma_dev *dev = to_vdev(ibqp->device); + unsigned long flags; + struct pvrdma_sq_wqe_hdr *wqe_hdr; + struct pvrdma_sge *sge; + int i, index; + int nreq; + int ret; + + /* + * In states lower than RTS, we can fail immediately. In other states, + * just post and let the device figure it out. + */ + if (qp->state < IB_QPS_RTS) { + *bad_wr = wr; + return -EINVAL; + } + + spin_lock_irqsave(&qp->sq.lock, flags); + + index = pvrdma_idx(&qp->sq.ring->prod_tail, qp->sq.wqe_cnt); + for (nreq = 0; wr; nreq++, wr = wr->next) { + unsigned int tail; + + if (unlikely(!pvrdma_idx_ring_has_space( + qp->sq.ring, qp->sq.wqe_cnt, &tail))) { + dev_warn_ratelimited(&dev->pdev->dev, + "send queue is full\n"); + *bad_wr = wr; + ret = -ENOMEM; + goto out; + } + + if (unlikely(wr->num_sge > qp->sq.max_sg || wr->num_sge < 0)) { + dev_warn_ratelimited(&dev->pdev->dev, + "send SGE overflow\n"); + *bad_wr = wr; + ret = -EINVAL; + goto out; + } + + if (unlikely(wr->opcode < 0)) { + dev_warn_ratelimited(&dev->pdev->dev, + "invalid send opcode\n"); + *bad_wr = wr; + ret = -EINVAL; + goto out; + } + + /* + * Only support UD, RC. + * Need to check opcode table for thorough checking. + * opcode _UD _UC _RC + * _SEND x x x + * _SEND_WITH_IMM x x x + * _RDMA_WRITE x x + * _RDMA_WRITE_WITH_IMM x x + * _LOCAL_INV x x + * _SEND_WITH_INV x x + * _RDMA_READ x + * _ATOMIC_CMP_AND_SWP x + * _ATOMIC_FETCH_AND_ADD x + * _MASK_ATOMIC_CMP_AND_SWP x + * _MASK_ATOMIC_FETCH_AND_ADD x + * _REG_MR x + * + */ + if (qp->ibqp.qp_type != IB_QPT_UD && + qp->ibqp.qp_type != IB_QPT_RC && + wr->opcode != IB_WR_SEND) { + dev_warn_ratelimited(&dev->pdev->dev, + "unsupported queuepair type\n"); + *bad_wr = wr; + ret = -EINVAL; + goto out; + } else if (qp->ibqp.qp_type == IB_QPT_UD || + qp->ibqp.qp_type == IB_QPT_GSI) { + if (wr->opcode != IB_WR_SEND && + wr->opcode != IB_WR_SEND_WITH_IMM) { + dev_warn_ratelimited(&dev->pdev->dev, + "invalid send opcode\n"); + *bad_wr = wr; + ret = -EINVAL; + goto out; + } + } + + wqe_hdr = (struct pvrdma_sq_wqe_hdr *)get_sq_wqe(qp, index); + memset(wqe_hdr, 0, sizeof(*wqe_hdr)); + wqe_hdr->wr_id = wr->wr_id; + wqe_hdr->num_sge = wr->num_sge; + wqe_hdr->opcode = ib_wr_opcode_to_pvrdma(wr->opcode); + wqe_hdr->send_flags = ib_send_flags_to_pvrdma(wr->send_flags); + if (wr->opcode == IB_WR_SEND_WITH_IMM || + wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) + wqe_hdr->ex.imm_data = wr->ex.imm_data; + + switch (qp->ibqp.qp_type) { + case IB_QPT_GSI: + case IB_QPT_UD: + if (unlikely(!ud_wr(wr)->ah)) { + dev_warn_ratelimited(&dev->pdev->dev, + "invalid address handle\n"); + *bad_wr = wr; + ret = -EINVAL; + goto out; + } + + /* + * Use qkey from qp context if high order bit set, + * otherwise from work request. + */ + wqe_hdr->wr.ud.remote_qpn = ud_wr(wr)->remote_qpn; + wqe_hdr->wr.ud.remote_qkey = + ud_wr(wr)->remote_qkey & 0x80000000 ? + qp->qkey : ud_wr(wr)->remote_qkey; + wqe_hdr->wr.ud.av = to_vah(ud_wr(wr)->ah)->av; + + break; + case IB_QPT_RC: + switch (wr->opcode) { + case IB_WR_RDMA_READ: + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + wqe_hdr->wr.rdma.remote_addr = + rdma_wr(wr)->remote_addr; + wqe_hdr->wr.rdma.rkey = rdma_wr(wr)->rkey; + break; + case IB_WR_LOCAL_INV: + case IB_WR_SEND_WITH_INV: + wqe_hdr->ex.invalidate_rkey = + wr->ex.invalidate_rkey; + break; + case IB_WR_ATOMIC_CMP_AND_SWP: + case IB_WR_ATOMIC_FETCH_AND_ADD: + wqe_hdr->wr.atomic.remote_addr = + atomic_wr(wr)->remote_addr; + wqe_hdr->wr.atomic.rkey = atomic_wr(wr)->rkey; + wqe_hdr->wr.atomic.compare_add = + atomic_wr(wr)->compare_add; + if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) + wqe_hdr->wr.atomic.swap = + atomic_wr(wr)->swap; + break; + case IB_WR_REG_MR: + ret = set_reg_seg(wqe_hdr, reg_wr(wr)); + if (ret < 0) { + dev_warn_ratelimited(&dev->pdev->dev, + "Failed to set fast register work request\n"); + *bad_wr = wr; + goto out; + } + break; + default: + break; + } + + break; + default: + dev_warn_ratelimited(&dev->pdev->dev, + "invalid queuepair type\n"); + ret = -EINVAL; + *bad_wr = wr; + goto out; + } + + sge = (struct pvrdma_sge *)(wqe_hdr + 1); + for (i = 0; i < wr->num_sge; i++) { + /* Need to check wqe_size 0 or max size */ + sge->addr = wr->sg_list[i].addr; + sge->length = wr->sg_list[i].length; + sge->lkey = wr->sg_list[i].lkey; + sge++; + } + + /* Make sure wqe is written before index update */ + smp_wmb(); + + index++; + if (unlikely(index >= qp->sq.wqe_cnt)) + index = 0; + /* Update shared sq ring */ + pvrdma_idx_ring_inc(&qp->sq.ring->prod_tail, + qp->sq.wqe_cnt); + } + + ret = 0; + +out: + spin_unlock_irqrestore(&qp->sq.lock, flags); + + if (!ret) + pvrdma_write_uar_qp(dev, PVRDMA_UAR_QP_SEND | qp->qp_handle); + + return ret; +} + +/** + * pvrdma_post_receive - post receive work request entries on a QP + * @ibqp: the QP + * @wr: the work request list to post + * @bad_wr: the first bad WR returned + * + * @return: 0 on success, otherwise errno returned. + */ +int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + struct pvrdma_dev *dev = to_vdev(ibqp->device); + unsigned long flags; + struct pvrdma_qp *qp = to_vqp(ibqp); + struct pvrdma_rq_wqe_hdr *wqe_hdr; + struct pvrdma_sge *sge; + int index, nreq; + int ret = 0; + int i; + + /* + * In the RESET state, we can fail immediately. For other states, + * just post and let the device figure it out. + */ + if (qp->state == IB_QPS_RESET) { + *bad_wr = wr; + return -EINVAL; + } + + spin_lock_irqsave(&qp->rq.lock, flags); + + index = pvrdma_idx(&qp->rq.ring->prod_tail, qp->rq.wqe_cnt); + for (nreq = 0; wr; nreq++, wr = wr->next) { + unsigned int tail; + + if (unlikely(wr->num_sge > qp->rq.max_sg || + wr->num_sge < 0)) { + ret = -EINVAL; + *bad_wr = wr; + dev_warn_ratelimited(&dev->pdev->dev, + "recv SGE overflow\n"); + goto out; + } + + if (unlikely(!pvrdma_idx_ring_has_space( + qp->rq.ring, qp->rq.wqe_cnt, &tail))) { + ret = -ENOMEM; + *bad_wr = wr; + dev_warn_ratelimited(&dev->pdev->dev, + "recv queue full\n"); + goto out; + } + + wqe_hdr = (struct pvrdma_rq_wqe_hdr *)get_rq_wqe(qp, index); + wqe_hdr->wr_id = wr->wr_id; + wqe_hdr->num_sge = wr->num_sge; + wqe_hdr->total_len = 0; + + sge = (struct pvrdma_sge *)(wqe_hdr + 1); + for (i = 0; i < wr->num_sge; i++) { + sge->addr = wr->sg_list[i].addr; + sge->length = wr->sg_list[i].length; + sge->lkey = wr->sg_list[i].lkey; + sge++; + } + + /* Make sure wqe is written before index update */ + smp_wmb(); + + index++; + if (unlikely(index >= qp->rq.wqe_cnt)) + index = 0; + /* Update shared rq ring */ + pvrdma_idx_ring_inc(&qp->rq.ring->prod_tail, + qp->rq.wqe_cnt); + } + + spin_unlock_irqrestore(&qp->rq.lock, flags); + + pvrdma_write_uar_qp(dev, PVRDMA_UAR_QP_RECV | qp->qp_handle); + + return ret; + +out: + spin_unlock_irqrestore(&qp->rq.lock, flags); + + return ret; +} + +/** + * pvrdma_query_qp - query a queue pair's attributes + * @ibqp: the queue pair to query + * @attr: the queue pair's attributes + * @attr_mask: attributes mask + * @init_attr: initial queue pair attributes + * + * @returns 0 on success, otherwise returns an errno. + */ +int pvrdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_qp_init_attr *init_attr) +{ + struct pvrdma_dev *dev = to_vdev(ibqp->device); + struct pvrdma_qp *qp = to_vqp(ibqp); + union pvrdma_cmd_req req; + union pvrdma_cmd_resp rsp; + struct pvrdma_cmd_query_qp *cmd = &req.query_qp; + struct pvrdma_cmd_query_qp_resp *resp = &rsp.query_qp_resp; + int ret = 0; + + mutex_lock(&qp->mutex); + + if (qp->state == IB_QPS_RESET) { + attr->qp_state = IB_QPS_RESET; + goto out; + } + + memset(cmd, 0, sizeof(*cmd)); + cmd->hdr.cmd = PVRDMA_CMD_QUERY_QP; + cmd->qp_handle = qp->qp_handle; + cmd->attr_mask = ib_qp_attr_mask_to_pvrdma(attr_mask); + + ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_QUERY_QP_RESP); + if (ret < 0) { + dev_warn(&dev->pdev->dev, + "could not query queuepair, error: %d\n", ret); + goto out; + } + + attr->qp_state = pvrdma_qp_state_to_ib(resp->attrs.qp_state); + attr->cur_qp_state = + pvrdma_qp_state_to_ib(resp->attrs.cur_qp_state); + attr->path_mtu = pvrdma_mtu_to_ib(resp->attrs.path_mtu); + attr->path_mig_state = + pvrdma_mig_state_to_ib(resp->attrs.path_mig_state); + attr->qkey = resp->attrs.qkey; + attr->rq_psn = resp->attrs.rq_psn; + attr->sq_psn = resp->attrs.sq_psn; + attr->dest_qp_num = resp->attrs.dest_qp_num; + attr->qp_access_flags = + pvrdma_access_flags_to_ib(resp->attrs.qp_access_flags); + attr->pkey_index = resp->attrs.pkey_index; + attr->alt_pkey_index = resp->attrs.alt_pkey_index; + attr->en_sqd_async_notify = resp->attrs.en_sqd_async_notify; + attr->sq_draining = resp->attrs.sq_draining; + attr->max_rd_atomic = resp->attrs.max_rd_atomic; + attr->max_dest_rd_atomic = resp->attrs.max_dest_rd_atomic; + attr->min_rnr_timer = resp->attrs.min_rnr_timer; + attr->port_num = resp->attrs.port_num; + attr->timeout = resp->attrs.timeout; + attr->retry_cnt = resp->attrs.retry_cnt; + attr->rnr_retry = resp->attrs.rnr_retry; + attr->alt_port_num = resp->attrs.alt_port_num; + attr->alt_timeout = resp->attrs.alt_timeout; + pvrdma_qp_cap_to_ib(&attr->cap, &resp->attrs.cap); + pvrdma_ah_attr_to_ib(&attr->ah_attr, &resp->attrs.ah_attr); + pvrdma_ah_attr_to_ib(&attr->alt_ah_attr, &resp->attrs.alt_ah_attr); + + qp->state = attr->qp_state; + + ret = 0; + +out: + attr->cur_qp_state = attr->qp_state; + + init_attr->event_handler = qp->ibqp.event_handler; + init_attr->qp_context = qp->ibqp.qp_context; + init_attr->send_cq = qp->ibqp.send_cq; + init_attr->recv_cq = qp->ibqp.recv_cq; + init_attr->srq = qp->ibqp.srq; + init_attr->xrcd = NULL; + init_attr->cap = attr->cap; + init_attr->sq_sig_type = 0; + init_attr->qp_type = qp->ibqp.qp_type; + init_attr->create_flags = 0; + init_attr->port_num = qp->port; + + mutex_unlock(&qp->mutex); + return ret; +} diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_ring.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_ring.h new file mode 100644 index 000000000000..ed9022a91a1d --- /dev/null +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_ring.h @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2012-2016 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of EITHER the GNU General Public License + * version 2 as published by the Free Software Foundation or the BSD + * 2-Clause License. This program is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED + * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License version 2 for more details at + * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html. + * + * You should have received a copy of the GNU General Public License + * along with this program available in the file COPYING in the main + * directory of this source tree. + * + * The BSD 2-Clause License + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __PVRDMA_RING_H__ +#define __PVRDMA_RING_H__ + +#include <linux/types.h> + +#define PVRDMA_INVALID_IDX -1 /* Invalid index. */ + +struct pvrdma_ring { + atomic_t prod_tail; /* Producer tail. */ + atomic_t cons_head; /* Consumer head. */ +}; + +struct pvrdma_ring_state { + struct pvrdma_ring tx; /* Tx ring. */ + struct pvrdma_ring rx; /* Rx ring. */ +}; + +static inline int pvrdma_idx_valid(__u32 idx, __u32 max_elems) +{ + /* Generates fewer instructions than a less-than. */ + return (idx & ~((max_elems << 1) - 1)) == 0; +} + +static inline __s32 pvrdma_idx(atomic_t *var, __u32 max_elems) +{ + const unsigned int idx = atomic_read(var); + + if (pvrdma_idx_valid(idx, max_elems)) + return idx & (max_elems - 1); + return PVRDMA_INVALID_IDX; +} + +static inline void pvrdma_idx_ring_inc(atomic_t *var, __u32 max_elems) +{ + __u32 idx = atomic_read(var) + 1; /* Increment. */ + + idx &= (max_elems << 1) - 1; /* Modulo size, flip gen. */ + atomic_set(var, idx); +} + +static inline __s32 pvrdma_idx_ring_has_space(const struct pvrdma_ring *r, + __u32 max_elems, __u32 *out_tail) +{ + const __u32 tail = atomic_read(&r->prod_tail); + const __u32 head = atomic_read(&r->cons_head); + + if (pvrdma_idx_valid(tail, max_elems) && + pvrdma_idx_valid(head, max_elems)) { + *out_tail = tail & (max_elems - 1); + return tail != (head ^ max_elems); + } + return PVRDMA_INVALID_IDX; +} + +static inline __s32 pvrdma_idx_ring_has_data(const struct pvrdma_ring *r, + __u32 max_elems, __u32 *out_head) +{ + const __u32 tail = atomic_read(&r->prod_tail); + const __u32 head = atomic_read(&r->cons_head); + + if (pvrdma_idx_valid(tail, max_elems) && + pvrdma_idx_valid(head, max_elems)) { + *out_head = head & (max_elems - 1); + return tail != head; + } + return PVRDMA_INVALID_IDX; +} + +static inline bool pvrdma_idx_ring_is_valid_idx(const struct pvrdma_ring *r, + __u32 max_elems, __u32 *idx) +{ + const __u32 tail = atomic_read(&r->prod_tail); + const __u32 head = atomic_read(&r->cons_head); + + if (pvrdma_idx_valid(tail, max_elems) && + pvrdma_idx_valid(head, max_elems) && + pvrdma_idx_valid(*idx, max_elems)) { + if (tail > head && (*idx < tail && *idx >= head)) + return true; + else if (head > tail && (*idx >= head || *idx < tail)) + return true; + } + return false; +} + +#endif /* __PVRDMA_RING_H__ */ diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c new file mode 100644 index 000000000000..54891370d18a --- /dev/null +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c @@ -0,0 +1,579 @@ +/* + * Copyright (c) 2012-2016 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of EITHER the GNU General Public License + * version 2 as published by the Free Software Foundation or the BSD + * 2-Clause License. This program is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED + * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License version 2 for more details at + * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html. + * + * You should have received a copy of the GNU General Public License + * along with this program available in the file COPYING in the main + * directory of this source tree. + * + * The BSD 2-Clause License + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <asm/page.h> +#include <linux/inet.h> +#include <linux/io.h> +#include <rdma/ib_addr.h> +#include <rdma/ib_smi.h> +#include <rdma/ib_user_verbs.h> +#include <rdma/vmw_pvrdma-abi.h> + +#include "pvrdma.h" + +/** + * pvrdma_query_device - query device + * @ibdev: the device to query + * @props: the device properties + * @uhw: user data + * + * @return: 0 on success, otherwise negative errno + */ +int pvrdma_query_device(struct ib_device *ibdev, + struct ib_device_attr *props, + struct ib_udata *uhw) +{ + struct pvrdma_dev *dev = to_vdev(ibdev); + + if (uhw->inlen || uhw->outlen) + return -EINVAL; + + memset(props, 0, sizeof(*props)); + + props->fw_ver = dev->dsr->caps.fw_ver; + props->sys_image_guid = dev->dsr->caps.sys_image_guid; + props->max_mr_size = dev->dsr->caps.max_mr_size; + props->page_size_cap = dev->dsr->caps.page_size_cap; + props->vendor_id = dev->dsr->caps.vendor_id; + props->vendor_part_id = dev->pdev->device; + props->hw_ver = dev->dsr->caps.hw_ver; + props->max_qp = dev->dsr->caps.max_qp; + props->max_qp_wr = dev->dsr->caps.max_qp_wr; + props->device_cap_flags = dev->dsr->caps.device_cap_flags; + props->max_sge = dev->dsr->caps.max_sge; + props->max_cq = dev->dsr->caps.max_cq; + props->max_cqe = dev->dsr->caps.max_cqe; + props->max_mr = dev->dsr->caps.max_mr; + props->max_pd = dev->dsr->caps.max_pd; + props->max_qp_rd_atom = dev->dsr->caps.max_qp_rd_atom; + props->max_qp_init_rd_atom = dev->dsr->caps.max_qp_init_rd_atom; + props->atomic_cap = + dev->dsr->caps.atomic_ops & + (PVRDMA_ATOMIC_OP_COMP_SWAP | PVRDMA_ATOMIC_OP_FETCH_ADD) ? + IB_ATOMIC_HCA : IB_ATOMIC_NONE; + props->masked_atomic_cap = props->atomic_cap; + props->max_ah = dev->dsr->caps.max_ah; + props->max_pkeys = dev->dsr->caps.max_pkeys; + props->local_ca_ack_delay = dev->dsr->caps.local_ca_ack_delay; + if ((dev->dsr->caps.bmme_flags & PVRDMA_BMME_FLAG_LOCAL_INV) && + (dev->dsr->caps.bmme_flags & PVRDMA_BMME_FLAG_REMOTE_INV) && + (dev->dsr->caps.bmme_flags & PVRDMA_BMME_FLAG_FAST_REG_WR)) { + props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; + } + + return 0; +} + +/** + * pvrdma_query_port - query device port attributes + * @ibdev: the device to query + * @port: the port number + * @props: the device properties + * + * @return: 0 on success, otherwise negative errno + */ +int pvrdma_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props) +{ + struct pvrdma_dev *dev = to_vdev(ibdev); + union pvrdma_cmd_req req; + union pvrdma_cmd_resp rsp; + struct pvrdma_cmd_query_port *cmd = &req.query_port; + struct pvrdma_cmd_query_port_resp *resp = &rsp.query_port_resp; + int err; + + memset(cmd, 0, sizeof(*cmd)); + cmd->hdr.cmd = PVRDMA_CMD_QUERY_PORT; + cmd->port_num = port; + + err = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_QUERY_PORT_RESP); + if (err < 0) { + dev_warn(&dev->pdev->dev, + "could not query port, error: %d\n", err); + return err; + } + + memset(props, 0, sizeof(*props)); + + props->state = pvrdma_port_state_to_ib(resp->attrs.state); + props->max_mtu = pvrdma_mtu_to_ib(resp->attrs.max_mtu); + props->active_mtu = pvrdma_mtu_to_ib(resp->attrs.active_mtu); + props->gid_tbl_len = resp->attrs.gid_tbl_len; + props->port_cap_flags = + pvrdma_port_cap_flags_to_ib(resp->attrs.port_cap_flags); + props->max_msg_sz = resp->attrs.max_msg_sz; + props->bad_pkey_cntr = resp->attrs.bad_pkey_cntr; + props->qkey_viol_cntr = resp->attrs.qkey_viol_cntr; + props->pkey_tbl_len = resp->attrs.pkey_tbl_len; + props->lid = resp->attrs.lid; + props->sm_lid = resp->attrs.sm_lid; + props->lmc = resp->attrs.lmc; + props->max_vl_num = resp->attrs.max_vl_num; + props->sm_sl = resp->attrs.sm_sl; + props->subnet_timeout = resp->attrs.subnet_timeout; + props->init_type_reply = resp->attrs.init_type_reply; + props->active_width = pvrdma_port_width_to_ib(resp->attrs.active_width); + props->active_speed = pvrdma_port_speed_to_ib(resp->attrs.active_speed); + props->phys_state = resp->attrs.phys_state; + + return 0; +} + +/** + * pvrdma_query_gid - query device gid + * @ibdev: the device to query + * @port: the port number + * @index: the index + * @gid: the device gid value + * + * @return: 0 on success, otherwise negative errno + */ +int pvrdma_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *gid) +{ + struct pvrdma_dev *dev = to_vdev(ibdev); + + if (index >= dev->dsr->caps.gid_tbl_len) + return -EINVAL; + + memcpy(gid, &dev->sgid_tbl[index], sizeof(union ib_gid)); + + return 0; +} + +/** + * pvrdma_query_pkey - query device port's P_Key table + * @ibdev: the device to query + * @port: the port number + * @index: the index + * @pkey: the device P_Key value + * + * @return: 0 on success, otherwise negative errno + */ +int pvrdma_query_pkey(struct ib_device *ibdev, u8 port, u16 index, + u16 *pkey) +{ + int err = 0; + union pvrdma_cmd_req req; + union pvrdma_cmd_resp rsp; + struct pvrdma_cmd_query_pkey *cmd = &req.query_pkey; + + memset(cmd, 0, sizeof(*cmd)); + cmd->hdr.cmd = PVRDMA_CMD_QUERY_PKEY; + cmd->port_num = port; + cmd->index = index; + + err = pvrdma_cmd_post(to_vdev(ibdev), &req, &rsp, + PVRDMA_CMD_QUERY_PKEY_RESP); + if (err < 0) { + dev_warn(&to_vdev(ibdev)->pdev->dev, + "could not query pkey, error: %d\n", err); + return err; + } + + *pkey = rsp.query_pkey_resp.pkey; + + return 0; +} + +enum rdma_link_layer pvrdma_port_link_layer(struct ib_device *ibdev, + u8 port) +{ + return IB_LINK_LAYER_ETHERNET; +} + +int pvrdma_modify_device(struct ib_device *ibdev, int mask, + struct ib_device_modify *props) +{ + unsigned long flags; + + if (mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID | + IB_DEVICE_MODIFY_NODE_DESC)) { + dev_warn(&to_vdev(ibdev)->pdev->dev, + "unsupported device modify mask %#x\n", mask); + return -EOPNOTSUPP; + } + + if (mask & IB_DEVICE_MODIFY_NODE_DESC) { + spin_lock_irqsave(&to_vdev(ibdev)->desc_lock, flags); + memcpy(ibdev->node_desc, props->node_desc, 64); + spin_unlock_irqrestore(&to_vdev(ibdev)->desc_lock, flags); + } + + if (mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) { + mutex_lock(&to_vdev(ibdev)->port_mutex); + to_vdev(ibdev)->sys_image_guid = + cpu_to_be64(props->sys_image_guid); + mutex_unlock(&to_vdev(ibdev)->port_mutex); + } + + return 0; +} + +/** + * pvrdma_modify_port - modify device port attributes + * @ibdev: the device to modify + * @port: the port number + * @mask: attributes to modify + * @props: the device properties + * + * @return: 0 on success, otherwise negative errno + */ +int pvrdma_modify_port(struct ib_device *ibdev, u8 port, int mask, + struct ib_port_modify *props) +{ + struct ib_port_attr attr; + struct pvrdma_dev *vdev = to_vdev(ibdev); + int ret; + + if (mask & ~IB_PORT_SHUTDOWN) { + dev_warn(&vdev->pdev->dev, + "unsupported port modify mask %#x\n", mask); + return -EOPNOTSUPP; + } + + mutex_lock(&vdev->port_mutex); + ret = pvrdma_query_port(ibdev, port, &attr); + if (ret) + goto out; + + vdev->port_cap_mask |= props->set_port_cap_mask; + vdev->port_cap_mask &= ~props->clr_port_cap_mask; + + if (mask & IB_PORT_SHUTDOWN) + vdev->ib_active = false; + +out: + mutex_unlock(&vdev->port_mutex); + return ret; +} + +/** + * pvrdma_alloc_ucontext - allocate ucontext + * @ibdev: the IB device + * @udata: user data + * + * @return: the ib_ucontext pointer on success, otherwise errno. + */ +struct ib_ucontext *pvrdma_alloc_ucontext(struct ib_device *ibdev, + struct ib_udata *udata) +{ + struct pvrdma_dev *vdev = to_vdev(ibdev); + struct pvrdma_ucontext *context; + union pvrdma_cmd_req req; + union pvrdma_cmd_resp rsp; + struct pvrdma_cmd_create_uc *cmd = &req.create_uc; + struct pvrdma_cmd_create_uc_resp *resp = &rsp.create_uc_resp; + struct pvrdma_alloc_ucontext_resp uresp; + int ret; + void *ptr; + + if (!vdev->ib_active) + return ERR_PTR(-EAGAIN); + + context = kmalloc(sizeof(*context), GFP_KERNEL); + if (!context) + return ERR_PTR(-ENOMEM); + + context->dev = vdev; + ret = pvrdma_uar_alloc(vdev, &context->uar); + if (ret) { + kfree(context); + return ERR_PTR(-ENOMEM); + } + + /* get ctx_handle from host */ + memset(cmd, 0, sizeof(*cmd)); + cmd->pfn = context->uar.pfn; + cmd->hdr.cmd = PVRDMA_CMD_CREATE_UC; + ret = pvrdma_cmd_post(vdev, &req, &rsp, PVRDMA_CMD_CREATE_UC_RESP); + if (ret < 0) { + dev_warn(&vdev->pdev->dev, + "could not create ucontext, error: %d\n", ret); + ptr = ERR_PTR(ret); + goto err; + } + + context->ctx_handle = resp->ctx_handle; + + /* copy back to user */ + uresp.qp_tab_size = vdev->dsr->caps.max_qp; + ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + if (ret) { + pvrdma_uar_free(vdev, &context->uar); + context->ibucontext.device = ibdev; + pvrdma_dealloc_ucontext(&context->ibucontext); + return ERR_PTR(-EFAULT); + } + + return &context->ibucontext; + +err: + pvrdma_uar_free(vdev, &context->uar); + kfree(context); + return ptr; +} + +/** + * pvrdma_dealloc_ucontext - deallocate ucontext + * @ibcontext: the ucontext + * + * @return: 0 on success, otherwise errno. + */ +int pvrdma_dealloc_ucontext(struct ib_ucontext *ibcontext) +{ + struct pvrdma_ucontext *context = to_vucontext(ibcontext); + union pvrdma_cmd_req req; + struct pvrdma_cmd_destroy_uc *cmd = &req.destroy_uc; + int ret; + + memset(cmd, 0, sizeof(*cmd)); + cmd->hdr.cmd = PVRDMA_CMD_DESTROY_UC; + cmd->ctx_handle = context->ctx_handle; + + ret = pvrdma_cmd_post(context->dev, &req, NULL, 0); + if (ret < 0) + dev_warn(&context->dev->pdev->dev, + "destroy ucontext failed, error: %d\n", ret); + + /* Free the UAR even if the device command failed */ + pvrdma_uar_free(to_vdev(ibcontext->device), &context->uar); + kfree(context); + + return ret; +} + +/** + * pvrdma_mmap - create mmap region + * @ibcontext: the user context + * @vma: the VMA + * + * @return: 0 on success, otherwise errno. + */ +int pvrdma_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) +{ + struct pvrdma_ucontext *context = to_vucontext(ibcontext); + unsigned long start = vma->vm_start; + unsigned long size = vma->vm_end - vma->vm_start; + unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; + + dev_dbg(&context->dev->pdev->dev, "create mmap region\n"); + + if ((size != PAGE_SIZE) || (offset & ~PAGE_MASK)) { + dev_warn(&context->dev->pdev->dev, + "invalid params for mmap region\n"); + return -EINVAL; + } + + /* Map UAR to kernel space, VM_LOCKED? */ + vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + if (io_remap_pfn_range(vma, start, context->uar.pfn, size, + vma->vm_page_prot)) + return -EAGAIN; + + return 0; +} + +/** + * pvrdma_alloc_pd - allocate protection domain + * @ibdev: the IB device + * @context: user context + * @udata: user data + * + * @return: the ib_pd protection domain pointer on success, otherwise errno. + */ +struct ib_pd *pvrdma_alloc_pd(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct pvrdma_pd *pd; + struct pvrdma_dev *dev = to_vdev(ibdev); + union pvrdma_cmd_req req; + union pvrdma_cmd_resp rsp; + struct pvrdma_cmd_create_pd *cmd = &req.create_pd; + struct pvrdma_cmd_create_pd_resp *resp = &rsp.create_pd_resp; + int ret; + void *ptr; + + /* Check allowed max pds */ + if (!atomic_add_unless(&dev->num_pds, 1, dev->dsr->caps.max_pd)) + return ERR_PTR(-ENOMEM); + + pd = kmalloc(sizeof(*pd), GFP_KERNEL); + if (!pd) { + ptr = ERR_PTR(-ENOMEM); + goto err; + } + + memset(cmd, 0, sizeof(*cmd)); + cmd->hdr.cmd = PVRDMA_CMD_CREATE_PD; + cmd->ctx_handle = (context) ? to_vucontext(context)->ctx_handle : 0; + ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_PD_RESP); + if (ret < 0) { + dev_warn(&dev->pdev->dev, + "failed to allocate protection domain, error: %d\n", + ret); + ptr = ERR_PTR(ret); + goto freepd; + } + + pd->privileged = !context; + pd->pd_handle = resp->pd_handle; + pd->pdn = resp->pd_handle; + + if (context) { + if (ib_copy_to_udata(udata, &pd->pdn, sizeof(__u32))) { + dev_warn(&dev->pdev->dev, + "failed to copy back protection domain\n"); + pvrdma_dealloc_pd(&pd->ibpd); + return ERR_PTR(-EFAULT); + } + } + + /* u32 pd handle */ + return &pd->ibpd; + +freepd: + kfree(pd); +err: + atomic_dec(&dev->num_pds); + return ptr; +} + +/** + * pvrdma_dealloc_pd - deallocate protection domain + * @pd: the protection domain to be released + * + * @return: 0 on success, otherwise errno. + */ +int pvrdma_dealloc_pd(struct ib_pd *pd) +{ + struct pvrdma_dev *dev = to_vdev(pd->device); + union pvrdma_cmd_req req; + struct pvrdma_cmd_destroy_pd *cmd = &req.destroy_pd; + int ret; + + memset(cmd, 0, sizeof(*cmd)); + cmd->hdr.cmd = PVRDMA_CMD_DESTROY_PD; + cmd->pd_handle = to_vpd(pd)->pd_handle; + + ret = pvrdma_cmd_post(dev, &req, NULL, 0); + if (ret) + dev_warn(&dev->pdev->dev, + "could not dealloc protection domain, error: %d\n", + ret); + + kfree(to_vpd(pd)); + atomic_dec(&dev->num_pds); + + return 0; +} + +/** + * pvrdma_create_ah - create an address handle + * @pd: the protection domain + * @ah_attr: the attributes of the AH + * @udata: user data blob + * + * @return: the ib_ah pointer on success, otherwise errno. + */ +struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, + struct ib_udata *udata) +{ + struct pvrdma_dev *dev = to_vdev(pd->device); + struct pvrdma_ah *ah; + enum rdma_link_layer ll; + + if (!(ah_attr->ah_flags & IB_AH_GRH)) + return ERR_PTR(-EINVAL); + + ll = rdma_port_get_link_layer(pd->device, ah_attr->port_num); + + if (ll != IB_LINK_LAYER_ETHERNET || + rdma_is_multicast_addr((struct in6_addr *)ah_attr->grh.dgid.raw)) + return ERR_PTR(-EINVAL); + + if (!atomic_add_unless(&dev->num_ahs, 1, dev->dsr->caps.max_ah)) + return ERR_PTR(-ENOMEM); + + ah = kzalloc(sizeof(*ah), GFP_KERNEL); + if (!ah) { + atomic_dec(&dev->num_ahs); + return ERR_PTR(-ENOMEM); + } + + ah->av.port_pd = to_vpd(pd)->pd_handle | (ah_attr->port_num << 24); + ah->av.src_path_bits = ah_attr->src_path_bits; + ah->av.src_path_bits |= 0x80; + ah->av.gid_index = ah_attr->grh.sgid_index; + ah->av.hop_limit = ah_attr->grh.hop_limit; + ah->av.sl_tclass_flowlabel = (ah_attr->grh.traffic_class << 20) | + ah_attr->grh.flow_label; + memcpy(ah->av.dgid, ah_attr->grh.dgid.raw, 16); + memcpy(ah->av.dmac, ah_attr->dmac, 6); + + ah->ibah.device = pd->device; + ah->ibah.pd = pd; + ah->ibah.uobject = NULL; + + return &ah->ibah; +} + +/** + * pvrdma_destroy_ah - destroy an address handle + * @ah: the address handle to destroyed + * + * @return: 0 on success. + */ +int pvrdma_destroy_ah(struct ib_ah *ah) +{ + struct pvrdma_dev *dev = to_vdev(ah->device); + + kfree(to_vah(ah)); + atomic_dec(&dev->num_ahs); + + return 0; +} diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h new file mode 100644 index 000000000000..bfbe96b56255 --- /dev/null +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h @@ -0,0 +1,436 @@ +/* + * Copyright (c) 2012-2016 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of EITHER the GNU General Public License + * version 2 as published by the Free Software Foundation or the BSD + * 2-Clause License. This program is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED + * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License version 2 for more details at + * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html. + * + * You should have received a copy of the GNU General Public License + * along with this program available in the file COPYING in the main + * directory of this source tree. + * + * The BSD 2-Clause License + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __PVRDMA_VERBS_H__ +#define __PVRDMA_VERBS_H__ + +#include <linux/types.h> + +union pvrdma_gid { + u8 raw[16]; + struct { + __be64 subnet_prefix; + __be64 interface_id; + } global; +}; + +enum pvrdma_link_layer { + PVRDMA_LINK_LAYER_UNSPECIFIED, + PVRDMA_LINK_LAYER_INFINIBAND, + PVRDMA_LINK_LAYER_ETHERNET, +}; + +enum pvrdma_mtu { + PVRDMA_MTU_256 = 1, + PVRDMA_MTU_512 = 2, + PVRDMA_MTU_1024 = 3, + PVRDMA_MTU_2048 = 4, + PVRDMA_MTU_4096 = 5, +}; + +static inline int pvrdma_mtu_enum_to_int(enum pvrdma_mtu mtu) +{ + switch (mtu) { + case PVRDMA_MTU_256: return 256; + case PVRDMA_MTU_512: return 512; + case PVRDMA_MTU_1024: return 1024; + case PVRDMA_MTU_2048: return 2048; + case PVRDMA_MTU_4096: return 4096; + default: return -1; + } +} + +static inline enum pvrdma_mtu pvrdma_mtu_int_to_enum(int mtu) +{ + switch (mtu) { + case 256: return PVRDMA_MTU_256; + case 512: return PVRDMA_MTU_512; + case 1024: return PVRDMA_MTU_1024; + case 2048: return PVRDMA_MTU_2048; + case 4096: + default: return PVRDMA_MTU_4096; + } +} + +enum pvrdma_port_state { + PVRDMA_PORT_NOP = 0, + PVRDMA_PORT_DOWN = 1, + PVRDMA_PORT_INIT = 2, + PVRDMA_PORT_ARMED = 3, + PVRDMA_PORT_ACTIVE = 4, + PVRDMA_PORT_ACTIVE_DEFER = 5, +}; + +enum pvrdma_port_cap_flags { + PVRDMA_PORT_SM = 1 << 1, + PVRDMA_PORT_NOTICE_SUP = 1 << 2, + PVRDMA_PORT_TRAP_SUP = 1 << 3, + PVRDMA_PORT_OPT_IPD_SUP = 1 << 4, + PVRDMA_PORT_AUTO_MIGR_SUP = 1 << 5, + PVRDMA_PORT_SL_MAP_SUP = 1 << 6, + PVRDMA_PORT_MKEY_NVRAM = 1 << 7, + PVRDMA_PORT_PKEY_NVRAM = 1 << 8, + PVRDMA_PORT_LED_INFO_SUP = 1 << 9, + PVRDMA_PORT_SM_DISABLED = 1 << 10, + PVRDMA_PORT_SYS_IMAGE_GUID_SUP = 1 << 11, + PVRDMA_PORT_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12, + PVRDMA_PORT_EXTENDED_SPEEDS_SUP = 1 << 14, + PVRDMA_PORT_CM_SUP = 1 << 16, + PVRDMA_PORT_SNMP_TUNNEL_SUP = 1 << 17, + PVRDMA_PORT_REINIT_SUP = 1 << 18, + PVRDMA_PORT_DEVICE_MGMT_SUP = 1 << 19, + PVRDMA_PORT_VENDOR_CLASS_SUP = 1 << 20, + PVRDMA_PORT_DR_NOTICE_SUP = 1 << 21, + PVRDMA_PORT_CAP_MASK_NOTICE_SUP = 1 << 22, + PVRDMA_PORT_BOOT_MGMT_SUP = 1 << 23, + PVRDMA_PORT_LINK_LATENCY_SUP = 1 << 24, + PVRDMA_PORT_CLIENT_REG_SUP = 1 << 25, + PVRDMA_PORT_IP_BASED_GIDS = 1 << 26, + PVRDMA_PORT_CAP_FLAGS_MAX = PVRDMA_PORT_IP_BASED_GIDS, +}; + +enum pvrdma_port_width { + PVRDMA_WIDTH_1X = 1, + PVRDMA_WIDTH_4X = 2, + PVRDMA_WIDTH_8X = 4, + PVRDMA_WIDTH_12X = 8, +}; + +static inline int pvrdma_width_enum_to_int(enum pvrdma_port_width width) +{ + switch (width) { + case PVRDMA_WIDTH_1X: return 1; + case PVRDMA_WIDTH_4X: return 4; + case PVRDMA_WIDTH_8X: return 8; + case PVRDMA_WIDTH_12X: return 12; + default: return -1; + } +} + +enum pvrdma_port_speed { + PVRDMA_SPEED_SDR = 1, + PVRDMA_SPEED_DDR = 2, + PVRDMA_SPEED_QDR = 4, + PVRDMA_SPEED_FDR10 = 8, + PVRDMA_SPEED_FDR = 16, + PVRDMA_SPEED_EDR = 32, +}; + +struct pvrdma_port_attr { + enum pvrdma_port_state state; + enum pvrdma_mtu max_mtu; + enum pvrdma_mtu active_mtu; + u32 gid_tbl_len; + u32 port_cap_flags; + u32 max_msg_sz; + u32 bad_pkey_cntr; + u32 qkey_viol_cntr; + u16 pkey_tbl_len; + u16 lid; + u16 sm_lid; + u8 lmc; + u8 max_vl_num; + u8 sm_sl; + u8 subnet_timeout; + u8 init_type_reply; + u8 active_width; + u8 active_speed; + u8 phys_state; + u8 reserved[2]; +}; + +struct pvrdma_global_route { + union pvrdma_gid dgid; + u32 flow_label; + u8 sgid_index; + u8 hop_limit; + u8 traffic_class; + u8 reserved; +}; + +struct pvrdma_grh { + __be32 version_tclass_flow; + __be16 paylen; + u8 next_hdr; + u8 hop_limit; + union pvrdma_gid sgid; + union pvrdma_gid dgid; +}; + +enum pvrdma_ah_flags { + PVRDMA_AH_GRH = 1, +}; + +enum pvrdma_rate { + PVRDMA_RATE_PORT_CURRENT = 0, + PVRDMA_RATE_2_5_GBPS = 2, + PVRDMA_RATE_5_GBPS = 5, + PVRDMA_RATE_10_GBPS = 3, + PVRDMA_RATE_20_GBPS = 6, + PVRDMA_RATE_30_GBPS = 4, + PVRDMA_RATE_40_GBPS = 7, + PVRDMA_RATE_60_GBPS = 8, + PVRDMA_RATE_80_GBPS = 9, + PVRDMA_RATE_120_GBPS = 10, + PVRDMA_RATE_14_GBPS = 11, + PVRDMA_RATE_56_GBPS = 12, + PVRDMA_RATE_112_GBPS = 13, + PVRDMA_RATE_168_GBPS = 14, + PVRDMA_RATE_25_GBPS = 15, + PVRDMA_RATE_100_GBPS = 16, + PVRDMA_RATE_200_GBPS = 17, + PVRDMA_RATE_300_GBPS = 18, +}; + +struct pvrdma_ah_attr { + struct pvrdma_global_route grh; + u16 dlid; + u16 vlan_id; + u8 sl; + u8 src_path_bits; + u8 static_rate; + u8 ah_flags; + u8 port_num; + u8 dmac[6]; + u8 reserved; +}; + +enum pvrdma_cq_notify_flags { + PVRDMA_CQ_SOLICITED = 1 << 0, + PVRDMA_CQ_NEXT_COMP = 1 << 1, + PVRDMA_CQ_SOLICITED_MASK = PVRDMA_CQ_SOLICITED | + PVRDMA_CQ_NEXT_COMP, + PVRDMA_CQ_REPORT_MISSED_EVENTS = 1 << 2, +}; + +struct pvrdma_qp_cap { + u32 max_send_wr; + u32 max_recv_wr; + u32 max_send_sge; + u32 max_recv_sge; + u32 max_inline_data; + u32 reserved; +}; + +enum pvrdma_sig_type { + PVRDMA_SIGNAL_ALL_WR, + PVRDMA_SIGNAL_REQ_WR, +}; + +enum pvrdma_qp_type { + PVRDMA_QPT_SMI, + PVRDMA_QPT_GSI, + PVRDMA_QPT_RC, + PVRDMA_QPT_UC, + PVRDMA_QPT_UD, + PVRDMA_QPT_RAW_IPV6, + PVRDMA_QPT_RAW_ETHERTYPE, + PVRDMA_QPT_RAW_PACKET = 8, + PVRDMA_QPT_XRC_INI = 9, + PVRDMA_QPT_XRC_TGT, + PVRDMA_QPT_MAX, +}; + +enum pvrdma_qp_create_flags { + PVRDMA_QP_CREATE_IPOPVRDMA_UD_LSO = 1 << 0, + PVRDMA_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1 << 1, +}; + +enum pvrdma_qp_attr_mask { + PVRDMA_QP_STATE = 1 << 0, + PVRDMA_QP_CUR_STATE = 1 << 1, + PVRDMA_QP_EN_SQD_ASYNC_NOTIFY = 1 << 2, + PVRDMA_QP_ACCESS_FLAGS = 1 << 3, + PVRDMA_QP_PKEY_INDEX = 1 << 4, + PVRDMA_QP_PORT = 1 << 5, + PVRDMA_QP_QKEY = 1 << 6, + PVRDMA_QP_AV = 1 << 7, + PVRDMA_QP_PATH_MTU = 1 << 8, + PVRDMA_QP_TIMEOUT = 1 << 9, + PVRDMA_QP_RETRY_CNT = 1 << 10, + PVRDMA_QP_RNR_RETRY = 1 << 11, + PVRDMA_QP_RQ_PSN = 1 << 12, + PVRDMA_QP_MAX_QP_RD_ATOMIC = 1 << 13, + PVRDMA_QP_ALT_PATH = 1 << 14, + PVRDMA_QP_MIN_RNR_TIMER = 1 << 15, + PVRDMA_QP_SQ_PSN = 1 << 16, + PVRDMA_QP_MAX_DEST_RD_ATOMIC = 1 << 17, + PVRDMA_QP_PATH_MIG_STATE = 1 << 18, + PVRDMA_QP_CAP = 1 << 19, + PVRDMA_QP_DEST_QPN = 1 << 20, + PVRDMA_QP_ATTR_MASK_MAX = PVRDMA_QP_DEST_QPN, +}; + +enum pvrdma_qp_state { + PVRDMA_QPS_RESET, + PVRDMA_QPS_INIT, + PVRDMA_QPS_RTR, + PVRDMA_QPS_RTS, + PVRDMA_QPS_SQD, + PVRDMA_QPS_SQE, + PVRDMA_QPS_ERR, +}; + +enum pvrdma_mig_state { + PVRDMA_MIG_MIGRATED, + PVRDMA_MIG_REARM, + PVRDMA_MIG_ARMED, +}; + +enum pvrdma_mw_type { + PVRDMA_MW_TYPE_1 = 1, + PVRDMA_MW_TYPE_2 = 2, +}; + +struct pvrdma_qp_attr { + enum pvrdma_qp_state qp_state; + enum pvrdma_qp_state cur_qp_state; + enum pvrdma_mtu path_mtu; + enum pvrdma_mig_state path_mig_state; + u32 qkey; + u32 rq_psn; + u32 sq_psn; + u32 dest_qp_num; + u32 qp_access_flags; + u16 pkey_index; + u16 alt_pkey_index; + u8 en_sqd_async_notify; + u8 sq_draining; + u8 max_rd_atomic; + u8 max_dest_rd_atomic; + u8 min_rnr_timer; + u8 port_num; + u8 timeout; + u8 retry_cnt; + u8 rnr_retry; + u8 alt_port_num; + u8 alt_timeout; + u8 reserved[5]; + struct pvrdma_qp_cap cap; + struct pvrdma_ah_attr ah_attr; + struct pvrdma_ah_attr alt_ah_attr; +}; + +enum pvrdma_send_flags { + PVRDMA_SEND_FENCE = 1 << 0, + PVRDMA_SEND_SIGNALED = 1 << 1, + PVRDMA_SEND_SOLICITED = 1 << 2, + PVRDMA_SEND_INLINE = 1 << 3, + PVRDMA_SEND_IP_CSUM = 1 << 4, + PVRDMA_SEND_FLAGS_MAX = PVRDMA_SEND_IP_CSUM, +}; + +enum pvrdma_access_flags { + PVRDMA_ACCESS_LOCAL_WRITE = 1 << 0, + PVRDMA_ACCESS_REMOTE_WRITE = 1 << 1, + PVRDMA_ACCESS_REMOTE_READ = 1 << 2, + PVRDMA_ACCESS_REMOTE_ATOMIC = 1 << 3, + PVRDMA_ACCESS_MW_BIND = 1 << 4, + PVRDMA_ZERO_BASED = 1 << 5, + PVRDMA_ACCESS_ON_DEMAND = 1 << 6, + PVRDMA_ACCESS_FLAGS_MAX = PVRDMA_ACCESS_ON_DEMAND, +}; + +int pvrdma_query_device(struct ib_device *ibdev, + struct ib_device_attr *props, + struct ib_udata *udata); +int pvrdma_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props); +int pvrdma_query_gid(struct ib_device *ibdev, u8 port, + int index, union ib_gid *gid); +int pvrdma_query_pkey(struct ib_device *ibdev, u8 port, + u16 index, u16 *pkey); +enum rdma_link_layer pvrdma_port_link_layer(struct ib_device *ibdev, + u8 port); +int pvrdma_modify_device(struct ib_device *ibdev, int mask, + struct ib_device_modify *props); +int pvrdma_modify_port(struct ib_device *ibdev, u8 port, + int mask, struct ib_port_modify *props); +int pvrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); +struct ib_ucontext *pvrdma_alloc_ucontext(struct ib_device *ibdev, + struct ib_udata *udata); +int pvrdma_dealloc_ucontext(struct ib_ucontext *context); +struct ib_pd *pvrdma_alloc_pd(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_udata *udata); +int pvrdma_dealloc_pd(struct ib_pd *ibpd); +struct ib_mr *pvrdma_get_dma_mr(struct ib_pd *pd, int acc); +struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt_addr, int access_flags, + struct ib_udata *udata); +int pvrdma_dereg_mr(struct ib_mr *mr); +struct ib_mr *pvrdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg); +int pvrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, + int sg_nents, unsigned int *sg_offset); +int pvrdma_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); +int pvrdma_resize_cq(struct ib_cq *ibcq, int entries, + struct ib_udata *udata); +struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *context, + struct ib_udata *udata); +int pvrdma_resize_cq(struct ib_cq *ibcq, int entries, + struct ib_udata *udata); +int pvrdma_destroy_cq(struct ib_cq *cq); +int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); +int pvrdma_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); +struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, + struct ib_udata *udata); +int pvrdma_destroy_ah(struct ib_ah *ah); +struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata); +int pvrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata); +int pvrdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); +int pvrdma_destroy_qp(struct ib_qp *qp); +int pvrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr); +int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr); + +#endif /* __PVRDMA_VERBS_H__ */ diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index 6d9904a4a0ab..4d0b6992e847 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -119,18 +119,17 @@ void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited) if (cq->notify == IB_CQ_NEXT_COMP || (cq->notify == IB_CQ_SOLICITED && (solicited || entry->status != IB_WC_SUCCESS))) { - struct kthread_worker *worker; /* * This will cause send_complete() to be called in * another thread. */ - smp_read_barrier_depends(); /* see rvt_cq_exit */ - worker = cq->rdi->worker; - if (likely(worker)) { + spin_lock(&cq->rdi->n_cqs_lock); + if (likely(cq->rdi->worker)) { cq->notify = RVT_CQ_NONE; cq->triggered++; - kthread_queue_work(worker, &cq->comptask); + kthread_queue_work(cq->rdi->worker, &cq->comptask); } + spin_unlock(&cq->rdi->n_cqs_lock); } spin_unlock_irqrestore(&cq->lock, flags); @@ -240,15 +239,15 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev, } } - spin_lock(&rdi->n_cqs_lock); + spin_lock_irq(&rdi->n_cqs_lock); if (rdi->n_cqs_allocated == rdi->dparms.props.max_cq) { - spin_unlock(&rdi->n_cqs_lock); + spin_unlock_irq(&rdi->n_cqs_lock); ret = ERR_PTR(-ENOMEM); goto bail_ip; } rdi->n_cqs_allocated++; - spin_unlock(&rdi->n_cqs_lock); + spin_unlock_irq(&rdi->n_cqs_lock); if (cq->ip) { spin_lock_irq(&rdi->pending_lock); @@ -296,9 +295,9 @@ int rvt_destroy_cq(struct ib_cq *ibcq) struct rvt_dev_info *rdi = cq->rdi; kthread_flush_work(&cq->comptask); - spin_lock(&rdi->n_cqs_lock); + spin_lock_irq(&rdi->n_cqs_lock); rdi->n_cqs_allocated--; - spin_unlock(&rdi->n_cqs_lock); + spin_unlock_irq(&rdi->n_cqs_lock); if (cq->ip) kref_put(&cq->ip->ref, rvt_release_mmap_info); else @@ -504,33 +503,23 @@ int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) */ int rvt_driver_cq_init(struct rvt_dev_info *rdi) { - int ret = 0; int cpu; - struct task_struct *task; + struct kthread_worker *worker; if (rdi->worker) return 0; + spin_lock_init(&rdi->n_cqs_lock); - rdi->worker = kzalloc(sizeof(*rdi->worker), GFP_KERNEL); - if (!rdi->worker) - return -ENOMEM; - kthread_init_worker(rdi->worker); - task = kthread_create_on_node( - kthread_worker_fn, - rdi->worker, - rdi->dparms.node, - "%s", rdi->dparms.cq_name); - if (IS_ERR(task)) { - kfree(rdi->worker); - rdi->worker = NULL; - return PTR_ERR(task); - } - set_user_nice(task, MIN_NICE); cpu = cpumask_first(cpumask_of_node(rdi->dparms.node)); - kthread_bind(task, cpu); - wake_up_process(task); - return ret; + worker = kthread_create_worker_on_cpu(cpu, 0, + "%s", rdi->dparms.cq_name); + if (IS_ERR(worker)) + return PTR_ERR(worker); + + set_user_nice(worker->task, MIN_NICE); + rdi->worker = worker; + return 0; } /** @@ -541,13 +530,14 @@ void rvt_cq_exit(struct rvt_dev_info *rdi) { struct kthread_worker *worker; - worker = rdi->worker; - if (!worker) + /* block future queuing from send_complete() */ + spin_lock_irq(&rdi->n_cqs_lock); + if (!rdi->worker) { + spin_unlock_irq(&rdi->n_cqs_lock); return; - /* blocks future queuing from send_complete() */ + } rdi->worker = NULL; - smp_wmb(); /* See rdi_cq_enter */ - kthread_flush_worker(worker); - kthread_stop(worker->task); - kfree(worker); + spin_unlock_irq(&rdi->n_cqs_lock); + + kthread_destroy_worker(worker); } diff --git a/drivers/infiniband/sw/rdmavt/mcast.c b/drivers/infiniband/sw/rdmavt/mcast.c index 983d319ac976..05c8c2afb0e3 100644 --- a/drivers/infiniband/sw/rdmavt/mcast.c +++ b/drivers/infiniband/sw/rdmavt/mcast.c @@ -81,7 +81,7 @@ static struct rvt_mcast_qp *rvt_mcast_qp_alloc(struct rvt_qp *qp) goto bail; mqp->qp = qp; - atomic_inc(&qp->refcount); + rvt_get_qp(qp); bail: return mqp; @@ -92,8 +92,7 @@ static void rvt_mcast_qp_free(struct rvt_mcast_qp *mqp) struct rvt_qp *qp = mqp->qp; /* Notify hfi1_destroy_qp() if it is waiting. */ - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); kfree(mqp); } diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 46b64970058e..52fd15276ee6 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -51,6 +51,7 @@ #include <rdma/rdma_vt.h> #include "vt.h" #include "mr.h" +#include "trace.h" /** * rvt_driver_mr_init - Init MR resources per driver @@ -84,6 +85,7 @@ int rvt_driver_mr_init(struct rvt_dev_info *rdi) lkey_table_size = rdi->dparms.lkey_table_size; } rdi->lkey_table.max = 1 << lkey_table_size; + rdi->lkey_table.shift = 32 - lkey_table_size; lk_tab_size = rdi->lkey_table.max * sizeof(*rdi->lkey_table.table); rdi->lkey_table.table = (struct rvt_mregion __rcu **) vmalloc_node(lk_tab_size, rdi->dparms.node); @@ -402,6 +404,7 @@ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } mr->mr.map[m]->segs[n].vaddr = vaddr; mr->mr.map[m]->segs[n].length = umem->page_size; + trace_rvt_mr_user_seg(&mr->mr, m, n, vaddr, umem->page_size); n++; if (n == RVT_SEGSZ) { m++; @@ -506,6 +509,7 @@ static int rvt_set_page(struct ib_mr *ibmr, u64 addr) n = mapped_segs % RVT_SEGSZ; mr->mr.map[m]->segs[n].vaddr = (void *)addr; mr->mr.map[m]->segs[n].length = ps; + trace_rvt_mr_page_seg(&mr->mr, m, n, (void *)addr, ps); mr->mr.length += ps; return 0; @@ -692,6 +696,7 @@ int rvt_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, for (i = 0; i < list_len; i++) { fmr->mr.map[m]->segs[n].vaddr = (void *)page_list[i]; fmr->mr.map[m]->segs[n].length = ps; + trace_rvt_mr_fmr_seg(&fmr->mr, m, n, (void *)page_list[i], ps); if (++n == RVT_SEGSZ) { m++; n = 0; @@ -774,7 +779,6 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, struct rvt_mregion *mr; unsigned n, m; size_t off; - struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device); /* * We use LKEY == zero for kernel virtual addresses @@ -782,12 +786,14 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, */ rcu_read_lock(); if (sge->lkey == 0) { + struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device); + if (pd->user) goto bail; mr = rcu_dereference(dev->dma_mr); if (!mr) goto bail; - atomic_inc(&mr->refcount); + rvt_get_mr(mr); rcu_read_unlock(); isge->mr = mr; @@ -798,8 +804,7 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, isge->n = 0; goto ok; } - mr = rcu_dereference( - rkt->table[(sge->lkey >> (32 - dev->dparms.lkey_table_size))]); + mr = rcu_dereference(rkt->table[sge->lkey >> rkt->shift]); if (unlikely(!mr || atomic_read(&mr->lkey_invalid) || mr->lkey != sge->lkey || mr->pd != &pd->ibpd)) goto bail; @@ -809,7 +814,7 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, off + sge->length > mr->length || (mr->access_flags & acc) != acc)) goto bail; - atomic_inc(&mr->refcount); + rvt_get_mr(mr); rcu_read_unlock(); off += mr->offset; @@ -887,7 +892,7 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, mr = rcu_dereference(rdi->dma_mr); if (!mr) goto bail; - atomic_inc(&mr->refcount); + rvt_get_mr(mr); rcu_read_unlock(); sge->mr = mr; @@ -899,8 +904,7 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, goto ok; } - mr = rcu_dereference( - rkt->table[(rkey >> (32 - dev->dparms.lkey_table_size))]); + mr = rcu_dereference(rkt->table[rkey >> rkt->shift]); if (unlikely(!mr || atomic_read(&mr->lkey_invalid) || mr->lkey != rkey || qp->ibqp.pd != mr->pd)) goto bail; @@ -909,7 +913,7 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, if (unlikely(vaddr < mr->iova || off + len > mr->length || (mr->access_flags & acc) == 0)) goto bail; - atomic_inc(&mr->refcount); + rvt_get_mr(mr); rcu_read_unlock(); off += mr->offset; diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 6500c3b5a89c..2a13ac660f2b 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -76,6 +76,23 @@ const int ib_rvt_state_ops[IB_QPS_ERR + 1] = { }; EXPORT_SYMBOL(ib_rvt_state_ops); +/* + * Translate ib_wr_opcode into ib_wc_opcode. + */ +const enum ib_wc_opcode ib_rvt_wc_opcode[] = { + [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE, + [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE, + [IB_WR_SEND] = IB_WC_SEND, + [IB_WR_SEND_WITH_IMM] = IB_WC_SEND, + [IB_WR_RDMA_READ] = IB_WC_RDMA_READ, + [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP, + [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD, + [IB_WR_SEND_WITH_INV] = IB_WC_SEND, + [IB_WR_LOCAL_INV] = IB_WC_LOCAL_INV, + [IB_WR_REG_MR] = IB_WC_REG_MR +}; +EXPORT_SYMBOL(ib_rvt_wc_opcode); + static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map, gfp_t gfp) @@ -884,7 +901,8 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, return ret; bail_ip: - kref_put(&qp->ip->ref, rvt_release_mmap_info); + if (qp->ip) + kref_put(&qp->ip->ref, rvt_release_mmap_info); bail_qpn: free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num); diff --git a/drivers/infiniband/sw/rdmavt/trace.h b/drivers/infiniband/sw/rdmavt/trace.h index 6c0457db5499..e2d23acb6a7d 100644 --- a/drivers/infiniband/sw/rdmavt/trace.h +++ b/drivers/infiniband/sw/rdmavt/trace.h @@ -45,143 +45,10 @@ * */ -#undef TRACE_SYSTEM_VAR -#define TRACE_SYSTEM_VAR rdmavt - -#if !defined(__RDMAVT_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) -#define __RDMAVT_TRACE_H - -#include <linux/tracepoint.h> -#include <linux/trace_seq.h> - -#include <rdma/ib_verbs.h> -#include <rdma/rdma_vt.h> - #define RDI_DEV_ENTRY(rdi) __string(dev, rdi->driver_f.get_card_name(rdi)) #define RDI_DEV_ASSIGN(rdi) __assign_str(dev, rdi->driver_f.get_card_name(rdi)) -#undef TRACE_SYSTEM -#define TRACE_SYSTEM rdmavt - -TRACE_EVENT(rvt_dbg, - TP_PROTO(struct rvt_dev_info *rdi, - const char *msg), - TP_ARGS(rdi, msg), - TP_STRUCT__entry( - RDI_DEV_ENTRY(rdi) - __string(msg, msg) - ), - TP_fast_assign( - RDI_DEV_ASSIGN(rdi); - __assign_str(msg, msg); - ), - TP_printk("[%s]: %s", __get_str(dev), __get_str(msg)) -); - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM rvt_qphash -DECLARE_EVENT_CLASS(rvt_qphash_template, - TP_PROTO(struct rvt_qp *qp, u32 bucket), - TP_ARGS(qp, bucket), - TP_STRUCT__entry( - RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device)) - __field(u32, qpn) - __field(u32, bucket) - ), - TP_fast_assign( - RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device)) - __entry->qpn = qp->ibqp.qp_num; - __entry->bucket = bucket; - ), - TP_printk( - "[%s] qpn 0x%x bucket %u", - __get_str(dev), - __entry->qpn, - __entry->bucket - ) -); - -DEFINE_EVENT(rvt_qphash_template, rvt_qpinsert, - TP_PROTO(struct rvt_qp *qp, u32 bucket), - TP_ARGS(qp, bucket)); - -DEFINE_EVENT(rvt_qphash_template, rvt_qpremove, - TP_PROTO(struct rvt_qp *qp, u32 bucket), - TP_ARGS(qp, bucket)); - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM rvt_tx - -#define wr_opcode_name(opcode) { IB_WR_##opcode, #opcode } -#define show_wr_opcode(opcode) \ -__print_symbolic(opcode, \ - wr_opcode_name(RDMA_WRITE), \ - wr_opcode_name(RDMA_WRITE_WITH_IMM), \ - wr_opcode_name(SEND), \ - wr_opcode_name(SEND_WITH_IMM), \ - wr_opcode_name(RDMA_READ), \ - wr_opcode_name(ATOMIC_CMP_AND_SWP), \ - wr_opcode_name(ATOMIC_FETCH_AND_ADD), \ - wr_opcode_name(LSO), \ - wr_opcode_name(SEND_WITH_INV), \ - wr_opcode_name(RDMA_READ_WITH_INV), \ - wr_opcode_name(LOCAL_INV), \ - wr_opcode_name(MASKED_ATOMIC_CMP_AND_SWP), \ - wr_opcode_name(MASKED_ATOMIC_FETCH_AND_ADD)) - -#define POS_PRN \ -"[%s] wr_id %llx qpn %x psn 0x%x lpsn 0x%x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u" - -TRACE_EVENT( - rvt_post_one_wr, - TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe), - TP_ARGS(qp, wqe), - TP_STRUCT__entry( - RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device)) - __field(u64, wr_id) - __field(u32, qpn) - __field(u32, psn) - __field(u32, lpsn) - __field(u32, length) - __field(u32, opcode) - __field(u32, size) - __field(u32, avail) - __field(u32, head) - __field(u32, last) - ), - TP_fast_assign( - RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device)) - __entry->wr_id = wqe->wr.wr_id; - __entry->qpn = qp->ibqp.qp_num; - __entry->psn = wqe->psn; - __entry->lpsn = wqe->lpsn; - __entry->length = wqe->length; - __entry->opcode = wqe->wr.opcode; - __entry->size = qp->s_size; - __entry->avail = qp->s_avail; - __entry->head = qp->s_head; - __entry->last = qp->s_last; - ), - TP_printk( - POS_PRN, - __get_str(dev), - __entry->wr_id, - __entry->qpn, - __entry->psn, - __entry->lpsn, - __entry->length, - __entry->opcode, show_wr_opcode(__entry->opcode), - __entry->size, - __entry->avail, - __entry->head, - __entry->last - ) -); - -#endif /* __RDMAVT_TRACE_H */ - -#undef TRACE_INCLUDE_PATH -#undef TRACE_INCLUDE_FILE -#define TRACE_INCLUDE_PATH . -#define TRACE_INCLUDE_FILE trace -#include <trace/define_trace.h> +#include "trace_rvt.h" +#include "trace_qp.h" +#include "trace_tx.h" +#include "trace_mr.h" diff --git a/drivers/infiniband/sw/rdmavt/trace_mr.h b/drivers/infiniband/sw/rdmavt/trace_mr.h new file mode 100644 index 000000000000..3318a6c36373 --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/trace_mr.h @@ -0,0 +1,112 @@ +/* + * Copyright(c) 2016 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#if !defined(__RVT_TRACE_MR_H) || defined(TRACE_HEADER_MULTI_READ) +#define __RVT_TRACE_MR_H + +#include <linux/tracepoint.h> +#include <linux/trace_seq.h> + +#include <rdma/ib_verbs.h> +#include <rdma/rdma_vt.h> +#include <rdma/rdmavt_mr.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rvt_mr +DECLARE_EVENT_CLASS( + rvt_mr_template, + TP_PROTO(struct rvt_mregion *mr, u16 m, u16 n, void *v, size_t len), + TP_ARGS(mr, m, n, v, len), + TP_STRUCT__entry( + RDI_DEV_ENTRY(ib_to_rvt(mr->pd->device)) + __field(void *, vaddr) + __field(struct page *, page) + __field(size_t, len) + __field(u32, lkey) + __field(u16, m) + __field(u16, n) + ), + TP_fast_assign( + RDI_DEV_ASSIGN(ib_to_rvt(mr->pd->device)); + __entry->vaddr = v; + __entry->page = virt_to_page(v); + __entry->m = m; + __entry->n = n; + __entry->len = len; + ), + TP_printk( + "[%s] vaddr %p page %p m %u n %u len %ld", + __get_str(dev), + __entry->vaddr, + __entry->page, + __entry->m, + __entry->n, + __entry->len + ) +); + +DEFINE_EVENT( + rvt_mr_template, rvt_mr_page_seg, + TP_PROTO(struct rvt_mregion *mr, u16 m, u16 n, void *v, size_t len), + TP_ARGS(mr, m, n, v, len)); + +DEFINE_EVENT( + rvt_mr_template, rvt_mr_fmr_seg, + TP_PROTO(struct rvt_mregion *mr, u16 m, u16 n, void *v, size_t len), + TP_ARGS(mr, m, n, v, len)); + +DEFINE_EVENT( + rvt_mr_template, rvt_mr_user_seg, + TP_PROTO(struct rvt_mregion *mr, u16 m, u16 n, void *v, size_t len), + TP_ARGS(mr, m, n, v, len)); + +#endif /* __RVT_TRACE_MR_H */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_mr +#include <trace/define_trace.h> diff --git a/drivers/infiniband/sw/rdmavt/trace_qp.h b/drivers/infiniband/sw/rdmavt/trace_qp.h new file mode 100644 index 000000000000..4c77a3119bda --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/trace_qp.h @@ -0,0 +1,96 @@ +/* + * Copyright(c) 2016 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#if !defined(__RVT_TRACE_QP_H) || defined(TRACE_HEADER_MULTI_READ) +#define __RVT_TRACE_QP_H + +#include <linux/tracepoint.h> +#include <linux/trace_seq.h> + +#include <rdma/ib_verbs.h> +#include <rdma/rdma_vt.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rvt_qp + +DECLARE_EVENT_CLASS(rvt_qphash_template, + TP_PROTO(struct rvt_qp *qp, u32 bucket), + TP_ARGS(qp, bucket), + TP_STRUCT__entry( + RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device)) + __field(u32, qpn) + __field(u32, bucket) + ), + TP_fast_assign( + RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device)) + __entry->qpn = qp->ibqp.qp_num; + __entry->bucket = bucket; + ), + TP_printk( + "[%s] qpn 0x%x bucket %u", + __get_str(dev), + __entry->qpn, + __entry->bucket + ) +); + +DEFINE_EVENT(rvt_qphash_template, rvt_qpinsert, + TP_PROTO(struct rvt_qp *qp, u32 bucket), + TP_ARGS(qp, bucket)); + +DEFINE_EVENT(rvt_qphash_template, rvt_qpremove, + TP_PROTO(struct rvt_qp *qp, u32 bucket), + TP_ARGS(qp, bucket)); + + +#endif /* __RVT_TRACE_QP_H */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_qp +#include <trace/define_trace.h> + diff --git a/drivers/infiniband/sw/rdmavt/trace_rvt.h b/drivers/infiniband/sw/rdmavt/trace_rvt.h new file mode 100644 index 000000000000..746f33461d9a --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/trace_rvt.h @@ -0,0 +1,81 @@ +/* + * Copyright(c) 2016 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#if !defined(__RVT_TRACE_RVT_H) || defined(TRACE_HEADER_MULTI_READ) +#define __RVT_TRACE_RVT_H + +#include <linux/tracepoint.h> +#include <linux/trace_seq.h> + +#include <rdma/ib_verbs.h> +#include <rdma/rdma_vt.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rvt + +TRACE_EVENT(rvt_dbg, + TP_PROTO(struct rvt_dev_info *rdi, + const char *msg), + TP_ARGS(rdi, msg), + TP_STRUCT__entry( + RDI_DEV_ENTRY(rdi) + __string(msg, msg) + ), + TP_fast_assign( + RDI_DEV_ASSIGN(rdi); + __assign_str(msg, msg); + ), + TP_printk("[%s]: %s", __get_str(dev), __get_str(msg)) +); + +#endif /* __RVT_TRACE_MISC_H */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_rvt +#include <trace/define_trace.h> + diff --git a/drivers/infiniband/sw/rdmavt/trace_tx.h b/drivers/infiniband/sw/rdmavt/trace_tx.h new file mode 100644 index 000000000000..0e03173662d8 --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/trace_tx.h @@ -0,0 +1,132 @@ +/* + * Copyright(c) 2016 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#if !defined(__RVT_TRACE_TX_H) || defined(TRACE_HEADER_MULTI_READ) +#define __RVT_TRACE_TX_H + +#include <linux/tracepoint.h> +#include <linux/trace_seq.h> + +#include <rdma/ib_verbs.h> +#include <rdma/rdma_vt.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rvt_tx + +#define wr_opcode_name(opcode) { IB_WR_##opcode, #opcode } +#define show_wr_opcode(opcode) \ +__print_symbolic(opcode, \ + wr_opcode_name(RDMA_WRITE), \ + wr_opcode_name(RDMA_WRITE_WITH_IMM), \ + wr_opcode_name(SEND), \ + wr_opcode_name(SEND_WITH_IMM), \ + wr_opcode_name(RDMA_READ), \ + wr_opcode_name(ATOMIC_CMP_AND_SWP), \ + wr_opcode_name(ATOMIC_FETCH_AND_ADD), \ + wr_opcode_name(LSO), \ + wr_opcode_name(SEND_WITH_INV), \ + wr_opcode_name(RDMA_READ_WITH_INV), \ + wr_opcode_name(LOCAL_INV), \ + wr_opcode_name(MASKED_ATOMIC_CMP_AND_SWP), \ + wr_opcode_name(MASKED_ATOMIC_FETCH_AND_ADD)) + +#define POS_PRN \ +"[%s] wr_id %llx qpn %x psn 0x%x lpsn 0x%x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u" + +TRACE_EVENT( + rvt_post_one_wr, + TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe), + TP_ARGS(qp, wqe), + TP_STRUCT__entry( + RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device)) + __field(u64, wr_id) + __field(u32, qpn) + __field(u32, psn) + __field(u32, lpsn) + __field(u32, length) + __field(u32, opcode) + __field(u32, size) + __field(u32, avail) + __field(u32, head) + __field(u32, last) + ), + TP_fast_assign( + RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device)) + __entry->wr_id = wqe->wr.wr_id; + __entry->qpn = qp->ibqp.qp_num; + __entry->psn = wqe->psn; + __entry->lpsn = wqe->lpsn; + __entry->length = wqe->length; + __entry->opcode = wqe->wr.opcode; + __entry->size = qp->s_size; + __entry->avail = qp->s_avail; + __entry->head = qp->s_head; + __entry->last = qp->s_last; + ), + TP_printk( + POS_PRN, + __get_str(dev), + __entry->wr_id, + __entry->qpn, + __entry->psn, + __entry->lpsn, + __entry->length, + __entry->opcode, show_wr_opcode(__entry->opcode), + __entry->size, + __entry->avail, + __entry->head, + __entry->last + ) +); + +#endif /* __RVT_TRACE_TX_H */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_tx +#include <trace/define_trace.h> + diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index 6c5e29db88e3..cd27cbde7652 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -420,11 +420,12 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe) (wqe->wr.send_flags & IB_SEND_SIGNALED) || (qp->req.state == QP_STATE_ERROR)) { make_send_cqe(qp, wqe, &cqe); + advance_consumer(qp->sq.queue); rxe_cq_post(qp->scq, &cqe, 0); + } else { + advance_consumer(qp->sq.queue); } - advance_consumer(qp->sq.queue); - /* * we completed something so let req run again * if it is trying to fence @@ -510,6 +511,8 @@ int rxe_completer(void *arg) struct rxe_pkt_info *pkt = NULL; enum comp_state state; + rxe_add_ref(qp); + if (!qp->valid) { while ((skb = skb_dequeue(&qp->resp_pkts))) { rxe_drop_ref(qp); @@ -739,11 +742,13 @@ exit: /* we come here if we are done with processing and want the task to * exit from the loop calling us */ + rxe_drop_ref(qp); return -EAGAIN; done: /* we come here if we have processed a packet we want the task to call * us again to see if there is anything else to do */ + rxe_drop_ref(qp); return 0; } diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index 73849a5a91b3..efe4c6a35442 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -266,8 +266,6 @@ static inline int rxe_xmit_packet(struct rxe_dev *rxe, struct rxe_qp *qp, return err; } - atomic_inc(&qp->skb_out); - if ((qp_type(qp) != IB_QPT_RC) && (pkt->mask & RXE_END_MASK)) { pkt->wqe->state = wqe_state_done; diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 1869152f1d23..d0faca294006 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -355,6 +355,9 @@ int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length, size_t offset; u32 crc = crcp ? (*crcp) : 0; + if (length == 0) + return 0; + if (mem->type == RXE_MEM_TYPE_DMA) { u8 *src, *dest; diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index ffff5a54cb34..16967cdb45df 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -46,7 +46,7 @@ #include "rxe_loc.h" static LIST_HEAD(rxe_dev_list); -static spinlock_t dev_list_lock; /* spinlock for device list */ +static DEFINE_SPINLOCK(dev_list_lock); /* spinlock for device list */ struct rxe_dev *net_to_rxe(struct net_device *ndev) { @@ -455,6 +455,8 @@ static int send(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, return -EAGAIN; } + if (pkt->qp) + atomic_inc(&pkt->qp->skb_out); kfree_skb(skb); return 0; @@ -659,8 +661,6 @@ struct notifier_block rxe_net_notifier = { int rxe_net_ipv4_init(void) { - spin_lock_init(&dev_list_lock); - recv_sockets.sk4 = rxe_setup_udp_tunnel(&init_net, htons(ROCE_V2_UDP_DPORT), false); if (IS_ERR(recv_sockets.sk4)) { @@ -676,8 +676,6 @@ int rxe_net_ipv6_init(void) { #if IS_ENABLED(CONFIG_IPV6) - spin_lock_init(&dev_list_lock); - recv_sockets.sk6 = rxe_setup_udp_tunnel(&init_net, htons(ROCE_V2_UDP_DPORT), true); if (IS_ERR(recv_sockets.sk6)) { diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h index f459c43a77c8..13ed2cc6eaa2 100644 --- a/drivers/infiniband/sw/rxe/rxe_param.h +++ b/drivers/infiniband/sw/rxe/rxe_param.h @@ -82,7 +82,7 @@ enum rxe_device_param { RXE_MAX_SGE = 32, RXE_MAX_SGE_RD = 32, RXE_MAX_CQ = 16384, - RXE_MAX_LOG_CQE = 13, + RXE_MAX_LOG_CQE = 15, RXE_MAX_MR = 2 * 1024, RXE_MAX_PD = 0x7ffc, RXE_MAX_QP_RD_ATOM = 128, diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index 6bac0717c540..d723947a8542 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -180,7 +180,6 @@ static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min) size = BITS_TO_LONGS(max - min + 1) * sizeof(long); pool->table = kmalloc(size, GFP_KERNEL); if (!pool->table) { - pr_warn("no memory for bit table\n"); err = -ENOMEM; goto out; } diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index 46f062842a9a..252b4d637d45 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -391,16 +391,15 @@ int rxe_rcv(struct sk_buff *skb) payload_size(pkt)); calc_icrc = cpu_to_be32(~calc_icrc); if (unlikely(calc_icrc != pack_icrc)) { - char saddr[sizeof(struct in6_addr)]; - if (skb->protocol == htons(ETH_P_IPV6)) - sprintf(saddr, "%pI6", &ipv6_hdr(skb)->saddr); + pr_warn_ratelimited("bad ICRC from %pI6c\n", + &ipv6_hdr(skb)->saddr); else if (skb->protocol == htons(ETH_P_IP)) - sprintf(saddr, "%pI4", &ip_hdr(skb)->saddr); + pr_warn_ratelimited("bad ICRC from %pI4\n", + &ip_hdr(skb)->saddr); else - sprintf(saddr, "unknown"); + pr_warn_ratelimited("bad ICRC from unknown\n"); - pr_warn_ratelimited("bad ICRC from %s\n", saddr); goto drop; } diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 22bd9630dcd9..73d4a97603a1 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -548,23 +548,23 @@ static void update_wqe_psn(struct rxe_qp *qp, static void save_state(struct rxe_send_wqe *wqe, struct rxe_qp *qp, struct rxe_send_wqe *rollback_wqe, - struct rxe_qp *rollback_qp) + u32 *rollback_psn) { rollback_wqe->state = wqe->state; rollback_wqe->first_psn = wqe->first_psn; rollback_wqe->last_psn = wqe->last_psn; - rollback_qp->req.psn = qp->req.psn; + *rollback_psn = qp->req.psn; } static void rollback_state(struct rxe_send_wqe *wqe, struct rxe_qp *qp, struct rxe_send_wqe *rollback_wqe, - struct rxe_qp *rollback_qp) + u32 rollback_psn) { wqe->state = rollback_wqe->state; wqe->first_psn = rollback_wqe->first_psn; wqe->last_psn = rollback_wqe->last_psn; - qp->req.psn = rollback_qp->req.psn; + qp->req.psn = rollback_psn; } static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe, @@ -593,8 +593,10 @@ int rxe_requester(void *arg) int mtu; int opcode; int ret; - struct rxe_qp rollback_qp; struct rxe_send_wqe rollback_wqe; + u32 rollback_psn; + + rxe_add_ref(qp); next_wqe: if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR)) @@ -697,6 +699,7 @@ next_wqe: wqe->state = wqe_state_done; wqe->status = IB_WC_SUCCESS; __rxe_do_task(&qp->comp.task); + rxe_drop_ref(qp); return 0; } payload = mtu; @@ -719,7 +722,7 @@ next_wqe: * rxe_xmit_packet(). * Otherwise, completer might initiate an unjustified retry flow. */ - save_state(wqe, qp, &rollback_wqe, &rollback_qp); + save_state(wqe, qp, &rollback_wqe, &rollback_psn); update_wqe_state(qp, wqe, &pkt); update_wqe_psn(qp, wqe, &pkt, payload); ret = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, &pkt, skb); @@ -727,7 +730,7 @@ next_wqe: qp->need_req_skb = 1; kfree_skb(skb); - rollback_state(wqe, qp, &rollback_wqe, &rollback_qp); + rollback_state(wqe, qp, &rollback_wqe, rollback_psn); if (ret == -EAGAIN) { rxe_run_task(&qp->req.task, 1); @@ -756,8 +759,7 @@ err: */ wqe->wr.send_flags |= IB_SEND_SIGNALED; __rxe_do_task(&qp->comp.task); - return -EAGAIN; - exit: + rxe_drop_ref(qp); return -EAGAIN; } diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index dd3d88adc003..7a36ec9dbc0c 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -444,6 +444,13 @@ static enum resp_states check_rkey(struct rxe_qp *qp, return RESPST_EXECUTE; } + /* A zero-byte op is not required to set an addr or rkey. */ + if ((pkt->mask & (RXE_READ_MASK | RXE_WRITE_OR_SEND)) && + (pkt->mask & RXE_RETH_MASK) && + reth_len(pkt) == 0) { + return RESPST_EXECUTE; + } + va = qp->resp.va; rkey = qp->resp.rkey; resid = qp->resp.resid; @@ -680,9 +687,14 @@ static enum resp_states read_reply(struct rxe_qp *qp, res->read.va_org = qp->resp.va; res->first_psn = req_pkt->psn; - res->last_psn = req_pkt->psn + - (reth_len(req_pkt) + mtu - 1) / - mtu - 1; + + if (reth_len(req_pkt)) { + res->last_psn = (req_pkt->psn + + (reth_len(req_pkt) + mtu - 1) / + mtu - 1) & BTH_PSN_MASK; + } else { + res->last_psn = res->first_psn; + } res->cur_psn = req_pkt->psn; res->read.resid = qp->resp.resid; @@ -742,7 +754,8 @@ static enum resp_states read_reply(struct rxe_qp *qp, } else { qp->resp.res = NULL; qp->resp.opcode = -1; - qp->resp.psn = res->cur_psn; + if (psn_compare(res->cur_psn, qp->resp.psn) >= 0) + qp->resp.psn = res->cur_psn; state = RESPST_CLEANUP; } @@ -1132,6 +1145,7 @@ static enum resp_states duplicate_request(struct rxe_qp *qp, pkt, skb_copy); if (rc) { pr_err("Failed resending result. This flow is not handled - skb ignored\n"); + rxe_drop_ref(qp); kfree_skb(skb_copy); rc = RESPST_CLEANUP; goto out; @@ -1198,6 +1212,8 @@ int rxe_responder(void *arg) struct rxe_pkt_info *pkt = NULL; int ret = 0; + rxe_add_ref(qp); + qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED; if (!qp->valid) { @@ -1386,5 +1402,6 @@ int rxe_responder(void *arg) exit: ret = -EAGAIN; done: + rxe_drop_ref(qp); return ret; } diff --git a/drivers/infiniband/sw/rxe/rxe_srq.c b/drivers/infiniband/sw/rxe/rxe_srq.c index 2a6e3cd2d4e8..efc832a2d7c6 100644 --- a/drivers/infiniband/sw/rxe/rxe_srq.c +++ b/drivers/infiniband/sw/rxe/rxe_srq.c @@ -169,7 +169,7 @@ int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq, } } - err = rxe_queue_resize(q, (unsigned int *)&attr->max_wr, + err = rxe_queue_resize(q, &attr->max_wr, rcv_wqe_size(srq->rq.max_sge), srq->rq.queue->ip ? srq->rq.queue->ip->context : diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c index 1e19bf828a6e..d2a14a1bdc7f 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.c +++ b/drivers/infiniband/sw/rxe/rxe_task.c @@ -121,6 +121,7 @@ int rxe_init_task(void *obj, struct rxe_task *task, task->arg = arg; task->func = func; snprintf(task->name, sizeof(task->name), "%s", name); + task->destroyed = false; tasklet_init(&task->tasklet, rxe_do_task, (unsigned long)task); @@ -132,11 +133,29 @@ int rxe_init_task(void *obj, struct rxe_task *task, void rxe_cleanup_task(struct rxe_task *task) { + unsigned long flags; + bool idle; + + /* + * Mark the task, then wait for it to finish. It might be + * running in a non-tasklet (direct call) context. + */ + task->destroyed = true; + + do { + spin_lock_irqsave(&task->state_lock, flags); + idle = (task->state == TASK_STATE_START); + spin_unlock_irqrestore(&task->state_lock, flags); + } while (!idle); + tasklet_kill(&task->tasklet); } void rxe_run_task(struct rxe_task *task, int sched) { + if (task->destroyed) + return; + if (sched) tasklet_schedule(&task->tasklet); else diff --git a/drivers/infiniband/sw/rxe/rxe_task.h b/drivers/infiniband/sw/rxe/rxe_task.h index d14aa6daed05..08ff42d451c6 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.h +++ b/drivers/infiniband/sw/rxe/rxe_task.h @@ -54,6 +54,7 @@ struct rxe_task { int (*func)(void *arg); int ret; char name[16]; + bool destroyed; }; /* diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 19841c863daf..beb7021ff18a 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -316,7 +316,9 @@ static int rxe_init_av(struct rxe_dev *rxe, struct ib_ah_attr *attr, return err; } -static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) +static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr, + struct ib_udata *udata) + { int err; struct rxe_dev *rxe = to_rdev(ibpd->device); @@ -564,7 +566,7 @@ static struct ib_qp *rxe_create_qp(struct ib_pd *ibpd, if (udata) { if (udata->inlen) { err = -EINVAL; - goto err1; + goto err2; } qp->is_user = 1; } @@ -573,12 +575,13 @@ static struct ib_qp *rxe_create_qp(struct ib_pd *ibpd, err = rxe_qp_from_init(rxe, qp, pd, init, udata, ibpd); if (err) - goto err2; + goto err3; return &qp->ibqp; -err2: +err3: rxe_drop_index(qp); +err2: rxe_drop_ref(qp); err1: return ERR_PTR(err); @@ -1007,11 +1010,19 @@ static int rxe_peek_cq(struct ib_cq *ibcq, int wc_cnt) static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) { struct rxe_cq *cq = to_rcq(ibcq); + unsigned long irq_flags; + int ret = 0; + spin_lock_irqsave(&cq->cq_lock, irq_flags); if (cq->notify != IB_CQ_NEXT_COMP) cq->notify = flags & IB_CQ_SOLICITED_MASK; - return 0; + if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !queue_empty(cq->queue)) + ret = 1; + + spin_unlock_irqrestore(&cq->cq_lock, irq_flags); + + return ret; } static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 339a1eecdfe3..096c4f6fbd65 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -357,11 +357,8 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i int i; rx->rx_ring = vzalloc(ipoib_recvq_size * sizeof *rx->rx_ring); - if (!rx->rx_ring) { - printk(KERN_WARNING "%s: failed to allocate CM non-SRQ ring (%d entries)\n", - priv->ca->name, ipoib_recvq_size); + if (!rx->rx_ring) return -ENOMEM; - } t = kmalloc(sizeof *t, GFP_KERNEL); if (!t) { @@ -1054,8 +1051,6 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_ tx_qp = ib_create_qp(priv->pd, &attr); if (PTR_ERR(tx_qp) == -EINVAL) { - ipoib_warn(priv, "can't use GFP_NOIO for QPs on device %s, using GFP_KERNEL\n", - priv->ca->name); attr.create_flags &= ~IB_QP_CREATE_USE_GFP_NOIO; tx_qp = ib_create_qp(priv->pd, &attr); } @@ -1134,7 +1129,6 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn, p->tx_ring = __vmalloc(ipoib_sendq_size * sizeof *p->tx_ring, GFP_NOIO, PAGE_KERNEL); if (!p->tx_ring) { - ipoib_warn(priv, "failed to allocate tx ring\n"); ret = -ENOMEM; goto err_tx; } @@ -1550,8 +1544,6 @@ static void ipoib_cm_create_srq(struct net_device *dev, int max_sge) priv->cm.srq_ring = vzalloc(ipoib_recvq_size * sizeof *priv->cm.srq_ring); if (!priv->cm.srq_ring) { - printk(KERN_WARNING "%s: failed to allocate CM SRQ ring (%d entries)\n", - priv->ca->name, ipoib_recvq_size); ib_destroy_srq(priv->cm.srq); priv->cm.srq = NULL; return; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 830fecb6934c..5038f9d2d753 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -416,11 +416,8 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) "(status=%d, wrid=%d vend_err %x)\n", wc->status, wr_id, wc->vendor_err); qp_work = kzalloc(sizeof(*qp_work), GFP_ATOMIC); - if (!qp_work) { - ipoib_warn(priv, "%s Failed alloc ipoib_qp_state_validate for qp: 0x%x\n", - __func__, priv->qp->qp_num); + if (!qp_work) return; - } INIT_WORK(&qp_work->work, ipoib_qp_state_validate_work); qp_work->priv = priv; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index c50794fb92db..3ce0765a05ab 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1619,11 +1619,8 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) /* Allocate RX/TX "rings" to hold queued skbs */ priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring, GFP_KERNEL); - if (!priv->rx_ring) { - printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n", - ca->name, ipoib_recvq_size); + if (!priv->rx_ring) goto out; - } priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring); if (!priv->tx_ring) { diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 1909dd252c94..fddff403d5d2 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -575,8 +575,11 @@ void ipoib_mcast_join_task(struct work_struct *work) if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) return; - if (ib_query_port(priv->ca, priv->port, &port_attr) || - port_attr.state != IB_PORT_ACTIVE) { + if (ib_query_port(priv->ca, priv->port, &port_attr)) { + ipoib_dbg(priv, "ib_query_port() failed\n"); + return; + } + if (port_attr.state != IB_PORT_ACTIVE) { ipoib_dbg(priv, "port state is not ACTIVE (state = %d) suspending join task\n", port_attr.state); return; diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index a4b791dfaa1d..8ae7a3beddb7 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -890,11 +890,14 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve case RDMA_CM_EVENT_ESTABLISHED: iser_connected_handler(cma_id, event->param.conn.private_data); break; + case RDMA_CM_EVENT_REJECTED: + iser_info("Connection rejected: %s\n", + rdma_reject_msg(cma_id, event->status)); + /* FALLTHROUGH */ case RDMA_CM_EVENT_ADDR_ERROR: case RDMA_CM_EVENT_ROUTE_ERROR: case RDMA_CM_EVENT_CONNECT_ERROR: case RDMA_CM_EVENT_UNREACHABLE: - case RDMA_CM_EVENT_REJECTED: iser_connect_error(cma_id); break; case RDMA_CM_EVENT_DISCONNECTED: diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 6dd43f63238e..314e95516068 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -184,7 +184,7 @@ isert_alloc_rx_descriptors(struct isert_conn *isert_conn) isert_conn->rx_descs = kzalloc(ISERT_QP_MAX_RECV_DTOS * sizeof(struct iser_rx_desc), GFP_KERNEL); if (!isert_conn->rx_descs) - goto fail; + return -ENOMEM; rx_desc = isert_conn->rx_descs; @@ -213,9 +213,7 @@ dma_map_fail: } kfree(isert_conn->rx_descs); isert_conn->rx_descs = NULL; -fail: isert_err("conn %p failed to allocate rx descriptors\n", isert_conn); - return -ENOMEM; } @@ -269,10 +267,8 @@ isert_alloc_comps(struct isert_device *device) device->comps = kcalloc(device->comps_used, sizeof(struct isert_comp), GFP_KERNEL); - if (!device->comps) { - isert_err("Unable to allocate completion contexts\n"); + if (!device->comps) return -ENOMEM; - } max_cqe = min(ISER_MAX_CQ_LEN, device->ib_device->attrs.max_cqe); @@ -432,10 +428,8 @@ isert_alloc_login_buf(struct isert_conn *isert_conn, isert_conn->login_req_buf = kzalloc(sizeof(*isert_conn->login_req_buf), GFP_KERNEL); - if (!isert_conn->login_req_buf) { - isert_err("Unable to allocate isert_conn->login_buf\n"); + if (!isert_conn->login_req_buf) return -ENOMEM; - } isert_conn->login_req_dma = ib_dma_map_single(ib_dev, isert_conn->login_req_buf, @@ -795,6 +789,8 @@ isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) */ return 1; case RDMA_CM_EVENT_REJECTED: /* FALLTHRU */ + isert_info("Connection rejected: %s\n", + rdma_reject_msg(cma_id, event->status)); case RDMA_CM_EVENT_UNREACHABLE: /* FALLTHRU */ case RDMA_CM_EVENT_CONNECT_ERROR: ret = isert_connect_error(cma_id); @@ -1276,11 +1272,8 @@ isert_handle_text_cmd(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd if (payload_length) { text_in = kzalloc(payload_length, GFP_KERNEL); - if (!text_in) { - isert_err("Unable to allocate text_in of payload_length: %u\n", - payload_length); + if (!text_in) return -ENOMEM; - } } cmd->text_in_ptr = text_in; @@ -1851,6 +1844,8 @@ isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd) isert_cmd->pdu_buf_dma = ib_dma_map_single(ib_dev, (void *)cmd->sense_buffer, pdu_len, DMA_TO_DEVICE); + if (ib_dma_mapping_error(ib_dev, isert_cmd->pdu_buf_dma)) + return -ENOMEM; isert_cmd->pdu_buf_len = pdu_len; tx_dsg->addr = isert_cmd->pdu_buf_dma; @@ -1978,6 +1973,8 @@ isert_put_reject(struct iscsi_cmd *cmd, struct iscsi_conn *conn) isert_cmd->pdu_buf_dma = ib_dma_map_single(ib_dev, (void *)cmd->buf_ptr, ISCSI_HDR_LEN, DMA_TO_DEVICE); + if (ib_dma_mapping_error(ib_dev, isert_cmd->pdu_buf_dma)) + return -ENOMEM; isert_cmd->pdu_buf_len = ISCSI_HDR_LEN; tx_dsg->addr = isert_cmd->pdu_buf_dma; tx_dsg->length = ISCSI_HDR_LEN; @@ -2018,6 +2015,8 @@ isert_put_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn) isert_cmd->pdu_buf_dma = ib_dma_map_single(ib_dev, txt_rsp_buf, txt_rsp_len, DMA_TO_DEVICE); + if (ib_dma_mapping_error(ib_dev, isert_cmd->pdu_buf_dma)) + return -ENOMEM; isert_cmd->pdu_buf_len = txt_rsp_len; tx_dsg->addr = isert_cmd->pdu_buf_dma; @@ -2307,10 +2306,9 @@ isert_setup_np(struct iscsi_np *np, int ret; isert_np = kzalloc(sizeof(struct isert_np), GFP_KERNEL); - if (!isert_np) { - isert_err("Unable to allocate struct isert_np\n"); + if (!isert_np) return -ENOMEM; - } + sema_init(&isert_np->sem, 0); mutex_init(&isert_np->mutex); INIT_LIST_HEAD(&isert_np->accepted); @@ -2651,7 +2649,6 @@ static int __init isert_init(void) WQ_UNBOUND | WQ_HIGHPRI, 0); if (!isert_comp_wq) { isert_err("Unable to allocate isert_comp_wq\n"); - ret = -ENOMEM; return -ENOMEM; } diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index d980fb458ad4..8ddc07123193 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -64,6 +64,11 @@ MODULE_LICENSE("Dual BSD/GPL"); MODULE_VERSION(DRV_VERSION); MODULE_INFO(release_date, DRV_RELDATE); +#if !defined(CONFIG_DYNAMIC_DEBUG) +#define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt) +#define DYNAMIC_DEBUG_BRANCH(descriptor) false +#endif + static unsigned int srp_sg_tablesize; static unsigned int cmd_sg_entries; static unsigned int indirect_sg_entries; @@ -384,6 +389,9 @@ static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device, max_page_list_len); if (IS_ERR(mr)) { ret = PTR_ERR(mr); + if (ret == -ENOMEM) + pr_info("%s: ib_alloc_mr() failed. Try to reduce max_cmd_per_lun, max_sect or ch_count\n", + dev_name(&device->dev)); goto destroy_pool; } d->mr = mr; @@ -1266,8 +1274,12 @@ static int srp_map_finish_fmr(struct srp_map_state *state, struct ib_pool_fmr *fmr; u64 io_addr = 0; - if (state->fmr.next >= state->fmr.end) + if (state->fmr.next >= state->fmr.end) { + shost_printk(KERN_ERR, ch->target->scsi_host, + PFX "Out of MRs (mr_per_cmd = %d)\n", + ch->target->mr_per_cmd); return -ENOMEM; + } WARN_ON_ONCE(!dev->use_fmr); @@ -1323,8 +1335,12 @@ static int srp_map_finish_fr(struct srp_map_state *state, u32 rkey; int n, err; - if (state->fr.next >= state->fr.end) + if (state->fr.next >= state->fr.end) { + shost_printk(KERN_ERR, ch->target->scsi_host, + PFX "Out of MRs (mr_per_cmd = %d)\n", + ch->target->mr_per_cmd); return -ENOMEM; + } WARN_ON_ONCE(!dev->use_fast_reg); @@ -1556,7 +1572,6 @@ static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req, return 0; } -#if defined(DYNAMIC_DATA_DEBUG) static void srp_check_mapping(struct srp_map_state *state, struct srp_rdma_ch *ch, struct srp_request *req, struct scatterlist *scat, int count) @@ -1580,7 +1595,6 @@ static void srp_check_mapping(struct srp_map_state *state, scsi_bufflen(req->scmnd), desc_len, mr_len, state->ndesc, state->nmdesc); } -#endif /** * srp_map_data() - map SCSI data buffer onto an SRP request @@ -1669,14 +1683,12 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, if (ret < 0) goto unmap; -#if defined(DYNAMIC_DEBUG) { DEFINE_DYNAMIC_DEBUG_METADATA(ddm, "Memory mapping consistency check"); - if (unlikely(ddm.flags & _DPRINTK_FLAGS_PRINT)) + if (DYNAMIC_DEBUG_BRANCH(ddm)) srp_check_mapping(&state, ch, req, scat, count); } -#endif /* We've mapped the request, now pull as much of the indirect * descriptor table as we can into the command buffer. If this @@ -3287,7 +3299,9 @@ static ssize_t srp_create_target(struct device *dev, */ scsi_host_get(target->scsi_host); - mutex_lock(&host->add_target_mutex); + ret = mutex_lock_interruptible(&host->add_target_mutex); + if (ret < 0) + goto put; ret = srp_parse_options(buf, target); if (ret) @@ -3443,6 +3457,7 @@ connected: out: mutex_unlock(&host->add_target_mutex); +put: scsi_host_put(target->scsi_host); if (ret < 0) scsi_host_put(target->scsi_host); @@ -3526,6 +3541,7 @@ free_host: static void srp_add_one(struct ib_device *device) { struct srp_device *srp_dev; + struct ib_device_attr *attr = &device->attrs; struct srp_host *host; int mr_page_shift, p; u64 max_pages_per_mr; @@ -3540,25 +3556,25 @@ static void srp_add_one(struct ib_device *device) * minimum of 4096 bytes. We're unlikely to build large sglists * out of smaller entries. */ - mr_page_shift = max(12, ffs(device->attrs.page_size_cap) - 1); + mr_page_shift = max(12, ffs(attr->page_size_cap) - 1); srp_dev->mr_page_size = 1 << mr_page_shift; srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1); - max_pages_per_mr = device->attrs.max_mr_size; + max_pages_per_mr = attr->max_mr_size; do_div(max_pages_per_mr, srp_dev->mr_page_size); pr_debug("%s: %llu / %u = %llu <> %u\n", __func__, - device->attrs.max_mr_size, srp_dev->mr_page_size, + attr->max_mr_size, srp_dev->mr_page_size, max_pages_per_mr, SRP_MAX_PAGES_PER_MR); srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR, max_pages_per_mr); srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr && device->map_phys_fmr && device->unmap_fmr); - srp_dev->has_fr = (device->attrs.device_cap_flags & + srp_dev->has_fr = (attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS); if (!never_register && !srp_dev->has_fmr && !srp_dev->has_fr) { dev_warn(&device->dev, "neither FMR nor FR is supported\n"); } else if (!never_register && - device->attrs.max_mr_size >= 2 * srp_dev->mr_page_size) { + attr->max_mr_size >= 2 * srp_dev->mr_page_size) { srp_dev->use_fast_reg = (srp_dev->has_fr && (!srp_dev->has_fmr || prefer_fr)); srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr; @@ -3571,13 +3587,13 @@ static void srp_add_one(struct ib_device *device) if (srp_dev->use_fast_reg) { srp_dev->max_pages_per_mr = min_t(u32, srp_dev->max_pages_per_mr, - device->attrs.max_fast_reg_page_list_len); + attr->max_fast_reg_page_list_len); } srp_dev->mr_max_size = srp_dev->mr_page_size * srp_dev->max_pages_per_mr; pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n", - device->name, mr_page_shift, device->attrs.max_mr_size, - device->attrs.max_fast_reg_page_list_len, + device->name, mr_page_shift, attr->max_mr_size, + attr->max_fast_reg_page_list_len, srp_dev->max_pages_per_mr, srp_dev->mr_max_size); INIT_LIST_HEAD(&srp_dev->dev_list); diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 0b1f69ed2e92..d21ba9d857c3 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1840,7 +1840,6 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, struct srpt_rdma_ch *ch, *tmp_ch; u32 it_iu_len; int i, ret = 0; - unsigned char *p; WARN_ON_ONCE(irqs_disabled()); @@ -1994,21 +1993,18 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, be64_to_cpu(*(__be64 *)(ch->i_port_id + 8))); pr_debug("registering session %s\n", ch->sess_name); - p = &ch->sess_name[0]; -try_again: ch->sess = target_alloc_session(&sport->port_tpg_1, 0, 0, - TARGET_PROT_NORMAL, p, ch, NULL); + TARGET_PROT_NORMAL, ch->sess_name, ch, + NULL); + /* Retry without leading "0x" */ + if (IS_ERR(ch->sess)) + ch->sess = target_alloc_session(&sport->port_tpg_1, 0, 0, + TARGET_PROT_NORMAL, + ch->sess_name + 2, ch, NULL); if (IS_ERR(ch->sess)) { - pr_info("Rejected login because no ACL has been" - " configured yet for initiator %s.\n", p); - /* - * XXX: Hack to retry of ch->i_port_id without leading '0x' - */ - if (p == &ch->sess_name[0]) { - p += 2; - goto try_again; - } + pr_info("Rejected login because no ACL has been configured yet for initiator %s.\n", + ch->sess_name); rej->reason = cpu_to_be32((PTR_ERR(ch->sess) == -ENOMEM) ? SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES : SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED); |