diff options
Diffstat (limited to 'drivers/infiniband')
181 files changed, 11641 insertions, 2638 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index fb3fb89640e5..670917387eda 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -73,6 +73,7 @@ source "drivers/infiniband/hw/mlx4/Kconfig" source "drivers/infiniband/hw/mlx5/Kconfig" source "drivers/infiniband/hw/nes/Kconfig" source "drivers/infiniband/hw/ocrdma/Kconfig" +source "drivers/infiniband/hw/vmw_pvrdma/Kconfig" source "drivers/infiniband/hw/usnic/Kconfig" source "drivers/infiniband/hw/hns/Kconfig" diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c index 4fa524dfb6cf..11dacd97a667 100644 --- a/drivers/infiniband/core/agent.c +++ b/drivers/infiniband/core/agent.c @@ -156,7 +156,6 @@ int ib_agent_port_open(struct ib_device *device, int port_num) /* Create new device info */ port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL); if (!port_priv) { - dev_err(&device->dev, "No memory for ib_agent_port_private\n"); ret = -ENOMEM; goto error1; } diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 1a2984c28b95..ae04826e82fc 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -770,12 +770,8 @@ static int _gid_table_setup_one(struct ib_device *ib_dev) int err = 0; table = kcalloc(ib_dev->phys_port_cnt, sizeof(*table), GFP_KERNEL); - - if (!table) { - pr_warn("failed to allocate ib gid cache for %s\n", - ib_dev->name); + if (!table) return -ENOMEM; - } for (port = 0; port < ib_dev->phys_port_cnt; port++) { u8 rdma_port = port + rdma_start_port(ib_dev); @@ -1170,14 +1166,13 @@ int ib_cache_setup_one(struct ib_device *device) GFP_KERNEL); if (!device->cache.pkey_cache || !device->cache.lmc_cache) { - pr_warn("Couldn't allocate cache for %s\n", device->name); - return -ENOMEM; + err = -ENOMEM; + goto free; } err = gid_table_setup_one(device); if (err) - /* Allocated memory will be cleaned in the release function */ - return err; + goto free; for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p) ib_cache_update(device, p + rdma_start_port(device)); @@ -1192,6 +1187,9 @@ int ib_cache_setup_one(struct ib_device *device) err: gid_table_cleanup_one(device); +free: + kfree(device->cache.pkey_cache); + kfree(device->cache.lmc_cache); return err; } diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 71c7c4c328ef..cf1edfa1cbac 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -57,6 +57,54 @@ MODULE_AUTHOR("Sean Hefty"); MODULE_DESCRIPTION("InfiniBand CM"); MODULE_LICENSE("Dual BSD/GPL"); +static const char * const ibcm_rej_reason_strs[] = { + [IB_CM_REJ_NO_QP] = "no QP", + [IB_CM_REJ_NO_EEC] = "no EEC", + [IB_CM_REJ_NO_RESOURCES] = "no resources", + [IB_CM_REJ_TIMEOUT] = "timeout", + [IB_CM_REJ_UNSUPPORTED] = "unsupported", + [IB_CM_REJ_INVALID_COMM_ID] = "invalid comm ID", + [IB_CM_REJ_INVALID_COMM_INSTANCE] = "invalid comm instance", + [IB_CM_REJ_INVALID_SERVICE_ID] = "invalid service ID", + [IB_CM_REJ_INVALID_TRANSPORT_TYPE] = "invalid transport type", + [IB_CM_REJ_STALE_CONN] = "stale conn", + [IB_CM_REJ_RDC_NOT_EXIST] = "RDC not exist", + [IB_CM_REJ_INVALID_GID] = "invalid GID", + [IB_CM_REJ_INVALID_LID] = "invalid LID", + [IB_CM_REJ_INVALID_SL] = "invalid SL", + [IB_CM_REJ_INVALID_TRAFFIC_CLASS] = "invalid traffic class", + [IB_CM_REJ_INVALID_HOP_LIMIT] = "invalid hop limit", + [IB_CM_REJ_INVALID_PACKET_RATE] = "invalid packet rate", + [IB_CM_REJ_INVALID_ALT_GID] = "invalid alt GID", + [IB_CM_REJ_INVALID_ALT_LID] = "invalid alt LID", + [IB_CM_REJ_INVALID_ALT_SL] = "invalid alt SL", + [IB_CM_REJ_INVALID_ALT_TRAFFIC_CLASS] = "invalid alt traffic class", + [IB_CM_REJ_INVALID_ALT_HOP_LIMIT] = "invalid alt hop limit", + [IB_CM_REJ_INVALID_ALT_PACKET_RATE] = "invalid alt packet rate", + [IB_CM_REJ_PORT_CM_REDIRECT] = "port CM redirect", + [IB_CM_REJ_PORT_REDIRECT] = "port redirect", + [IB_CM_REJ_INVALID_MTU] = "invalid MTU", + [IB_CM_REJ_INSUFFICIENT_RESP_RESOURCES] = "insufficient resp resources", + [IB_CM_REJ_CONSUMER_DEFINED] = "consumer defined", + [IB_CM_REJ_INVALID_RNR_RETRY] = "invalid RNR retry", + [IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID] = "duplicate local comm ID", + [IB_CM_REJ_INVALID_CLASS_VERSION] = "invalid class version", + [IB_CM_REJ_INVALID_FLOW_LABEL] = "invalid flow label", + [IB_CM_REJ_INVALID_ALT_FLOW_LABEL] = "invalid alt flow label", +}; + +const char *__attribute_const__ ibcm_reject_msg(int reason) +{ + size_t index = reason; + + if (index < ARRAY_SIZE(ibcm_rej_reason_strs) && + ibcm_rej_reason_strs[index]) + return ibcm_rej_reason_strs[index]; + else + return "unrecognized reason"; +} +EXPORT_SYMBOL(ibcm_reject_msg); + static void cm_add_one(struct ib_device *device); static void cm_remove_one(struct ib_device *device, void *client_data); @@ -1582,6 +1630,7 @@ static struct cm_id_private * cm_match_req(struct cm_work *work, struct cm_id_private *listen_cm_id_priv, *cur_cm_id_priv; struct cm_timewait_info *timewait_info; struct cm_req_msg *req_msg; + struct ib_cm_id *cm_id; req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; @@ -1603,10 +1652,18 @@ static struct cm_id_private * cm_match_req(struct cm_work *work, timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info); if (timewait_info) { cm_cleanup_timewait(cm_id_priv->timewait_info); + cur_cm_id_priv = cm_get_id(timewait_info->work.local_id, + timewait_info->work.remote_id); + spin_unlock_irq(&cm.lock); cm_issue_rej(work->port, work->mad_recv_wc, IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ, NULL, 0); + if (cur_cm_id_priv) { + cm_id = &cur_cm_id_priv->id; + ib_send_cm_dreq(cm_id, NULL, 0); + cm_deref_id(cur_cm_id_priv); + } return NULL; } @@ -1984,6 +2041,9 @@ static int cm_rep_handler(struct cm_work *work) struct cm_id_private *cm_id_priv; struct cm_rep_msg *rep_msg; int ret; + struct cm_id_private *cur_cm_id_priv; + struct ib_cm_id *cm_id; + struct cm_timewait_info *timewait_info; rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad; cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, 0); @@ -2018,16 +2078,26 @@ static int cm_rep_handler(struct cm_work *work) goto error; } /* Check for a stale connection. */ - if (cm_insert_remote_qpn(cm_id_priv->timewait_info)) { + timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info); + if (timewait_info) { rb_erase(&cm_id_priv->timewait_info->remote_id_node, &cm.remote_id_table); cm_id_priv->timewait_info->inserted_remote_id = 0; + cur_cm_id_priv = cm_get_id(timewait_info->work.local_id, + timewait_info->work.remote_id); + spin_unlock(&cm.lock); spin_unlock_irq(&cm_id_priv->lock); cm_issue_rej(work->port, work->mad_recv_wc, IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP, NULL, 0); ret = -EINVAL; + if (cur_cm_id_priv) { + cm_id = &cur_cm_id_priv->id; + ib_send_cm_dreq(cm_id, NULL, 0); + cm_deref_id(cur_cm_id_priv); + } + goto error; } spin_unlock(&cm.lock); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 22fcf284dd8b..e7dcfac877ca 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -101,6 +101,49 @@ const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event) } EXPORT_SYMBOL(rdma_event_msg); +const char *__attribute_const__ rdma_reject_msg(struct rdma_cm_id *id, + int reason) +{ + if (rdma_ib_or_roce(id->device, id->port_num)) + return ibcm_reject_msg(reason); + + if (rdma_protocol_iwarp(id->device, id->port_num)) + return iwcm_reject_msg(reason); + + WARN_ON_ONCE(1); + return "unrecognized transport"; +} +EXPORT_SYMBOL(rdma_reject_msg); + +bool rdma_is_consumer_reject(struct rdma_cm_id *id, int reason) +{ + if (rdma_ib_or_roce(id->device, id->port_num)) + return reason == IB_CM_REJ_CONSUMER_DEFINED; + + if (rdma_protocol_iwarp(id->device, id->port_num)) + return reason == -ECONNREFUSED; + + WARN_ON_ONCE(1); + return false; +} +EXPORT_SYMBOL(rdma_is_consumer_reject); + +const void *rdma_consumer_reject_data(struct rdma_cm_id *id, + struct rdma_cm_event *ev, u8 *data_len) +{ + const void *p; + + if (rdma_is_consumer_reject(id, ev->status)) { + *data_len = ev->param.conn.private_data_len; + p = ev->param.conn.private_data; + } else { + *data_len = 0; + p = NULL; + } + return p; +} +EXPORT_SYMBOL(rdma_consumer_reject_data); + static void cma_add_one(struct ib_device *device); static void cma_remove_one(struct ib_device *device, void *client_data); diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 0c0bea091de8..d29372624f3a 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -72,9 +72,6 @@ void ib_device_unregister_sysfs(struct ib_device *device); void ib_cache_setup(void); void ib_cache_cleanup(void); -int ib_resolve_eth_dmac(struct ib_qp *qp, - struct ib_qp_attr *qp_attr, int *qp_attr_mask); - typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port, struct net_device *idev, void *cookie); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 760ef603a468..571974cd3919 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -254,11 +254,8 @@ static int add_client_context(struct ib_device *device, struct ib_client *client unsigned long flags; context = kmalloc(sizeof *context, GFP_KERNEL); - if (!context) { - pr_warn("Couldn't allocate client context for %s/%s\n", - device->name, client->name); + if (!context) return -ENOMEM; - } context->client = client; context->data = NULL; diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c index cdbb1f1a6d97..cdfad5f26212 100644 --- a/drivers/infiniband/core/fmr_pool.c +++ b/drivers/infiniband/core/fmr_pool.c @@ -247,7 +247,6 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd, kmalloc(IB_FMR_HASH_SIZE * sizeof *pool->cache_bucket, GFP_KERNEL); if (!pool->cache_bucket) { - pr_warn(PFX "Failed to allocate cache in pool\n"); ret = -ENOMEM; goto out_free_pool; } diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index 5495e22839a7..31661b5c1743 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -59,6 +59,27 @@ MODULE_AUTHOR("Tom Tucker"); MODULE_DESCRIPTION("iWARP CM"); MODULE_LICENSE("Dual BSD/GPL"); +static const char * const iwcm_rej_reason_strs[] = { + [ECONNRESET] = "reset by remote host", + [ECONNREFUSED] = "refused by remote application", + [ETIMEDOUT] = "setup timeout", +}; + +const char *__attribute_const__ iwcm_reject_msg(int reason) +{ + size_t index; + + /* iWARP uses negative errnos */ + index = -reason; + + if (index < ARRAY_SIZE(iwcm_rej_reason_strs) && + iwcm_rej_reason_strs[index]) + return iwcm_rej_reason_strs[index]; + else + return "unrecognized reason"; +} +EXPORT_SYMBOL(iwcm_reject_msg); + static struct ibnl_client_cbs iwcm_nl_cb_table[] = { [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb}, [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb}, diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c index 1c41b95cefec..a0e7c16d8bd8 100644 --- a/drivers/infiniband/core/iwpm_msg.c +++ b/drivers/infiniband/core/iwpm_msg.c @@ -604,7 +604,6 @@ int iwpm_remote_info_cb(struct sk_buff *skb, struct netlink_callback *cb) } rem_info = kzalloc(sizeof(struct iwpm_remote_info), GFP_ATOMIC); if (!rem_info) { - pr_err("%s: Unable to allocate a remote info\n", __func__); ret = -ENOMEM; return ret; } diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c index ade71e7f0131..3ef51a96bbf1 100644 --- a/drivers/infiniband/core/iwpm_util.c +++ b/drivers/infiniband/core/iwpm_util.c @@ -62,7 +62,6 @@ int iwpm_init(u8 nl_client) sizeof(struct hlist_head), GFP_KERNEL); if (!iwpm_hash_bucket) { ret = -ENOMEM; - pr_err("%s Unable to create mapinfo hash table\n", __func__); goto init_exit; } iwpm_reminfo_bucket = kzalloc(IWPM_REMINFO_HASH_SIZE * @@ -70,7 +69,6 @@ int iwpm_init(u8 nl_client) if (!iwpm_reminfo_bucket) { kfree(iwpm_hash_bucket); ret = -ENOMEM; - pr_err("%s Unable to create reminfo hash table\n", __func__); goto init_exit; } } @@ -128,10 +126,9 @@ int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr, if (!iwpm_valid_client(nl_client)) return ret; map_info = kzalloc(sizeof(struct iwpm_mapping_info), GFP_KERNEL); - if (!map_info) { - pr_err("%s: Unable to allocate a mapping info\n", __func__); + if (!map_info) return -ENOMEM; - } + memcpy(&map_info->local_sockaddr, local_sockaddr, sizeof(struct sockaddr_storage)); memcpy(&map_info->mapped_sockaddr, mapped_sockaddr, @@ -309,10 +306,9 @@ struct iwpm_nlmsg_request *iwpm_get_nlmsg_request(__u32 nlmsg_seq, unsigned long flags; nlmsg_request = kzalloc(sizeof(struct iwpm_nlmsg_request), gfp); - if (!nlmsg_request) { - pr_err("%s Unable to allocate a nlmsg_request\n", __func__); + if (!nlmsg_request) return NULL; - } + spin_lock_irqsave(&iwpm_nlmsg_req_lock, flags); list_add_tail(&nlmsg_request->inprocess_list, &iwpm_nlmsg_req_list); spin_unlock_irqrestore(&iwpm_nlmsg_req_lock, flags); diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 40cbd6bdb73b..a009f7132c73 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -769,7 +769,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, * If we are at the start of the LID routed part, don't update the * hop_ptr or hop_cnt. See section 14.2.2, Vol 1 IB spec. */ - if (opa && smp->class_version == OPA_SMP_CLASS_VERSION) { + if (opa && smp->class_version == OPA_SM_CLASS_VERSION) { u32 opa_drslid; if ((opa_get_smp_direction(opa_smp) @@ -816,7 +816,6 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, local = kmalloc(sizeof *local, GFP_ATOMIC); if (!local) { ret = -ENOMEM; - dev_err(&device->dev, "No memory for ib_mad_local_private\n"); goto out; } local->mad_priv = NULL; @@ -824,7 +823,6 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, mad_priv = alloc_mad_private(mad_size, GFP_ATOMIC); if (!mad_priv) { ret = -ENOMEM; - dev_err(&device->dev, "No memory for local response MAD\n"); kfree(local); goto out; } @@ -947,9 +945,6 @@ static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr, for (left = send_buf->data_len + pad; left > 0; left -= seg_size) { seg = kmalloc(sizeof (*seg) + seg_size, gfp_mask); if (!seg) { - dev_err(&send_buf->mad_agent->device->dev, - "alloc_send_rmpp_segs: RMPP mem alloc failed for len %zd, gfp %#x\n", - sizeof (*seg) + seg_size, gfp_mask); free_send_rmpp_list(send_wr); return -ENOMEM; } @@ -1362,12 +1357,7 @@ static int allocate_method_table(struct ib_mad_mgmt_method_table **method) { /* Allocate management method table */ *method = kzalloc(sizeof **method, GFP_ATOMIC); - if (!*method) { - pr_err("No memory for ib_mad_mgmt_method_table\n"); - return -ENOMEM; - } - - return 0; + return (*method) ? 0 : (-ENOMEM); } /* @@ -1458,8 +1448,6 @@ static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req, /* Allocate management class table for "new" class version */ *class = kzalloc(sizeof **class, GFP_ATOMIC); if (!*class) { - dev_err(&agent_priv->agent.device->dev, - "No memory for ib_mad_mgmt_class_table\n"); ret = -ENOMEM; goto error1; } @@ -1524,22 +1512,16 @@ static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req, if (!*vendor_table) { /* Allocate mgmt vendor class table for "new" class version */ vendor = kzalloc(sizeof *vendor, GFP_ATOMIC); - if (!vendor) { - dev_err(&agent_priv->agent.device->dev, - "No memory for ib_mad_mgmt_vendor_class_table\n"); + if (!vendor) goto error1; - } *vendor_table = vendor; } if (!(*vendor_table)->vendor_class[vclass]) { /* Allocate table for this management vendor class */ vendor_class = kzalloc(sizeof *vendor_class, GFP_ATOMIC); - if (!vendor_class) { - dev_err(&agent_priv->agent.device->dev, - "No memory for ib_mad_mgmt_vendor_class\n"); + if (!vendor_class) goto error2; - } (*vendor_table)->vendor_class[vclass] = vendor_class; } @@ -1746,7 +1728,7 @@ find_mad_agent(struct ib_mad_port_private *port_priv, if (!class) goto out; if (convert_mgmt_class(mad_hdr->mgmt_class) >= - IB_MGMT_MAX_METHODS) + ARRAY_SIZE(class->method_table)) goto out; method = class->method_table[convert_mgmt_class( mad_hdr->mgmt_class)]; @@ -2167,7 +2149,7 @@ handle_smi(struct ib_mad_port_private *port_priv, struct ib_mad_hdr *mad_hdr = (struct ib_mad_hdr *)recv->mad; if (opa && mad_hdr->base_version == OPA_MGMT_BASE_VERSION && - mad_hdr->class_version == OPA_SMI_CLASS_VERSION) + mad_hdr->class_version == OPA_SM_CLASS_VERSION) return handle_opa_smi(port_priv, qp_info, wc, port_num, recv, response); @@ -2238,11 +2220,8 @@ static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc) mad_size = recv->mad_size; response = alloc_mad_private(mad_size, GFP_KERNEL); - if (!response) { - dev_err(&port_priv->device->dev, - "%s: no memory for response buffer\n", __func__); + if (!response) goto out; - } if (rdma_cap_ib_switch(port_priv->device)) port_num = wc->port_num; @@ -2869,8 +2848,6 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, mad_priv = alloc_mad_private(port_mad_size(qp_info->port_priv), GFP_ATOMIC); if (!mad_priv) { - dev_err(&qp_info->port_priv->device->dev, - "No memory for receive buffer\n"); ret = -ENOMEM; break; } @@ -2961,11 +2938,8 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv) u16 pkey_index; attr = kmalloc(sizeof *attr, GFP_KERNEL); - if (!attr) { - dev_err(&port_priv->device->dev, - "Couldn't kmalloc ib_qp_attr\n"); + if (!attr) return -ENOMEM; - } ret = ib_find_pkey(port_priv->device, port_priv->port_num, IB_DEFAULT_PKEY_FULL, &pkey_index); @@ -3135,10 +3109,8 @@ static int ib_mad_port_open(struct ib_device *device, /* Create new device info */ port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL); - if (!port_priv) { - dev_err(&device->dev, "No memory for ib_mad_port_private\n"); + if (!port_priv) return -ENOMEM; - } port_priv->device = device; port_priv->port_num = port_num; diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index e51b739f6ea3..322cb67b07a9 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -518,8 +518,11 @@ static void join_handler(int status, struct ib_sa_mcmember_rec *rec, process_join_error(group, status); else { int mgids_changed, is_mgid0; - ib_find_pkey(group->port->dev->device, group->port->port_num, - be16_to_cpu(rec->pkey), &pkey_index); + + if (ib_find_pkey(group->port->dev->device, + group->port->port_num, be16_to_cpu(rec->pkey), + &pkey_index)) + pkey_index = MCAST_INVALID_PKEY_INDEX; spin_lock_irq(&group->port->lock); if (group->state == MCAST_BUSY && diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c index 3a64a0881882..0621f4455732 100644 --- a/drivers/infiniband/core/roce_gid_mgmt.c +++ b/drivers/infiniband/core/roce_gid_mgmt.c @@ -304,10 +304,9 @@ static void enum_netdev_ipv4_ips(struct ib_device *ib_dev, for_ifa(in_dev) { struct sin_list *entry = kzalloc(sizeof(*entry), GFP_ATOMIC); - if (!entry) { - pr_warn("roce_gid_mgmt: couldn't allocate entry for IPv4 update\n"); + if (!entry) continue; - } + entry->ip.sin_family = AF_INET; entry->ip.sin_addr.s_addr = ifa->ifa_address; list_add_tail(&entry->list, &sin_list); @@ -348,10 +347,8 @@ static void enum_netdev_ipv6_ips(struct ib_device *ib_dev, list_for_each_entry(ifp, &in6_dev->addr_list, if_list) { struct sin6_list *entry = kzalloc(sizeof(*entry), GFP_ATOMIC); - if (!entry) { - pr_warn("roce_gid_mgmt: couldn't allocate entry for IPv6 update\n"); + if (!entry) continue; - } entry->sin6.sin6_family = AF_INET6; entry->sin6.sin6_addr = ifp->addr; @@ -447,10 +444,8 @@ static int netdev_upper_walk(struct net_device *upper, void *data) struct upper_list *entry = kmalloc(sizeof(*entry), GFP_ATOMIC); struct list_head *upper_list = data; - if (!entry) { - pr_info("roce_gid_mgmt: couldn't allocate entry to delete ndev\n"); + if (!entry) return 0; - } list_add_tail(&entry->list, upper_list); dev_hold(upper); @@ -559,10 +554,8 @@ static int netdevice_queue_work(struct netdev_event_work_cmd *cmds, struct netdev_event_work *ndev_work = kmalloc(sizeof(*ndev_work), GFP_KERNEL); - if (!ndev_work) { - pr_warn("roce_gid_mgmt: can't allocate work for netdevice_event\n"); + if (!ndev_work) return NOTIFY_DONE; - } memcpy(ndev_work->cmds, cmds, sizeof(ndev_work->cmds)); for (i = 0; i < ARRAY_SIZE(ndev_work->cmds) && ndev_work->cmds[i].cb; i++) { @@ -696,10 +689,8 @@ static int addr_event(struct notifier_block *this, unsigned long event, } work = kmalloc(sizeof(*work), GFP_ATOMIC); - if (!work) { - pr_warn("roce_gid_mgmt: Couldn't allocate work for addr_event\n"); + if (!work) return NOTIFY_DONE; - } INIT_WORK(&work->work, update_gid_event_work_handler); diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index 7713ef089c3c..579f9a7f6283 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -1104,8 +1104,11 @@ static ssize_t ib_ucm_write(struct file *filp, const char __user *buf, struct ib_ucm_cmd_hdr hdr; ssize_t result; - if (WARN_ON_ONCE(!ib_safe_file_access(filp))) + if (!ib_safe_file_access(filp)) { + pr_err_once("ucm_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n", + task_tgid_vnr(current), current->comm); return -EACCES; + } if (len < sizeof(hdr)) return -EINVAL; diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 9520154f1d7c..e12f8faf8c23 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -1584,8 +1584,11 @@ static ssize_t ucma_write(struct file *filp, const char __user *buf, struct rdma_ucm_cmd_hdr hdr; ssize_t ret; - if (WARN_ON_ONCE(!ib_safe_file_access(filp))) + if (!ib_safe_file_access(filp)) { + pr_err_once("ucma_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n", + task_tgid_vnr(current), current->comm); return -EACCES; + } if (len < sizeof(hdr)) return -EINVAL; diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 84b4eff90395..1e62a5f0cb28 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -51,7 +51,7 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d if (umem->nmap > 0) ib_dma_unmap_sg(dev, umem->sg_head.sgl, - umem->nmap, + umem->npages, DMA_BIDIRECTIONAL); for_each_sg(umem->sg_head.sgl, sg, umem->npages, i) { diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index 1f0fe3217f23..6b079a31dced 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -578,7 +578,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt, */ npages = get_user_pages_remote(owning_process, owning_mm, user_virt, gup_num_pages, - flags, local_page_list, NULL); + flags, local_page_list, NULL, NULL); up_read(&owning_mm->mmap_sem); if (npages < 0) diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index df26a741cda6..455034ac994e 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -289,5 +289,6 @@ IB_UVERBS_DECLARE_EX_CMD(modify_wq); IB_UVERBS_DECLARE_EX_CMD(destroy_wq); IB_UVERBS_DECLARE_EX_CMD(create_rwq_ind_table); IB_UVERBS_DECLARE_EX_CMD(destroy_rwq_ind_table); +IB_UVERBS_DECLARE_EX_CMD(modify_qp); #endif /* UVERBS_H */ diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index cb3f515a2285..09b649159e6c 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -2328,94 +2328,88 @@ static int modify_qp_mask(enum ib_qp_type qp_type, int mask) } } -ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) +static int modify_qp(struct ib_uverbs_file *file, + struct ib_uverbs_ex_modify_qp *cmd, struct ib_udata *udata) { - struct ib_uverbs_modify_qp cmd; - struct ib_udata udata; - struct ib_qp *qp; - struct ib_qp_attr *attr; - int ret; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd, - out_len); + struct ib_qp_attr *attr; + struct ib_qp *qp; + int ret; attr = kmalloc(sizeof *attr, GFP_KERNEL); if (!attr) return -ENOMEM; - qp = idr_read_qp(cmd.qp_handle, file->ucontext); + qp = idr_read_qp(cmd->base.qp_handle, file->ucontext); if (!qp) { ret = -EINVAL; goto out; } - attr->qp_state = cmd.qp_state; - attr->cur_qp_state = cmd.cur_qp_state; - attr->path_mtu = cmd.path_mtu; - attr->path_mig_state = cmd.path_mig_state; - attr->qkey = cmd.qkey; - attr->rq_psn = cmd.rq_psn; - attr->sq_psn = cmd.sq_psn; - attr->dest_qp_num = cmd.dest_qp_num; - attr->qp_access_flags = cmd.qp_access_flags; - attr->pkey_index = cmd.pkey_index; - attr->alt_pkey_index = cmd.alt_pkey_index; - attr->en_sqd_async_notify = cmd.en_sqd_async_notify; - attr->max_rd_atomic = cmd.max_rd_atomic; - attr->max_dest_rd_atomic = cmd.max_dest_rd_atomic; - attr->min_rnr_timer = cmd.min_rnr_timer; - attr->port_num = cmd.port_num; - attr->timeout = cmd.timeout; - attr->retry_cnt = cmd.retry_cnt; - attr->rnr_retry = cmd.rnr_retry; - attr->alt_port_num = cmd.alt_port_num; - attr->alt_timeout = cmd.alt_timeout; - - memcpy(attr->ah_attr.grh.dgid.raw, cmd.dest.dgid, 16); - attr->ah_attr.grh.flow_label = cmd.dest.flow_label; - attr->ah_attr.grh.sgid_index = cmd.dest.sgid_index; - attr->ah_attr.grh.hop_limit = cmd.dest.hop_limit; - attr->ah_attr.grh.traffic_class = cmd.dest.traffic_class; - attr->ah_attr.dlid = cmd.dest.dlid; - attr->ah_attr.sl = cmd.dest.sl; - attr->ah_attr.src_path_bits = cmd.dest.src_path_bits; - attr->ah_attr.static_rate = cmd.dest.static_rate; - attr->ah_attr.ah_flags = cmd.dest.is_global ? IB_AH_GRH : 0; - attr->ah_attr.port_num = cmd.dest.port_num; - - memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd.alt_dest.dgid, 16); - attr->alt_ah_attr.grh.flow_label = cmd.alt_dest.flow_label; - attr->alt_ah_attr.grh.sgid_index = cmd.alt_dest.sgid_index; - attr->alt_ah_attr.grh.hop_limit = cmd.alt_dest.hop_limit; - attr->alt_ah_attr.grh.traffic_class = cmd.alt_dest.traffic_class; - attr->alt_ah_attr.dlid = cmd.alt_dest.dlid; - attr->alt_ah_attr.sl = cmd.alt_dest.sl; - attr->alt_ah_attr.src_path_bits = cmd.alt_dest.src_path_bits; - attr->alt_ah_attr.static_rate = cmd.alt_dest.static_rate; - attr->alt_ah_attr.ah_flags = cmd.alt_dest.is_global ? IB_AH_GRH : 0; - attr->alt_ah_attr.port_num = cmd.alt_dest.port_num; + attr->qp_state = cmd->base.qp_state; + attr->cur_qp_state = cmd->base.cur_qp_state; + attr->path_mtu = cmd->base.path_mtu; + attr->path_mig_state = cmd->base.path_mig_state; + attr->qkey = cmd->base.qkey; + attr->rq_psn = cmd->base.rq_psn; + attr->sq_psn = cmd->base.sq_psn; + attr->dest_qp_num = cmd->base.dest_qp_num; + attr->qp_access_flags = cmd->base.qp_access_flags; + attr->pkey_index = cmd->base.pkey_index; + attr->alt_pkey_index = cmd->base.alt_pkey_index; + attr->en_sqd_async_notify = cmd->base.en_sqd_async_notify; + attr->max_rd_atomic = cmd->base.max_rd_atomic; + attr->max_dest_rd_atomic = cmd->base.max_dest_rd_atomic; + attr->min_rnr_timer = cmd->base.min_rnr_timer; + attr->port_num = cmd->base.port_num; + attr->timeout = cmd->base.timeout; + attr->retry_cnt = cmd->base.retry_cnt; + attr->rnr_retry = cmd->base.rnr_retry; + attr->alt_port_num = cmd->base.alt_port_num; + attr->alt_timeout = cmd->base.alt_timeout; + attr->rate_limit = cmd->rate_limit; + + memcpy(attr->ah_attr.grh.dgid.raw, cmd->base.dest.dgid, 16); + attr->ah_attr.grh.flow_label = cmd->base.dest.flow_label; + attr->ah_attr.grh.sgid_index = cmd->base.dest.sgid_index; + attr->ah_attr.grh.hop_limit = cmd->base.dest.hop_limit; + attr->ah_attr.grh.traffic_class = cmd->base.dest.traffic_class; + attr->ah_attr.dlid = cmd->base.dest.dlid; + attr->ah_attr.sl = cmd->base.dest.sl; + attr->ah_attr.src_path_bits = cmd->base.dest.src_path_bits; + attr->ah_attr.static_rate = cmd->base.dest.static_rate; + attr->ah_attr.ah_flags = cmd->base.dest.is_global ? + IB_AH_GRH : 0; + attr->ah_attr.port_num = cmd->base.dest.port_num; + + memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd->base.alt_dest.dgid, 16); + attr->alt_ah_attr.grh.flow_label = cmd->base.alt_dest.flow_label; + attr->alt_ah_attr.grh.sgid_index = cmd->base.alt_dest.sgid_index; + attr->alt_ah_attr.grh.hop_limit = cmd->base.alt_dest.hop_limit; + attr->alt_ah_attr.grh.traffic_class = cmd->base.alt_dest.traffic_class; + attr->alt_ah_attr.dlid = cmd->base.alt_dest.dlid; + attr->alt_ah_attr.sl = cmd->base.alt_dest.sl; + attr->alt_ah_attr.src_path_bits = cmd->base.alt_dest.src_path_bits; + attr->alt_ah_attr.static_rate = cmd->base.alt_dest.static_rate; + attr->alt_ah_attr.ah_flags = cmd->base.alt_dest.is_global ? + IB_AH_GRH : 0; + attr->alt_ah_attr.port_num = cmd->base.alt_dest.port_num; if (qp->real_qp == qp) { - ret = ib_resolve_eth_dmac(qp, attr, &cmd.attr_mask); - if (ret) - goto release_qp; + if (cmd->base.attr_mask & IB_QP_AV) { + ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr); + if (ret) + goto release_qp; + } ret = qp->device->modify_qp(qp, attr, - modify_qp_mask(qp->qp_type, cmd.attr_mask), &udata); + modify_qp_mask(qp->qp_type, + cmd->base.attr_mask), + udata); } else { - ret = ib_modify_qp(qp, attr, modify_qp_mask(qp->qp_type, cmd.attr_mask)); + ret = ib_modify_qp(qp, attr, + modify_qp_mask(qp->qp_type, + cmd->base.attr_mask)); } - if (ret) - goto release_qp; - - ret = in_len; - release_qp: put_qp_read(qp); @@ -2425,6 +2419,68 @@ out: return ret; } +ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_ex_modify_qp cmd = {}; + struct ib_udata udata; + int ret; + + if (copy_from_user(&cmd.base, buf, sizeof(cmd.base))) + return -EFAULT; + + if (cmd.base.attr_mask & + ~((IB_USER_LEGACY_LAST_QP_ATTR_MASK << 1) - 1)) + return -EOPNOTSUPP; + + INIT_UDATA(&udata, buf + sizeof(cmd.base), NULL, + in_len - sizeof(cmd.base), out_len); + + ret = modify_qp(file, &cmd, &udata); + if (ret) + return ret; + + return in_len; +} + +int ib_uverbs_ex_modify_qp(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + struct ib_udata *ucore, + struct ib_udata *uhw) +{ + struct ib_uverbs_ex_modify_qp cmd = {}; + int ret; + + /* + * Last bit is reserved for extending the attr_mask by + * using another field. + */ + BUILD_BUG_ON(IB_USER_LAST_QP_ATTR_MASK == (1 << 31)); + + if (ucore->inlen < sizeof(cmd.base)) + return -EINVAL; + + ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); + if (ret) + return ret; + + if (cmd.base.attr_mask & + ~((IB_USER_LAST_QP_ATTR_MASK << 1) - 1)) + return -EOPNOTSUPP; + + if (ucore->inlen > sizeof(cmd)) { + if (ib_is_udata_cleared(ucore, sizeof(cmd), + ucore->inlen - sizeof(cmd))) + return -EOPNOTSUPP; + } + + ret = modify_qp(file, &cmd, uhw); + + return ret; +} + ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, int in_len, @@ -2875,6 +2931,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, struct ib_ah *ah; struct ib_ah_attr attr; int ret; + struct ib_udata udata; if (out_len < sizeof resp) return -ENOSPC; @@ -2882,6 +2939,10 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; + INIT_UDATA(&udata, buf + sizeof(cmd), + (unsigned long)cmd.response + sizeof(resp), + in_len - sizeof(cmd), out_len - sizeof(resp)); + uobj = kmalloc(sizeof *uobj, GFP_KERNEL); if (!uobj) return -ENOMEM; @@ -2908,12 +2969,16 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, memset(&attr.dmac, 0, sizeof(attr.dmac)); memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16); - ah = ib_create_ah(pd, &attr); + ah = pd->device->create_ah(pd, &attr, &udata); + if (IS_ERR(ah)) { ret = PTR_ERR(ah); goto err_put; } + ah->device = pd->device; + ah->pd = pd; + atomic_inc(&pd->usecnt); ah->uobject = uobj; uobj->object = ah; @@ -3124,8 +3189,10 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec, kern_spec_val = (void *)kern_spec + sizeof(struct ib_uverbs_flow_spec_hdr); kern_spec_mask = kern_spec_val + kern_filter_sz; + if (ib_spec->type == (IB_FLOW_SPEC_INNER | IB_FLOW_SPEC_VXLAN_TUNNEL)) + return -EINVAL; - switch (ib_spec->type) { + switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) { case IB_FLOW_SPEC_ETH: ib_filter_sz = offsetof(struct ib_flow_eth_filter, real_sz); actual_filter_sz = spec_filter_size(kern_spec_mask, @@ -3175,6 +3242,21 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec, memcpy(&ib_spec->tcp_udp.val, kern_spec_val, actual_filter_sz); memcpy(&ib_spec->tcp_udp.mask, kern_spec_mask, actual_filter_sz); break; + case IB_FLOW_SPEC_VXLAN_TUNNEL: + ib_filter_sz = offsetof(struct ib_flow_tunnel_filter, real_sz); + actual_filter_sz = spec_filter_size(kern_spec_mask, + kern_filter_sz, + ib_filter_sz); + if (actual_filter_sz <= 0) + return -EINVAL; + ib_spec->tunnel.size = sizeof(struct ib_flow_spec_tunnel); + memcpy(&ib_spec->tunnel.val, kern_spec_val, actual_filter_sz); + memcpy(&ib_spec->tunnel.mask, kern_spec_mask, actual_filter_sz); + + if ((ntohl(ib_spec->tunnel.mask.tunnel_id)) >= BIT(24) || + (ntohl(ib_spec->tunnel.val.tunnel_id)) >= BIT(24)) + return -EINVAL; + break; default: return -EINVAL; } @@ -3745,7 +3827,6 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, err = PTR_ERR(flow_id); goto err_free; } - flow_id->qp = qp; flow_id->uobject = uobj; uobj->object = flow_id; diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 44b1104eb168..813593550c4b 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -137,6 +137,7 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file, [IB_USER_VERBS_EX_CMD_DESTROY_WQ] = ib_uverbs_ex_destroy_wq, [IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL] = ib_uverbs_ex_create_rwq_ind_table, [IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL] = ib_uverbs_ex_destroy_rwq_ind_table, + [IB_USER_VERBS_EX_CMD_MODIFY_QP] = ib_uverbs_ex_modify_qp, }; static void ib_uverbs_add_one(struct ib_device *device); @@ -746,8 +747,11 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, int srcu_key; ssize_t ret; - if (WARN_ON_ONCE(!ib_safe_file_access(filp))) + if (!ib_safe_file_access(filp)) { + pr_err_once("uverbs_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n", + task_tgid_vnr(current), current->comm); return -EACCES; + } if (count < sizeof hdr) return -EINVAL; diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 83687646da68..71580cc28c9e 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -315,7 +315,7 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) { struct ib_ah *ah; - ah = pd->device->create_ah(pd, ah_attr); + ah = pd->device->create_ah(pd, ah_attr, NULL); if (!IS_ERR(ah)) { ah->device = pd->device; @@ -328,7 +328,7 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) } EXPORT_SYMBOL(ib_create_ah); -static int ib_get_header_version(const union rdma_network_hdr *hdr) +int ib_get_rdma_header_version(const union rdma_network_hdr *hdr) { const struct iphdr *ip4h = (struct iphdr *)&hdr->roce4grh; struct iphdr ip4h_checked; @@ -359,6 +359,7 @@ static int ib_get_header_version(const union rdma_network_hdr *hdr) return 4; return 6; } +EXPORT_SYMBOL(ib_get_rdma_header_version); static enum rdma_network_type ib_get_net_type_by_grh(struct ib_device *device, u8 port_num, @@ -369,7 +370,7 @@ static enum rdma_network_type ib_get_net_type_by_grh(struct ib_device *device, if (rdma_protocol_ib(device, port_num)) return RDMA_NETWORK_IB; - grh_version = ib_get_header_version((union rdma_network_hdr *)grh); + grh_version = ib_get_rdma_header_version((union rdma_network_hdr *)grh); if (grh_version == 4) return RDMA_NETWORK_IPV4; @@ -415,9 +416,9 @@ static int get_sgid_index_from_eth(struct ib_device *device, u8 port_num, &context, gid_index); } -static int get_gids_from_rdma_hdr(union rdma_network_hdr *hdr, - enum rdma_network_type net_type, - union ib_gid *sgid, union ib_gid *dgid) +int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr, + enum rdma_network_type net_type, + union ib_gid *sgid, union ib_gid *dgid) { struct sockaddr_in src_in; struct sockaddr_in dst_in; @@ -447,6 +448,7 @@ static int get_gids_from_rdma_hdr(union rdma_network_hdr *hdr, return -EINVAL; } } +EXPORT_SYMBOL(ib_get_gids_from_rdma_hdr); int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, const struct ib_wc *wc, const struct ib_grh *grh, @@ -469,8 +471,8 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, net_type = ib_get_net_type_by_grh(device, port_num, grh); gid_type = ib_network_to_gid_type(net_type); } - ret = get_gids_from_rdma_hdr((union rdma_network_hdr *)grh, net_type, - &sgid, &dgid); + ret = ib_get_gids_from_rdma_hdr((union rdma_network_hdr *)grh, net_type, + &sgid, &dgid); if (ret) return ret; @@ -1014,6 +1016,7 @@ static const struct { IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_CUR_STATE | IB_QP_QKEY), + [IB_QPT_RAW_PACKET] = IB_QP_RATE_LIMIT, } } }, @@ -1047,6 +1050,7 @@ static const struct { IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_CUR_STATE | IB_QP_QKEY), + [IB_QPT_RAW_PACKET] = IB_QP_RATE_LIMIT, } }, [IB_QPS_SQD] = { @@ -1196,66 +1200,66 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, } EXPORT_SYMBOL(ib_modify_qp_is_ok); -int ib_resolve_eth_dmac(struct ib_qp *qp, - struct ib_qp_attr *qp_attr, int *qp_attr_mask) +int ib_resolve_eth_dmac(struct ib_device *device, + struct ib_ah_attr *ah_attr) { int ret = 0; - if (*qp_attr_mask & IB_QP_AV) { - if (qp_attr->ah_attr.port_num < rdma_start_port(qp->device) || - qp_attr->ah_attr.port_num > rdma_end_port(qp->device)) - return -EINVAL; - - if (!rdma_cap_eth_ah(qp->device, qp_attr->ah_attr.port_num)) - return 0; - - if (rdma_link_local_addr((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw)) { - rdma_get_ll_mac((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw, - qp_attr->ah_attr.dmac); - } else { - union ib_gid sgid; - struct ib_gid_attr sgid_attr; - int ifindex; - int hop_limit; - - ret = ib_query_gid(qp->device, - qp_attr->ah_attr.port_num, - qp_attr->ah_attr.grh.sgid_index, - &sgid, &sgid_attr); - - if (ret || !sgid_attr.ndev) { - if (!ret) - ret = -ENXIO; - goto out; - } + if (ah_attr->port_num < rdma_start_port(device) || + ah_attr->port_num > rdma_end_port(device)) + return -EINVAL; - ifindex = sgid_attr.ndev->ifindex; + if (!rdma_cap_eth_ah(device, ah_attr->port_num)) + return 0; - ret = rdma_addr_find_l2_eth_by_grh(&sgid, - &qp_attr->ah_attr.grh.dgid, - qp_attr->ah_attr.dmac, - NULL, &ifindex, &hop_limit); + if (rdma_link_local_addr((struct in6_addr *)ah_attr->grh.dgid.raw)) { + rdma_get_ll_mac((struct in6_addr *)ah_attr->grh.dgid.raw, + ah_attr->dmac); + } else { + union ib_gid sgid; + struct ib_gid_attr sgid_attr; + int ifindex; + int hop_limit; + + ret = ib_query_gid(device, + ah_attr->port_num, + ah_attr->grh.sgid_index, + &sgid, &sgid_attr); + + if (ret || !sgid_attr.ndev) { + if (!ret) + ret = -ENXIO; + goto out; + } - dev_put(sgid_attr.ndev); + ifindex = sgid_attr.ndev->ifindex; - qp_attr->ah_attr.grh.hop_limit = hop_limit; - } + ret = rdma_addr_find_l2_eth_by_grh(&sgid, + &ah_attr->grh.dgid, + ah_attr->dmac, + NULL, &ifindex, &hop_limit); + + dev_put(sgid_attr.ndev); + + ah_attr->grh.hop_limit = hop_limit; } out: return ret; } EXPORT_SYMBOL(ib_resolve_eth_dmac); - int ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask) { - int ret; - ret = ib_resolve_eth_dmac(qp, qp_attr, &qp_attr_mask); - if (ret) - return ret; + if (qp_attr_mask & IB_QP_AV) { + int ret; + + ret = ib_resolve_eth_dmac(qp->device, &qp_attr->ah_attr); + if (ret) + return ret; + } return qp->device->modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL); } @@ -1734,8 +1738,10 @@ struct ib_flow *ib_create_flow(struct ib_qp *qp, return ERR_PTR(-ENOSYS); flow_id = qp->device->create_flow(qp, flow_attr, domain); - if (!IS_ERR(flow_id)) + if (!IS_ERR(flow_id)) { atomic_inc(&qp->usecnt); + flow_id->qp = qp; + } return flow_id; } EXPORT_SYMBOL(ib_create_flow); diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile index e7a5ed9f6f3f..ed553de2ca12 100644 --- a/drivers/infiniband/hw/Makefile +++ b/drivers/infiniband/hw/Makefile @@ -7,6 +7,7 @@ obj-$(CONFIG_MLX4_INFINIBAND) += mlx4/ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5/ obj-$(CONFIG_INFINIBAND_NES) += nes/ obj-$(CONFIG_INFINIBAND_OCRDMA) += ocrdma/ +obj-$(CONFIG_INFINIBAND_VMWARE_PVRDMA) += vmw_pvrdma/ obj-$(CONFIG_INFINIBAND_USNIC) += usnic/ obj-$(CONFIG_INFINIBAND_HFI1) += hfi1/ obj-$(CONFIG_INFINIBAND_HNS) += hns/ diff --git a/drivers/infiniband/hw/cxgb3/cxio_dbg.c b/drivers/infiniband/hw/cxgb3/cxio_dbg.c index 8bca6b4ec9af..445e89e5e7cf 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_dbg.c +++ b/drivers/infiniband/hw/cxgb3/cxio_dbg.c @@ -45,10 +45,9 @@ void cxio_dump_tpt(struct cxio_rdev *rdev, u32 stag) int size = 32; m = kmalloc(sizeof(*m) + size, GFP_ATOMIC); - if (!m) { - PDBG("%s couldn't allocate memory.\n", __func__); + if (!m) return; - } + m->mem_id = MEM_PMRX; m->addr = (stag>>8) * 32 + rdev->rnic_info.tpt_base; m->len = size; @@ -82,10 +81,9 @@ void cxio_dump_pbl(struct cxio_rdev *rdev, u32 pbl_addr, uint len, u8 shift) size = npages * sizeof(u64); m = kmalloc(sizeof(*m) + size, GFP_ATOMIC); - if (!m) { - PDBG("%s couldn't allocate memory.\n", __func__); + if (!m) return; - } + m->mem_id = MEM_PMRX; m->addr = pbl_addr; m->len = size; @@ -144,10 +142,9 @@ void cxio_dump_rqt(struct cxio_rdev *rdev, u32 hwtid, int nents) int rc; m = kmalloc(sizeof(*m) + size, GFP_ATOMIC); - if (!m) { - PDBG("%s couldn't allocate memory.\n", __func__); + if (!m) return; - } + m->mem_id = MEM_PMRX; m->addr = ((hwtid)<<10) + rdev->rnic_info.rqt_base; m->len = size; @@ -177,10 +174,9 @@ void cxio_dump_tcb(struct cxio_rdev *rdev, u32 hwtid) int rc; m = kmalloc(sizeof(*m) + size, GFP_ATOMIC); - if (!m) { - PDBG("%s couldn't allocate memory.\n", __func__); + if (!m) return; - } + m->mem_id = MEM_CM; m->addr = hwtid * size; m->len = size; diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index cba57bb53dba..9d5fe1853da4 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -62,7 +62,8 @@ #include "common.h" static struct ib_ah *iwch_ah_create(struct ib_pd *pd, - struct ib_ah_attr *ah_attr) + struct ib_ah_attr *ah_attr, + struct ib_udata *udata) { return ERR_PTR(-ENOSYS); } diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 4e5baf4fe15e..516b0ae6dc3f 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -828,8 +828,10 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) } rdev->status_page = (struct t4_dev_status_page *) __get_free_page(GFP_KERNEL); - if (!rdev->status_page) + if (!rdev->status_page) { + err = -ENOMEM; goto destroy_ocqp_pool; + } rdev->status_page->qp_start = rdev->lldi.vr->qp.start; rdev->status_page->qp_size = rdev->lldi.vr->qp.size; rdev->status_page->cq_start = rdev->lldi.vr->cq.start; @@ -841,8 +843,6 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) if (rdev->wr_log) { rdev->wr_log_size = 1 << c4iw_wr_log_size_order; atomic_set(&rdev->wr_log_idx, 0); - } else { - pr_err(MOD "error allocating wr_log. Logging disabled\n"); } } @@ -1424,8 +1424,6 @@ static void recover_queues(struct uld_ctx *ctx) qp_list.qps = kzalloc(count * sizeof *qp_list.qps, GFP_ATOMIC); if (!qp_list.qps) { - printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n", - pci_name(ctx->lldi.pdev)); spin_unlock_irq(&ctx->dev->lock); return; } diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 645e606a17c5..49b51b7e0fd7 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -59,7 +59,9 @@ module_param(fastreg_support, int, 0644); MODULE_PARM_DESC(fastreg_support, "Advertise fastreg support (default=1)"); static struct ib_ah *c4iw_ah_create(struct ib_pd *pd, - struct ib_ah_attr *ah_attr) + struct ib_ah_attr *ah_attr, + struct ib_udata *udata) + { return ERR_PTR(-ENOSYS); } diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index 67ea85a56945..7a3d906b3671 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -125,6 +125,7 @@ int node_affinity_init(void) cpumask_weight(topology_sibling_cpumask( cpumask_first(&node_affinity.proc.mask) )); + node_affinity.num_possible_nodes = num_possible_nodes(); node_affinity.num_online_nodes = num_online_nodes(); node_affinity.num_online_cpus = num_online_cpus(); @@ -135,7 +136,7 @@ int node_affinity_init(void) */ init_real_cpu_mask(); - hfi1_per_node_cntr = kcalloc(num_possible_nodes(), + hfi1_per_node_cntr = kcalloc(node_affinity.num_possible_nodes, sizeof(*hfi1_per_node_cntr), GFP_KERNEL); if (!hfi1_per_node_cntr) return -ENOMEM; diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h index 42e63316afd1..e78c7aa094e0 100644 --- a/drivers/infiniband/hw/hfi1/affinity.h +++ b/drivers/infiniband/hw/hfi1/affinity.h @@ -70,14 +70,6 @@ struct cpu_mask_set { uint gen; }; -struct hfi1_affinity { - struct cpu_mask_set def_intr; - struct cpu_mask_set rcv_intr; - struct cpumask real_cpu_mask; - /* spin lock to protect affinity struct */ - spinlock_t lock; -}; - struct hfi1_msix_entry; /* Initialize non-HT cpu cores mask */ @@ -115,6 +107,7 @@ struct hfi1_affinity_node_list { struct cpumask real_cpu_mask; struct cpu_mask_set proc; int num_core_siblings; + int num_possible_nodes; int num_online_nodes; int num_online_cpus; struct mutex lock; /* protects affinity nodes */ diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 24d0820873cf..ef72bc2a9e1d 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -8477,7 +8477,10 @@ static int do_8051_command( */ if (type == HCMD_WRITE_LCB_CSR) { in_data |= ((*out_data) & 0xffffffffffull) << 8; - reg = ((((*out_data) >> 40) & 0xff) << + /* must preserve COMPLETED - it is tied to hardware */ + reg = read_csr(dd, DC_DC8051_CFG_EXT_DEV_0); + reg &= DC_DC8051_CFG_EXT_DEV_0_COMPLETED_SMASK; + reg |= ((((*out_data) >> 40) & 0xff) << DC_DC8051_CFG_EXT_DEV_0_RETURN_CODE_SHIFT) | ((((*out_data) >> 48) & 0xffff) << DC_DC8051_CFG_EXT_DEV_0_RSP_DATA_SHIFT); @@ -9556,11 +9559,11 @@ int bringup_serdes(struct hfi1_pportdata *ppd) if (HFI1_CAP_IS_KSET(EXTENDED_PSN)) add_rcvctrl(dd, RCV_CTRL_RCV_EXTENDED_PSN_ENABLE_SMASK); - guid = ppd->guid; + guid = ppd->guids[HFI1_PORT_GUID_INDEX]; if (!guid) { if (dd->base_guid) guid = dd->base_guid + ppd->port - 1; - ppd->guid = guid; + ppd->guids[HFI1_PORT_GUID_INDEX] = guid; } /* Set linkinit_reason on power up per OPA spec */ diff --git a/drivers/infiniband/hw/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h index 5b9993899789..5bfa839d1c48 100644 --- a/drivers/infiniband/hw/hfi1/chip_registers.h +++ b/drivers/infiniband/hw/hfi1/chip_registers.h @@ -415,6 +415,9 @@ #define ASIC_CFG_SBUS_REQUEST_DATA_IN_SHIFT 32 #define ASIC_CFG_SBUS_REQUEST_RECEIVER_ADDR_SHIFT 0 #define ASIC_CFG_SCRATCH (ASIC + 0x000000000020) +#define ASIC_CFG_SCRATCH_1 (ASIC_CFG_SCRATCH + 0x08) +#define ASIC_CFG_SCRATCH_2 (ASIC_CFG_SCRATCH + 0x10) +#define ASIC_CFG_SCRATCH_3 (ASIC_CFG_SCRATCH + 0x18) #define ASIC_CFG_THERM_POLL_EN (ASIC + 0x000000000050) #define ASIC_EEP_ADDR_CMD (ASIC + 0x000000000308) #define ASIC_EEP_ADDR_CMD_EP_ADDR_MASK 0xFFFFFFull diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index 632ba21759ab..8725f4c086cf 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -541,6 +541,114 @@ static ssize_t asic_flags_write(struct file *file, const char __user *buf, return ret; } +/* read the dc8051 memory */ +static ssize_t dc8051_memory_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct hfi1_pportdata *ppd = private2ppd(file); + ssize_t rval; + void *tmp; + loff_t start, end; + + /* the checks below expect the position to be positive */ + if (*ppos < 0) + return -EINVAL; + + tmp = kzalloc(DC8051_DATA_MEM_SIZE, GFP_KERNEL); + if (!tmp) + return -ENOMEM; + + /* + * Fill in the requested portion of the temporary buffer from the + * 8051 memory. The 8051 memory read is done in terms of 8 bytes. + * Adjust start and end to fit. Skip reading anything if out of + * range. + */ + start = *ppos & ~0x7; /* round down */ + if (start < DC8051_DATA_MEM_SIZE) { + end = (*ppos + count + 7) & ~0x7; /* round up */ + if (end > DC8051_DATA_MEM_SIZE) + end = DC8051_DATA_MEM_SIZE; + rval = read_8051_data(ppd->dd, start, end - start, + (u64 *)(tmp + start)); + if (rval) + goto done; + } + + rval = simple_read_from_buffer(buf, count, ppos, tmp, + DC8051_DATA_MEM_SIZE); +done: + kfree(tmp); + return rval; +} + +static ssize_t debugfs_lcb_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct hfi1_pportdata *ppd = private2ppd(file); + struct hfi1_devdata *dd = ppd->dd; + unsigned long total, csr_off; + u64 data; + + if (*ppos < 0) + return -EINVAL; + /* only read 8 byte quantities */ + if ((count % 8) != 0) + return -EINVAL; + /* offset must be 8-byte aligned */ + if ((*ppos % 8) != 0) + return -EINVAL; + /* do nothing if out of range or zero count */ + if (*ppos >= (LCB_END - LCB_START) || !count) + return 0; + /* reduce count if needed */ + if (*ppos + count > LCB_END - LCB_START) + count = (LCB_END - LCB_START) - *ppos; + + csr_off = LCB_START + *ppos; + for (total = 0; total < count; total += 8, csr_off += 8) { + if (read_lcb_csr(dd, csr_off, (u64 *)&data)) + break; /* failed */ + if (put_user(data, (unsigned long __user *)(buf + total))) + break; + } + *ppos += total; + return total; +} + +static ssize_t debugfs_lcb_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct hfi1_pportdata *ppd = private2ppd(file); + struct hfi1_devdata *dd = ppd->dd; + unsigned long total, csr_off, data; + + if (*ppos < 0) + return -EINVAL; + /* only write 8 byte quantities */ + if ((count % 8) != 0) + return -EINVAL; + /* offset must be 8-byte aligned */ + if ((*ppos % 8) != 0) + return -EINVAL; + /* do nothing if out of range or zero count */ + if (*ppos >= (LCB_END - LCB_START) || !count) + return 0; + /* reduce count if needed */ + if (*ppos + count > LCB_END - LCB_START) + count = (LCB_END - LCB_START) - *ppos; + + csr_off = LCB_START + *ppos; + for (total = 0; total < count; total += 8, csr_off += 8) { + if (get_user(data, (unsigned long __user *)(buf + total))) + break; + if (write_lcb_csr(dd, csr_off, data)) + break; /* failed */ + } + *ppos += total; + return total; +} + /* * read the per-port QSFP data for ppd */ @@ -931,6 +1039,8 @@ static const struct counter_info port_cntr_ops[] = { DEBUGFS_XOPS("qsfp2", qsfp2_debugfs_read, qsfp2_debugfs_write, qsfp2_debugfs_open, qsfp2_debugfs_release), DEBUGFS_OPS("asic_flags", asic_flags_read, asic_flags_write), + DEBUGFS_OPS("dc8051_memory", dc8051_memory_read, NULL), + DEBUGFS_OPS("lcb", debugfs_lcb_read, debugfs_lcb_write), }; static void *_sdma_cpu_list_seq_start(struct seq_file *s, loff_t *pos) diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index c5efff29c147..4fbaee68012b 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -795,8 +795,7 @@ static inline void process_rcv_qp_work(struct hfi1_packet *packet) hfi1_schedule_send(qp); spin_unlock_irqrestore(&qp->s_lock, flags); } - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); } } diff --git a/drivers/infiniband/hw/hfi1/eprom.c b/drivers/infiniband/hw/hfi1/eprom.c index e70c223801b4..26da124c88e2 100644 --- a/drivers/infiniband/hw/hfi1/eprom.c +++ b/drivers/infiniband/hw/hfi1/eprom.c @@ -207,6 +207,40 @@ done_asic: /* magic character sequence that trails an image */ #define IMAGE_TRAIL_MAGIC "egamiAPO" +/* EPROM file types */ +#define HFI1_EFT_PLATFORM_CONFIG 2 + +/* segment size - 128 KiB */ +#define SEG_SIZE (128 * 1024) + +struct hfi1_eprom_footer { + u32 oprom_size; /* size of the oprom, in bytes */ + u16 num_table_entries; + u16 version; /* version of this footer */ + u32 magic; /* must be last */ +}; + +struct hfi1_eprom_table_entry { + u32 type; /* file type */ + u32 offset; /* file offset from start of EPROM */ + u32 size; /* file size, in bytes */ +}; + +/* + * Calculate the max number of table entries that will fit within a directory + * buffer of size 'dir_size'. + */ +#define MAX_TABLE_ENTRIES(dir_size) \ + (((dir_size) - sizeof(struct hfi1_eprom_footer)) / \ + sizeof(struct hfi1_eprom_table_entry)) + +#define DIRECTORY_SIZE(n) (sizeof(struct hfi1_eprom_footer) + \ + (sizeof(struct hfi1_eprom_table_entry) * (n))) + +#define MAGIC4(a, b, c, d) ((d) << 24 | (c) << 16 | (b) << 8 | (a)) +#define FOOTER_MAGIC MAGIC4('e', 'p', 'r', 'm') +#define FOOTER_VERSION 1 + /* * Read all of partition 1. The actual file is at the front. Adjust * the returned size if a trailing image magic is found. @@ -242,6 +276,167 @@ static int read_partition_platform_config(struct hfi1_devdata *dd, void **data, } /* + * The segment magic has been checked. There is a footer and table of + * contents present. + * + * directory is a u32 aligned buffer of size EP_PAGE_SIZE. + */ +static int read_segment_platform_config(struct hfi1_devdata *dd, + void *directory, void **data, u32 *size) +{ + struct hfi1_eprom_footer *footer; + struct hfi1_eprom_table_entry *table; + struct hfi1_eprom_table_entry *entry; + void *buffer = NULL; + void *table_buffer = NULL; + int ret, i; + u32 directory_size; + u32 seg_base, seg_offset; + u32 bytes_available, ncopied, to_copy; + + /* the footer is at the end of the directory */ + footer = (struct hfi1_eprom_footer *) + (directory + EP_PAGE_SIZE - sizeof(*footer)); + + /* make sure the structure version is supported */ + if (footer->version != FOOTER_VERSION) + return -EINVAL; + + /* oprom size cannot be larger than a segment */ + if (footer->oprom_size >= SEG_SIZE) + return -EINVAL; + + /* the file table must fit in a segment with the oprom */ + if (footer->num_table_entries > + MAX_TABLE_ENTRIES(SEG_SIZE - footer->oprom_size)) + return -EINVAL; + + /* find the file table start, which precedes the footer */ + directory_size = DIRECTORY_SIZE(footer->num_table_entries); + if (directory_size <= EP_PAGE_SIZE) { + /* the file table fits into the directory buffer handed in */ + table = (struct hfi1_eprom_table_entry *) + (directory + EP_PAGE_SIZE - directory_size); + } else { + /* need to allocate and read more */ + table_buffer = kmalloc(directory_size, GFP_KERNEL); + if (!table_buffer) + return -ENOMEM; + ret = read_length(dd, SEG_SIZE - directory_size, + directory_size, table_buffer); + if (ret) + goto done; + table = table_buffer; + } + + /* look for the platform configuration file in the table */ + for (entry = NULL, i = 0; i < footer->num_table_entries; i++) { + if (table[i].type == HFI1_EFT_PLATFORM_CONFIG) { + entry = &table[i]; + break; + } + } + if (!entry) { + ret = -ENOENT; + goto done; + } + + /* + * Sanity check on the configuration file size - it should never + * be larger than 4 KiB. + */ + if (entry->size > (4 * 1024)) { + dd_dev_err(dd, "Bad configuration file size 0x%x\n", + entry->size); + ret = -EINVAL; + goto done; + } + + /* check for bogus offset and size that wrap when added together */ + if (entry->offset + entry->size < entry->offset) { + dd_dev_err(dd, + "Bad configuration file start + size 0x%x+0x%x\n", + entry->offset, entry->size); + ret = -EINVAL; + goto done; + } + + /* allocate the buffer to return */ + buffer = kmalloc(entry->size, GFP_KERNEL); + if (!buffer) { + ret = -ENOMEM; + goto done; + } + + /* + * Extract the file by looping over segments until it is fully read. + */ + seg_offset = entry->offset % SEG_SIZE; + seg_base = entry->offset - seg_offset; + ncopied = 0; + while (ncopied < entry->size) { + /* calculate data bytes available in this segment */ + + /* start with the bytes from the current offset to the end */ + bytes_available = SEG_SIZE - seg_offset; + /* subtract off footer and table from segment 0 */ + if (seg_base == 0) { + /* + * Sanity check: should not have a starting point + * at or within the directory. + */ + if (bytes_available <= directory_size) { + dd_dev_err(dd, + "Bad configuration file - offset 0x%x within footer+table\n", + entry->offset); + ret = -EINVAL; + goto done; + } + bytes_available -= directory_size; + } + + /* calculate bytes wanted */ + to_copy = entry->size - ncopied; + + /* max out at the available bytes in this segment */ + if (to_copy > bytes_available) + to_copy = bytes_available; + + /* + * Read from the EPROM. + * + * The sanity check for entry->offset is done in read_length(). + * The EPROM offset is validated against what the hardware + * addressing supports. In addition, if the offset is larger + * than the actual EPROM, it silently wraps. It will work + * fine, though the reader may not get what they expected + * from the EPROM. + */ + ret = read_length(dd, seg_base + seg_offset, to_copy, + buffer + ncopied); + if (ret) + goto done; + + ncopied += to_copy; + + /* set up for next segment */ + seg_offset = footer->oprom_size; + seg_base += SEG_SIZE; + } + + /* success */ + ret = 0; + *data = buffer; + *size = entry->size; + +done: + kfree(table_buffer); + if (ret) + kfree(buffer); + return ret; +} + +/* * Read the platform configuration file from the EPROM. * * On success, an allocated buffer containing the data and its size are @@ -253,6 +448,7 @@ static int read_partition_platform_config(struct hfi1_devdata *dd, void **data, * -EBUSY - not able to acquire access to the EPROM * -ENOENT - no recognizable file written * -ENOMEM - buffer could not be allocated + * -EINVAL - invalid EPROM contentents found */ int eprom_read_platform_config(struct hfi1_devdata *dd, void **data, u32 *size) { @@ -266,21 +462,20 @@ int eprom_read_platform_config(struct hfi1_devdata *dd, void **data, u32 *size) if (ret) return -EBUSY; - /* read the last page of P0 for the EPROM format magic */ - ret = read_length(dd, P1_START - EP_PAGE_SIZE, EP_PAGE_SIZE, directory); + /* read the last page of the segment for the EPROM format magic */ + ret = read_length(dd, SEG_SIZE - EP_PAGE_SIZE, EP_PAGE_SIZE, directory); if (ret) goto done; - /* last dword of P0 contains a magic indicator */ - if (directory[EP_PAGE_DWORDS - 1] == 0) { + /* last dword of the segment contains a magic value */ + if (directory[EP_PAGE_DWORDS - 1] == FOOTER_MAGIC) { + /* segment format */ + ret = read_segment_platform_config(dd, directory, data, size); + } else { /* partition format */ ret = read_partition_platform_config(dd, data, size); - goto done; } - /* nothing recognized */ - ret = -ENOENT; - done: release_chip_resource(dd, CR_EPROM); return ret; diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c index 13db8eb4f4ec..0dd50cdb039a 100644 --- a/drivers/infiniband/hw/hfi1/firmware.c +++ b/drivers/infiniband/hw/hfi1/firmware.c @@ -239,6 +239,16 @@ static const u8 all_fabric_serdes_broadcast = 0xe1; const u8 pcie_serdes_broadcast[2] = { 0xe2, 0xe3 }; static const u8 all_pcie_serdes_broadcast = 0xe0; +static const u32 platform_config_table_limits[PLATFORM_CONFIG_TABLE_MAX] = { + 0, + SYSTEM_TABLE_MAX, + PORT_TABLE_MAX, + RX_PRESET_TABLE_MAX, + TX_PRESET_TABLE_MAX, + QSFP_ATTEN_TABLE_MAX, + VARIABLE_SETTINGS_TABLE_MAX +}; + /* forwards */ static void dispose_one_firmware(struct firmware_details *fdet); static int load_fabric_serdes_firmware(struct hfi1_devdata *dd, @@ -263,11 +273,13 @@ static int __read_8051_data(struct hfi1_devdata *dd, u32 addr, u64 *result) u64 reg; int count; - /* start the read at the given address */ - reg = ((addr & DC_DC8051_CFG_RAM_ACCESS_CTRL_ADDRESS_MASK) - << DC_DC8051_CFG_RAM_ACCESS_CTRL_ADDRESS_SHIFT) - | DC_DC8051_CFG_RAM_ACCESS_CTRL_READ_ENA_SMASK; + /* step 1: set the address, clear enable */ + reg = (addr & DC_DC8051_CFG_RAM_ACCESS_CTRL_ADDRESS_MASK) + << DC_DC8051_CFG_RAM_ACCESS_CTRL_ADDRESS_SHIFT; write_csr(dd, DC_DC8051_CFG_RAM_ACCESS_CTRL, reg); + /* step 2: enable */ + write_csr(dd, DC_DC8051_CFG_RAM_ACCESS_CTRL, + reg | DC_DC8051_CFG_RAM_ACCESS_CTRL_READ_ENA_SMASK); /* wait until ACCESS_COMPLETED is set */ count = 0; @@ -707,6 +719,9 @@ static int obtain_firmware(struct hfi1_devdata *dd) &dd->pcidev->dev); if (err) { platform_config = NULL; + dd_dev_err(dd, + "%s: No default platform config file found\n", + __func__); goto done; } dd->platform_config.data = platform_config->data; @@ -1761,8 +1776,17 @@ int parse_platform_config(struct hfi1_devdata *dd) u32 record_idx = 0, table_type = 0, table_length_dwords = 0; int ret = -EINVAL; /* assume failure */ + /* + * For integrated devices that did not fall back to the default file, + * the SI tuning information for active channels is acquired from the + * scratch register bitmap, thus there is no platform config to parse. + * Skip parsing in these situations. + */ + if (is_integrated(dd) && !platform_config_load) + return 0; + if (!dd->platform_config.data) { - dd_dev_info(dd, "%s: Missing config file\n", __func__); + dd_dev_err(dd, "%s: Missing config file\n", __func__); goto bail; } ptr = (u32 *)dd->platform_config.data; @@ -1770,7 +1794,7 @@ int parse_platform_config(struct hfi1_devdata *dd) magic_num = *ptr; ptr++; if (magic_num != PLATFORM_CONFIG_MAGIC_NUM) { - dd_dev_info(dd, "%s: Bad config file\n", __func__); + dd_dev_err(dd, "%s: Bad config file\n", __func__); goto bail; } @@ -1797,9 +1821,9 @@ int parse_platform_config(struct hfi1_devdata *dd) header1 = *ptr; header2 = *(ptr + 1); if (header1 != ~header2) { - dd_dev_info(dd, "%s: Failed validation at offset %ld\n", - __func__, (ptr - (u32 *) - dd->platform_config.data)); + dd_dev_err(dd, "%s: Failed validation at offset %ld\n", + __func__, (ptr - (u32 *) + dd->platform_config.data)); goto bail; } @@ -1841,11 +1865,11 @@ int parse_platform_config(struct hfi1_devdata *dd) table_length_dwords; break; default: - dd_dev_info(dd, - "%s: Unknown data table %d, offset %ld\n", - __func__, table_type, - (ptr - (u32 *) - dd->platform_config.data)); + dd_dev_err(dd, + "%s: Unknown data table %d, offset %ld\n", + __func__, table_type, + (ptr - (u32 *) + dd->platform_config.data)); goto bail; /* We don't trust this file now */ } pcfgcache->config_tables[table_type].table = ptr; @@ -1865,11 +1889,11 @@ int parse_platform_config(struct hfi1_devdata *dd) case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE: break; default: - dd_dev_info(dd, - "%s: Unknown meta table %d, offset %ld\n", - __func__, table_type, - (ptr - - (u32 *)dd->platform_config.data)); + dd_dev_err(dd, + "%s: Unknown meta table %d, offset %ld\n", + __func__, table_type, + (ptr - + (u32 *)dd->platform_config.data)); goto bail; /* We don't trust this file now */ } pcfgcache->config_tables[table_type].table_metadata = @@ -1884,10 +1908,9 @@ int parse_platform_config(struct hfi1_devdata *dd) /* Jump the table */ ptr += table_length_dwords; if (crc != *ptr) { - dd_dev_info(dd, "%s: Failed CRC check at offset %ld\n", - __func__, (ptr - - (u32 *) - dd->platform_config.data)); + dd_dev_err(dd, "%s: Failed CRC check at offset %ld\n", + __func__, (ptr - + (u32 *)dd->platform_config.data)); goto bail; } /* Jump the CRC DWORD */ @@ -1901,6 +1924,84 @@ bail: return ret; } +static void get_integrated_platform_config_field( + struct hfi1_devdata *dd, + enum platform_config_table_type_encoding table_type, + int field_index, u32 *data) +{ + struct hfi1_pportdata *ppd = dd->pport; + u8 *cache = ppd->qsfp_info.cache; + u32 tx_preset = 0; + + switch (table_type) { + case PLATFORM_CONFIG_SYSTEM_TABLE: + if (field_index == SYSTEM_TABLE_QSFP_POWER_CLASS_MAX) + *data = ppd->max_power_class; + else if (field_index == SYSTEM_TABLE_QSFP_ATTENUATION_DEFAULT_25G) + *data = ppd->default_atten; + break; + case PLATFORM_CONFIG_PORT_TABLE: + if (field_index == PORT_TABLE_PORT_TYPE) + *data = ppd->port_type; + else if (field_index == PORT_TABLE_LOCAL_ATTEN_25G) + *data = ppd->local_atten; + else if (field_index == PORT_TABLE_REMOTE_ATTEN_25G) + *data = ppd->remote_atten; + break; + case PLATFORM_CONFIG_RX_PRESET_TABLE: + if (field_index == RX_PRESET_TABLE_QSFP_RX_CDR_APPLY) + *data = (ppd->rx_preset & QSFP_RX_CDR_APPLY_SMASK) >> + QSFP_RX_CDR_APPLY_SHIFT; + else if (field_index == RX_PRESET_TABLE_QSFP_RX_EMP_APPLY) + *data = (ppd->rx_preset & QSFP_RX_EMP_APPLY_SMASK) >> + QSFP_RX_EMP_APPLY_SHIFT; + else if (field_index == RX_PRESET_TABLE_QSFP_RX_AMP_APPLY) + *data = (ppd->rx_preset & QSFP_RX_AMP_APPLY_SMASK) >> + QSFP_RX_AMP_APPLY_SHIFT; + else if (field_index == RX_PRESET_TABLE_QSFP_RX_CDR) + *data = (ppd->rx_preset & QSFP_RX_CDR_SMASK) >> + QSFP_RX_CDR_SHIFT; + else if (field_index == RX_PRESET_TABLE_QSFP_RX_EMP) + *data = (ppd->rx_preset & QSFP_RX_EMP_SMASK) >> + QSFP_RX_EMP_SHIFT; + else if (field_index == RX_PRESET_TABLE_QSFP_RX_AMP) + *data = (ppd->rx_preset & QSFP_RX_AMP_SMASK) >> + QSFP_RX_AMP_SHIFT; + break; + case PLATFORM_CONFIG_TX_PRESET_TABLE: + if (cache[QSFP_EQ_INFO_OFFS] & 0x4) + tx_preset = ppd->tx_preset_eq; + else + tx_preset = ppd->tx_preset_noeq; + if (field_index == TX_PRESET_TABLE_PRECUR) + *data = (tx_preset & TX_PRECUR_SMASK) >> + TX_PRECUR_SHIFT; + else if (field_index == TX_PRESET_TABLE_ATTN) + *data = (tx_preset & TX_ATTN_SMASK) >> + TX_ATTN_SHIFT; + else if (field_index == TX_PRESET_TABLE_POSTCUR) + *data = (tx_preset & TX_POSTCUR_SMASK) >> + TX_POSTCUR_SHIFT; + else if (field_index == TX_PRESET_TABLE_QSFP_TX_CDR_APPLY) + *data = (tx_preset & QSFP_TX_CDR_APPLY_SMASK) >> + QSFP_TX_CDR_APPLY_SHIFT; + else if (field_index == TX_PRESET_TABLE_QSFP_TX_EQ_APPLY) + *data = (tx_preset & QSFP_TX_EQ_APPLY_SMASK) >> + QSFP_TX_EQ_APPLY_SHIFT; + else if (field_index == TX_PRESET_TABLE_QSFP_TX_CDR) + *data = (tx_preset & QSFP_TX_CDR_SMASK) >> + QSFP_TX_CDR_SHIFT; + else if (field_index == TX_PRESET_TABLE_QSFP_TX_EQ) + *data = (tx_preset & QSFP_TX_EQ_SMASK) >> + QSFP_TX_EQ_SHIFT; + break; + case PLATFORM_CONFIG_QSFP_ATTEN_TABLE: + case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE: + default: + break; + } +} + static int get_platform_fw_field_metadata(struct hfi1_devdata *dd, int table, int field, u32 *field_len_bits, u32 *field_start_bits) @@ -1976,6 +2077,15 @@ int get_platform_config_field(struct hfi1_devdata *dd, else return -EINVAL; + if (is_integrated(dd) && !platform_config_load) { + /* + * Use saved configuration from ppd for integrated platforms + */ + get_integrated_platform_config_field(dd, table_type, + field_index, data); + return 0; + } + ret = get_platform_fw_field_metadata(dd, table_type, field_index, &field_len_bits, &field_start_bits); diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index cc87fd4e534b..751a0fb29fa5 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -492,6 +492,9 @@ struct rvt_sge_state; #define HFI1_MIN_VLS_SUPPORTED 1 #define HFI1_MAX_VLS_SUPPORTED 8 +#define HFI1_GUIDS_PER_PORT 5 +#define HFI1_PORT_GUID_INDEX 0 + static inline void incr_cntr64(u64 *cntr) { if (*cntr < (u64)-1LL) @@ -559,11 +562,20 @@ struct hfi1_pportdata { struct kobject vl2mtu_kobj; /* PHY support */ - u32 port_type; struct qsfp_data qsfp_info; + /* Values for SI tuning of SerDes */ + u32 port_type; + u32 tx_preset_eq; + u32 tx_preset_noeq; + u32 rx_preset; + u8 local_atten; + u8 remote_atten; + u8 default_atten; + u8 max_power_class; + + /* GUIDs for this interface, in host order, guids[0] is a port guid */ + u64 guids[HFI1_GUIDS_PER_PORT]; - /* GUID for this interface, in host order */ - u64 guid; /* GUID for peer interface, in host order */ u64 neighbor_guid; @@ -826,32 +838,29 @@ struct hfi1_devdata { u8 __iomem *kregend; /* physical address of chip for io_remap, etc. */ resource_size_t physaddr; - /* receive context data */ - struct hfi1_ctxtdata **rcd; + /* Per VL data. Enough for all VLs but not all elements are set/used. */ + struct per_vl_data vld[PER_VL_SEND_CONTEXTS]; /* send context data */ struct send_context_info *send_contexts; /* map hardware send contexts to software index */ u8 *hw_to_sw; /* spinlock for allocating and releasing send context resources */ spinlock_t sc_lock; - /* Per VL data. Enough for all VLs but not all elements are set/used. */ - struct per_vl_data vld[PER_VL_SEND_CONTEXTS]; /* lock for pio_map */ spinlock_t pio_map_lock; + /* Send Context initialization lock. */ + spinlock_t sc_init_lock; + /* lock for sdma_map */ + spinlock_t sde_map_lock; /* array of kernel send contexts */ struct send_context **kernel_send_context; /* array of vl maps */ struct pio_vl_map __rcu *pio_map; - /* seqlock for sc2vl */ - seqlock_t sc2vl_lock; - u64 sc2vl[4]; - /* Send Context initialization lock. */ - spinlock_t sc_init_lock; + /* default flags to last descriptor */ + u64 default_desc1; /* fields common to all SDMA engines */ - /* default flags to last descriptor */ - u64 default_desc1; volatile __le64 *sdma_heads_dma; /* DMA'ed by chip */ dma_addr_t sdma_heads_phys; void *sdma_pad_dma; /* DMA'ed by chip */ @@ -862,8 +871,6 @@ struct hfi1_devdata { u32 chip_sdma_engines; /* num used */ u32 num_sdma; - /* lock for sdma_map */ - spinlock_t sde_map_lock; /* array of engines sized by num_sdma */ struct sdma_engine *per_sdma; /* array of vl maps */ @@ -872,14 +879,11 @@ struct hfi1_devdata { wait_queue_head_t sdma_unfreeze_wq; atomic_t sdma_unfreeze_count; + u32 lcb_access_count; /* count of LCB users */ + /* common data between shared ASIC HFIs in this OS */ struct hfi1_asic_data *asic_data; - /* hfi1_pportdata, points to array of (physical) port-specific - * data structs, indexed by pidx (0..n-1) - */ - struct hfi1_pportdata *pport; - /* mem-mapped pointer to base of PIO buffers */ void __iomem *piobase; /* @@ -896,20 +900,13 @@ struct hfi1_devdata { /* send context numbers and sizes for each type */ struct sc_config_sizes sc_sizes[SC_MAX]; - u32 lcb_access_count; /* count of LCB users */ - char *boardname; /* human readable board info */ - /* device (not port) flags, basically device capabilities */ - u32 flags; - /* reset value */ u64 z_int_counter; u64 z_rcv_limit; u64 z_send_schedule; - /* percpu int_counter */ - u64 __percpu *int_counter; - u64 __percpu *rcv_limit; + u64 __percpu *send_schedule; /* number of receive contexts in use by the driver */ u32 num_rcv_contexts; @@ -924,6 +921,7 @@ struct hfi1_devdata { /* base receive interrupt timeout, in CSR units */ u32 rcv_intr_timeout_csr; + u32 freezelen; /* max length of freezemsg */ u64 __iomem *egrtidbase; spinlock_t sendctrl_lock; /* protect changes to SendCtrl */ spinlock_t rcvctrl_lock; /* protect changes to RcvCtrl */ @@ -945,7 +943,6 @@ struct hfi1_devdata { * IB link status cheaply */ struct hfi1_status *status; - u32 freezelen; /* max length of freezemsg */ /* revision register shadow */ u64 revision; @@ -973,6 +970,8 @@ struct hfi1_devdata { u16 rcvegrbufsize_shift; /* both sides of the PCIe link are gen3 capable */ u8 link_gen3_capable; + /* default link down value (poll/sleep) */ + u8 link_default; /* localbus width (1, 2,4,8,16,32) from config space */ u32 lbus_width; /* localbus speed in MHz */ @@ -1008,8 +1007,6 @@ struct hfi1_devdata { u8 hfi1_id; /* implementation code */ u8 icode; - /* default link down value (poll/sleep) */ - u8 link_default; /* vAU of this device */ u8 vau; /* vCU of this device */ @@ -1020,27 +1017,17 @@ struct hfi1_devdata { u16 vl15_init; /* Misc small ints */ - /* Number of physical ports available */ - u8 num_pports; - /* Lowest context number which can be used by user processes */ - u8 first_user_ctxt; u8 n_krcv_queues; u8 qos_shift; - u8 qpn_mask; - u16 rhf_offset; /* offset of RHF within receive header entry */ u16 irev; /* implementation revision */ u16 dc8051_ver; /* 8051 firmware version */ + spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */ struct platform_config platform_config; struct platform_config_cache pcfg_cache; struct diag_client *diag_client; - spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */ - - u8 psxmitwait_supported; - /* cycle length of PS* counters in HW (in picoseconds) */ - u16 psxmitwait_check_rate; /* MSI-X information */ struct hfi1_msix_entry *msix_entries; @@ -1055,6 +1042,9 @@ struct hfi1_devdata { struct rcv_array_data rcv_entries; + /* cycle length of PS* counters in HW (in picoseconds) */ + u16 psxmitwait_check_rate; + /* * 64 bit synthetic counters */ @@ -1085,11 +1075,11 @@ struct hfi1_devdata { struct err_info_rcvport err_info_rcvport; struct err_info_constraint err_info_rcv_constraint; struct err_info_constraint err_info_xmit_constraint; - u8 err_info_uncorrectable; - u8 err_info_fmconfig; atomic_t drop_packet; u8 do_drop; + u8 err_info_uncorrectable; + u8 err_info_fmconfig; /* * Software counters for the status bits defined by the @@ -1112,40 +1102,60 @@ struct hfi1_devdata { u64 sw_cce_err_status_aggregate; /* Software counter that aggregates all bypass packet rcv errors */ u64 sw_rcv_bypass_packet_errors; - /* receive interrupt functions */ - rhf_rcv_function_ptr *rhf_rcv_function_map; + /* receive interrupt function */ rhf_rcv_function_ptr normal_rhf_rcv_functions[8]; + /* Save the enabled LCB error bits */ + u64 lcb_err_en; + /* * Capability to have different send engines simply by changing a * pointer value. */ - send_routine process_pio_send; + send_routine process_pio_send ____cacheline_aligned_in_smp; send_routine process_dma_send; void (*pio_inline_send)(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc, const void *from, size_t count); + /* hfi1_pportdata, points to array of (physical) port-specific + * data structs, indexed by pidx (0..n-1) + */ + struct hfi1_pportdata *pport; + /* receive context data */ + struct hfi1_ctxtdata **rcd; + u64 __percpu *int_counter; + /* device (not port) flags, basically device capabilities */ + u16 flags; + /* Number of physical ports available */ + u8 num_pports; + /* Lowest context number which can be used by user processes */ + u8 first_user_ctxt; + /* adding a new field here would make it part of this cacheline */ + + /* seqlock for sc2vl */ + seqlock_t sc2vl_lock ____cacheline_aligned_in_smp; + u64 sc2vl[4]; + /* receive interrupt functions */ + rhf_rcv_function_ptr *rhf_rcv_function_map; + u64 __percpu *rcv_limit; + u16 rhf_offset; /* offset of RHF within receive header entry */ + /* adding a new field here would make it part of this cacheline */ /* OUI comes from the HW. Used everywhere as 3 separate bytes. */ u8 oui1; u8 oui2; u8 oui3; + u8 dc_shutdown; + /* Timer and counter used to detect RcvBufOvflCnt changes */ struct timer_list rcverr_timer; - u32 rcv_ovfl_cnt; wait_queue_head_t event_queue; - /* Save the enabled LCB error bits */ - u64 lcb_err_en; - u8 dc_shutdown; - /* receive context tail dummy address */ __le64 *rcvhdrtail_dummy_kvaddr; dma_addr_t rcvhdrtail_dummy_dma; - bool eprom_available; /* true if EPROM is available for this device */ - bool aspm_supported; /* Does HW support ASPM */ - bool aspm_enabled; /* ASPM state: enabled/disabled */ + u32 rcv_ovfl_cnt; /* Serialize ASPM enable/disable between multiple verbs contexts */ spinlock_t aspm_lock; /* Number of verbs contexts which have disabled ASPM */ @@ -1155,8 +1165,11 @@ struct hfi1_devdata { /* Used to wait for outstanding user space clients before dev removal */ struct completion user_comp; - struct hfi1_affinity *affinity; + bool eprom_available; /* true if EPROM is available for this device */ + bool aspm_supported; /* Does HW support ASPM */ + bool aspm_enabled; /* ASPM state: enabled/disabled */ struct rhashtable sdma_rht; + struct kobject kobj; }; @@ -1604,6 +1617,17 @@ static inline u16 hfi1_get_pkey(struct hfi1_ibport *ibp, unsigned index) } /* + * Return the indexed GUID from the port GUIDs table. + */ +static inline __be64 get_sguid(struct hfi1_ibport *ibp, unsigned int index) +{ + struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); + + WARN_ON(index >= HFI1_GUIDS_PER_PORT); + return cpu_to_be64(ppd->guids[index]); +} + +/* * Called by readers of cc_state only, must call under rcu_read_lock(). */ static inline struct cc_state *get_cc_state(struct hfi1_pportdata *ppd) @@ -1982,6 +2006,12 @@ static inline u32 qsfp_resource(struct hfi1_devdata *dd) return i2c_target(dd->hfi1_id); } +/* Is this device integrated or discrete? */ +static inline bool is_integrated(struct hfi1_devdata *dd) +{ + return dd->pcidev->device == PCI_DEVICE_ID_INTEL1; +} + int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp); #define DD_DEV_ENTRY(dd) __string(dev, dev_name(&(dd)->pcidev->dev)) diff --git a/drivers/infiniband/hw/hfi1/iowait.h b/drivers/infiniband/hw/hfi1/iowait.h index 2ec6ef38d389..d9740ddea6f1 100644 --- a/drivers/infiniband/hw/hfi1/iowait.h +++ b/drivers/infiniband/hw/hfi1/iowait.h @@ -64,6 +64,7 @@ struct sdma_engine; /** * struct iowait - linkage for delayed progress/waiting * @list: used to add/insert into QP/PQ wait lists + * @lock: uses to record the list head lock * @tx_head: overflow list of sdma_txreq's * @sleep: no space callback * @wakeup: space callback wakeup @@ -91,6 +92,11 @@ struct sdma_engine; * so sleeping is not allowed. * * The wait_dma member along with the iow + * + * The lock field is used by waiters to record + * the seqlock_t that guards the list head. + * Waiters explicity know that, but the destroy + * code that unwaits QPs does not. */ struct iowait { @@ -103,6 +109,7 @@ struct iowait { unsigned seq); void (*wakeup)(struct iowait *wait, int reason); void (*sdma_drained)(struct iowait *wait); + seqlock_t *lock; struct work_struct iowork; wait_queue_head_t wait_dma; wait_queue_head_t wait_pio; @@ -141,6 +148,7 @@ static inline void iowait_init( void (*sdma_drained)(struct iowait *wait)) { wait->count = 0; + wait->lock = NULL; INIT_LIST_HEAD(&wait->list); INIT_LIST_HEAD(&wait->tx_head); INIT_WORK(&wait->iowork, func); diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 9487c9bb8920..6e595afca24c 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -128,7 +128,7 @@ static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len) smp = send_buf->mad; smp->base_version = OPA_MGMT_BASE_VERSION; smp->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; - smp->class_version = OPA_SMI_CLASS_VERSION; + smp->class_version = OPA_SM_CLASS_VERSION; smp->method = IB_MGMT_METHOD_TRAP; ibp->rvp.tid++; smp->tid = cpu_to_be64(ibp->rvp.tid); @@ -336,20 +336,20 @@ static int __subn_get_opa_nodeinfo(struct opa_smp *smp, u32 am, u8 *data, ni = (struct opa_node_info *)data; /* GUID 0 is illegal */ - if (am || pidx >= dd->num_pports || dd->pport[pidx].guid == 0) { + if (am || pidx >= dd->num_pports || ibdev->node_guid == 0 || + get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX) == 0) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } - ni->port_guid = cpu_to_be64(dd->pport[pidx].guid); + ni->port_guid = get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX); ni->base_version = OPA_MGMT_BASE_VERSION; - ni->class_version = OPA_SMI_CLASS_VERSION; + ni->class_version = OPA_SM_CLASS_VERSION; ni->node_type = 1; /* channel adapter */ ni->num_ports = ibdev->phys_port_cnt; /* This is already in network order */ ni->system_image_guid = ib_hfi1_sys_image_guid; - /* Use first-port GUID as node */ - ni->node_guid = cpu_to_be64(dd->pport->guid); + ni->node_guid = ibdev->node_guid; ni->partition_cap = cpu_to_be16(hfi1_get_npkeys(dd)); ni->device_id = cpu_to_be16(dd->pcidev->device); ni->revision = cpu_to_be32(dd->minrev); @@ -373,19 +373,20 @@ static int subn_get_nodeinfo(struct ib_smp *smp, struct ib_device *ibdev, /* GUID 0 is illegal */ if (smp->attr_mod || pidx >= dd->num_pports || - dd->pport[pidx].guid == 0) + ibdev->node_guid == 0 || + get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX) == 0) { smp->status |= IB_SMP_INVALID_FIELD; - else - nip->port_guid = cpu_to_be64(dd->pport[pidx].guid); + return reply((struct ib_mad_hdr *)smp); + } + nip->port_guid = get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX); nip->base_version = OPA_MGMT_BASE_VERSION; - nip->class_version = OPA_SMI_CLASS_VERSION; + nip->class_version = OPA_SM_CLASS_VERSION; nip->node_type = 1; /* channel adapter */ nip->num_ports = ibdev->phys_port_cnt; /* This is already in network order */ nip->sys_guid = ib_hfi1_sys_image_guid; - /* Use first-port GUID as node */ - nip->node_guid = cpu_to_be64(dd->pport->guid); + nip->node_guid = ibdev->node_guid; nip->partition_cap = cpu_to_be16(hfi1_get_npkeys(dd)); nip->device_id = cpu_to_be16(dd->pcidev->device); nip->revision = cpu_to_be32(dd->minrev); @@ -2302,7 +2303,7 @@ static int pma_get_opa_classportinfo(struct opa_pma_mad *pmp, pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; p->base_version = OPA_MGMT_BASE_VERSION; - p->class_version = OPA_SMI_CLASS_VERSION; + p->class_version = OPA_SM_CLASS_VERSION; /* * Expected response time is 4.096 usec. * 2^18 == 1.073741824 sec. */ @@ -4022,7 +4023,7 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags, am = be32_to_cpu(smp->attr_mod); attr_id = smp->attr_id; - if (smp->class_version != OPA_SMI_CLASS_VERSION) { + if (smp->class_version != OPA_SM_CLASS_VERSION) { smp->status |= IB_SMP_UNSUP_VERSION; ret = reply((struct ib_mad_hdr *)smp); return ret; @@ -4232,7 +4233,7 @@ static int process_perf_opa(struct ib_device *ibdev, u8 port, *out_mad = *in_mad; - if (pmp->mad_hdr.class_version != OPA_SMI_CLASS_VERSION) { + if (pmp->mad_hdr.class_version != OPA_SM_CLASS_VERSION) { pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION; return reply((struct ib_mad_hdr *)pmp); } diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c index 7ad30898fc19..ccbf52c8ff6f 100644 --- a/drivers/infiniband/hw/hfi1/mmu_rb.c +++ b/drivers/infiniband/hw/hfi1/mmu_rb.c @@ -81,7 +81,7 @@ static void do_remove(struct mmu_rb_handler *handler, struct list_head *del_list); static void handle_remove(struct work_struct *work); -static struct mmu_notifier_ops mn_opts = { +static const struct mmu_notifier_ops mn_opts = { .invalidate_page = mmu_notifier_page, .invalidate_range_start = mmu_notifier_range_start, }; diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index d89b8745d4c1..615be68e40b3 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -758,6 +758,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, sc->hw_context = hw_context; cr_group_addresses(sc, &dma); sc->credits = sci->credits; + sc->size = sc->credits * PIO_BLOCK_SIZE; /* PIO Send Memory Address details */ #define PIO_ADDR_CONTEXT_MASK 0xfful @@ -1242,6 +1243,7 @@ int sc_enable(struct send_context *sc) sc->free = 0; sc->alloc_free = 0; sc->fill = 0; + sc->fill_wrap = 0; sc->sr_head = 0; sc->sr_tail = 0; sc->flags = 0; @@ -1385,7 +1387,7 @@ struct pio_buf *sc_buffer_alloc(struct send_context *sc, u32 dw_len, unsigned long flags; unsigned long avail; unsigned long blocks = dwords_to_blocks(dw_len); - unsigned long start_fill; + u32 fill_wrap; int trycount = 0; u32 head, next; @@ -1410,9 +1412,7 @@ retry: (sc->fill - sc->alloc_free); if (blocks > avail) { /* still no room, actively update */ - spin_unlock_irqrestore(&sc->alloc_lock, flags); sc_release_update(sc); - spin_lock_irqsave(&sc->alloc_lock, flags); sc->alloc_free = ACCESS_ONCE(sc->free); trycount++; goto retry; @@ -1428,8 +1428,11 @@ retry: head = sc->sr_head; /* "allocate" the buffer */ - start_fill = sc->fill; sc->fill += blocks; + fill_wrap = sc->fill_wrap; + sc->fill_wrap += blocks; + if (sc->fill_wrap >= sc->credits) + sc->fill_wrap = sc->fill_wrap - sc->credits; /* * Fill the parts that the releaser looks at before moving the head. @@ -1458,11 +1461,8 @@ retry: spin_unlock_irqrestore(&sc->alloc_lock, flags); /* finish filling in the buffer outside the lock */ - pbuf->start = sc->base_addr + ((start_fill % sc->credits) - * PIO_BLOCK_SIZE); - pbuf->size = sc->credits * PIO_BLOCK_SIZE; - pbuf->end = sc->base_addr + pbuf->size; - pbuf->block_count = blocks; + pbuf->start = sc->base_addr + fill_wrap * PIO_BLOCK_SIZE; + pbuf->end = sc->base_addr + sc->size; pbuf->qw_written = 0; pbuf->carry_bytes = 0; pbuf->carry.val64 = 0; @@ -1573,6 +1573,7 @@ static void sc_piobufavail(struct send_context *sc) qp = iowait_to_qp(wait); priv = qp->priv; list_del_init(&priv->s_iowait.list); + priv->s_iowait.lock = NULL; /* refcount held until actual wake up */ qps[n++] = qp; } @@ -2028,29 +2029,17 @@ freesc15: int init_credit_return(struct hfi1_devdata *dd) { int ret; - int num_numa; int i; - num_numa = num_online_nodes(); - /* enforce the expectation that the numas are compact */ - for (i = 0; i < num_numa; i++) { - if (!node_online(i)) { - dd_dev_err(dd, "NUMA nodes are not compact\n"); - ret = -EINVAL; - goto done; - } - } - dd->cr_base = kcalloc( - num_numa, + node_affinity.num_possible_nodes, sizeof(struct credit_return_base), GFP_KERNEL); if (!dd->cr_base) { - dd_dev_err(dd, "Unable to allocate credit return base\n"); ret = -ENOMEM; goto done; } - for (i = 0; i < num_numa; i++) { + for_each_node_with_cpus(i) { int bytes = TXE_NUM_CONTEXTS * sizeof(struct credit_return); set_dev_node(&dd->pcidev->dev, i); @@ -2077,14 +2066,11 @@ done: void free_credit_return(struct hfi1_devdata *dd) { - int num_numa; int i; if (!dd->cr_base) return; - - num_numa = num_online_nodes(); - for (i = 0; i < num_numa; i++) { + for (i = 0; i < node_affinity.num_possible_nodes; i++) { if (dd->cr_base[i].va) { dma_free_coherent(&dd->pcidev->dev, TXE_NUM_CONTEXTS * diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h index e709eaf743b5..867e5ffc3595 100644 --- a/drivers/infiniband/hw/hfi1/pio.h +++ b/drivers/infiniband/hw/hfi1/pio.h @@ -83,53 +83,55 @@ struct pio_buf { void *arg; /* argument for cb */ void __iomem *start; /* buffer start address */ void __iomem *end; /* context end address */ - unsigned long size; /* context size, in bytes */ unsigned long sent_at; /* buffer is sent when <= free */ - u32 block_count; /* size of buffer, in blocks */ - u32 qw_written; /* QW written so far */ - u32 carry_bytes; /* number of valid bytes in carry */ union mix carry; /* pending unwritten bytes */ + u16 qw_written; /* QW written so far */ + u8 carry_bytes; /* number of valid bytes in carry */ }; /* cache line aligned pio buffer array */ union pio_shadow_ring { struct pio_buf pbuf; - u64 unused[16]; /* cache line spacer */ } ____cacheline_aligned; /* per-NUMA send context */ struct send_context { /* read-only after init */ struct hfi1_devdata *dd; /* device */ - void __iomem *base_addr; /* start of PIO memory */ union pio_shadow_ring *sr; /* shadow ring */ + void __iomem *base_addr; /* start of PIO memory */ + u32 __percpu *buffers_allocated;/* count of buffers allocated */ + u32 size; /* context size, in bytes */ - volatile __le64 *hw_free; /* HW free counter */ - struct work_struct halt_work; /* halted context work queue entry */ - unsigned long flags; /* flags */ int node; /* context home node */ - int type; /* context type */ - u32 sw_index; /* software index number */ - u32 hw_context; /* hardware context number */ - u32 credits; /* number of blocks in context */ u32 sr_size; /* size of the shadow ring */ - u32 group; /* credit return group */ + u16 flags; /* flags */ + u8 type; /* context type */ + u8 sw_index; /* software index number */ + u8 hw_context; /* hardware context number */ + u8 group; /* credit return group */ + /* allocator fields */ spinlock_t alloc_lock ____cacheline_aligned_in_smp; + u32 sr_head; /* shadow ring head */ unsigned long fill; /* official alloc count */ unsigned long alloc_free; /* copy of free (less cache thrash) */ - u32 sr_head; /* shadow ring head */ + u32 fill_wrap; /* tracks fill within ring */ + u32 credits; /* number of blocks in context */ + /* adding a new field here would make it part of this cacheline */ + /* releaser fields */ spinlock_t release_lock ____cacheline_aligned_in_smp; - unsigned long free; /* official free count */ u32 sr_tail; /* shadow ring tail */ + unsigned long free; /* official free count */ + volatile __le64 *hw_free; /* HW free counter */ /* list for PIO waiters */ struct list_head piowait ____cacheline_aligned_in_smp; spinlock_t credit_ctrl_lock ____cacheline_aligned_in_smp; - u64 credit_ctrl; /* cache for credit control */ u32 credit_intr_count; /* count of credit intr users */ - u32 __percpu *buffers_allocated;/* count of buffers allocated */ + u64 credit_ctrl; /* cache for credit control */ wait_queue_head_t halt_wait; /* wait until kernel sees interrupt */ + struct work_struct halt_work; /* halted context work queue entry */ }; /* send context flags */ diff --git a/drivers/infiniband/hw/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c index aa7773643107..03024cec78dd 100644 --- a/drivers/infiniband/hw/hfi1/pio_copy.c +++ b/drivers/infiniband/hw/hfi1/pio_copy.c @@ -129,8 +129,8 @@ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc, dest += sizeof(u64); } - dest -= pbuf->size; - dend -= pbuf->size; + dest -= pbuf->sc->size; + dend -= pbuf->sc->size; } /* write 8-byte non-SOP, non-wrap chunk data */ @@ -361,8 +361,8 @@ void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc, dest += sizeof(u64); } - dest -= pbuf->size; - dend -= pbuf->size; + dest -= pbuf->sc->size; + dend -= pbuf->sc->size; } /* write 8-byte non-SOP, non-wrap chunk data */ @@ -458,8 +458,8 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes) dest += sizeof(u64); } - dest -= pbuf->size; - dend -= pbuf->size; + dest -= pbuf->sc->size; + dend -= pbuf->sc->size; } /* write 8-byte non-SOP, non-wrap chunk data */ @@ -492,7 +492,7 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes) */ /* adjust if we have wrapped */ if (dest >= pbuf->end) - dest -= pbuf->size; + dest -= pbuf->sc->size; /* jump to the SOP range if within the first block */ else if (pbuf->qw_written < PIO_BLOCK_QWS) dest += SOP_DISTANCE; @@ -584,8 +584,8 @@ static void mid_copy_straight(struct pio_buf *pbuf, dest += sizeof(u64); } - dest -= pbuf->size; - dend -= pbuf->size; + dest -= pbuf->sc->size; + dend -= pbuf->sc->size; } /* write 8-byte non-SOP, non-wrap chunk data */ @@ -666,7 +666,7 @@ void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes) */ /* adjust if we've wrapped */ if (dest >= pbuf->end) - dest -= pbuf->size; + dest -= pbuf->sc->size; /* jump to SOP range if within the first block */ else if (pbuf->qw_written < PIO_BLOCK_QWS) dest += SOP_DISTANCE; @@ -719,7 +719,7 @@ void seg_pio_copy_end(struct pio_buf *pbuf) */ /* adjust if we have wrapped */ if (dest >= pbuf->end) - dest -= pbuf->size; + dest -= pbuf->sc->size; /* jump to the SOP range if within the first block */ else if (pbuf->qw_written < PIO_BLOCK_QWS) dest += SOP_DISTANCE; diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c index 202433178864..838fe84e285a 100644 --- a/drivers/infiniband/hw/hfi1/platform.c +++ b/drivers/infiniband/hw/hfi1/platform.c @@ -49,6 +49,90 @@ #include "efivar.h" #include "eprom.h" +static int validate_scratch_checksum(struct hfi1_devdata *dd) +{ + u64 checksum = 0, temp_scratch = 0; + int i, j, version; + + temp_scratch = read_csr(dd, ASIC_CFG_SCRATCH); + version = (temp_scratch & BITMAP_VERSION_SMASK) >> BITMAP_VERSION_SHIFT; + + /* Prevent power on default of all zeroes from passing checksum */ + if (!version) + return 0; + + /* + * ASIC scratch 0 only contains the checksum and bitmap version as + * fields of interest, both of which are handled separately from the + * loop below, so skip it + */ + checksum += version; + for (i = 1; i < ASIC_NUM_SCRATCH; i++) { + temp_scratch = read_csr(dd, ASIC_CFG_SCRATCH + (8 * i)); + for (j = sizeof(u64); j != 0; j -= 2) { + checksum += (temp_scratch & 0xFFFF); + temp_scratch >>= 16; + } + } + + while (checksum >> 16) + checksum = (checksum & CHECKSUM_MASK) + (checksum >> 16); + + temp_scratch = read_csr(dd, ASIC_CFG_SCRATCH); + temp_scratch &= CHECKSUM_SMASK; + temp_scratch >>= CHECKSUM_SHIFT; + + if (checksum + temp_scratch == 0xFFFF) + return 1; + return 0; +} + +static void save_platform_config_fields(struct hfi1_devdata *dd) +{ + struct hfi1_pportdata *ppd = dd->pport; + u64 temp_scratch = 0, temp_dest = 0; + + temp_scratch = read_csr(dd, ASIC_CFG_SCRATCH_1); + + temp_dest = temp_scratch & + (dd->hfi1_id ? PORT1_PORT_TYPE_SMASK : + PORT0_PORT_TYPE_SMASK); + ppd->port_type = temp_dest >> + (dd->hfi1_id ? PORT1_PORT_TYPE_SHIFT : + PORT0_PORT_TYPE_SHIFT); + + temp_dest = temp_scratch & + (dd->hfi1_id ? PORT1_LOCAL_ATTEN_SMASK : + PORT0_LOCAL_ATTEN_SMASK); + ppd->local_atten = temp_dest >> + (dd->hfi1_id ? PORT1_LOCAL_ATTEN_SHIFT : + PORT0_LOCAL_ATTEN_SHIFT); + + temp_dest = temp_scratch & + (dd->hfi1_id ? PORT1_REMOTE_ATTEN_SMASK : + PORT0_REMOTE_ATTEN_SMASK); + ppd->remote_atten = temp_dest >> + (dd->hfi1_id ? PORT1_REMOTE_ATTEN_SHIFT : + PORT0_REMOTE_ATTEN_SHIFT); + + temp_dest = temp_scratch & + (dd->hfi1_id ? PORT1_DEFAULT_ATTEN_SMASK : + PORT0_DEFAULT_ATTEN_SMASK); + ppd->default_atten = temp_dest >> + (dd->hfi1_id ? PORT1_DEFAULT_ATTEN_SHIFT : + PORT0_DEFAULT_ATTEN_SHIFT); + + temp_scratch = read_csr(dd, dd->hfi1_id ? ASIC_CFG_SCRATCH_3 : + ASIC_CFG_SCRATCH_2); + + ppd->tx_preset_eq = (temp_scratch & TX_EQ_SMASK) >> TX_EQ_SHIFT; + ppd->tx_preset_noeq = (temp_scratch & TX_NO_EQ_SMASK) >> TX_NO_EQ_SHIFT; + ppd->rx_preset = (temp_scratch & RX_SMASK) >> RX_SHIFT; + + ppd->max_power_class = (temp_scratch & QSFP_MAX_POWER_SMASK) >> + QSFP_MAX_POWER_SHIFT; +} + void get_platform_config(struct hfi1_devdata *dd) { int ret = 0; @@ -56,38 +140,49 @@ void get_platform_config(struct hfi1_devdata *dd) u8 *temp_platform_config = NULL; u32 esize; - ret = eprom_read_platform_config(dd, (void **)&temp_platform_config, - &esize); - if (!ret) { - /* success */ - size = esize; - goto success; + if (is_integrated(dd)) { + if (validate_scratch_checksum(dd)) { + save_platform_config_fields(dd); + return; + } + dd_dev_err(dd, "%s: Config bitmap corrupted/uninitialized\n", + __func__); + dd_dev_err(dd, + "%s: Please update your BIOS to support active channels\n", + __func__); + } else { + ret = eprom_read_platform_config(dd, + (void **)&temp_platform_config, + &esize); + if (!ret) { + /* success */ + dd->platform_config.data = temp_platform_config; + dd->platform_config.size = esize; + return; + } + /* fail, try EFI variable */ + + ret = read_hfi1_efi_var(dd, "configuration", &size, + (void **)&temp_platform_config); + if (!ret) { + dd->platform_config.data = temp_platform_config; + dd->platform_config.size = size; + return; + } } - /* fail, try EFI variable */ - - ret = read_hfi1_efi_var(dd, "configuration", &size, - (void **)&temp_platform_config); - if (!ret) - goto success; - - dd_dev_info(dd, - "%s: Failed to get platform config from UEFI, falling back to request firmware\n", - __func__); + dd_dev_err(dd, + "%s: Failed to get platform config, falling back to sub-optimal default file\n", + __func__); /* fall back to request firmware */ platform_config_load = 1; - return; - -success: - dd->platform_config.data = temp_platform_config; - dd->platform_config.size = size; } void free_platform_config(struct hfi1_devdata *dd) { if (!platform_config_load) { /* - * was loaded from EFI, release memory - * allocated by read_efi_var + * was loaded from EFI or the EPROM, release memory + * allocated by read_efi_var/eprom_read_platform_config */ kfree(dd->platform_config.data); } @@ -100,12 +195,16 @@ void free_platform_config(struct hfi1_devdata *dd) void get_port_type(struct hfi1_pportdata *ppd) { int ret; + u32 temp; ret = get_platform_config_field(ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0, - PORT_TABLE_PORT_TYPE, &ppd->port_type, + PORT_TABLE_PORT_TYPE, &temp, 4); - if (ret) + if (ret) { ppd->port_type = PORT_TYPE_UNKNOWN; + return; + } + ppd->port_type = temp; } int set_qsfp_tx(struct hfi1_pportdata *ppd, int on) @@ -538,6 +637,38 @@ static void apply_tx_lanes(struct hfi1_pportdata *ppd, u8 field_id, } } +/* + * Return a special SerDes setting for low power AOC cables. The power class + * threshold and setting being used were all found by empirical testing. + * + * Summary of the logic: + * + * if (QSFP and QSFP_TYPE == AOC and QSFP_POWER_CLASS < 4) + * return 0xe + * return 0; // leave at default + */ +static u8 aoc_low_power_setting(struct hfi1_pportdata *ppd) +{ + u8 *cache = ppd->qsfp_info.cache; + int power_class; + + /* QSFP only */ + if (ppd->port_type != PORT_TYPE_QSFP) + return 0; /* leave at default */ + + /* active optical cables only */ + switch ((cache[QSFP_MOD_TECH_OFFS] & 0xF0) >> 4) { + case 0x0 ... 0x9: /* fallthrough */ + case 0xC: /* fallthrough */ + case 0xE: + /* active AOC */ + power_class = get_qsfp_power_class(cache[QSFP_MOD_PWR_OFFS]); + if (power_class < QSFP_POWER_CLASS_4) + return 0xe; + } + return 0; /* leave at default */ +} + static void apply_tunings( struct hfi1_pportdata *ppd, u32 tx_preset_index, u8 tuning_method, u32 total_atten, u8 limiting_active) @@ -606,7 +737,17 @@ static void apply_tunings( tx_preset_index, TX_PRESET_TABLE_POSTCUR, &tx_preset, 4); postcur = tx_preset; - config_data = precur | (attn << 8) | (postcur << 16); + /* + * NOTES: + * o The aoc_low_power_setting is applied to all lanes even + * though only lane 0's value is examined by the firmware. + * o A lingering low power setting after a cable swap does + * not occur. On cable unplug the 8051 is reset and + * restarted on cable insert. This resets all settings to + * their default, erasing any previous low power setting. + */ + config_data = precur | (attn << 8) | (postcur << 16) | + (aoc_low_power_setting(ppd) << 24); apply_tx_lanes(ppd, TX_EQ_SETTINGS, config_data, "Applying TX settings"); diff --git a/drivers/infiniband/hw/hfi1/platform.h b/drivers/infiniband/hw/hfi1/platform.h index e2c21613c326..eed0aa9124fa 100644 --- a/drivers/infiniband/hw/hfi1/platform.h +++ b/drivers/infiniband/hw/hfi1/platform.h @@ -168,16 +168,6 @@ struct platform_config_cache { struct platform_config_data config_tables[PLATFORM_CONFIG_TABLE_MAX]; }; -static const u32 platform_config_table_limits[PLATFORM_CONFIG_TABLE_MAX] = { - 0, - SYSTEM_TABLE_MAX, - PORT_TABLE_MAX, - RX_PRESET_TABLE_MAX, - TX_PRESET_TABLE_MAX, - QSFP_ATTEN_TABLE_MAX, - VARIABLE_SETTINGS_TABLE_MAX -}; - /* This section defines default values and encodings for the * fields defined for each table above */ @@ -295,6 +285,123 @@ enum link_tuning_encoding { OPA_UNKNOWN_TUNING }; +/* + * Shifts and masks for the link SI tuning values stuffed into the ASIC scratch + * registers for integrated platforms + */ +#define PORT0_PORT_TYPE_SHIFT 0 +#define PORT0_LOCAL_ATTEN_SHIFT 4 +#define PORT0_REMOTE_ATTEN_SHIFT 10 +#define PORT0_DEFAULT_ATTEN_SHIFT 32 + +#define PORT1_PORT_TYPE_SHIFT 16 +#define PORT1_LOCAL_ATTEN_SHIFT 20 +#define PORT1_REMOTE_ATTEN_SHIFT 26 +#define PORT1_DEFAULT_ATTEN_SHIFT 40 + +#define PORT0_PORT_TYPE_MASK 0xFUL +#define PORT0_LOCAL_ATTEN_MASK 0x3FUL +#define PORT0_REMOTE_ATTEN_MASK 0x3FUL +#define PORT0_DEFAULT_ATTEN_MASK 0xFFUL + +#define PORT1_PORT_TYPE_MASK 0xFUL +#define PORT1_LOCAL_ATTEN_MASK 0x3FUL +#define PORT1_REMOTE_ATTEN_MASK 0x3FUL +#define PORT1_DEFAULT_ATTEN_MASK 0xFFUL + +#define PORT0_PORT_TYPE_SMASK (PORT0_PORT_TYPE_MASK << \ + PORT0_PORT_TYPE_SHIFT) +#define PORT0_LOCAL_ATTEN_SMASK (PORT0_LOCAL_ATTEN_MASK << \ + PORT0_LOCAL_ATTEN_SHIFT) +#define PORT0_REMOTE_ATTEN_SMASK (PORT0_REMOTE_ATTEN_MASK << \ + PORT0_REMOTE_ATTEN_SHIFT) +#define PORT0_DEFAULT_ATTEN_SMASK (PORT0_DEFAULT_ATTEN_MASK << \ + PORT0_DEFAULT_ATTEN_SHIFT) + +#define PORT1_PORT_TYPE_SMASK (PORT1_PORT_TYPE_MASK << \ + PORT1_PORT_TYPE_SHIFT) +#define PORT1_LOCAL_ATTEN_SMASK (PORT1_LOCAL_ATTEN_MASK << \ + PORT1_LOCAL_ATTEN_SHIFT) +#define PORT1_REMOTE_ATTEN_SMASK (PORT1_REMOTE_ATTEN_MASK << \ + PORT1_REMOTE_ATTEN_SHIFT) +#define PORT1_DEFAULT_ATTEN_SMASK (PORT1_DEFAULT_ATTEN_MASK << \ + PORT1_DEFAULT_ATTEN_SHIFT) + +#define QSFP_MAX_POWER_SHIFT 0 +#define TX_NO_EQ_SHIFT 4 +#define TX_EQ_SHIFT 25 +#define RX_SHIFT 46 + +#define QSFP_MAX_POWER_MASK 0xFUL +#define TX_NO_EQ_MASK 0x1FFFFFUL +#define TX_EQ_MASK 0x1FFFFFUL +#define RX_MASK 0xFFFFUL + +#define QSFP_MAX_POWER_SMASK (QSFP_MAX_POWER_MASK << \ + QSFP_MAX_POWER_SHIFT) +#define TX_NO_EQ_SMASK (TX_NO_EQ_MASK << TX_NO_EQ_SHIFT) +#define TX_EQ_SMASK (TX_EQ_MASK << TX_EQ_SHIFT) +#define RX_SMASK (RX_MASK << RX_SHIFT) + +#define TX_PRECUR_SHIFT 0 +#define TX_ATTN_SHIFT 4 +#define QSFP_TX_CDR_APPLY_SHIFT 9 +#define QSFP_TX_EQ_APPLY_SHIFT 10 +#define QSFP_TX_CDR_SHIFT 11 +#define QSFP_TX_EQ_SHIFT 12 +#define TX_POSTCUR_SHIFT 16 + +#define TX_PRECUR_MASK 0xFUL +#define TX_ATTN_MASK 0x1FUL +#define QSFP_TX_CDR_APPLY_MASK 0x1UL +#define QSFP_TX_EQ_APPLY_MASK 0x1UL +#define QSFP_TX_CDR_MASK 0x1UL +#define QSFP_TX_EQ_MASK 0xFUL +#define TX_POSTCUR_MASK 0x1FUL + +#define TX_PRECUR_SMASK (TX_PRECUR_MASK << TX_PRECUR_SHIFT) +#define TX_ATTN_SMASK (TX_ATTN_MASK << TX_ATTN_SHIFT) +#define QSFP_TX_CDR_APPLY_SMASK (QSFP_TX_CDR_APPLY_MASK << \ + QSFP_TX_CDR_APPLY_SHIFT) +#define QSFP_TX_EQ_APPLY_SMASK (QSFP_TX_EQ_APPLY_MASK << \ + QSFP_TX_EQ_APPLY_SHIFT) +#define QSFP_TX_CDR_SMASK (QSFP_TX_CDR_MASK << QSFP_TX_CDR_SHIFT) +#define QSFP_TX_EQ_SMASK (QSFP_TX_EQ_MASK << QSFP_TX_EQ_SHIFT) +#define TX_POSTCUR_SMASK (TX_POSTCUR_MASK << TX_POSTCUR_SHIFT) + +#define QSFP_RX_CDR_APPLY_SHIFT 0 +#define QSFP_RX_EMP_APPLY_SHIFT 1 +#define QSFP_RX_AMP_APPLY_SHIFT 2 +#define QSFP_RX_CDR_SHIFT 3 +#define QSFP_RX_EMP_SHIFT 4 +#define QSFP_RX_AMP_SHIFT 8 + +#define QSFP_RX_CDR_APPLY_MASK 0x1UL +#define QSFP_RX_EMP_APPLY_MASK 0x1UL +#define QSFP_RX_AMP_APPLY_MASK 0x1UL +#define QSFP_RX_CDR_MASK 0x1UL +#define QSFP_RX_EMP_MASK 0xFUL +#define QSFP_RX_AMP_MASK 0x3UL + +#define QSFP_RX_CDR_APPLY_SMASK (QSFP_RX_CDR_APPLY_MASK << \ + QSFP_RX_CDR_APPLY_SHIFT) +#define QSFP_RX_EMP_APPLY_SMASK (QSFP_RX_EMP_APPLY_MASK << \ + QSFP_RX_EMP_APPLY_SHIFT) +#define QSFP_RX_AMP_APPLY_SMASK (QSFP_RX_AMP_APPLY_MASK << \ + QSFP_RX_AMP_APPLY_SHIFT) +#define QSFP_RX_CDR_SMASK (QSFP_RX_CDR_MASK << QSFP_RX_CDR_SHIFT) +#define QSFP_RX_EMP_SMASK (QSFP_RX_EMP_MASK << QSFP_RX_EMP_SHIFT) +#define QSFP_RX_AMP_SMASK (QSFP_RX_AMP_MASK << QSFP_RX_AMP_SHIFT) + +#define BITMAP_VERSION 1 +#define BITMAP_VERSION_SHIFT 44 +#define BITMAP_VERSION_MASK 0xFUL +#define BITMAP_VERSION_SMASK (BITMAP_VERSION_MASK << \ + BITMAP_VERSION_SHIFT) +#define CHECKSUM_SHIFT 48 +#define CHECKSUM_MASK 0xFFFFUL +#define CHECKSUM_SMASK (CHECKSUM_MASK << CHECKSUM_SHIFT) + /* platform.c */ void get_platform_config(struct hfi1_devdata *dd); void free_platform_config(struct hfi1_devdata *dd); diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 9fc75e7e8781..d752d6768a49 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -196,15 +196,18 @@ static void flush_tx_list(struct rvt_qp *qp) static void flush_iowait(struct rvt_qp *qp) { struct hfi1_qp_priv *priv = qp->priv; - struct hfi1_ibdev *dev = to_idev(qp->ibqp.device); unsigned long flags; + seqlock_t *lock = priv->s_iowait.lock; - write_seqlock_irqsave(&dev->iowait_lock, flags); + if (!lock) + return; + write_seqlock_irqsave(lock, flags); if (!list_empty(&priv->s_iowait.list)) { list_del_init(&priv->s_iowait.list); + priv->s_iowait.lock = NULL; rvt_put_qp(qp); } - write_sequnlock_irqrestore(&dev->iowait_lock, flags); + write_sequnlock_irqrestore(lock, flags); } static inline int opa_mtu_enum_to_int(int mtu) @@ -543,6 +546,7 @@ static int iowait_sleep( ibp->rvp.n_dmawait++; qp->s_flags |= RVT_S_WAIT_DMA_DESC; list_add_tail(&priv->s_iowait.list, &sde->dmawait); + priv->s_iowait.lock = &dev->iowait_lock; trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC); rvt_get_qp(qp); } @@ -964,6 +968,7 @@ void notify_error_qp(struct rvt_qp *qp) if (!list_empty(&priv->s_iowait.list) && !(qp->s_flags & RVT_S_BUSY)) { qp->s_flags &= ~RVT_S_ANY_WAIT_IO; list_del_init(&priv->s_iowait.list); + priv->s_iowait.lock = NULL; rvt_put_qp(qp); } write_sequnlock(&dev->iowait_lock); diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 83198a8a8797..809b26eb6d3c 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -276,7 +276,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, rvt_get_mr(ps->s_txreq->mr); qp->s_ack_rdma_sge.sge = e->rdma_sge; qp->s_ack_rdma_sge.num_sge = 1; - qp->s_cur_sge = &qp->s_ack_rdma_sge; + ps->s_txreq->ss = &qp->s_ack_rdma_sge; if (len > pmtu) { len = pmtu; qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST); @@ -290,7 +290,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, bth2 = mask_psn(qp->s_ack_rdma_psn++); } else { /* COMPARE_SWAP or FETCH_ADD */ - qp->s_cur_sge = NULL; + ps->s_txreq->ss = NULL; len = 0; qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE); ohdr->u.at.aeth = hfi1_compute_aeth(qp); @@ -306,7 +306,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE); /* FALLTHROUGH */ case OP(RDMA_READ_RESPONSE_MIDDLE): - qp->s_cur_sge = &qp->s_ack_rdma_sge; + ps->s_txreq->ss = &qp->s_ack_rdma_sge; ps->s_txreq->mr = qp->s_ack_rdma_sge.sge.mr; if (ps->s_txreq->mr) rvt_get_mr(ps->s_txreq->mr); @@ -335,7 +335,7 @@ normal: */ qp->s_ack_state = OP(SEND_ONLY); qp->s_flags &= ~RVT_S_ACK_PENDING; - qp->s_cur_sge = NULL; + ps->s_txreq->ss = NULL; if (qp->s_nak_state) ohdr->u.aeth = cpu_to_be32((qp->r_msn & HFI1_MSN_MASK) | @@ -351,7 +351,7 @@ normal: qp->s_rdma_ack_cnt++; qp->s_hdrwords = hwords; ps->s_txreq->sde = priv->s_sde; - qp->s_cur_size = len; + ps->s_txreq->s_cur_size = len; hfi1_make_ruc_header(qp, ohdr, bth0, bth2, middle, ps); /* pbc */ ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2; @@ -801,8 +801,8 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) qp->s_len -= len; qp->s_hdrwords = hwords; ps->s_txreq->sde = priv->s_sde; - qp->s_cur_sge = ss; - qp->s_cur_size = len; + ps->s_txreq->ss = ss; + ps->s_txreq->s_cur_size = len; hfi1_make_ruc_header( qp, ohdr, @@ -1146,8 +1146,6 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr) { struct ib_other_headers *ohdr; struct rvt_swqe *wqe; - struct ib_wc wc; - unsigned i; u32 opcode; u32 psn; @@ -1195,22 +1193,8 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr) qp->s_last = s_last; /* see post_send() */ barrier(); - for (i = 0; i < wqe->wr.num_sge; i++) { - struct rvt_sge *sge = &wqe->sg_list[i]; - - rvt_put_mr(sge->mr); - } - /* Post a send completion queue entry if requested. */ - if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) || - (wqe->wr.send_flags & IB_SEND_SIGNALED)) { - memset(&wc, 0, sizeof(wc)); - wc.wr_id = wqe->wr.wr_id; - wc.status = IB_WC_SUCCESS; - wc.opcode = ib_hfi1_wc_opcode[wqe->wr.opcode]; - wc.byte_len = wqe->length; - wc.qp = &qp->ibqp; - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0); - } + rvt_put_swqe(wqe); + rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS); } /* * If we were waiting for sends to complete before re-sending, @@ -1240,9 +1224,6 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, struct rvt_swqe *wqe, struct hfi1_ibport *ibp) { - struct ib_wc wc; - unsigned i; - lockdep_assert_held(&qp->s_lock); /* * Don't decrement refcount and don't generate a @@ -1253,28 +1234,14 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) { u32 s_last; - for (i = 0; i < wqe->wr.num_sge; i++) { - struct rvt_sge *sge = &wqe->sg_list[i]; - - rvt_put_mr(sge->mr); - } + rvt_put_swqe(wqe); s_last = qp->s_last; if (++s_last >= qp->s_size) s_last = 0; qp->s_last = s_last; /* see post_send() */ barrier(); - /* Post a send completion queue entry if requested. */ - if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) || - (wqe->wr.send_flags & IB_SEND_SIGNALED)) { - memset(&wc, 0, sizeof(wc)); - wc.wr_id = wqe->wr.wr_id; - wc.status = IB_WC_SUCCESS; - wc.opcode = ib_hfi1_wc_opcode[wqe->wr.opcode]; - wc.byte_len = wqe->length; - wc.qp = &qp->ibqp; - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0); - } + rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS); } else { struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); @@ -2295,7 +2262,7 @@ send_last: hfi1_copy_sge(&qp->r_sge, data, tlen, 1, copy_last); rvt_put_ss(&qp->r_sge); qp->r_msn++; - if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) + if (!__test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) break; wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; @@ -2410,8 +2377,7 @@ send_last: * Update the next expected PSN. We add 1 later * below, so only add the remainder here. */ - if (len > pmtu) - qp->r_psn += (len - 1) / pmtu; + qp->r_psn += rvt_div_mtu(qp, len - 1); } else { e->rdma_sge.mr = NULL; e->rdma_sge.vaddr = NULL; diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index a1576aea4756..717ed4b159d3 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -239,16 +239,6 @@ bail: return ret; } -static __be64 get_sguid(struct hfi1_ibport *ibp, unsigned index) -{ - if (!index) { - struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - - return cpu_to_be64(ppd->guid); - } - return ibp->guids[index - 1]; -} - static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id) { return (gid->global.interface_id == id && @@ -699,9 +689,9 @@ u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr, /* The SGID is 32-bit aligned. */ hdr->sgid.global.subnet_prefix = ibp->rvp.gid_prefix; hdr->sgid.global.interface_id = - grh->sgid_index && grh->sgid_index < ARRAY_SIZE(ibp->guids) ? - ibp->guids[grh->sgid_index - 1] : - cpu_to_be64(ppd_from_ibp(ibp)->guid); + grh->sgid_index < HFI1_GUIDS_PER_PORT ? + get_sguid(ibp, grh->sgid_index) : + get_sguid(ibp, HFI1_PORT_GUID_INDEX); hdr->dgid = grh->dgid; /* GRH header size in 32-bit words. */ @@ -777,8 +767,8 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, u32 bth1; /* Construct the header. */ - extra_bytes = -qp->s_cur_size & 3; - nwords = (qp->s_cur_size + extra_bytes) >> 2; + extra_bytes = -ps->s_txreq->s_cur_size & 3; + nwords = (ps->s_txreq->s_cur_size + extra_bytes) >> 2; lrh0 = HFI1_LRH_BTH; if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) { qp->s_hdrwords += hfi1_make_grh(ibp, @@ -952,7 +942,6 @@ void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, enum ib_wc_status status) { u32 old_last, last; - unsigned i; if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND)) return; @@ -964,32 +953,13 @@ void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, qp->s_last = last; /* See post_send() */ barrier(); - for (i = 0; i < wqe->wr.num_sge; i++) { - struct rvt_sge *sge = &wqe->sg_list[i]; - - rvt_put_mr(sge->mr); - } + rvt_put_swqe(wqe); if (qp->ibqp.qp_type == IB_QPT_UD || qp->ibqp.qp_type == IB_QPT_SMI || qp->ibqp.qp_type == IB_QPT_GSI) atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount); - /* See ch. 11.2.4.1 and 10.7.3.1 */ - if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) || - (wqe->wr.send_flags & IB_SEND_SIGNALED) || - status != IB_WC_SUCCESS) { - struct ib_wc wc; - - memset(&wc, 0, sizeof(wc)); - wc.wr_id = wqe->wr.wr_id; - wc.status = status; - wc.opcode = ib_hfi1_wc_opcode[wqe->wr.opcode]; - wc.qp = &qp->ibqp; - if (status == IB_WC_SUCCESS) - wc.byte_len = wqe->length; - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, - status != IB_WC_SUCCESS); - } + rvt_qp_swqe_complete(qp, wqe, status); if (qp->s_acked == old_last) qp->s_acked = last; diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 9cbe52d21077..1d81cac1fa6c 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -375,7 +375,7 @@ static inline void complete_tx(struct sdma_engine *sde, sde->head_sn, tx->sn); sde->head_sn++; #endif - sdma_txclean(sde->dd, tx); + __sdma_txclean(sde->dd, tx); if (complete) (*complete)(tx, res); if (wait && iowait_sdma_dec(wait)) @@ -1643,7 +1643,7 @@ static inline u8 ahg_mode(struct sdma_txreq *tx) } /** - * sdma_txclean() - clean tx of mappings, descp *kmalloc's + * __sdma_txclean() - clean tx of mappings, descp *kmalloc's * @dd: hfi1_devdata for unmapping * @tx: tx request to clean * @@ -1653,7 +1653,7 @@ static inline u8 ahg_mode(struct sdma_txreq *tx) * The code can be called multiple times without issue. * */ -void sdma_txclean( +void __sdma_txclean( struct hfi1_devdata *dd, struct sdma_txreq *tx) { @@ -3065,7 +3065,7 @@ static int _extend_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx) tx->descp[i] = tx->descs[i]; return 0; enomem: - sdma_txclean(dd, tx); + __sdma_txclean(dd, tx); return -ENOMEM; } @@ -3094,14 +3094,14 @@ int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx, rval = _extend_sdma_tx_descs(dd, tx); if (rval) { - sdma_txclean(dd, tx); + __sdma_txclean(dd, tx); return rval; } /* If coalesce buffer is allocated, copy data into it */ if (tx->coalesce_buf) { if (type == SDMA_MAP_NONE) { - sdma_txclean(dd, tx); + __sdma_txclean(dd, tx); return -EINVAL; } @@ -3109,7 +3109,7 @@ int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx, kvaddr = kmap(page); kvaddr += offset; } else if (WARN_ON(!kvaddr)) { - sdma_txclean(dd, tx); + __sdma_txclean(dd, tx); return -EINVAL; } @@ -3139,7 +3139,7 @@ int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx, DMA_TO_DEVICE); if (unlikely(dma_mapping_error(&dd->pcidev->dev, addr))) { - sdma_txclean(dd, tx); + __sdma_txclean(dd, tx); return -ENOSPC; } @@ -3181,7 +3181,7 @@ int _pad_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx) if ((unlikely(tx->num_desc == tx->desc_limit))) { rval = _extend_sdma_tx_descs(dd, tx); if (rval) { - sdma_txclean(dd, tx); + __sdma_txclean(dd, tx); return rval; } } diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h index 56257ea3598f..21f1e2834f37 100644 --- a/drivers/infiniband/hw/hfi1/sdma.h +++ b/drivers/infiniband/hw/hfi1/sdma.h @@ -667,7 +667,13 @@ int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx, int type, void *kvaddr, struct page *page, unsigned long offset, u16 len); int _pad_sdma_tx_descs(struct hfi1_devdata *, struct sdma_txreq *); -void sdma_txclean(struct hfi1_devdata *, struct sdma_txreq *); +void __sdma_txclean(struct hfi1_devdata *, struct sdma_txreq *); + +static inline void sdma_txclean(struct hfi1_devdata *dd, struct sdma_txreq *tx) +{ + if (tx->num_desc) + __sdma_txclean(dd, tx); +} /* helpers used by public routines */ static inline void _sdma_close_tx(struct hfi1_devdata *dd, @@ -753,7 +759,7 @@ static inline int sdma_txadd_page( DMA_TO_DEVICE); if (unlikely(dma_mapping_error(&dd->pcidev->dev, addr))) { - sdma_txclean(dd, tx); + __sdma_txclean(dd, tx); return -ENOSPC; } @@ -834,7 +840,7 @@ static inline int sdma_txadd_kvaddr( DMA_TO_DEVICE); if (unlikely(dma_mapping_error(&dd->pcidev->dev, addr))) { - sdma_txclean(dd, tx); + __sdma_txclean(dd, tx); return -ENOSPC; } diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index 5e6d1bac4914..b141a78ae38b 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -258,8 +258,8 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) qp->s_len -= len; qp->s_hdrwords = hwords; ps->s_txreq->sde = priv->s_sde; - qp->s_cur_sge = &qp->s_sge; - qp->s_cur_size = len; + ps->s_txreq->ss = &qp->s_sge; + ps->s_txreq->s_cur_size = len; hfi1_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), mask_psn(qp->s_psn++), middle, ps); /* pbc */ diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index 97ae24b6314c..c071955c0272 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -354,8 +354,8 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) /* header size in 32-bit words LRH+BTH+DETH = (8+12+8)/4. */ qp->s_hdrwords = 7; - qp->s_cur_size = wqe->length; - qp->s_cur_sge = &qp->s_sge; + ps->s_txreq->s_cur_size = wqe->length; + ps->s_txreq->ss = &qp->s_sge; qp->s_srate = ah_attr->static_rate; qp->srate_mbps = ib_rate_to_mbps(qp->s_srate); qp->s_wqe = wqe; diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 77697d690f3e..7d22f8ee98ef 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -115,6 +115,7 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12 #define KDETH_HCRC_LOWER_MASK 0xff #define AHG_KDETH_INTR_SHIFT 12 +#define AHG_KDETH_SH_SHIFT 13 #define PBC2LRH(x) ((((x) & 0xfff) << 2) - 4) #define LRH2PBC(x) ((((x) >> 2) + 1) & 0xfff) @@ -144,8 +145,9 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12 #define KDETH_OM_LARGE 64 #define KDETH_OM_MAX_SIZE (1 << ((KDETH_OM_LARGE / KDETH_OM_SMALL) + 1)) -/* Last packet in the request */ -#define TXREQ_FLAGS_REQ_LAST_PKT BIT(0) +/* Tx request flag bits */ +#define TXREQ_FLAGS_REQ_ACK BIT(0) /* Set the ACK bit in the header */ +#define TXREQ_FLAGS_REQ_DISABLE_SH BIT(1) /* Disable header suppression */ /* SDMA request flag bits */ #define SDMA_REQ_FOR_THREAD 1 @@ -943,8 +945,13 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) tx->busycount = 0; INIT_LIST_HEAD(&tx->list); + /* + * For the last packet set the ACK request + * and disable header suppression. + */ if (req->seqnum == req->info.npkts - 1) - tx->flags |= TXREQ_FLAGS_REQ_LAST_PKT; + tx->flags |= (TXREQ_FLAGS_REQ_ACK | + TXREQ_FLAGS_REQ_DISABLE_SH); /* * Calculate the payload size - this is min of the fragment @@ -963,11 +970,22 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) } datalen = compute_data_length(req, tx); + + /* + * Disable header suppression for the payload <= 8DWS. + * If there is an uncorrectable error in the receive + * data FIFO when the received payload size is less than + * or equal to 8DWS then the RxDmaDataFifoRdUncErr is + * not reported.There is set RHF.EccErr if the header + * is not suppressed. + */ if (!datalen) { SDMA_DBG(req, "Request has data but pkt len is 0"); ret = -EFAULT; goto free_tx; + } else if (datalen <= 32) { + tx->flags |= TXREQ_FLAGS_REQ_DISABLE_SH; } } @@ -990,6 +1008,10 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) LRH2PBC(lrhlen); tx->hdr.pbc[0] = cpu_to_le16(pbclen); } + ret = check_header_template(req, &tx->hdr, + lrhlen, datalen); + if (ret) + goto free_tx; ret = sdma_txinit_ahg(&tx->txreq, SDMA_TXREQ_F_AHG_COPY, sizeof(tx->hdr) + datalen, @@ -1351,7 +1373,7 @@ static int set_txreq_header(struct user_sdma_request *req, req->seqnum)); /* Set ACK request on last packet */ - if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT)) + if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK)) hdr->bth[2] |= cpu_to_be32(1UL << 31); /* Set the new offset */ @@ -1384,8 +1406,8 @@ static int set_txreq_header(struct user_sdma_request *req, /* Set KDETH.TID based on value for this TID */ KDETH_SET(hdr->kdeth.ver_tid_offset, TID, EXP_TID_GET(tidval, IDX)); - /* Clear KDETH.SH only on the last packet */ - if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT)) + /* Clear KDETH.SH when DISABLE_SH flag is set */ + if (unlikely(tx->flags & TXREQ_FLAGS_REQ_DISABLE_SH)) KDETH_SET(hdr->kdeth.ver_tid_offset, SH, 0); /* * Set the KDETH.OFFSET and KDETH.OM based on size of @@ -1429,7 +1451,7 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, /* BTH.PSN and BTH.A */ val32 = (be32_to_cpu(hdr->bth[2]) + req->seqnum) & (HFI1_CAP_IS_KSET(EXTENDED_PSN) ? 0x7fffffff : 0xffffff); - if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT)) + if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK)) val32 |= 1UL << 31; AHG_HEADER_SET(req->ahg, diff, 6, 0, 16, cpu_to_be16(val32 >> 16)); AHG_HEADER_SET(req->ahg, diff, 6, 16, 16, cpu_to_be16(val32 & 0xffff)); @@ -1468,19 +1490,23 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, AHG_HEADER_SET(req->ahg, diff, 7, 0, 16, ((!!(req->omfactor - KDETH_OM_SMALL)) << 15 | ((req->tidoffset / req->omfactor) & 0x7fff))); - /* KDETH.TIDCtrl, KDETH.TID */ + /* KDETH.TIDCtrl, KDETH.TID, KDETH.Intr, KDETH.SH */ val = cpu_to_le16(((EXP_TID_GET(tidval, CTRL) & 0x3) << 10) | - (EXP_TID_GET(tidval, IDX) & 0x3ff)); - /* Clear KDETH.SH on last packet */ - if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT)) { - val |= cpu_to_le16(KDETH_GET(hdr->kdeth.ver_tid_offset, - INTR) << - AHG_KDETH_INTR_SHIFT); - val &= cpu_to_le16(~(1U << 13)); - AHG_HEADER_SET(req->ahg, diff, 7, 16, 14, val); + (EXP_TID_GET(tidval, IDX) & 0x3ff)); + + if (unlikely(tx->flags & TXREQ_FLAGS_REQ_DISABLE_SH)) { + val |= cpu_to_le16((KDETH_GET(hdr->kdeth.ver_tid_offset, + INTR) << + AHG_KDETH_INTR_SHIFT)); } else { - AHG_HEADER_SET(req->ahg, diff, 7, 16, 12, val); + val |= KDETH_GET(hdr->kdeth.ver_tid_offset, SH) ? + cpu_to_le16(0x1 << AHG_KDETH_SH_SHIFT) : + cpu_to_le16((KDETH_GET(hdr->kdeth.ver_tid_offset, + INTR) << + AHG_KDETH_INTR_SHIFT)); } + + AHG_HEADER_SET(req->ahg, diff, 7, 16, 14, val); } trace_hfi1_sdma_user_header_ahg(pq->dd, pq->ctxt, pq->subctxt, diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 4b7a16ceb362..95ed4d6da510 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -297,22 +297,6 @@ static inline int wss_exceeds_threshold(void) } /* - * Translate ib_wr_opcode into ib_wc_opcode. - */ -const enum ib_wc_opcode ib_hfi1_wc_opcode[] = { - [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE, - [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE, - [IB_WR_SEND] = IB_WC_SEND, - [IB_WR_SEND_WITH_IMM] = IB_WC_SEND, - [IB_WR_RDMA_READ] = IB_WC_RDMA_READ, - [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP, - [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD, - [IB_WR_SEND_WITH_INV] = IB_WC_SEND, - [IB_WR_LOCAL_INV] = IB_WC_LOCAL_INV, - [IB_WR_REG_MR] = IB_WC_REG_MR -}; - -/* * Length of header by opcode, 0 --> not supported */ const u8 hdr_len_by_opcode[256] = { @@ -694,6 +678,7 @@ static void mem_timer(unsigned long data) qp = iowait_to_qp(wait); priv = qp->priv; list_del_init(&priv->s_iowait.list); + priv->s_iowait.lock = NULL; /* refcount held until actual wake up */ if (!list_empty(list)) mod_timer(&dev->mem_timer, jiffies + 1); @@ -769,6 +754,7 @@ static int wait_kmem(struct hfi1_ibdev *dev, mod_timer(&dev->mem_timer, jiffies + 1); qp->s_flags |= RVT_S_WAIT_KMEM; list_add_tail(&priv->s_iowait.list, &dev->memwait); + priv->s_iowait.lock = &dev->iowait_lock; trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM); rvt_get_qp(qp); } @@ -788,10 +774,10 @@ static int wait_kmem(struct hfi1_ibdev *dev, */ static noinline int build_verbs_ulp_payload( struct sdma_engine *sde, - struct rvt_sge_state *ss, u32 length, struct verbs_txreq *tx) { + struct rvt_sge_state *ss = tx->ss; struct rvt_sge *sg_list = ss->sg_list; struct rvt_sge sge = ss->sge; u8 num_sge = ss->num_sge; @@ -835,7 +821,6 @@ bail_txadd: /* New API */ static int build_verbs_tx_desc( struct sdma_engine *sde, - struct rvt_sge_state *ss, u32 length, struct verbs_txreq *tx, struct hfi1_ahg_info *ahg_info, @@ -879,9 +864,9 @@ static int build_verbs_tx_desc( goto bail_txadd; } - /* add the ulp payload - if any. ss can be NULL for acks */ - if (ss) - ret = build_verbs_ulp_payload(sde, ss, length, tx); + /* add the ulp payload - if any. tx->ss can be NULL for acks */ + if (tx->ss) + ret = build_verbs_ulp_payload(sde, length, tx); bail_txadd: return ret; } @@ -892,8 +877,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, struct hfi1_qp_priv *priv = qp->priv; struct hfi1_ahg_info *ahg_info = priv->s_ahg; u32 hdrwords = qp->s_hdrwords; - struct rvt_sge_state *ss = qp->s_cur_sge; - u32 len = qp->s_cur_size; + u32 len = ps->s_txreq->s_cur_size; u32 plen = hdrwords + ((len + 3) >> 2) + 2; /* includes pbc */ struct hfi1_ibdev *dev = ps->dev; struct hfi1_pportdata *ppd = ps->ppd; @@ -918,7 +902,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, plen); } tx->wqe = qp->s_wqe; - ret = build_verbs_tx_desc(tx->sde, ss, len, tx, ahg_info, pbc); + ret = build_verbs_tx_desc(tx->sde, len, tx, ahg_info, pbc); if (unlikely(ret)) goto bail_build; } @@ -980,6 +964,7 @@ static int pio_wait(struct rvt_qp *qp, qp->s_flags |= flag; was_empty = list_empty(&sc->piowait); list_add_tail(&priv->s_iowait.list, &sc->piowait); + priv->s_iowait.lock = &dev->iowait_lock; trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO); rvt_get_qp(qp); /* counting: only call wantpiobuf_intr if first user */ @@ -1008,8 +993,8 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, { struct hfi1_qp_priv *priv = qp->priv; u32 hdrwords = qp->s_hdrwords; - struct rvt_sge_state *ss = qp->s_cur_sge; - u32 len = qp->s_cur_size; + struct rvt_sge_state *ss = ps->s_txreq->ss; + u32 len = ps->s_txreq->s_cur_size; u32 dwords = (len + 3) >> 2; u32 plen = hdrwords + dwords + 2; /* includes pbc */ struct hfi1_pportdata *ppd = ps->ppd; @@ -1237,7 +1222,7 @@ static inline send_routine get_send_routine(struct rvt_qp *qp, u8 op = get_opcode(h); if (piothreshold && - qp->s_cur_size <= min(piothreshold, qp->pmtu) && + tx->s_cur_size <= min(piothreshold, qp->pmtu) && (BIT(op & OPMASK) & pio_opmask[op >> 5]) && iowait_sdma_pending(&priv->s_iowait) == 0 && !sdma_txreq_built(&tx->txreq)) @@ -1483,15 +1468,11 @@ static int hfi1_get_guid_be(struct rvt_dev_info *rdi, struct rvt_ibport *rvp, int guid_index, __be64 *guid) { struct hfi1_ibport *ibp = container_of(rvp, struct hfi1_ibport, rvp); - struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - if (guid_index == 0) - *guid = cpu_to_be64(ppd->guid); - else if (guid_index < HFI1_GUIDS_PER_PORT) - *guid = ibp->guids[guid_index - 1]; - else + if (guid_index >= HFI1_GUIDS_PER_PORT) return -EINVAL; + *guid = get_sguid(ibp, guid_index); return 0; } @@ -1610,6 +1591,154 @@ static void hfi1_get_dev_fw_str(struct ib_device *ibdev, char *str, dc8051_ver_min(ver)); } +static const char * const driver_cntr_names[] = { + /* must be element 0*/ + "DRIVER_KernIntr", + "DRIVER_ErrorIntr", + "DRIVER_Tx_Errs", + "DRIVER_Rcv_Errs", + "DRIVER_HW_Errs", + "DRIVER_NoPIOBufs", + "DRIVER_CtxtsOpen", + "DRIVER_RcvLen_Errs", + "DRIVER_EgrBufFull", + "DRIVER_EgrHdrFull" +}; + +static const char **dev_cntr_names; +static const char **port_cntr_names; +static int num_driver_cntrs = ARRAY_SIZE(driver_cntr_names); +static int num_dev_cntrs; +static int num_port_cntrs; +static int cntr_names_initialized; + +/* + * Convert a list of names separated by '\n' into an array of NULL terminated + * strings. Optionally some entries can be reserved in the array to hold extra + * external strings. + */ +static int init_cntr_names(const char *names_in, + const int names_len, + int num_extra_names, + int *num_cntrs, + const char ***cntr_names) +{ + char *names_out, *p, **q; + int i, n; + + n = 0; + for (i = 0; i < names_len; i++) + if (names_in[i] == '\n') + n++; + + names_out = kmalloc((n + num_extra_names) * sizeof(char *) + names_len, + GFP_KERNEL); + if (!names_out) { + *num_cntrs = 0; + *cntr_names = NULL; + return -ENOMEM; + } + + p = names_out + (n + num_extra_names) * sizeof(char *); + memcpy(p, names_in, names_len); + + q = (char **)names_out; + for (i = 0; i < n; i++) { + q[i] = p; + p = strchr(p, '\n'); + *p++ = '\0'; + } + + *num_cntrs = n; + *cntr_names = (const char **)names_out; + return 0; +} + +static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev, + u8 port_num) +{ + int i, err; + + if (!cntr_names_initialized) { + struct hfi1_devdata *dd = dd_from_ibdev(ibdev); + + err = init_cntr_names(dd->cntrnames, + dd->cntrnameslen, + num_driver_cntrs, + &num_dev_cntrs, + &dev_cntr_names); + if (err) + return NULL; + + for (i = 0; i < num_driver_cntrs; i++) + dev_cntr_names[num_dev_cntrs + i] = + driver_cntr_names[i]; + + err = init_cntr_names(dd->portcntrnames, + dd->portcntrnameslen, + 0, + &num_port_cntrs, + &port_cntr_names); + if (err) { + kfree(dev_cntr_names); + dev_cntr_names = NULL; + return NULL; + } + cntr_names_initialized = 1; + } + + if (!port_num) + return rdma_alloc_hw_stats_struct( + dev_cntr_names, + num_dev_cntrs + num_driver_cntrs, + RDMA_HW_STATS_DEFAULT_LIFESPAN); + else + return rdma_alloc_hw_stats_struct( + port_cntr_names, + num_port_cntrs, + RDMA_HW_STATS_DEFAULT_LIFESPAN); +} + +static u64 hfi1_sps_ints(void) +{ + unsigned long flags; + struct hfi1_devdata *dd; + u64 sps_ints = 0; + + spin_lock_irqsave(&hfi1_devs_lock, flags); + list_for_each_entry(dd, &hfi1_dev_list, list) { + sps_ints += get_all_cpu_total(dd->int_counter); + } + spin_unlock_irqrestore(&hfi1_devs_lock, flags); + return sps_ints; +} + +static int get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, + u8 port, int index) +{ + u64 *values; + int count; + + if (!port) { + u64 *stats = (u64 *)&hfi1_stats; + int i; + + hfi1_read_cntrs(dd_from_ibdev(ibdev), NULL, &values); + values[num_dev_cntrs] = hfi1_sps_ints(); + for (i = 1; i < num_driver_cntrs; i++) + values[num_dev_cntrs + i] = stats[i]; + count = num_dev_cntrs + num_driver_cntrs; + } else { + struct hfi1_ibport *ibp = to_iport(ibdev, port); + + hfi1_read_portcntrs(ppd_from_ibp(ibp), NULL, &values); + count = num_port_cntrs; + } + + memcpy(stats->value, values, count * sizeof(u64)); + return count; +} + /** * hfi1_register_ib_device - register our device with the infiniband core * @dd: the device data structure @@ -1620,6 +1749,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) struct hfi1_ibdev *dev = &dd->verbs_dev; struct ib_device *ibdev = &dev->rdi.ibdev; struct hfi1_pportdata *ppd = dd->pport; + struct hfi1_ibport *ibp = &ppd->ibport_data; unsigned i; int ret; size_t lcpysz = IB_DEVICE_NAME_MAX; @@ -1632,6 +1762,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) setup_timer(&dev->mem_timer, mem_timer, (unsigned long)dev); seqlock_init(&dev->iowait_lock); + seqlock_init(&dev->txwait_lock); INIT_LIST_HEAD(&dev->txwait); INIT_LIST_HEAD(&dev->memwait); @@ -1639,20 +1770,24 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) if (ret) goto err_verbs_txreq; + /* Use first-port GUID as node guid */ + ibdev->node_guid = get_sguid(ibp, HFI1_PORT_GUID_INDEX); + /* * The system image GUID is supposed to be the same for all * HFIs in a single system but since there can be other * device types in the system, we can't be sure this is unique. */ if (!ib_hfi1_sys_image_guid) - ib_hfi1_sys_image_guid = cpu_to_be64(ppd->guid); + ib_hfi1_sys_image_guid = ibdev->node_guid; lcpysz = strlcpy(ibdev->name, class_name(), lcpysz); strlcpy(ibdev->name + lcpysz, "_%d", IB_DEVICE_NAME_MAX - lcpysz); ibdev->owner = THIS_MODULE; - ibdev->node_guid = cpu_to_be64(ppd->guid); ibdev->phys_port_cnt = dd->num_pports; ibdev->dma_device = &dd->pcidev->dev; ibdev->modify_device = modify_device; + ibdev->alloc_hw_stats = alloc_hw_stats; + ibdev->get_hw_stats = get_hw_stats; /* keep process mad in the driver */ ibdev->process_mad = hfi1_process_mad; @@ -1767,6 +1902,10 @@ void hfi1_unregister_ib_device(struct hfi1_devdata *dd) del_timer_sync(&dev->mem_timer); verbs_txreq_exit(dev); + + kfree(dev_cntr_names); + kfree(port_cntr_names); + cntr_names_initialized = 0; } void hfi1_cnp_rcv(struct hfi1_packet *packet) diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 1c3815d89eb7..e6b893010e6d 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -73,7 +73,6 @@ struct hfi1_packet; #include "iowait.h" #define HFI1_MAX_RDMA_ATOMIC 16 -#define HFI1_GUIDS_PER_PORT 5 /* * Increment this value if any changes that break userspace ABI @@ -169,8 +168,6 @@ struct hfi1_ibport { struct rvt_qp __rcu *qp[2]; struct rvt_ibport rvp; - __be64 guids[HFI1_GUIDS_PER_PORT - 1]; /* writable GUIDs */ - /* the first 16 entries are sl_to_vl for !OPA */ u8 sl_to_sc[32]; u8 sc_to_sl[32]; @@ -180,18 +177,19 @@ struct hfi1_ibdev { struct rvt_dev_info rdi; /* Must be first */ /* QP numbers are shared by all IB ports */ - /* protect wait lists */ - seqlock_t iowait_lock; + /* protect txwait list */ + seqlock_t txwait_lock ____cacheline_aligned_in_smp; struct list_head txwait; /* list for wait verbs_txreq */ struct list_head memwait; /* list for wait kernel memory */ - struct list_head txreq_free; struct kmem_cache *verbs_txreq_cache; - struct timer_list mem_timer; + u64 n_txwait; + u64 n_kmem_wait; + /* protect iowait lists */ + seqlock_t iowait_lock ____cacheline_aligned_in_smp; u64 n_piowait; u64 n_piodrain; - u64 n_txwait; - u64 n_kmem_wait; + struct timer_list mem_timer; #ifdef CONFIG_DEBUG_FS /* per HFI debugfs */ diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.c b/drivers/infiniband/hw/hfi1/verbs_txreq.c index 094ab829ec42..5d23172c470f 100644 --- a/drivers/infiniband/hw/hfi1/verbs_txreq.c +++ b/drivers/infiniband/hw/hfi1/verbs_txreq.c @@ -72,22 +72,22 @@ void hfi1_put_txreq(struct verbs_txreq *tx) kmem_cache_free(dev->verbs_txreq_cache, tx); do { - seq = read_seqbegin(&dev->iowait_lock); + seq = read_seqbegin(&dev->txwait_lock); if (!list_empty(&dev->txwait)) { struct iowait *wait; - write_seqlock_irqsave(&dev->iowait_lock, flags); + write_seqlock_irqsave(&dev->txwait_lock, flags); wait = list_first_entry(&dev->txwait, struct iowait, list); qp = iowait_to_qp(wait); priv = qp->priv; list_del_init(&priv->s_iowait.list); /* refcount held until actual wake up */ - write_sequnlock_irqrestore(&dev->iowait_lock, flags); + write_sequnlock_irqrestore(&dev->txwait_lock, flags); hfi1_qp_wakeup(qp, RVT_S_WAIT_TX); break; } - } while (read_seqretry(&dev->iowait_lock, seq)); + } while (read_seqretry(&dev->txwait_lock, seq)); } struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev, @@ -96,7 +96,7 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev, { struct verbs_txreq *tx = ERR_PTR(-EBUSY); - write_seqlock(&dev->iowait_lock); + write_seqlock(&dev->txwait_lock); if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) { struct hfi1_qp_priv *priv; @@ -108,13 +108,14 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev, dev->n_txwait++; qp->s_flags |= RVT_S_WAIT_TX; list_add_tail(&priv->s_iowait.list, &dev->txwait); + priv->s_iowait.lock = &dev->txwait_lock; trace_hfi1_qpsleep(qp, RVT_S_WAIT_TX); rvt_get_qp(qp); } qp->s_flags &= ~RVT_S_BUSY; } out: - write_sequnlock(&dev->iowait_lock); + write_sequnlock(&dev->txwait_lock); return tx; } diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h index 5660897593ba..76216f2ef35a 100644 --- a/drivers/infiniband/hw/hfi1/verbs_txreq.h +++ b/drivers/infiniband/hw/hfi1/verbs_txreq.h @@ -65,6 +65,7 @@ struct verbs_txreq { struct sdma_engine *sde; struct send_context *psc; u16 hdr_dwords; + u16 s_cur_size; }; struct hfi1_ibdev; diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index 24f79ee39fdf..0ac294db3b29 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -39,7 +39,8 @@ #define HNS_ROCE_VLAN_SL_BIT_MASK 7 #define HNS_ROCE_VLAN_SL_SHIFT 13 -struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *ah_attr) +struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *ah_attr, + struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibpd->device); struct device *dev = &hr_dev->pdev->dev; diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c index 863a17a2de40..605962f2828c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_alloc.c +++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c @@ -61,9 +61,10 @@ int hns_roce_bitmap_alloc(struct hns_roce_bitmap *bitmap, unsigned long *obj) return ret; } -void hns_roce_bitmap_free(struct hns_roce_bitmap *bitmap, unsigned long obj) +void hns_roce_bitmap_free(struct hns_roce_bitmap *bitmap, unsigned long obj, + int rr) { - hns_roce_bitmap_free_range(bitmap, obj, 1); + hns_roce_bitmap_free_range(bitmap, obj, 1, rr); } int hns_roce_bitmap_alloc_range(struct hns_roce_bitmap *bitmap, int cnt, @@ -106,7 +107,8 @@ int hns_roce_bitmap_alloc_range(struct hns_roce_bitmap *bitmap, int cnt, } void hns_roce_bitmap_free_range(struct hns_roce_bitmap *bitmap, - unsigned long obj, int cnt) + unsigned long obj, int cnt, + int rr) { int i; @@ -116,7 +118,8 @@ void hns_roce_bitmap_free_range(struct hns_roce_bitmap *bitmap, for (i = 0; i < cnt; i++) clear_bit(obj + i, bitmap->table); - bitmap->last = min(bitmap->last, obj); + if (!rr) + bitmap->last = min(bitmap->last, obj); bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top) & bitmap->mask; spin_unlock(&bitmap->lock); diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.c b/drivers/infiniband/hw/hns/hns_roce_cmd.c index 2a0b6c05da5f..8c1f7a6f84d2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.c +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.c @@ -216,10 +216,10 @@ static int __hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param, goto out; /* - * It is timeout when wait_for_completion_timeout return 0 - * The return value is the time limit set in advance - * how many seconds showing - */ + * It is timeout when wait_for_completion_timeout return 0 + * The return value is the time limit set in advance + * how many seconds showing + */ if (!wait_for_completion_timeout(&context->done, msecs_to_jiffies(timeout))) { dev_err(dev, "[cmd]wait_for_completion_timeout timeout\n"); diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h index e3997d312c55..f5a9ee2fc53d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.h +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h @@ -34,6 +34,7 @@ #define _HNS_ROCE_CMD_H #define HNS_ROCE_MAILBOX_SIZE 4096 +#define HNS_ROCE_CMD_TIMEOUT_MSECS 10000 enum { /* TPT commands */ @@ -57,17 +58,6 @@ enum { HNS_ROCE_CMD_QUERY_QP = 0x22, }; -enum { - HNS_ROCE_CMD_TIME_CLASS_A = 10000, - HNS_ROCE_CMD_TIME_CLASS_B = 10000, - HNS_ROCE_CMD_TIME_CLASS_C = 10000, -}; - -struct hns_roce_cmd_mailbox { - void *buf; - dma_addr_t dma; -}; - int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, unsigned long in_modifier, u8 op_modifier, u16 op, unsigned long timeout); diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h index 297016103aa7..4af403e1348c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_common.h +++ b/drivers/infiniband/hw/hns/hns_roce_common.h @@ -57,6 +57,32 @@ #define roce_set_bit(origin, shift, val) \ roce_set_field((origin), (1ul << (shift)), (shift), (val)) +/* + * roce_hw_index_cmp_lt - Compare two hardware index values in hisilicon + * SOC, check if a is less than b. + * @a: hardware index value + * @b: hardware index value + * @bits: the number of bits of a and b, range: 0~31. + * + * Hardware index increases continuously till max value, and then restart + * from zero, again and again. Because the bits of reg field is often + * limited, the reg field can only hold the low bits of the hardware index + * in hisilicon SOC. + * In some scenes we need to compare two values(a,b) getted from two reg + * fields in this driver, for example: + * If a equals 0xfffe, b equals 0x1 and bits equals 16, we think b has + * incresed from 0xffff to 0x1 and a is less than b. + * If a equals 0xfffe, b equals 0x0xf001 and bits equals 16, we think a + * is bigger than b. + * + * Return true on a less than b, otherwise false. + */ +#define roce_hw_index_mask(bits) ((1ul << (bits)) - 1) +#define roce_hw_index_shift(bits) (32 - (bits)) +#define roce_hw_index_cmp_lt(a, b, bits) \ + ((int)((((a) - (b)) & roce_hw_index_mask(bits)) << \ + roce_hw_index_shift(bits)) < 0) + #define ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S 3 #define ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S 4 @@ -245,16 +271,26 @@ #define ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M \ (((1UL << 28) - 1) << ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) +#define ROCEE_SDB_PTR_CMP_BITS 28 + #define ROCEE_SDB_INV_CNT_SDB_INV_CNT_S 0 #define ROCEE_SDB_INV_CNT_SDB_INV_CNT_M \ (((1UL << 16) - 1) << ROCEE_SDB_INV_CNT_SDB_INV_CNT_S) +#define ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S 0 +#define ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M \ + (((1UL << 16) - 1) << ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S) + +#define ROCEE_SDB_CNT_CMP_BITS 16 + +#define ROCEE_TSP_BP_ST_QH_FIFO_ENTRY_S 20 + +#define ROCEE_CNT_CLR_CE_CNT_CLR_CE_S 0 + /*************ROCEE_REG DEFINITION****************/ #define ROCEE_VENDOR_ID_REG 0x0 #define ROCEE_VENDOR_PART_ID_REG 0x4 -#define ROCEE_HW_VERSION_REG 0x8 - #define ROCEE_SYS_IMAGE_GUID_L_REG 0xC #define ROCEE_SYS_IMAGE_GUID_H_REG 0x10 @@ -318,7 +354,11 @@ #define ROCEE_SDB_ISSUE_PTR_REG 0x758 #define ROCEE_SDB_SEND_PTR_REG 0x75C +#define ROCEE_CAEP_CQE_WCMD_EMPTY 0x850 +#define ROCEE_SCAEP_WR_CQE_CNT 0x8D0 #define ROCEE_SDB_INV_CNT_REG 0x9A4 +#define ROCEE_SDB_RETRY_CNT_REG 0x9AC +#define ROCEE_TSP_BP_ST_REG 0x9EC #define ROCEE_ECC_UCERR_ALM0_REG 0xB34 #define ROCEE_ECC_CERR_ALM0_REG 0xB40 diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 097365932b09..589496c8fb9e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -35,7 +35,7 @@ #include "hns_roce_device.h" #include "hns_roce_cmd.h" #include "hns_roce_hem.h" -#include "hns_roce_user.h" +#include <rdma/hns-abi.h> #include "hns_roce_common.h" static void hns_roce_ib_cq_comp(struct hns_roce_cq *hr_cq) @@ -77,7 +77,7 @@ static int hns_roce_sw2hw_cq(struct hns_roce_dev *dev, unsigned long cq_num) { return hns_roce_cmd_mbox(dev, mailbox->dma, 0, cq_num, 0, - HNS_ROCE_CMD_SW2HW_CQ, HNS_ROCE_CMD_TIME_CLASS_A); + HNS_ROCE_CMD_SW2HW_CQ, HNS_ROCE_CMD_TIMEOUT_MSECS); } static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent, @@ -166,7 +166,7 @@ err_put: hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn); err_out: - hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn); + hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn, BITMAP_NO_RR); return ret; } @@ -176,11 +176,10 @@ static int hns_roce_hw2sw_cq(struct hns_roce_dev *dev, { return hns_roce_cmd_mbox(dev, 0, mailbox ? mailbox->dma : 0, cq_num, mailbox ? 0 : 1, HNS_ROCE_CMD_HW2SW_CQ, - HNS_ROCE_CMD_TIME_CLASS_A); + HNS_ROCE_CMD_TIMEOUT_MSECS); } -static void hns_roce_free_cq(struct hns_roce_dev *hr_dev, - struct hns_roce_cq *hr_cq) +void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; struct device *dev = &hr_dev->pdev->dev; @@ -204,7 +203,7 @@ static void hns_roce_free_cq(struct hns_roce_dev *hr_dev, spin_unlock_irq(&cq_table->lock); hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn); - hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn); + hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn, BITMAP_NO_RR); } static int hns_roce_ib_get_cq_umem(struct hns_roce_dev *hr_dev, @@ -349,6 +348,15 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, goto err_mtt; } + /* + * For the QP created by kernel space, tptr value should be initialized + * to zero; For the QP created by user space, it will cause synchronous + * problems if tptr is set to zero here, so we initialze it in user + * space. + */ + if (!context) + *hr_cq->tptr_addr = 0; + /* Get created cq handler and carry out event */ hr_cq->comp = hns_roce_ib_cq_comp; hr_cq->event = hns_roce_ib_cq_event; @@ -383,19 +391,25 @@ int hns_roce_ib_destroy_cq(struct ib_cq *ib_cq) { struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device); struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq); + int ret = 0; - hns_roce_free_cq(hr_dev, hr_cq); - hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt); + if (hr_dev->hw->destroy_cq) { + ret = hr_dev->hw->destroy_cq(ib_cq); + } else { + hns_roce_free_cq(hr_dev, hr_cq); + hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt); - if (ib_cq->uobject) - ib_umem_release(hr_cq->umem); - else - /* Free the buff of stored cq */ - hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf, ib_cq->cqe); + if (ib_cq->uobject) + ib_umem_release(hr_cq->umem); + else + /* Free the buff of stored cq */ + hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf, + ib_cq->cqe); - kfree(hr_cq); + kfree(hr_cq); + } - return 0; + return ret; } void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 341731553a60..1a6cb5d7a0dd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -37,6 +37,8 @@ #define DRV_NAME "hns_roce" +#define HNS_ROCE_HW_VER1 ('h' << 24 | 'i' << 16 | '0' << 8 | '6') + #define MAC_ADDR_OCTET_NUM 6 #define HNS_ROCE_MAX_MSG_LEN 0x80000000 @@ -54,6 +56,12 @@ #define HNS_ROCE_MAX_INNER_MTPT_NUM 0x7 #define HNS_ROCE_MAX_MTPT_PBL_NUM 0x100000 +#define HNS_ROCE_EACH_FREE_CQ_WAIT_MSECS 20 +#define HNS_ROCE_MAX_FREE_CQ_WAIT_CNT \ + (5000 / HNS_ROCE_EACH_FREE_CQ_WAIT_MSECS) +#define HNS_ROCE_CQE_WCMD_EMPTY_BIT 0x2 +#define HNS_ROCE_MIN_CQE_CNT 16 + #define HNS_ROCE_MAX_IRQ_NUM 34 #define HNS_ROCE_COMP_VEC_NUM 32 @@ -70,6 +78,9 @@ #define HNS_ROCE_MAX_GID_NUM 16 #define HNS_ROCE_GID_SIZE 16 +#define BITMAP_NO_RR 0 +#define BITMAP_RR 1 + #define MR_TYPE_MR 0x00 #define MR_TYPE_DMA 0x03 @@ -196,9 +207,9 @@ struct hns_roce_bitmap { /* Order = 0: bitmap is biggest, order = max bitmap is least (only a bit) */ /* Every bit repesent to a partner free/used status in bitmap */ /* -* Initial, bits of other bitmap are all 0 except that a bit of max_order is 1 -* Bit = 1 represent to idle and available; bit = 0: not available -*/ + * Initial, bits of other bitmap are all 0 except that a bit of max_order is 1 + * Bit = 1 represent to idle and available; bit = 0: not available + */ struct hns_roce_buddy { /* Members point to every order level bitmap */ unsigned long **bits; @@ -296,7 +307,7 @@ struct hns_roce_cq { u32 cq_depth; u32 cons_index; void __iomem *cq_db_l; - void __iomem *tptr_addr; + u16 *tptr_addr; unsigned long cqn; u32 vector; atomic_t refcount; @@ -360,29 +371,34 @@ struct hns_roce_cmdq { struct mutex hcr_mutex; struct semaphore poll_sem; /* - * Event mode: cmd register mutex protection, - * ensure to not exceed max_cmds and user use limit region - */ + * Event mode: cmd register mutex protection, + * ensure to not exceed max_cmds and user use limit region + */ struct semaphore event_sem; int max_cmds; spinlock_t context_lock; int free_head; struct hns_roce_cmd_context *context; /* - * Result of get integer part - * which max_comds compute according a power of 2 - */ + * Result of get integer part + * which max_comds compute according a power of 2 + */ u16 token_mask; /* - * Process whether use event mode, init default non-zero - * After the event queue of cmd event ready, - * can switch into event mode - * close device, switch into poll mode(non event mode) - */ + * Process whether use event mode, init default non-zero + * After the event queue of cmd event ready, + * can switch into event mode + * close device, switch into poll mode(non event mode) + */ u8 use_events; u8 toggle; }; +struct hns_roce_cmd_mailbox { + void *buf; + dma_addr_t dma; +}; + struct hns_roce_dev; struct hns_roce_qp { @@ -424,8 +440,6 @@ struct hns_roce_ib_iboe { struct net_device *netdevs[HNS_ROCE_MAX_PORTS]; struct notifier_block nb; struct notifier_block nb_inet; - /* 16 GID is shared by 6 port in v1 engine. */ - union ib_gid gid_table[HNS_ROCE_MAX_GID_NUM]; u8 phy_port[HNS_ROCE_MAX_PORTS]; }; @@ -519,6 +533,8 @@ struct hns_roce_hw { struct ib_recv_wr **bad_recv_wr); int (*req_notify_cq)(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); int (*poll_cq)(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); + int (*dereg_mr)(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr); + int (*destroy_cq)(struct ib_cq *ibcq); void *priv; }; @@ -553,6 +569,8 @@ struct hns_roce_dev { int cmd_mod; int loop_idc; + dma_addr_t tptr_dma_addr; /*only for hw v1*/ + u32 tptr_size; /*only for hw v1*/ struct hns_roce_hw *hw; }; @@ -657,7 +675,8 @@ void hns_roce_cleanup_cq_table(struct hns_roce_dev *hr_dev); void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev); int hns_roce_bitmap_alloc(struct hns_roce_bitmap *bitmap, unsigned long *obj); -void hns_roce_bitmap_free(struct hns_roce_bitmap *bitmap, unsigned long obj); +void hns_roce_bitmap_free(struct hns_roce_bitmap *bitmap, unsigned long obj, + int rr); int hns_roce_bitmap_init(struct hns_roce_bitmap *bitmap, u32 num, u32 mask, u32 reserved_bot, u32 resetrved_top); void hns_roce_bitmap_cleanup(struct hns_roce_bitmap *bitmap); @@ -665,9 +684,11 @@ void hns_roce_cleanup_bitmap(struct hns_roce_dev *hr_dev); int hns_roce_bitmap_alloc_range(struct hns_roce_bitmap *bitmap, int cnt, int align, unsigned long *obj); void hns_roce_bitmap_free_range(struct hns_roce_bitmap *bitmap, - unsigned long obj, int cnt); + unsigned long obj, int cnt, + int rr); -struct ib_ah *hns_roce_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); +struct ib_ah *hns_roce_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, + struct ib_udata *udata); int hns_roce_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr); int hns_roce_destroy_ah(struct ib_ah *ah); @@ -681,6 +702,10 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); int hns_roce_dereg_mr(struct ib_mr *ibmr); +int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev, + struct hns_roce_cmd_mailbox *mailbox, + unsigned long mpt_index); +unsigned long key_to_hw_index(u32 key); void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size, struct hns_roce_buf *buf); @@ -717,6 +742,7 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, struct ib_udata *udata); int hns_roce_ib_destroy_cq(struct ib_cq *ib_cq); +void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq); void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn); void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type); diff --git a/drivers/infiniband/hw/hns/hns_roce_eq.c b/drivers/infiniband/hw/hns/hns_roce_eq.c index 21e21b03cfb5..50f864935a0e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_eq.c +++ b/drivers/infiniband/hw/hns/hns_roce_eq.c @@ -371,9 +371,9 @@ static int hns_roce_aeq_ovf_int(struct hns_roce_dev *hr_dev, int i = 0; /** - * AEQ overflow ECC mult bit err CEQ overflow alarm - * must clear interrupt, mask irq, clear irq, cancel mask operation - */ + * AEQ overflow ECC mult bit err CEQ overflow alarm + * must clear interrupt, mask irq, clear irq, cancel mask operation + */ aeshift_val = roce_read(hr_dev, ROCEE_CAEP_AEQC_AEQE_SHIFT_REG); if (roce_get_bit(aeshift_val, diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index 250d8f280390..c5104e0b2916 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -80,9 +80,9 @@ struct hns_roce_hem *hns_roce_alloc_hem(struct hns_roce_dev *hr_dev, int npages, --order; /* - * Alloc memory one time. If failed, don't alloc small block - * memory, directly return fail. - */ + * Alloc memory one time. If failed, don't alloc small block + * memory, directly return fail. + */ mem = &chunk->mem[chunk->npages]; buf = dma_alloc_coherent(&hr_dev->pdev->dev, PAGE_SIZE << order, &sg_dma_address(mem), gfp_mask); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 71232e5fabf6..b8111b0c8877 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -32,6 +32,7 @@ #include <linux/platform_device.h> #include <linux/acpi.h> +#include <linux/etherdevice.h> #include <rdma/ib_umem.h> #include "hns_roce_common.h" #include "hns_roce_device.h" @@ -72,6 +73,8 @@ int hns_roce_v1_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, int nreq = 0; u32 ind = 0; int ret = 0; + u8 *smac; + int loopback; if (unlikely(ibqp->qp_type != IB_QPT_GSI && ibqp->qp_type != IB_QPT_RC)) { @@ -129,6 +132,14 @@ int hns_roce_v1_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, UD_SEND_WQE_U32_8_DMAC_5_M, UD_SEND_WQE_U32_8_DMAC_5_S, ah->av.mac[5]); + + smac = (u8 *)hr_dev->dev_addr[qp->port]; + loopback = ether_addr_equal_unaligned(ah->av.mac, + smac) ? 1 : 0; + roce_set_bit(ud_sq_wqe->u32_8, + UD_SEND_WQE_U32_8_LOOPBACK_INDICATOR_S, + loopback); + roce_set_field(ud_sq_wqe->u32_8, UD_SEND_WQE_U32_8_OPERATION_TYPE_M, UD_SEND_WQE_U32_8_OPERATION_TYPE_S, @@ -284,6 +295,8 @@ out: roce_set_field(sq_db.u32_4, SQ_DOORBELL_U32_4_SQ_HEAD_M, SQ_DOORBELL_U32_4_SQ_HEAD_S, (qp->sq.head & ((qp->sq.wqe_cnt << 1) - 1))); + roce_set_field(sq_db.u32_4, SQ_DOORBELL_U32_4_SL_M, + SQ_DOORBELL_U32_4_SL_S, qp->sl); roce_set_field(sq_db.u32_4, SQ_DOORBELL_U32_4_PORT_M, SQ_DOORBELL_U32_4_PORT_S, qp->phy_port); roce_set_field(sq_db.u32_8, SQ_DOORBELL_U32_8_QPN_M, @@ -611,6 +624,213 @@ ext_sdb_buf_fail_out: return ret; } +static struct hns_roce_qp *hns_roce_v1_create_lp_qp(struct hns_roce_dev *hr_dev, + struct ib_pd *pd) +{ + struct device *dev = &hr_dev->pdev->dev; + struct ib_qp_init_attr init_attr; + struct ib_qp *qp; + + memset(&init_attr, 0, sizeof(struct ib_qp_init_attr)); + init_attr.qp_type = IB_QPT_RC; + init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; + init_attr.cap.max_recv_wr = HNS_ROCE_MIN_WQE_NUM; + init_attr.cap.max_send_wr = HNS_ROCE_MIN_WQE_NUM; + + qp = hns_roce_create_qp(pd, &init_attr, NULL); + if (IS_ERR(qp)) { + dev_err(dev, "Create loop qp for mr free failed!"); + return NULL; + } + + return to_hr_qp(qp); +} + +static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_caps *caps = &hr_dev->caps; + struct device *dev = &hr_dev->pdev->dev; + struct ib_cq_init_attr cq_init_attr; + struct hns_roce_free_mr *free_mr; + struct ib_qp_attr attr = { 0 }; + struct hns_roce_v1_priv *priv; + struct hns_roce_qp *hr_qp; + struct ib_cq *cq; + struct ib_pd *pd; + u64 subnet_prefix; + int attr_mask = 0; + int i; + int ret; + u8 phy_port; + u8 sl; + + priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + free_mr = &priv->free_mr; + + /* Reserved cq for loop qp */ + cq_init_attr.cqe = HNS_ROCE_MIN_WQE_NUM * 2; + cq_init_attr.comp_vector = 0; + cq = hns_roce_ib_create_cq(&hr_dev->ib_dev, &cq_init_attr, NULL, NULL); + if (IS_ERR(cq)) { + dev_err(dev, "Create cq for reseved loop qp failed!"); + return -ENOMEM; + } + free_mr->mr_free_cq = to_hr_cq(cq); + free_mr->mr_free_cq->ib_cq.device = &hr_dev->ib_dev; + free_mr->mr_free_cq->ib_cq.uobject = NULL; + free_mr->mr_free_cq->ib_cq.comp_handler = NULL; + free_mr->mr_free_cq->ib_cq.event_handler = NULL; + free_mr->mr_free_cq->ib_cq.cq_context = NULL; + atomic_set(&free_mr->mr_free_cq->ib_cq.usecnt, 0); + + pd = hns_roce_alloc_pd(&hr_dev->ib_dev, NULL, NULL); + if (IS_ERR(pd)) { + dev_err(dev, "Create pd for reseved loop qp failed!"); + ret = -ENOMEM; + goto alloc_pd_failed; + } + free_mr->mr_free_pd = to_hr_pd(pd); + free_mr->mr_free_pd->ibpd.device = &hr_dev->ib_dev; + free_mr->mr_free_pd->ibpd.uobject = NULL; + atomic_set(&free_mr->mr_free_pd->ibpd.usecnt, 0); + + attr.qp_access_flags = IB_ACCESS_REMOTE_WRITE; + attr.pkey_index = 0; + attr.min_rnr_timer = 0; + /* Disable read ability */ + attr.max_dest_rd_atomic = 0; + attr.max_rd_atomic = 0; + /* Use arbitrary values as rq_psn and sq_psn */ + attr.rq_psn = 0x0808; + attr.sq_psn = 0x0808; + attr.retry_cnt = 7; + attr.rnr_retry = 7; + attr.timeout = 0x12; + attr.path_mtu = IB_MTU_256; + attr.ah_attr.ah_flags = 1; + attr.ah_attr.static_rate = 3; + attr.ah_attr.grh.sgid_index = 0; + attr.ah_attr.grh.hop_limit = 1; + attr.ah_attr.grh.flow_label = 0; + attr.ah_attr.grh.traffic_class = 0; + + subnet_prefix = cpu_to_be64(0xfe80000000000000LL); + for (i = 0; i < HNS_ROCE_V1_RESV_QP; i++) { + free_mr->mr_free_qp[i] = hns_roce_v1_create_lp_qp(hr_dev, pd); + if (IS_ERR(free_mr->mr_free_qp[i])) { + dev_err(dev, "Create loop qp failed!\n"); + goto create_lp_qp_failed; + } + hr_qp = free_mr->mr_free_qp[i]; + + sl = i / caps->num_ports; + + if (caps->num_ports == HNS_ROCE_MAX_PORTS) + phy_port = (i >= HNS_ROCE_MAX_PORTS) ? (i - 2) : + (i % caps->num_ports); + else + phy_port = i % caps->num_ports; + + hr_qp->port = phy_port + 1; + hr_qp->phy_port = phy_port; + hr_qp->ibqp.qp_type = IB_QPT_RC; + hr_qp->ibqp.device = &hr_dev->ib_dev; + hr_qp->ibqp.uobject = NULL; + atomic_set(&hr_qp->ibqp.usecnt, 0); + hr_qp->ibqp.pd = pd; + hr_qp->ibqp.recv_cq = cq; + hr_qp->ibqp.send_cq = cq; + + attr.ah_attr.port_num = phy_port + 1; + attr.ah_attr.sl = sl; + attr.port_num = phy_port + 1; + + attr.dest_qp_num = hr_qp->qpn; + memcpy(attr.ah_attr.dmac, hr_dev->dev_addr[phy_port], + MAC_ADDR_OCTET_NUM); + + memcpy(attr.ah_attr.grh.dgid.raw, + &subnet_prefix, sizeof(u64)); + memcpy(&attr.ah_attr.grh.dgid.raw[8], + hr_dev->dev_addr[phy_port], 3); + memcpy(&attr.ah_attr.grh.dgid.raw[13], + hr_dev->dev_addr[phy_port] + 3, 3); + attr.ah_attr.grh.dgid.raw[11] = 0xff; + attr.ah_attr.grh.dgid.raw[12] = 0xfe; + attr.ah_attr.grh.dgid.raw[8] ^= 2; + + attr_mask |= IB_QP_PORT; + + ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, &attr, attr_mask, + IB_QPS_RESET, IB_QPS_INIT); + if (ret) { + dev_err(dev, "modify qp failed(%d)!\n", ret); + goto create_lp_qp_failed; + } + + ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, &attr, attr_mask, + IB_QPS_INIT, IB_QPS_RTR); + if (ret) { + dev_err(dev, "modify qp failed(%d)!\n", ret); + goto create_lp_qp_failed; + } + + ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, &attr, attr_mask, + IB_QPS_RTR, IB_QPS_RTS); + if (ret) { + dev_err(dev, "modify qp failed(%d)!\n", ret); + goto create_lp_qp_failed; + } + } + + return 0; + +create_lp_qp_failed: + for (i -= 1; i >= 0; i--) { + hr_qp = free_mr->mr_free_qp[i]; + if (hns_roce_v1_destroy_qp(&hr_qp->ibqp)) + dev_err(dev, "Destroy qp %d for mr free failed!\n", i); + } + + if (hns_roce_dealloc_pd(pd)) + dev_err(dev, "Destroy pd for create_lp_qp failed!\n"); + +alloc_pd_failed: + if (hns_roce_ib_destroy_cq(cq)) + dev_err(dev, "Destroy cq for create_lp_qp failed!\n"); + + return -EINVAL; +} + +static void hns_roce_v1_release_lp_qp(struct hns_roce_dev *hr_dev) +{ + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_free_mr *free_mr; + struct hns_roce_v1_priv *priv; + struct hns_roce_qp *hr_qp; + int ret; + int i; + + priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + free_mr = &priv->free_mr; + + for (i = 0; i < HNS_ROCE_V1_RESV_QP; i++) { + hr_qp = free_mr->mr_free_qp[i]; + ret = hns_roce_v1_destroy_qp(&hr_qp->ibqp); + if (ret) + dev_err(dev, "Destroy qp %d for mr free failed(%d)!\n", + i, ret); + } + + ret = hns_roce_ib_destroy_cq(&free_mr->mr_free_cq->ib_cq); + if (ret) + dev_err(dev, "Destroy cq for mr_free failed(%d)!\n", ret); + + ret = hns_roce_dealloc_pd(&free_mr->mr_free_pd->ibpd); + if (ret) + dev_err(dev, "Destroy pd for mr_free failed(%d)!\n", ret); +} + static int hns_roce_db_init(struct hns_roce_dev *hr_dev) { struct device *dev = &hr_dev->pdev->dev; @@ -648,6 +868,223 @@ static int hns_roce_db_init(struct hns_roce_dev *hr_dev) return 0; } +void hns_roce_v1_recreate_lp_qp_work_fn(struct work_struct *work) +{ + struct hns_roce_recreate_lp_qp_work *lp_qp_work; + struct hns_roce_dev *hr_dev; + + lp_qp_work = container_of(work, struct hns_roce_recreate_lp_qp_work, + work); + hr_dev = to_hr_dev(lp_qp_work->ib_dev); + + hns_roce_v1_release_lp_qp(hr_dev); + + if (hns_roce_v1_rsv_lp_qp(hr_dev)) + dev_err(&hr_dev->pdev->dev, "create reserver qp failed\n"); + + if (lp_qp_work->comp_flag) + complete(lp_qp_work->comp); + + kfree(lp_qp_work); +} + +static int hns_roce_v1_recreate_lp_qp(struct hns_roce_dev *hr_dev) +{ + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_recreate_lp_qp_work *lp_qp_work; + struct hns_roce_free_mr *free_mr; + struct hns_roce_v1_priv *priv; + struct completion comp; + unsigned long end = + msecs_to_jiffies(HNS_ROCE_V1_RECREATE_LP_QP_TIMEOUT_MSECS) + jiffies; + + priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + free_mr = &priv->free_mr; + + lp_qp_work = kzalloc(sizeof(struct hns_roce_recreate_lp_qp_work), + GFP_KERNEL); + + INIT_WORK(&(lp_qp_work->work), hns_roce_v1_recreate_lp_qp_work_fn); + + lp_qp_work->ib_dev = &(hr_dev->ib_dev); + lp_qp_work->comp = ∁ + lp_qp_work->comp_flag = 1; + + init_completion(lp_qp_work->comp); + + queue_work(free_mr->free_mr_wq, &(lp_qp_work->work)); + + while (time_before_eq(jiffies, end)) { + if (try_wait_for_completion(&comp)) + return 0; + msleep(HNS_ROCE_V1_RECREATE_LP_QP_WAIT_VALUE); + } + + lp_qp_work->comp_flag = 0; + if (try_wait_for_completion(&comp)) + return 0; + + dev_warn(dev, "recreate lp qp failed 20s timeout and return failed!\n"); + return -ETIMEDOUT; +} + +static int hns_roce_v1_send_lp_wqe(struct hns_roce_qp *hr_qp) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device); + struct device *dev = &hr_dev->pdev->dev; + struct ib_send_wr send_wr, *bad_wr; + int ret; + + memset(&send_wr, 0, sizeof(send_wr)); + send_wr.next = NULL; + send_wr.num_sge = 0; + send_wr.send_flags = 0; + send_wr.sg_list = NULL; + send_wr.wr_id = (unsigned long long)&send_wr; + send_wr.opcode = IB_WR_RDMA_WRITE; + + ret = hns_roce_v1_post_send(&hr_qp->ibqp, &send_wr, &bad_wr); + if (ret) { + dev_err(dev, "Post write wqe for mr free failed(%d)!", ret); + return ret; + } + + return 0; +} + +static void hns_roce_v1_mr_free_work_fn(struct work_struct *work) +{ + struct hns_roce_mr_free_work *mr_work; + struct ib_wc wc[HNS_ROCE_V1_RESV_QP]; + struct hns_roce_free_mr *free_mr; + struct hns_roce_cq *mr_free_cq; + struct hns_roce_v1_priv *priv; + struct hns_roce_dev *hr_dev; + struct hns_roce_mr *hr_mr; + struct hns_roce_qp *hr_qp; + struct device *dev; + unsigned long end = + msecs_to_jiffies(HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS) + jiffies; + int i; + int ret; + int ne; + + mr_work = container_of(work, struct hns_roce_mr_free_work, work); + hr_mr = (struct hns_roce_mr *)mr_work->mr; + hr_dev = to_hr_dev(mr_work->ib_dev); + dev = &hr_dev->pdev->dev; + + priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + free_mr = &priv->free_mr; + mr_free_cq = free_mr->mr_free_cq; + + for (i = 0; i < HNS_ROCE_V1_RESV_QP; i++) { + hr_qp = free_mr->mr_free_qp[i]; + ret = hns_roce_v1_send_lp_wqe(hr_qp); + if (ret) { + dev_err(dev, + "Send wqe (qp:0x%lx) for mr free failed(%d)!\n", + hr_qp->qpn, ret); + goto free_work; + } + } + + ne = HNS_ROCE_V1_RESV_QP; + do { + ret = hns_roce_v1_poll_cq(&mr_free_cq->ib_cq, ne, wc); + if (ret < 0) { + dev_err(dev, + "(qp:0x%lx) starts, Poll cqe failed(%d) for mr 0x%x free! Remain %d cqe\n", + hr_qp->qpn, ret, hr_mr->key, ne); + goto free_work; + } + ne -= ret; + msleep(HNS_ROCE_V1_FREE_MR_WAIT_VALUE); + } while (ne && time_before_eq(jiffies, end)); + + if (ne != 0) + dev_err(dev, + "Poll cqe for mr 0x%x free timeout! Remain %d cqe\n", + hr_mr->key, ne); + +free_work: + if (mr_work->comp_flag) + complete(mr_work->comp); + kfree(mr_work); +} + +int hns_roce_v1_dereg_mr(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) +{ + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_mr_free_work *mr_work; + struct hns_roce_free_mr *free_mr; + struct hns_roce_v1_priv *priv; + struct completion comp; + unsigned long end = + msecs_to_jiffies(HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS) + jiffies; + unsigned long start = jiffies; + int npages; + int ret = 0; + + priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + free_mr = &priv->free_mr; + + if (mr->enabled) { + if (hns_roce_hw2sw_mpt(hr_dev, NULL, key_to_hw_index(mr->key) + & (hr_dev->caps.num_mtpts - 1))) + dev_warn(dev, "HW2SW_MPT failed!\n"); + } + + mr_work = kzalloc(sizeof(*mr_work), GFP_KERNEL); + if (!mr_work) { + ret = -ENOMEM; + goto free_mr; + } + + INIT_WORK(&(mr_work->work), hns_roce_v1_mr_free_work_fn); + + mr_work->ib_dev = &(hr_dev->ib_dev); + mr_work->comp = ∁ + mr_work->comp_flag = 1; + mr_work->mr = (void *)mr; + init_completion(mr_work->comp); + + queue_work(free_mr->free_mr_wq, &(mr_work->work)); + + while (time_before_eq(jiffies, end)) { + if (try_wait_for_completion(&comp)) + goto free_mr; + msleep(HNS_ROCE_V1_FREE_MR_WAIT_VALUE); + } + + mr_work->comp_flag = 0; + if (try_wait_for_completion(&comp)) + goto free_mr; + + dev_warn(dev, "Free mr work 0x%x over 50s and failed!\n", mr->key); + ret = -ETIMEDOUT; + +free_mr: + dev_dbg(dev, "Free mr 0x%x use 0x%x us.\n", + mr->key, jiffies_to_usecs(jiffies) - jiffies_to_usecs(start)); + + if (mr->size != ~0ULL) { + npages = ib_umem_page_count(mr->umem); + dma_free_coherent(dev, npages * 8, mr->pbl_buf, + mr->pbl_dma_addr); + } + + hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap, + key_to_hw_index(mr->key), 0); + + if (mr->umem) + ib_umem_release(mr->umem); + + kfree(mr); + + return ret; +} + static void hns_roce_db_free(struct hns_roce_dev *hr_dev) { struct device *dev = &hr_dev->pdev->dev; @@ -849,6 +1286,85 @@ static void hns_roce_bt_free(struct hns_roce_dev *hr_dev) priv->bt_table.qpc_buf.buf, priv->bt_table.qpc_buf.map); } +static int hns_roce_tptr_init(struct hns_roce_dev *hr_dev) +{ + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_buf_list *tptr_buf; + struct hns_roce_v1_priv *priv; + + priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + tptr_buf = &priv->tptr_table.tptr_buf; + + /* + * This buffer will be used for CQ's tptr(tail pointer), also + * named ci(customer index). Every CQ will use 2 bytes to save + * cqe ci in hip06. Hardware will read this area to get new ci + * when the queue is almost full. + */ + tptr_buf->buf = dma_alloc_coherent(dev, HNS_ROCE_V1_TPTR_BUF_SIZE, + &tptr_buf->map, GFP_KERNEL); + if (!tptr_buf->buf) + return -ENOMEM; + + hr_dev->tptr_dma_addr = tptr_buf->map; + hr_dev->tptr_size = HNS_ROCE_V1_TPTR_BUF_SIZE; + + return 0; +} + +static void hns_roce_tptr_free(struct hns_roce_dev *hr_dev) +{ + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_buf_list *tptr_buf; + struct hns_roce_v1_priv *priv; + + priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + tptr_buf = &priv->tptr_table.tptr_buf; + + dma_free_coherent(dev, HNS_ROCE_V1_TPTR_BUF_SIZE, + tptr_buf->buf, tptr_buf->map); +} + +static int hns_roce_free_mr_init(struct hns_roce_dev *hr_dev) +{ + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_free_mr *free_mr; + struct hns_roce_v1_priv *priv; + int ret = 0; + + priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + free_mr = &priv->free_mr; + + free_mr->free_mr_wq = create_singlethread_workqueue("hns_roce_free_mr"); + if (!free_mr->free_mr_wq) { + dev_err(dev, "Create free mr workqueue failed!\n"); + return -ENOMEM; + } + + ret = hns_roce_v1_rsv_lp_qp(hr_dev); + if (ret) { + dev_err(dev, "Reserved loop qp failed(%d)!\n", ret); + flush_workqueue(free_mr->free_mr_wq); + destroy_workqueue(free_mr->free_mr_wq); + } + + return ret; +} + +static void hns_roce_free_mr_free(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_free_mr *free_mr; + struct hns_roce_v1_priv *priv; + + priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + free_mr = &priv->free_mr; + + flush_workqueue(free_mr->free_mr_wq); + destroy_workqueue(free_mr->free_mr_wq); + + hns_roce_v1_release_lp_qp(hr_dev); +} + /** * hns_roce_v1_reset - reset RoCE * @hr_dev: RoCE device struct pointer @@ -898,6 +1414,38 @@ int hns_roce_v1_reset(struct hns_roce_dev *hr_dev, bool dereset) return ret; } +static int hns_roce_des_qp_init(struct hns_roce_dev *hr_dev) +{ + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_v1_priv *priv; + struct hns_roce_des_qp *des_qp; + + priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + des_qp = &priv->des_qp; + + des_qp->requeue_flag = 1; + des_qp->qp_wq = create_singlethread_workqueue("hns_roce_destroy_qp"); + if (!des_qp->qp_wq) { + dev_err(dev, "Create destroy qp workqueue failed!\n"); + return -ENOMEM; + } + + return 0; +} + +static void hns_roce_des_qp_free(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_v1_priv *priv; + struct hns_roce_des_qp *des_qp; + + priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + des_qp = &priv->des_qp; + + des_qp->requeue_flag = 0; + flush_workqueue(des_qp->qp_wq); + destroy_workqueue(des_qp->qp_wq); +} + void hns_roce_v1_profile(struct hns_roce_dev *hr_dev) { int i = 0; @@ -906,12 +1454,11 @@ void hns_roce_v1_profile(struct hns_roce_dev *hr_dev) hr_dev->vendor_id = le32_to_cpu(roce_read(hr_dev, ROCEE_VENDOR_ID_REG)); hr_dev->vendor_part_id = le32_to_cpu(roce_read(hr_dev, ROCEE_VENDOR_PART_ID_REG)); - hr_dev->hw_rev = le32_to_cpu(roce_read(hr_dev, ROCEE_HW_VERSION_REG)); - hr_dev->sys_image_guid = le32_to_cpu(roce_read(hr_dev, ROCEE_SYS_IMAGE_GUID_L_REG)) | ((u64)le32_to_cpu(roce_read(hr_dev, ROCEE_SYS_IMAGE_GUID_H_REG)) << 32); + hr_dev->hw_rev = HNS_ROCE_HW_VER1; caps->num_qps = HNS_ROCE_V1_MAX_QP_NUM; caps->max_wqes = HNS_ROCE_V1_MAX_WQE_NUM; @@ -1001,18 +1548,44 @@ int hns_roce_v1_init(struct hns_roce_dev *hr_dev) goto error_failed_raq_init; } - hns_roce_port_enable(hr_dev, HNS_ROCE_PORT_UP); - ret = hns_roce_bt_init(hr_dev); if (ret) { dev_err(dev, "bt init failed!\n"); goto error_failed_bt_init; } + ret = hns_roce_tptr_init(hr_dev); + if (ret) { + dev_err(dev, "tptr init failed!\n"); + goto error_failed_tptr_init; + } + + ret = hns_roce_des_qp_init(hr_dev); + if (ret) { + dev_err(dev, "des qp init failed!\n"); + goto error_failed_des_qp_init; + } + + ret = hns_roce_free_mr_init(hr_dev); + if (ret) { + dev_err(dev, "free mr init failed!\n"); + goto error_failed_free_mr_init; + } + + hns_roce_port_enable(hr_dev, HNS_ROCE_PORT_UP); + return 0; +error_failed_free_mr_init: + hns_roce_des_qp_free(hr_dev); + +error_failed_des_qp_init: + hns_roce_tptr_free(hr_dev); + +error_failed_tptr_init: + hns_roce_bt_free(hr_dev); + error_failed_bt_init: - hns_roce_port_enable(hr_dev, HNS_ROCE_PORT_DOWN); hns_roce_raq_free(hr_dev); error_failed_raq_init: @@ -1022,8 +1595,11 @@ error_failed_raq_init: void hns_roce_v1_exit(struct hns_roce_dev *hr_dev) { - hns_roce_bt_free(hr_dev); hns_roce_port_enable(hr_dev, HNS_ROCE_PORT_DOWN); + hns_roce_free_mr_free(hr_dev); + hns_roce_des_qp_free(hr_dev); + hns_roce_tptr_free(hr_dev); + hns_roce_bt_free(hr_dev); hns_roce_raq_free(hr_dev); hns_roce_db_free(hr_dev); } @@ -1061,6 +1637,14 @@ void hns_roce_v1_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, u8 *addr) u32 *p; u32 val; + /* + * When mac changed, loopback may fail + * because of smac not equal to dmac. + * We Need to release and create reserved qp again. + */ + if (hr_dev->hw->dereg_mr && hns_roce_v1_recreate_lp_qp(hr_dev)) + dev_warn(&hr_dev->pdev->dev, "recreate lp qp timeout!\n"); + p = (u32 *)(&addr[0]); reg_smac_l = *p; roce_raw_write(reg_smac_l, hr_dev->reg_base + ROCEE_SMAC_L_0_REG + @@ -1293,9 +1877,9 @@ static void __hns_roce_v1_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn, } /* - * Now backwards through the CQ, removing CQ entries - * that match our QP by overwriting them with next entries. - */ + * Now backwards through the CQ, removing CQ entries + * that match our QP by overwriting them with next entries. + */ while ((int) --prod_index - (int) hr_cq->cons_index >= 0) { cqe = get_cqe(hr_cq, prod_index & hr_cq->ib_cq.cqe); if ((roce_get_field(cqe->cqe_byte_16, CQE_BYTE_16_LOCAL_QPN_M, @@ -1317,9 +1901,9 @@ static void __hns_roce_v1_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn, if (nfreed) { hr_cq->cons_index += nfreed; /* - * Make sure update of buffer contents is done before - * updating consumer index. - */ + * Make sure update of buffer contents is done before + * updating consumer index. + */ wmb(); hns_roce_v1_cq_set_ci(hr_cq, hr_cq->cons_index); @@ -1339,14 +1923,21 @@ void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev, dma_addr_t dma_handle, int nent, u32 vector) { struct hns_roce_cq_context *cq_context = NULL; - void __iomem *tptr_addr; + struct hns_roce_buf_list *tptr_buf; + struct hns_roce_v1_priv *priv; + dma_addr_t tptr_dma_addr; + int offset; + + priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + tptr_buf = &priv->tptr_table.tptr_buf; cq_context = mb_buf; memset(cq_context, 0, sizeof(*cq_context)); - tptr_addr = 0; - hr_dev->priv_addr = tptr_addr; - hr_cq->tptr_addr = tptr_addr; + /* Get the tptr for this CQ. */ + offset = hr_cq->cqn * HNS_ROCE_V1_TPTR_ENTRY_SIZE; + tptr_dma_addr = tptr_buf->map + offset; + hr_cq->tptr_addr = (u16 *)(tptr_buf->buf + offset); /* Register cq_context members */ roce_set_field(cq_context->cqc_byte_4, @@ -1390,10 +1981,10 @@ void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev, roce_set_field(cq_context->cqc_byte_20, CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_M, CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_S, - (u64)tptr_addr >> 44); + tptr_dma_addr >> 44); cq_context->cqc_byte_20 = cpu_to_le32(cq_context->cqc_byte_20); - cq_context->cqe_tptr_addr_l = (u32)((u64)tptr_addr >> 12); + cq_context->cqe_tptr_addr_l = (u32)(tptr_dma_addr >> 12); roce_set_field(cq_context->cqc_byte_32, CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_M, @@ -1407,7 +1998,7 @@ void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev, roce_set_bit(cq_context->cqc_byte_32, CQ_CQNTEXT_CQC_BYTE_32_TYPE_OF_COMPLETION_NOTIFICATION_S, 0); - /*The initial value of cq's ci is 0 */ + /* The initial value of cq's ci is 0 */ roce_set_field(cq_context->cqc_byte_32, CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_M, CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_S, 0); @@ -1424,9 +2015,9 @@ int hns_roce_v1_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) notification_flag = (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ? CQ_DB_REQ_NOT : CQ_DB_REQ_NOT_SOL; /* - * flags = 0; Notification Flag = 1, next - * flags = 1; Notification Flag = 0, solocited - */ + * flags = 0; Notification Flag = 1, next + * flags = 1; Notification Flag = 0, solocited + */ doorbell[0] = hr_cq->cons_index & ((hr_cq->cq_depth << 1) - 1); roce_set_bit(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_HW_SYNS_S, 1); roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_M, @@ -1581,10 +2172,10 @@ static int hns_roce_v1_poll_one(struct hns_roce_cq *hr_cq, wq = &(*cur_qp)->sq; if ((*cur_qp)->sq_signal_bits) { /* - * If sg_signal_bit is 1, - * firstly tail pointer updated to wqe - * which current cqe correspond to - */ + * If sg_signal_bit is 1, + * firstly tail pointer updated to wqe + * which current cqe correspond to + */ wqe_ctr = (u16)roce_get_field(cqe->cqe_byte_4, CQE_BYTE_4_WQE_INDEX_M, CQE_BYTE_4_WQE_INDEX_S); @@ -1659,8 +2250,14 @@ int hns_roce_v1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) break; } - if (npolled) + if (npolled) { + *hr_cq->tptr_addr = hr_cq->cons_index & + ((hr_cq->cq_depth << 1) - 1); + + /* Memroy barrier */ + wmb(); hns_roce_v1_cq_set_ci(hr_cq, hr_cq->cons_index); + } spin_unlock_irqrestore(&hr_cq->lock, flags); @@ -1799,12 +2396,12 @@ static int hns_roce_v1_qp_modify(struct hns_roce_dev *hr_dev, if (op[cur_state][new_state] == HNS_ROCE_CMD_2RST_QP) return hns_roce_cmd_mbox(hr_dev, 0, 0, hr_qp->qpn, 2, HNS_ROCE_CMD_2RST_QP, - HNS_ROCE_CMD_TIME_CLASS_A); + HNS_ROCE_CMD_TIMEOUT_MSECS); if (op[cur_state][new_state] == HNS_ROCE_CMD_2ERR_QP) return hns_roce_cmd_mbox(hr_dev, 0, 0, hr_qp->qpn, 2, HNS_ROCE_CMD_2ERR_QP, - HNS_ROCE_CMD_TIME_CLASS_A); + HNS_ROCE_CMD_TIMEOUT_MSECS); mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); if (IS_ERR(mailbox)) @@ -1814,7 +2411,7 @@ static int hns_roce_v1_qp_modify(struct hns_roce_dev *hr_dev, ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, hr_qp->qpn, 0, op[cur_state][new_state], - HNS_ROCE_CMD_TIME_CLASS_C); + HNS_ROCE_CMD_TIMEOUT_MSECS); hns_roce_free_cmd_mailbox(hr_dev, mailbox); return ret; @@ -2000,11 +2597,11 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, } /* - *Reset to init - * Mandatory param: - * IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS - * Optional param: NA - */ + * Reset to init + * Mandatory param: + * IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS + * Optional param: NA + */ if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { roce_set_field(context->qpc_bytes_4, QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_M, @@ -2172,24 +2769,14 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP_CONTEXT_QPC_BYTE_32_SIGNALING_TYPE_S, hr_qp->sq_signal_bits); - for (port = 0; port < hr_dev->caps.num_ports; port++) { - smac = (u8 *)hr_dev->dev_addr[port]; - dev_dbg(dev, "smac: %2x: %2x: %2x: %2x: %2x: %2x\n", - smac[0], smac[1], smac[2], smac[3], smac[4], - smac[5]); - if ((dmac[0] == smac[0]) && (dmac[1] == smac[1]) && - (dmac[2] == smac[2]) && (dmac[3] == smac[3]) && - (dmac[4] == smac[4]) && (dmac[5] == smac[5])) { - roce_set_bit(context->qpc_bytes_32, - QP_CONTEXT_QPC_BYTE_32_LOOPBACK_INDICATOR_S, - 1); - break; - } - } - - if (hr_dev->loop_idc == 0x1) + port = (attr_mask & IB_QP_PORT) ? (attr->port_num - 1) : + hr_qp->port; + smac = (u8 *)hr_dev->dev_addr[port]; + /* when dmac equals smac or loop_idc is 1, it should loopback */ + if (ether_addr_equal_unaligned(dmac, smac) || + hr_dev->loop_idc == 0x1) roce_set_bit(context->qpc_bytes_32, - QP_CONTEXT_QPC_BYTE_32_LOOPBACK_INDICATOR_S, 1); + QP_CONTEXT_QPC_BYTE_32_LOOPBACK_INDICATOR_S, 1); roce_set_bit(context->qpc_bytes_32, QP_CONTEXT_QPC_BYTE_32_GLOBAL_HEADER_S, @@ -2509,7 +3096,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, /* Every status migrate must change state */ roce_set_field(context->qpc_bytes_144, QP_CONTEXT_QPC_BYTES_144_QP_STATE_M, - QP_CONTEXT_QPC_BYTES_144_QP_STATE_S, attr->qp_state); + QP_CONTEXT_QPC_BYTES_144_QP_STATE_S, new_state); /* SW pass context to HW */ ret = hns_roce_v1_qp_modify(hr_dev, &hr_qp->mtt, @@ -2522,9 +3109,9 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, } /* - * Use rst2init to instead of init2init with drv, - * need to hw to flash RQ HEAD by DB again - */ + * Use rst2init to instead of init2init with drv, + * need to hw to flash RQ HEAD by DB again + */ if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) { /* Memory barrier */ wmb(); @@ -2619,7 +3206,7 @@ static int hns_roce_v1_query_qpc(struct hns_roce_dev *hr_dev, ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, hr_qp->qpn, 0, HNS_ROCE_CMD_QUERY_QP, - HNS_ROCE_CMD_TIME_CLASS_A); + HNS_ROCE_CMD_TIMEOUT_MSECS); if (!ret) memcpy(hr_context, mailbox->buf, sizeof(*hr_context)); else @@ -2630,8 +3217,78 @@ static int hns_roce_v1_query_qpc(struct hns_roce_dev *hr_dev, return ret; } -int hns_roce_v1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, - int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) +static int hns_roce_v1_q_sqp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); + struct hns_roce_sqp_context context; + u32 addr; + + mutex_lock(&hr_qp->mutex); + + if (hr_qp->state == IB_QPS_RESET) { + qp_attr->qp_state = IB_QPS_RESET; + goto done; + } + + addr = ROCEE_QP1C_CFG0_0_REG + + hr_qp->port * sizeof(struct hns_roce_sqp_context); + context.qp1c_bytes_4 = roce_read(hr_dev, addr); + context.sq_rq_bt_l = roce_read(hr_dev, addr + 1); + context.qp1c_bytes_12 = roce_read(hr_dev, addr + 2); + context.qp1c_bytes_16 = roce_read(hr_dev, addr + 3); + context.qp1c_bytes_20 = roce_read(hr_dev, addr + 4); + context.cur_rq_wqe_ba_l = roce_read(hr_dev, addr + 5); + context.qp1c_bytes_28 = roce_read(hr_dev, addr + 6); + context.qp1c_bytes_32 = roce_read(hr_dev, addr + 7); + context.cur_sq_wqe_ba_l = roce_read(hr_dev, addr + 8); + context.qp1c_bytes_40 = roce_read(hr_dev, addr + 9); + + hr_qp->state = roce_get_field(context.qp1c_bytes_4, + QP1C_BYTES_4_QP_STATE_M, + QP1C_BYTES_4_QP_STATE_S); + qp_attr->qp_state = hr_qp->state; + qp_attr->path_mtu = IB_MTU_256; + qp_attr->path_mig_state = IB_MIG_ARMED; + qp_attr->qkey = QKEY_VAL; + qp_attr->rq_psn = 0; + qp_attr->sq_psn = 0; + qp_attr->dest_qp_num = 1; + qp_attr->qp_access_flags = 6; + + qp_attr->pkey_index = roce_get_field(context.qp1c_bytes_20, + QP1C_BYTES_20_PKEY_IDX_M, + QP1C_BYTES_20_PKEY_IDX_S); + qp_attr->port_num = hr_qp->port + 1; + qp_attr->sq_draining = 0; + qp_attr->max_rd_atomic = 0; + qp_attr->max_dest_rd_atomic = 0; + qp_attr->min_rnr_timer = 0; + qp_attr->timeout = 0; + qp_attr->retry_cnt = 0; + qp_attr->rnr_retry = 0; + qp_attr->alt_timeout = 0; + +done: + qp_attr->cur_qp_state = qp_attr->qp_state; + qp_attr->cap.max_recv_wr = hr_qp->rq.wqe_cnt; + qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs; + qp_attr->cap.max_send_wr = hr_qp->sq.wqe_cnt; + qp_attr->cap.max_send_sge = hr_qp->sq.max_gs; + qp_attr->cap.max_inline_data = 0; + qp_init_attr->cap = qp_attr->cap; + qp_init_attr->create_flags = 0; + + mutex_unlock(&hr_qp->mutex); + + return 0; +} + +static int hns_roce_v1_q_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr) { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); @@ -2725,9 +3382,7 @@ int hns_roce_v1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, qp_attr->pkey_index = roce_get_field(context->qpc_bytes_12, QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_M, QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S); - qp_attr->port_num = (u8)roce_get_field(context->qpc_bytes_156, - QP_CONTEXT_QPC_BYTES_156_PORT_NUM_M, - QP_CONTEXT_QPC_BYTES_156_PORT_NUM_S) + 1; + qp_attr->port_num = hr_qp->port + 1; qp_attr->sq_draining = 0; qp_attr->max_rd_atomic = roce_get_field(context->qpc_bytes_156, QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_M, @@ -2767,134 +3422,397 @@ out: return ret; } -static void hns_roce_v1_destroy_qp_common(struct hns_roce_dev *hr_dev, - struct hns_roce_qp *hr_qp, - int is_user) +int hns_roce_v1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) +{ + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); + + return hr_qp->doorbell_qpn <= 1 ? + hns_roce_v1_q_sqp(ibqp, qp_attr, qp_attr_mask, qp_init_attr) : + hns_roce_v1_q_qp(ibqp, qp_attr, qp_attr_mask, qp_init_attr); +} + +static int check_qp_db_process_status(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp, + u32 sdb_issue_ptr, + u32 *sdb_inv_cnt, + u32 *wait_stage) { - u32 sdbinvcnt; - unsigned long end = 0; - u32 sdbinvcnt_val; - u32 sdbsendptr_val; - u32 sdbisusepr_val; - struct hns_roce_cq *send_cq, *recv_cq; struct device *dev = &hr_dev->pdev->dev; + u32 sdb_retry_cnt, old_retry; + u32 sdb_send_ptr, old_send; + u32 success_flags = 0; + u32 cur_cnt, old_cnt; + unsigned long end; + u32 send_ptr; + u32 inv_cnt; + u32 tsp_st; + + if (*wait_stage > HNS_ROCE_V1_DB_STAGE2 || + *wait_stage < HNS_ROCE_V1_DB_STAGE1) { + dev_err(dev, "QP(0x%lx) db status wait stage(%d) error!\n", + hr_qp->qpn, *wait_stage); + return -EINVAL; + } - if (hr_qp->ibqp.qp_type == IB_QPT_RC) { - if (hr_qp->state != IB_QPS_RESET) { - /* - * Set qp to ERR, - * waiting for hw complete processing all dbs - */ - if (hns_roce_v1_qp_modify(hr_dev, NULL, - to_hns_roce_state( - (enum ib_qp_state)hr_qp->state), - HNS_ROCE_QP_STATE_ERR, NULL, - hr_qp)) - dev_err(dev, "modify QP %06lx to ERR failed.\n", - hr_qp->qpn); - - /* Record issued doorbell */ - sdbisusepr_val = roce_read(hr_dev, - ROCEE_SDB_ISSUE_PTR_REG); - /* - * Query db process status, - * until hw process completely - */ - end = msecs_to_jiffies( - HNS_ROCE_QP_DESTROY_TIMEOUT_MSECS) + jiffies; - do { - sdbsendptr_val = roce_read(hr_dev, + /* Calculate the total timeout for the entire verification process */ + end = msecs_to_jiffies(HNS_ROCE_V1_CHECK_DB_TIMEOUT_MSECS) + jiffies; + + if (*wait_stage == HNS_ROCE_V1_DB_STAGE1) { + /* Query db process status, until hw process completely */ + sdb_send_ptr = roce_read(hr_dev, ROCEE_SDB_SEND_PTR_REG); + while (roce_hw_index_cmp_lt(sdb_send_ptr, sdb_issue_ptr, + ROCEE_SDB_PTR_CMP_BITS)) { + if (!time_before(jiffies, end)) { + dev_dbg(dev, "QP(0x%lx) db process stage1 timeout. issue 0x%x send 0x%x.\n", + hr_qp->qpn, sdb_issue_ptr, + sdb_send_ptr); + return 0; + } + + msleep(HNS_ROCE_V1_CHECK_DB_SLEEP_MSECS); + sdb_send_ptr = roce_read(hr_dev, ROCEE_SDB_SEND_PTR_REG); - if (!time_before(jiffies, end)) { - dev_err(dev, "destroy qp(0x%lx) timeout!!!", - hr_qp->qpn); - break; - } - } while ((short)(roce_get_field(sdbsendptr_val, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) - - roce_get_field(sdbisusepr_val, - ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_M, - ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_S) - ) < 0); + } - /* Get list pointer */ - sdbinvcnt = roce_read(hr_dev, ROCEE_SDB_INV_CNT_REG); + if (roce_get_field(sdb_issue_ptr, + ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_M, + ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_S) == + roce_get_field(sdb_send_ptr, + ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, + ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S)) { + old_send = roce_read(hr_dev, ROCEE_SDB_SEND_PTR_REG); + old_retry = roce_read(hr_dev, ROCEE_SDB_RETRY_CNT_REG); - /* Query db's list status, until hw reversal */ do { - sdbinvcnt_val = roce_read(hr_dev, - ROCEE_SDB_INV_CNT_REG); + tsp_st = roce_read(hr_dev, ROCEE_TSP_BP_ST_REG); + if (roce_get_bit(tsp_st, + ROCEE_TSP_BP_ST_QH_FIFO_ENTRY_S) == 1) { + *wait_stage = HNS_ROCE_V1_DB_WAIT_OK; + return 0; + } + if (!time_before(jiffies, end)) { - dev_err(dev, "destroy qp(0x%lx) timeout!!!", - hr_qp->qpn); - dev_err(dev, "SdbInvCnt = 0x%x\n", - sdbinvcnt_val); - break; + dev_dbg(dev, "QP(0x%lx) db process stage1 timeout when send ptr equals issue ptr.\n" + "issue 0x%x send 0x%x.\n", + hr_qp->qpn, sdb_issue_ptr, + sdb_send_ptr); + return 0; + } + + msleep(HNS_ROCE_V1_CHECK_DB_SLEEP_MSECS); + + sdb_send_ptr = roce_read(hr_dev, + ROCEE_SDB_SEND_PTR_REG); + sdb_retry_cnt = roce_read(hr_dev, + ROCEE_SDB_RETRY_CNT_REG); + cur_cnt = roce_get_field(sdb_send_ptr, + ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, + ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) + + roce_get_field(sdb_retry_cnt, + ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M, + ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S); + if (!roce_get_bit(tsp_st, + ROCEE_CNT_CLR_CE_CNT_CLR_CE_S)) { + old_cnt = roce_get_field(old_send, + ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, + ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) + + roce_get_field(old_retry, + ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M, + ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S); + if (cur_cnt - old_cnt > SDB_ST_CMP_VAL) + success_flags = 1; + } else { + old_cnt = roce_get_field(old_send, + ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, + ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S); + if (cur_cnt - old_cnt > SDB_ST_CMP_VAL) + success_flags = 1; + else { + send_ptr = roce_get_field(old_send, + ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, + ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) + + roce_get_field(sdb_retry_cnt, + ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M, + ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S); + roce_set_field(old_send, + ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, + ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S, + send_ptr); + } } - } while ((short)(roce_get_field(sdbinvcnt_val, - ROCEE_SDB_INV_CNT_SDB_INV_CNT_M, - ROCEE_SDB_INV_CNT_SDB_INV_CNT_S) - - (sdbinvcnt + SDB_INV_CNT_OFFSET)) < 0); - - /* Modify qp to reset before destroying qp */ - if (hns_roce_v1_qp_modify(hr_dev, NULL, - to_hns_roce_state( - (enum ib_qp_state)hr_qp->state), - HNS_ROCE_QP_STATE_RST, NULL, hr_qp)) - dev_err(dev, "modify QP %06lx to RESET failed.\n", - hr_qp->qpn); + } while (!success_flags); } + + *wait_stage = HNS_ROCE_V1_DB_STAGE2; + + /* Get list pointer */ + *sdb_inv_cnt = roce_read(hr_dev, ROCEE_SDB_INV_CNT_REG); + dev_dbg(dev, "QP(0x%lx) db process stage2. inv cnt = 0x%x.\n", + hr_qp->qpn, *sdb_inv_cnt); + } + + if (*wait_stage == HNS_ROCE_V1_DB_STAGE2) { + /* Query db's list status, until hw reversal */ + inv_cnt = roce_read(hr_dev, ROCEE_SDB_INV_CNT_REG); + while (roce_hw_index_cmp_lt(inv_cnt, + *sdb_inv_cnt + SDB_INV_CNT_OFFSET, + ROCEE_SDB_CNT_CMP_BITS)) { + if (!time_before(jiffies, end)) { + dev_dbg(dev, "QP(0x%lx) db process stage2 timeout. inv cnt 0x%x.\n", + hr_qp->qpn, inv_cnt); + return 0; + } + + msleep(HNS_ROCE_V1_CHECK_DB_SLEEP_MSECS); + inv_cnt = roce_read(hr_dev, ROCEE_SDB_INV_CNT_REG); + } + + *wait_stage = HNS_ROCE_V1_DB_WAIT_OK; + } + + return 0; +} + +static int check_qp_reset_state(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp, + struct hns_roce_qp_work *qp_work_entry, + int *is_timeout) +{ + struct device *dev = &hr_dev->pdev->dev; + u32 sdb_issue_ptr; + int ret; + + if (hr_qp->state != IB_QPS_RESET) { + /* Set qp to ERR, waiting for hw complete processing all dbs */ + ret = hns_roce_v1_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state, + IB_QPS_ERR); + if (ret) { + dev_err(dev, "Modify QP(0x%lx) to ERR failed!\n", + hr_qp->qpn); + return ret; + } + + /* Record issued doorbell */ + sdb_issue_ptr = roce_read(hr_dev, ROCEE_SDB_ISSUE_PTR_REG); + qp_work_entry->sdb_issue_ptr = sdb_issue_ptr; + qp_work_entry->db_wait_stage = HNS_ROCE_V1_DB_STAGE1; + + /* Query db process status, until hw process completely */ + ret = check_qp_db_process_status(hr_dev, hr_qp, sdb_issue_ptr, + &qp_work_entry->sdb_inv_cnt, + &qp_work_entry->db_wait_stage); + if (ret) { + dev_err(dev, "Check QP(0x%lx) db process status failed!\n", + hr_qp->qpn); + return ret; + } + + if (qp_work_entry->db_wait_stage != HNS_ROCE_V1_DB_WAIT_OK) { + qp_work_entry->sche_cnt = 0; + *is_timeout = 1; + return 0; + } + + /* Modify qp to reset before destroying qp */ + ret = hns_roce_v1_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state, + IB_QPS_RESET); + if (ret) { + dev_err(dev, "Modify QP(0x%lx) to RST failed!\n", + hr_qp->qpn); + return ret; + } + } + + return 0; +} + +static void hns_roce_v1_destroy_qp_work_fn(struct work_struct *work) +{ + struct hns_roce_qp_work *qp_work_entry; + struct hns_roce_v1_priv *priv; + struct hns_roce_dev *hr_dev; + struct hns_roce_qp *hr_qp; + struct device *dev; + int ret; + + qp_work_entry = container_of(work, struct hns_roce_qp_work, work); + hr_dev = to_hr_dev(qp_work_entry->ib_dev); + dev = &hr_dev->pdev->dev; + priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + hr_qp = qp_work_entry->qp; + + dev_dbg(dev, "Schedule destroy QP(0x%lx) work.\n", hr_qp->qpn); + + qp_work_entry->sche_cnt++; + + /* Query db process status, until hw process completely */ + ret = check_qp_db_process_status(hr_dev, hr_qp, + qp_work_entry->sdb_issue_ptr, + &qp_work_entry->sdb_inv_cnt, + &qp_work_entry->db_wait_stage); + if (ret) { + dev_err(dev, "Check QP(0x%lx) db process status failed!\n", + hr_qp->qpn); + return; + } + + if (qp_work_entry->db_wait_stage != HNS_ROCE_V1_DB_WAIT_OK && + priv->des_qp.requeue_flag) { + queue_work(priv->des_qp.qp_wq, work); + return; + } + + /* Modify qp to reset before destroying qp */ + ret = hns_roce_v1_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state, + IB_QPS_RESET); + if (ret) { + dev_err(dev, "Modify QP(0x%lx) to RST failed!\n", hr_qp->qpn); + return; + } + + hns_roce_qp_remove(hr_dev, hr_qp); + hns_roce_qp_free(hr_dev, hr_qp); + + if (hr_qp->ibqp.qp_type == IB_QPT_RC) { + /* RC QP, release QPN */ + hns_roce_release_range_qp(hr_dev, hr_qp->qpn, 1); + kfree(hr_qp); + } else + kfree(hr_to_hr_sqp(hr_qp)); + + kfree(qp_work_entry); + + dev_dbg(dev, "Accomplished destroy QP(0x%lx) work.\n", hr_qp->qpn); +} + +int hns_roce_v1_destroy_qp(struct ib_qp *ibqp) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_qp_work qp_work_entry; + struct hns_roce_qp_work *qp_work; + struct hns_roce_v1_priv *priv; + struct hns_roce_cq *send_cq, *recv_cq; + int is_user = !!ibqp->pd->uobject; + int is_timeout = 0; + int ret; + + ret = check_qp_reset_state(hr_dev, hr_qp, &qp_work_entry, &is_timeout); + if (ret) { + dev_err(dev, "QP reset state check failed(%d)!\n", ret); + return ret; } send_cq = to_hr_cq(hr_qp->ibqp.send_cq); recv_cq = to_hr_cq(hr_qp->ibqp.recv_cq); hns_roce_lock_cqs(send_cq, recv_cq); - if (!is_user) { __hns_roce_v1_cq_clean(recv_cq, hr_qp->qpn, hr_qp->ibqp.srq ? to_hr_srq(hr_qp->ibqp.srq) : NULL); if (send_cq != recv_cq) __hns_roce_v1_cq_clean(send_cq, hr_qp->qpn, NULL); } - - hns_roce_qp_remove(hr_dev, hr_qp); - hns_roce_unlock_cqs(send_cq, recv_cq); - hns_roce_qp_free(hr_dev, hr_qp); + if (!is_timeout) { + hns_roce_qp_remove(hr_dev, hr_qp); + hns_roce_qp_free(hr_dev, hr_qp); - /* Not special_QP, free their QPN */ - if ((hr_qp->ibqp.qp_type == IB_QPT_RC) || - (hr_qp->ibqp.qp_type == IB_QPT_UC) || - (hr_qp->ibqp.qp_type == IB_QPT_UD)) - hns_roce_release_range_qp(hr_dev, hr_qp->qpn, 1); + /* RC QP, release QPN */ + if (hr_qp->ibqp.qp_type == IB_QPT_RC) + hns_roce_release_range_qp(hr_dev, hr_qp->qpn, 1); + } hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt); - if (is_user) { + if (is_user) ib_umem_release(hr_qp->umem); - } else { + else { kfree(hr_qp->sq.wrid); kfree(hr_qp->rq.wrid); + hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf); } + + if (!is_timeout) { + if (hr_qp->ibqp.qp_type == IB_QPT_RC) + kfree(hr_qp); + else + kfree(hr_to_hr_sqp(hr_qp)); + } else { + qp_work = kzalloc(sizeof(*qp_work), GFP_KERNEL); + if (!qp_work) + return -ENOMEM; + + INIT_WORK(&qp_work->work, hns_roce_v1_destroy_qp_work_fn); + qp_work->ib_dev = &hr_dev->ib_dev; + qp_work->qp = hr_qp; + qp_work->db_wait_stage = qp_work_entry.db_wait_stage; + qp_work->sdb_issue_ptr = qp_work_entry.sdb_issue_ptr; + qp_work->sdb_inv_cnt = qp_work_entry.sdb_inv_cnt; + qp_work->sche_cnt = qp_work_entry.sche_cnt; + + priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + queue_work(priv->des_qp.qp_wq, &qp_work->work); + dev_dbg(dev, "Begin destroy QP(0x%lx) work.\n", hr_qp->qpn); + } + + return 0; } -int hns_roce_v1_destroy_qp(struct ib_qp *ibqp) +int hns_roce_v1_destroy_cq(struct ib_cq *ibcq) { - struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); - struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); + struct hns_roce_dev *hr_dev = to_hr_dev(ibcq->device); + struct hns_roce_cq *hr_cq = to_hr_cq(ibcq); + struct device *dev = &hr_dev->pdev->dev; + u32 cqe_cnt_ori; + u32 cqe_cnt_cur; + u32 cq_buf_size; + int wait_time = 0; + int ret = 0; - hns_roce_v1_destroy_qp_common(hr_dev, hr_qp, !!ibqp->pd->uobject); + hns_roce_free_cq(hr_dev, hr_cq); - if (hr_qp->ibqp.qp_type == IB_QPT_GSI) - kfree(hr_to_hr_sqp(hr_qp)); - else - kfree(hr_qp); + /* + * Before freeing cq buffer, we need to ensure that the outstanding CQE + * have been written by checking the CQE counter. + */ + cqe_cnt_ori = roce_read(hr_dev, ROCEE_SCAEP_WR_CQE_CNT); + while (1) { + if (roce_read(hr_dev, ROCEE_CAEP_CQE_WCMD_EMPTY) & + HNS_ROCE_CQE_WCMD_EMPTY_BIT) + break; - return 0; + cqe_cnt_cur = roce_read(hr_dev, ROCEE_SCAEP_WR_CQE_CNT); + if ((cqe_cnt_cur - cqe_cnt_ori) >= HNS_ROCE_MIN_CQE_CNT) + break; + + msleep(HNS_ROCE_EACH_FREE_CQ_WAIT_MSECS); + if (wait_time > HNS_ROCE_MAX_FREE_CQ_WAIT_CNT) { + dev_warn(dev, "Destroy cq 0x%lx timeout!\n", + hr_cq->cqn); + ret = -ETIMEDOUT; + break; + } + wait_time++; + } + + hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt); + + if (ibcq->uobject) + ib_umem_release(hr_cq->umem); + else { + /* Free the buff of stored cq */ + cq_buf_size = (ibcq->cqe + 1) * hr_dev->caps.cq_entry_sz; + hns_roce_buf_free(hr_dev, cq_buf_size, &hr_cq->hr_buf.hr_buf); + } + + kfree(hr_cq); + + return ret; } struct hns_roce_v1_priv hr_v1_priv; @@ -2917,5 +3835,7 @@ struct hns_roce_hw hns_roce_hw_v1 = { .post_recv = hns_roce_v1_post_recv, .req_notify_cq = hns_roce_v1_req_notify_cq, .poll_cq = hns_roce_v1_poll_cq, + .dereg_mr = hns_roce_v1_dereg_mr, + .destroy_cq = hns_roce_v1_destroy_cq, .priv = &hr_v1_priv, }; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h index 539b0a3b92b0..b213b5e6fef1 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h @@ -58,6 +58,7 @@ #define HNS_ROCE_V1_PHY_UAR_NUM 8 #define HNS_ROCE_V1_GID_NUM 16 +#define HNS_ROCE_V1_RESV_QP 8 #define HNS_ROCE_V1_NUM_COMP_EQE 0x8000 #define HNS_ROCE_V1_NUM_ASYNC_EQE 0x400 @@ -102,8 +103,22 @@ #define HNS_ROCE_V1_EXT_ODB_ALFUL \ (HNS_ROCE_V1_EXT_ODB_DEPTH - HNS_ROCE_V1_DB_RSVD) +#define HNS_ROCE_V1_DB_WAIT_OK 0 +#define HNS_ROCE_V1_DB_STAGE1 1 +#define HNS_ROCE_V1_DB_STAGE2 2 +#define HNS_ROCE_V1_CHECK_DB_TIMEOUT_MSECS 10000 +#define HNS_ROCE_V1_CHECK_DB_SLEEP_MSECS 20 +#define HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS 50000 +#define HNS_ROCE_V1_RECREATE_LP_QP_TIMEOUT_MSECS 10000 +#define HNS_ROCE_V1_FREE_MR_WAIT_VALUE 5 +#define HNS_ROCE_V1_RECREATE_LP_QP_WAIT_VALUE 20 + #define HNS_ROCE_BT_RSV_BUF_SIZE (1 << 17) +#define HNS_ROCE_V1_TPTR_ENTRY_SIZE 2 +#define HNS_ROCE_V1_TPTR_BUF_SIZE \ + (HNS_ROCE_V1_TPTR_ENTRY_SIZE * HNS_ROCE_V1_MAX_CQ_NUM) + #define HNS_ROCE_ODB_POLL_MODE 0 #define HNS_ROCE_SDB_NORMAL_MODE 0 @@ -140,6 +155,7 @@ #define SQ_PSN_SHIFT 8 #define QKEY_VAL 0x80010000 #define SDB_INV_CNT_OFFSET 8 +#define SDB_ST_CMP_VAL 8 struct hns_roce_cq_context { u32 cqc_byte_4; @@ -436,6 +452,8 @@ struct hns_roce_ud_send_wqe { #define UD_SEND_WQE_U32_8_DMAC_5_M \ (((1UL << 8) - 1) << UD_SEND_WQE_U32_8_DMAC_5_S) +#define UD_SEND_WQE_U32_8_LOOPBACK_INDICATOR_S 22 + #define UD_SEND_WQE_U32_8_OPERATION_TYPE_S 16 #define UD_SEND_WQE_U32_8_OPERATION_TYPE_M \ (((1UL << 4) - 1) << UD_SEND_WQE_U32_8_OPERATION_TYPE_S) @@ -480,13 +498,17 @@ struct hns_roce_sqp_context { u32 qp1c_bytes_12; u32 qp1c_bytes_16; u32 qp1c_bytes_20; - u32 qp1c_bytes_28; u32 cur_rq_wqe_ba_l; + u32 qp1c_bytes_28; u32 qp1c_bytes_32; u32 cur_sq_wqe_ba_l; u32 qp1c_bytes_40; }; +#define QP1C_BYTES_4_QP_STATE_S 0 +#define QP1C_BYTES_4_QP_STATE_M \ + (((1UL << 3) - 1) << QP1C_BYTES_4_QP_STATE_S) + #define QP1C_BYTES_4_SQ_WQE_SHIFT_S 8 #define QP1C_BYTES_4_SQ_WQE_SHIFT_M \ (((1UL << 4) - 1) << QP1C_BYTES_4_SQ_WQE_SHIFT_S) @@ -952,6 +974,10 @@ struct hns_roce_sq_db { #define SQ_DOORBELL_U32_4_SQ_HEAD_M \ (((1UL << 15) - 1) << SQ_DOORBELL_U32_4_SQ_HEAD_S) +#define SQ_DOORBELL_U32_4_SL_S 16 +#define SQ_DOORBELL_U32_4_SL_M \ + (((1UL << 2) - 1) << SQ_DOORBELL_U32_4_SL_S) + #define SQ_DOORBELL_U32_4_PORT_S 18 #define SQ_DOORBELL_U32_4_PORT_M (((1UL << 3) - 1) << SQ_DOORBELL_U32_4_PORT_S) @@ -979,12 +1005,58 @@ struct hns_roce_bt_table { struct hns_roce_buf_list cqc_buf; }; +struct hns_roce_tptr_table { + struct hns_roce_buf_list tptr_buf; +}; + +struct hns_roce_qp_work { + struct work_struct work; + struct ib_device *ib_dev; + struct hns_roce_qp *qp; + u32 db_wait_stage; + u32 sdb_issue_ptr; + u32 sdb_inv_cnt; + u32 sche_cnt; +}; + +struct hns_roce_des_qp { + struct workqueue_struct *qp_wq; + int requeue_flag; +}; + +struct hns_roce_mr_free_work { + struct work_struct work; + struct ib_device *ib_dev; + struct completion *comp; + int comp_flag; + void *mr; +}; + +struct hns_roce_recreate_lp_qp_work { + struct work_struct work; + struct ib_device *ib_dev; + struct completion *comp; + int comp_flag; +}; + +struct hns_roce_free_mr { + struct workqueue_struct *free_mr_wq; + struct hns_roce_qp *mr_free_qp[HNS_ROCE_V1_RESV_QP]; + struct hns_roce_cq *mr_free_cq; + struct hns_roce_pd *mr_free_pd; +}; + struct hns_roce_v1_priv { struct hns_roce_db_table db_table; struct hns_roce_raq_table raq_table; struct hns_roce_bt_table bt_table; + struct hns_roce_tptr_table tptr_table; + struct hns_roce_des_qp des_qp; + struct hns_roce_free_mr free_mr; }; int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool dereset); +int hns_roce_v1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); +int hns_roce_v1_destroy_qp(struct ib_qp *ibqp); #endif diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 764e35a54457..4953d9cb83a7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -35,52 +35,13 @@ #include <rdma/ib_addr.h> #include <rdma/ib_smi.h> #include <rdma/ib_user_verbs.h> +#include <rdma/ib_cache.h> #include "hns_roce_common.h" #include "hns_roce_device.h" -#include "hns_roce_user.h" +#include <rdma/hns-abi.h> #include "hns_roce_hem.h" /** - * hns_roce_addrconf_ifid_eui48 - Get default gid. - * @eui: eui. - * @vlan_id: gid - * @dev: net device - * Description: - * MAC convert to GID - * gid[0..7] = fe80 0000 0000 0000 - * gid[8] = mac[0] ^ 2 - * gid[9] = mac[1] - * gid[10] = mac[2] - * gid[11] = ff (VLAN ID high byte (4 MS bits)) - * gid[12] = fe (VLAN ID low byte) - * gid[13] = mac[3] - * gid[14] = mac[4] - * gid[15] = mac[5] - */ -static void hns_roce_addrconf_ifid_eui48(u8 *eui, u16 vlan_id, - struct net_device *dev) -{ - memcpy(eui, dev->dev_addr, 3); - memcpy(eui + 5, dev->dev_addr + 3, 3); - if (vlan_id < 0x1000) { - eui[3] = vlan_id >> 8; - eui[4] = vlan_id & 0xff; - } else { - eui[3] = 0xff; - eui[4] = 0xfe; - } - eui[0] ^= 2; -} - -static void hns_roce_make_default_gid(struct net_device *dev, union ib_gid *gid) -{ - memset(gid, 0, sizeof(*gid)); - gid->raw[0] = 0xFE; - gid->raw[1] = 0x80; - hns_roce_addrconf_ifid_eui48(&gid->raw[8], 0xffff, dev); -} - -/** * hns_get_gid_index - Get gid index. * @hr_dev: pointer to structure hns_roce_dev. * @port: port, value range: 0 ~ MAX @@ -96,30 +57,6 @@ int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index) return gid_index * hr_dev->caps.num_ports + port; } -static int hns_roce_set_gid(struct hns_roce_dev *hr_dev, u8 port, int gid_index, - union ib_gid *gid) -{ - struct device *dev = &hr_dev->pdev->dev; - u8 gid_idx = 0; - - if (gid_index >= hr_dev->caps.gid_table_len[port]) { - dev_err(dev, "gid_index %d illegal, port %d gid range: 0~%d\n", - gid_index, port, hr_dev->caps.gid_table_len[port] - 1); - return -EINVAL; - } - - gid_idx = hns_get_gid_index(hr_dev, port, gid_index); - - if (!memcmp(gid, &hr_dev->iboe.gid_table[gid_idx], sizeof(*gid))) - return -EINVAL; - - memcpy(&hr_dev->iboe.gid_table[gid_idx], gid, sizeof(*gid)); - - hr_dev->hw->set_gid(hr_dev, port, gid_index, gid); - - return 0; -} - static void hns_roce_set_mac(struct hns_roce_dev *hr_dev, u8 port, u8 *addr) { u8 phy_port; @@ -135,27 +72,44 @@ static void hns_roce_set_mac(struct hns_roce_dev *hr_dev, u8 port, u8 *addr) hr_dev->hw->set_mac(hr_dev, phy_port, addr); } -static void hns_roce_set_mtu(struct hns_roce_dev *hr_dev, u8 port, int mtu) +static int hns_roce_add_gid(struct ib_device *device, u8 port_num, + unsigned int index, const union ib_gid *gid, + const struct ib_gid_attr *attr, void **context) { - u8 phy_port = hr_dev->iboe.phy_port[port]; - enum ib_mtu tmp; + struct hns_roce_dev *hr_dev = to_hr_dev(device); + u8 port = port_num - 1; + unsigned long flags; + + if (port >= hr_dev->caps.num_ports) + return -EINVAL; + + spin_lock_irqsave(&hr_dev->iboe.lock, flags); - tmp = iboe_get_mtu(mtu); - if (!tmp) - tmp = IB_MTU_256; + hr_dev->hw->set_gid(hr_dev, port, index, (union ib_gid *)gid); - hr_dev->hw->set_mtu(hr_dev, phy_port, tmp); + spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); + + return 0; } -static void hns_roce_update_gids(struct hns_roce_dev *hr_dev, int port) +static int hns_roce_del_gid(struct ib_device *device, u8 port_num, + unsigned int index, void **context) { - struct ib_event event; + struct hns_roce_dev *hr_dev = to_hr_dev(device); + union ib_gid zgid = { {0} }; + u8 port = port_num - 1; + unsigned long flags; + + if (port >= hr_dev->caps.num_ports) + return -EINVAL; + + spin_lock_irqsave(&hr_dev->iboe.lock, flags); - /* Refresh gid in ib_cache */ - event.device = &hr_dev->ib_dev; - event.element.port_num = port + 1; - event.event = IB_EVENT_GID_CHANGE; - ib_dispatch_event(&event); + hr_dev->hw->set_gid(hr_dev, port, index, &zgid); + + spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); + + return 0; } static int handle_en_event(struct hns_roce_dev *hr_dev, u8 port, @@ -163,9 +117,6 @@ static int handle_en_event(struct hns_roce_dev *hr_dev, u8 port, { struct device *dev = &hr_dev->pdev->dev; struct net_device *netdev; - unsigned long flags; - union ib_gid gid; - int ret = 0; netdev = hr_dev->iboe.netdevs[port]; if (!netdev) { @@ -173,7 +124,7 @@ static int handle_en_event(struct hns_roce_dev *hr_dev, u8 port, return -ENODEV; } - spin_lock_irqsave(&hr_dev->iboe.lock, flags); + spin_lock_bh(&hr_dev->iboe.lock); switch (event) { case NETDEV_UP: @@ -181,23 +132,19 @@ static int handle_en_event(struct hns_roce_dev *hr_dev, u8 port, case NETDEV_REGISTER: case NETDEV_CHANGEADDR: hns_roce_set_mac(hr_dev, port, netdev->dev_addr); - hns_roce_make_default_gid(netdev, &gid); - ret = hns_roce_set_gid(hr_dev, port, 0, &gid); - if (!ret) - hns_roce_update_gids(hr_dev, port); break; case NETDEV_DOWN: /* - * In v1 engine, only support all ports closed together. - */ + * In v1 engine, only support all ports closed together. + */ break; default: dev_dbg(dev, "NETDEV event = 0x%x!\n", (u32)(event)); break; } - spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); - return ret; + spin_unlock_bh(&hr_dev->iboe.lock); + return 0; } static int hns_roce_netdev_event(struct notifier_block *self, @@ -224,118 +171,17 @@ static int hns_roce_netdev_event(struct notifier_block *self, return NOTIFY_DONE; } -static void hns_roce_addr_event(int event, struct net_device *event_netdev, - struct hns_roce_dev *hr_dev, union ib_gid *gid) +static int hns_roce_setup_mtu_mac(struct hns_roce_dev *hr_dev) { - struct hns_roce_ib_iboe *iboe = NULL; - int gid_table_len = 0; - unsigned long flags; - union ib_gid zgid; - u8 gid_idx = 0; - u8 port = 0; - int i = 0; - int free; - struct net_device *real_dev = rdma_vlan_dev_real_dev(event_netdev) ? - rdma_vlan_dev_real_dev(event_netdev) : - event_netdev; - - if (event != NETDEV_UP && event != NETDEV_DOWN) - return; - - iboe = &hr_dev->iboe; - while (port < hr_dev->caps.num_ports) { - if (real_dev == iboe->netdevs[port]) - break; - port++; - } - - if (port >= hr_dev->caps.num_ports) { - dev_dbg(&hr_dev->pdev->dev, "can't find netdev\n"); - return; - } - - memset(zgid.raw, 0, sizeof(zgid.raw)); - free = -1; - gid_table_len = hr_dev->caps.gid_table_len[port]; - - spin_lock_irqsave(&hr_dev->iboe.lock, flags); - - for (i = 0; i < gid_table_len; i++) { - gid_idx = hns_get_gid_index(hr_dev, port, i); - if (!memcmp(gid->raw, iboe->gid_table[gid_idx].raw, - sizeof(gid->raw))) - break; - if (free < 0 && !memcmp(zgid.raw, - iboe->gid_table[gid_idx].raw, sizeof(zgid.raw))) - free = i; - } - - if (i >= gid_table_len) { - if (free < 0) { - spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); - dev_dbg(&hr_dev->pdev->dev, - "gid_index overflow, port(%d)\n", port); - return; - } - if (!hns_roce_set_gid(hr_dev, port, free, gid)) - hns_roce_update_gids(hr_dev, port); - } else if (event == NETDEV_DOWN) { - if (!hns_roce_set_gid(hr_dev, port, i, &zgid)) - hns_roce_update_gids(hr_dev, port); - } - - spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); -} - -static int hns_roce_inet_event(struct notifier_block *self, unsigned long event, - void *ptr) -{ - struct in_ifaddr *ifa = ptr; - struct hns_roce_dev *hr_dev; - struct net_device *dev = ifa->ifa_dev->dev; - union ib_gid gid; - - ipv6_addr_set_v4mapped(ifa->ifa_address, (struct in6_addr *)&gid); - - hr_dev = container_of(self, struct hns_roce_dev, iboe.nb_inet); - - hns_roce_addr_event(event, dev, hr_dev, &gid); - - return NOTIFY_DONE; -} - -static int hns_roce_setup_mtu_gids(struct hns_roce_dev *hr_dev) -{ - struct in_ifaddr *ifa_list = NULL; - union ib_gid gid = {{0} }; - u32 ipaddr = 0; - int index = 0; - int ret = 0; - u8 i = 0; + u8 i; for (i = 0; i < hr_dev->caps.num_ports; i++) { - hns_roce_set_mtu(hr_dev, i, - ib_mtu_enum_to_int(hr_dev->caps.max_mtu)); + hr_dev->hw->set_mtu(hr_dev, hr_dev->iboe.phy_port[i], + hr_dev->caps.max_mtu); hns_roce_set_mac(hr_dev, i, hr_dev->iboe.netdevs[i]->dev_addr); - - if (hr_dev->iboe.netdevs[i]->ip_ptr) { - ifa_list = hr_dev->iboe.netdevs[i]->ip_ptr->ifa_list; - index = 1; - while (ifa_list) { - ipaddr = ifa_list->ifa_address; - ipv6_addr_set_v4mapped(ipaddr, - (struct in6_addr *)&gid); - ret = hns_roce_set_gid(hr_dev, i, index, &gid); - if (ret) - break; - index++; - ifa_list = ifa_list->ifa_next; - } - hns_roce_update_gids(hr_dev, i); - } } - return ret; + return 0; } static int hns_roce_query_device(struct ib_device *ib_dev, @@ -444,31 +290,6 @@ static enum rdma_link_layer hns_roce_get_link_layer(struct ib_device *device, static int hns_roce_query_gid(struct ib_device *ib_dev, u8 port_num, int index, union ib_gid *gid) { - struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); - struct device *dev = &hr_dev->pdev->dev; - u8 gid_idx = 0; - u8 port; - - if (port_num < 1 || port_num > hr_dev->caps.num_ports || - index >= hr_dev->caps.gid_table_len[port_num - 1]) { - dev_err(dev, - "port_num %d index %d illegal! correct range: port_num 1~%d index 0~%d!\n", - port_num, index, hr_dev->caps.num_ports, - hr_dev->caps.gid_table_len[port_num - 1] - 1); - return -EINVAL; - } - - port = port_num - 1; - gid_idx = hns_get_gid_index(hr_dev, port, index); - if (gid_idx >= HNS_ROCE_MAX_GID_NUM) { - dev_err(dev, "port_num %d index %d illegal! total gid num %d!\n", - port_num, index, HNS_ROCE_MAX_GID_NUM); - return -EINVAL; - } - - memcpy(gid->raw, hr_dev->iboe.gid_table[gid_idx].raw, - HNS_ROCE_GID_SIZE); - return 0; } @@ -549,6 +370,8 @@ static int hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext) static int hns_roce_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) { + struct hns_roce_dev *hr_dev = to_hr_dev(context->device); + if (((vma->vm_end - vma->vm_start) % PAGE_SIZE) != 0) return -EINVAL; @@ -558,10 +381,15 @@ static int hns_roce_mmap(struct ib_ucontext *context, to_hr_ucontext(context)->uar.pfn, PAGE_SIZE, vma->vm_page_prot)) return -EAGAIN; - - } else { + } else if (vma->vm_pgoff == 1 && hr_dev->hw_rev == HNS_ROCE_HW_VER1) { + /* vm_pgoff: 1 -- TPTR */ + if (io_remap_pfn_range(vma, vma->vm_start, + hr_dev->tptr_dma_addr >> PAGE_SHIFT, + hr_dev->tptr_size, + vma->vm_page_prot)) + return -EAGAIN; + } else return -EINVAL; - } return 0; } @@ -605,7 +433,7 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) spin_lock_init(&iboe->lock); ib_dev = &hr_dev->ib_dev; - strlcpy(ib_dev->name, "hisi_%d", IB_DEVICE_NAME_MAX); + strlcpy(ib_dev->name, "hns_%d", IB_DEVICE_NAME_MAX); ib_dev->owner = THIS_MODULE; ib_dev->node_type = RDMA_NODE_IB_CA; @@ -639,6 +467,8 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) ib_dev->get_link_layer = hns_roce_get_link_layer; ib_dev->get_netdev = hns_roce_get_netdev; ib_dev->query_gid = hns_roce_query_gid; + ib_dev->add_gid = hns_roce_add_gid; + ib_dev->del_gid = hns_roce_del_gid; ib_dev->query_pkey = hns_roce_query_pkey; ib_dev->alloc_ucontext = hns_roce_alloc_ucontext; ib_dev->dealloc_ucontext = hns_roce_dealloc_ucontext; @@ -681,32 +511,22 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) return ret; } - ret = hns_roce_setup_mtu_gids(hr_dev); + ret = hns_roce_setup_mtu_mac(hr_dev); if (ret) { - dev_err(dev, "roce_setup_mtu_gids failed!\n"); - goto error_failed_setup_mtu_gids; + dev_err(dev, "setup_mtu_mac failed!\n"); + goto error_failed_setup_mtu_mac; } iboe->nb.notifier_call = hns_roce_netdev_event; ret = register_netdevice_notifier(&iboe->nb); if (ret) { dev_err(dev, "register_netdevice_notifier failed!\n"); - goto error_failed_setup_mtu_gids; - } - - iboe->nb_inet.notifier_call = hns_roce_inet_event; - ret = register_inetaddr_notifier(&iboe->nb_inet); - if (ret) { - dev_err(dev, "register inet addr notifier failed!\n"); - goto error_failed_register_inetaddr_notifier; + goto error_failed_setup_mtu_mac; } return 0; -error_failed_register_inetaddr_notifier: - unregister_netdevice_notifier(&iboe->nb); - -error_failed_setup_mtu_gids: +error_failed_setup_mtu_mac: ib_unregister_device(ib_dev); return ret; @@ -940,10 +760,10 @@ err_unmap_mtt: } /** -* hns_roce_setup_hca - setup host channel adapter -* @hr_dev: pointer to hns roce device -* Return : int -*/ + * hns_roce_setup_hca - setup host channel adapter + * @hr_dev: pointer to hns roce device + * Return : int + */ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) { int ret; @@ -1008,11 +828,11 @@ err_uar_table_free: } /** -* hns_roce_probe - RoCE driver entrance -* @pdev: pointer to platform device -* Return : int -* -*/ + * hns_roce_probe - RoCE driver entrance + * @pdev: pointer to platform device + * Return : int + * + */ static int hns_roce_probe(struct platform_device *pdev) { int ret; @@ -1023,9 +843,6 @@ static int hns_roce_probe(struct platform_device *pdev) if (!hr_dev) return -ENOMEM; - memset((u8 *)hr_dev + sizeof(struct ib_device), 0, - sizeof(struct hns_roce_dev) - sizeof(struct ib_device)); - hr_dev->pdev = pdev; platform_set_drvdata(pdev, hr_dev); @@ -1125,9 +942,9 @@ error_failed_get_cfg: } /** -* hns_roce_remove - remove RoCE device -* @pdev: pointer to platform device -*/ + * hns_roce_remove - remove RoCE device + * @pdev: pointer to platform device + */ static int hns_roce_remove(struct platform_device *pdev) { struct hns_roce_dev *hr_dev = platform_get_drvdata(pdev); diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index fb87883ead34..4139abee3b54 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -42,7 +42,7 @@ static u32 hw_index_to_key(unsigned long ind) return (u32)(ind >> 24) | (ind << 8); } -static unsigned long key_to_hw_index(u32 key) +unsigned long key_to_hw_index(u32 key) { return (key << 24) | (key >> 8); } @@ -53,16 +53,16 @@ static int hns_roce_sw2hw_mpt(struct hns_roce_dev *hr_dev, { return hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, mpt_index, 0, HNS_ROCE_CMD_SW2HW_MPT, - HNS_ROCE_CMD_TIME_CLASS_B); + HNS_ROCE_CMD_TIMEOUT_MSECS); } -static int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev, +int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev, struct hns_roce_cmd_mailbox *mailbox, unsigned long mpt_index) { return hns_roce_cmd_mbox(hr_dev, 0, mailbox ? mailbox->dma : 0, mpt_index, !mailbox, HNS_ROCE_CMD_HW2SW_MPT, - HNS_ROCE_CMD_TIME_CLASS_B); + HNS_ROCE_CMD_TIMEOUT_MSECS); } static int hns_roce_buddy_alloc(struct hns_roce_buddy *buddy, int order, @@ -137,11 +137,13 @@ static int hns_roce_buddy_init(struct hns_roce_buddy *buddy, int max_order) for (i = 0; i <= buddy->max_order; ++i) { s = BITS_TO_LONGS(1 << (buddy->max_order - i)); - buddy->bits[i] = kmalloc_array(s, sizeof(long), GFP_KERNEL); - if (!buddy->bits[i]) - goto err_out_free; - - bitmap_zero(buddy->bits[i], 1 << (buddy->max_order - i)); + buddy->bits[i] = kcalloc(s, sizeof(long), GFP_KERNEL | + __GFP_NOWARN); + if (!buddy->bits[i]) { + buddy->bits[i] = vzalloc(s * sizeof(long)); + if (!buddy->bits[i]) + goto err_out_free; + } } set_bit(0, buddy->bits[buddy->max_order]); @@ -151,7 +153,7 @@ static int hns_roce_buddy_init(struct hns_roce_buddy *buddy, int max_order) err_out_free: for (i = 0; i <= buddy->max_order; ++i) - kfree(buddy->bits[i]); + kvfree(buddy->bits[i]); err_out: kfree(buddy->bits); @@ -164,7 +166,7 @@ static void hns_roce_buddy_cleanup(struct hns_roce_buddy *buddy) int i; for (i = 0; i <= buddy->max_order; ++i) - kfree(buddy->bits[i]); + kvfree(buddy->bits[i]); kfree(buddy->bits); kfree(buddy->num_free); @@ -287,7 +289,7 @@ static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, } hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap, - key_to_hw_index(mr->key)); + key_to_hw_index(mr->key), BITMAP_NO_RR); } static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev, @@ -605,13 +607,20 @@ err_free: int hns_roce_dereg_mr(struct ib_mr *ibmr) { + struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device); struct hns_roce_mr *mr = to_hr_mr(ibmr); + int ret = 0; - hns_roce_mr_free(to_hr_dev(ibmr->device), mr); - if (mr->umem) - ib_umem_release(mr->umem); + if (hr_dev->hw->dereg_mr) { + ret = hr_dev->hw->dereg_mr(hr_dev, mr); + } else { + hns_roce_mr_free(hr_dev, mr); - kfree(mr); + if (mr->umem) + ib_umem_release(mr->umem); - return 0; + kfree(mr); + } + + return ret; } diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c index 05db7d59812a..a64500fa1145 100644 --- a/drivers/infiniband/hw/hns/hns_roce_pd.c +++ b/drivers/infiniband/hw/hns/hns_roce_pd.c @@ -40,7 +40,7 @@ static int hns_roce_pd_alloc(struct hns_roce_dev *hr_dev, unsigned long *pdn) static void hns_roce_pd_free(struct hns_roce_dev *hr_dev, unsigned long pdn) { - hns_roce_bitmap_free(&hr_dev->pd_bitmap, pdn); + hns_roce_bitmap_free(&hr_dev->pd_bitmap, pdn, BITMAP_NO_RR); } int hns_roce_init_pd_table(struct hns_roce_dev *hr_dev) @@ -121,7 +121,8 @@ int hns_roce_uar_alloc(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar) void hns_roce_uar_free(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar) { - hns_roce_bitmap_free(&hr_dev->uar_table.bitmap, uar->index); + hns_roce_bitmap_free(&hr_dev->uar_table.bitmap, uar->index, + BITMAP_NO_RR); } int hns_roce_init_uar_table(struct hns_roce_dev *hr_dev) diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index e86dd8d06777..f036f32f15d3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -37,7 +37,7 @@ #include "hns_roce_common.h" #include "hns_roce_device.h" #include "hns_roce_hem.h" -#include "hns_roce_user.h" +#include <rdma/hns-abi.h> #define SQP_NUM (2 * HNS_ROCE_MAX_PORTS) @@ -250,7 +250,7 @@ void hns_roce_release_range_qp(struct hns_roce_dev *hr_dev, int base_qpn, if (base_qpn < SQP_NUM) return; - hns_roce_bitmap_free_range(&qp_table->bitmap, base_qpn, cnt); + hns_roce_bitmap_free_range(&qp_table->bitmap, base_qpn, cnt, BITMAP_RR); } static int hns_roce_set_rq_size(struct hns_roce_dev *hr_dev, diff --git a/drivers/infiniband/hw/hns/hns_roce_user.h b/drivers/infiniband/hw/hns/hns_roce_user.h deleted file mode 100644 index a28f761a9f65..000000000000 --- a/drivers/infiniband/hw/hns/hns_roce_user.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2016 Hisilicon Limited. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _HNS_ROCE_USER_H -#define _HNS_ROCE_USER_H - -struct hns_roce_ib_create_cq { - __u64 buf_addr; -}; - -struct hns_roce_ib_create_qp { - __u64 buf_addr; - __u64 db_addr; - __u8 log_sq_bb_count; - __u8 log_sq_stride; - __u8 sq_no_prefetch; - __u8 reserved[5]; -}; - -struct hns_roce_ib_alloc_ucontext_resp { - __u32 qp_tab_size; -}; - -#endif /*_HNS_ROCE_USER_H */ diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h index 8ec09e470f84..da2eb5a281fa 100644 --- a/drivers/infiniband/hw/i40iw/i40iw.h +++ b/drivers/infiniband/hw/i40iw/i40iw.h @@ -112,9 +112,12 @@ #define I40IW_DRV_OPT_MCAST_LOGPORT_MAP 0x00000800 #define IW_HMC_OBJ_TYPE_NUM ARRAY_SIZE(iw_hmc_obj_types) -#define IW_CFG_FPM_QP_COUNT 32768 -#define I40IW_MAX_PAGES_PER_FMR 512 -#define I40IW_MIN_PAGES_PER_FMR 1 +#define IW_CFG_FPM_QP_COUNT 32768 +#define I40IW_MAX_PAGES_PER_FMR 512 +#define I40IW_MIN_PAGES_PER_FMR 1 +#define I40IW_CQP_COMPL_RQ_WQE_FLUSHED 2 +#define I40IW_CQP_COMPL_SQ_WQE_FLUSHED 3 +#define I40IW_CQP_COMPL_RQ_SQ_WQE_FLUSHED 4 #define I40IW_MTU_TO_MSS 40 #define I40IW_DEFAULT_MSS 1460 @@ -210,6 +213,12 @@ struct i40iw_msix_vector { u32 ceq_id; }; +struct l2params_work { + struct work_struct work; + struct i40iw_device *iwdev; + struct i40iw_l2params l2params; +}; + #define I40IW_MSIX_TABLE_SIZE 65 struct virtchnl_work { @@ -227,6 +236,7 @@ struct i40iw_device { struct net_device *netdev; wait_queue_head_t vchnl_waitq; struct i40iw_sc_dev sc_dev; + struct i40iw_sc_vsi vsi; struct i40iw_handler *hdl; struct i40e_info *ldev; struct i40e_client *client; @@ -280,7 +290,6 @@ struct i40iw_device { u32 sd_type; struct workqueue_struct *param_wq; atomic_t params_busy; - u32 mss; enum init_completion_state init_state; u16 mac_ip_table_idx; atomic_t vchnl_msgs; @@ -297,6 +306,14 @@ struct i40iw_device { u32 mr_stagmask; u32 mpa_version; bool dcb; + bool closing; + bool reset; + u32 used_pds; + u32 used_cqs; + u32 used_mrs; + u32 used_qps; + wait_queue_head_t close_wq; + atomic64_t use_count; }; struct i40iw_ib_device { @@ -498,7 +515,7 @@ u32 i40iw_initialize_hw_resources(struct i40iw_device *iwdev); int i40iw_register_rdma_device(struct i40iw_device *iwdev); void i40iw_port_ibevent(struct i40iw_device *iwdev); -int i40iw_cm_disconn(struct i40iw_qp *); +void i40iw_cm_disconn(struct i40iw_qp *iwqp); void i40iw_cm_disconn_worker(void *); int mini_cm_recv_pkt(struct i40iw_cm_core *, struct i40iw_device *, struct sk_buff *); @@ -508,20 +525,26 @@ enum i40iw_status_code i40iw_handle_cqp_op(struct i40iw_device *iwdev, enum i40iw_status_code i40iw_add_mac_addr(struct i40iw_device *iwdev, u8 *mac_addr, u8 *mac_index); int i40iw_modify_qp(struct ib_qp *, struct ib_qp_attr *, int, struct ib_udata *); +void i40iw_cq_wq_destroy(struct i40iw_device *iwdev, struct i40iw_sc_cq *cq); void i40iw_rem_pdusecount(struct i40iw_pd *iwpd, struct i40iw_device *iwdev); void i40iw_add_pdusecount(struct i40iw_pd *iwpd); +void i40iw_rem_devusecount(struct i40iw_device *iwdev); +void i40iw_add_devusecount(struct i40iw_device *iwdev); void i40iw_hw_modify_qp(struct i40iw_device *iwdev, struct i40iw_qp *iwqp, struct i40iw_modify_qp_info *info, bool wait); +void i40iw_qp_suspend_resume(struct i40iw_sc_dev *dev, + struct i40iw_sc_qp *qp, + bool suspend); enum i40iw_status_code i40iw_manage_qhash(struct i40iw_device *iwdev, struct i40iw_cm_info *cminfo, enum i40iw_quad_entry_type etype, enum i40iw_quad_hash_manage_type mtype, void *cmnode, bool wait); -void i40iw_receive_ilq(struct i40iw_sc_dev *dev, struct i40iw_puda_buf *rbuf); -void i40iw_free_sqbuf(struct i40iw_sc_dev *dev, void *bufp); +void i40iw_receive_ilq(struct i40iw_sc_vsi *vsi, struct i40iw_puda_buf *rbuf); +void i40iw_free_sqbuf(struct i40iw_sc_vsi *vsi, void *bufp); void i40iw_free_qp_resources(struct i40iw_device *iwdev, struct i40iw_qp *iwqp, u32 qp_num); diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 85637696f6e9..95a0586a4da8 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -68,13 +68,13 @@ static void i40iw_disconnect_worker(struct work_struct *work); /** * i40iw_free_sqbuf - put back puda buffer if refcount = 0 - * @dev: FPK device + * @vsi: pointer to vsi structure * @buf: puda buffer to free */ -void i40iw_free_sqbuf(struct i40iw_sc_dev *dev, void *bufp) +void i40iw_free_sqbuf(struct i40iw_sc_vsi *vsi, void *bufp) { struct i40iw_puda_buf *buf = (struct i40iw_puda_buf *)bufp; - struct i40iw_puda_rsrc *ilq = dev->ilq; + struct i40iw_puda_rsrc *ilq = vsi->ilq; if (!atomic_dec_return(&buf->refcount)) i40iw_puda_ret_bufpool(ilq, buf); @@ -221,6 +221,7 @@ static void i40iw_get_addr_info(struct i40iw_cm_node *cm_node, memcpy(cm_info->rem_addr, cm_node->rem_addr, sizeof(cm_info->rem_addr)); cm_info->loc_port = cm_node->loc_port; cm_info->rem_port = cm_node->rem_port; + cm_info->user_pri = cm_node->user_pri; } /** @@ -271,6 +272,7 @@ static int i40iw_send_cm_event(struct i40iw_cm_node *cm_node, event.provider_data = (void *)cm_node; event.private_data = (void *)cm_node->pdata_buf; event.private_data_len = (u8)cm_node->pdata.size; + event.ird = cm_node->ird_size; break; case IW_CM_EVENT_CONNECT_REPLY: i40iw_get_cmevent_info(cm_node, cm_id, &event); @@ -335,13 +337,13 @@ static struct i40iw_cm_event *i40iw_create_event(struct i40iw_cm_node *cm_node, */ static void i40iw_free_retrans_entry(struct i40iw_cm_node *cm_node) { - struct i40iw_sc_dev *dev = cm_node->dev; + struct i40iw_device *iwdev = cm_node->iwdev; struct i40iw_timer_entry *send_entry; send_entry = cm_node->send_entry; if (send_entry) { cm_node->send_entry = NULL; - i40iw_free_sqbuf(dev, (void *)send_entry->sqbuf); + i40iw_free_sqbuf(&iwdev->vsi, (void *)send_entry->sqbuf); kfree(send_entry); atomic_dec(&cm_node->ref_count); } @@ -360,15 +362,6 @@ static void i40iw_cleanup_retrans_entry(struct i40iw_cm_node *cm_node) spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); } -static bool is_remote_ne020_or_chelsio(struct i40iw_cm_node *cm_node) -{ - if ((cm_node->rem_mac[0] == 0x0) && - (((cm_node->rem_mac[1] == 0x12) && (cm_node->rem_mac[2] == 0x55)) || - ((cm_node->rem_mac[1] == 0x07 && (cm_node->rem_mac[2] == 0x43))))) - return true; - return false; -} - /** * i40iw_form_cm_frame - get a free packet and build frame * @cm_node: connection's node ionfo to use in frame @@ -384,7 +377,7 @@ static struct i40iw_puda_buf *i40iw_form_cm_frame(struct i40iw_cm_node *cm_node, u8 flags) { struct i40iw_puda_buf *sqbuf; - struct i40iw_sc_dev *dev = cm_node->dev; + struct i40iw_sc_vsi *vsi = &cm_node->iwdev->vsi; u8 *buf; struct tcphdr *tcph; @@ -396,8 +389,9 @@ static struct i40iw_puda_buf *i40iw_form_cm_frame(struct i40iw_cm_node *cm_node, u32 opts_len = 0; u32 pd_len = 0; u32 hdr_len = 0; + u16 vtag; - sqbuf = i40iw_puda_get_bufpool(dev->ilq); + sqbuf = i40iw_puda_get_bufpool(vsi->ilq); if (!sqbuf) return NULL; buf = sqbuf->mem.va; @@ -408,11 +402,8 @@ static struct i40iw_puda_buf *i40iw_form_cm_frame(struct i40iw_cm_node *cm_node, if (hdr) hdr_len = hdr->size; - if (pdata) { + if (pdata) pd_len = pdata->size; - if (!is_remote_ne020_or_chelsio(cm_node)) - pd_len += MPA_ZERO_PAD_LEN; - } if (cm_node->vlan_id < VLAN_TAG_PRESENT) eth_hlen += 4; @@ -445,7 +436,8 @@ static struct i40iw_puda_buf *i40iw_form_cm_frame(struct i40iw_cm_node *cm_node, ether_addr_copy(ethh->h_source, cm_node->loc_mac); if (cm_node->vlan_id < VLAN_TAG_PRESENT) { ((struct vlan_ethhdr *)ethh)->h_vlan_proto = htons(ETH_P_8021Q); - ((struct vlan_ethhdr *)ethh)->h_vlan_TCI = htons(cm_node->vlan_id); + vtag = (cm_node->user_pri << VLAN_PRIO_SHIFT) | cm_node->vlan_id; + ((struct vlan_ethhdr *)ethh)->h_vlan_TCI = htons(vtag); ((struct vlan_ethhdr *)ethh)->h_vlan_encapsulated_proto = htons(ETH_P_IP); } else { @@ -454,7 +446,7 @@ static struct i40iw_puda_buf *i40iw_form_cm_frame(struct i40iw_cm_node *cm_node, iph->version = IPVERSION; iph->ihl = 5; /* 5 * 4Byte words, IP headr len */ - iph->tos = 0; + iph->tos = cm_node->tos; iph->tot_len = htons(packetsize); iph->id = htons(++cm_node->tcp_cntxt.loc_id); @@ -474,13 +466,15 @@ static struct i40iw_puda_buf *i40iw_form_cm_frame(struct i40iw_cm_node *cm_node, ether_addr_copy(ethh->h_source, cm_node->loc_mac); if (cm_node->vlan_id < VLAN_TAG_PRESENT) { ((struct vlan_ethhdr *)ethh)->h_vlan_proto = htons(ETH_P_8021Q); - ((struct vlan_ethhdr *)ethh)->h_vlan_TCI = htons(cm_node->vlan_id); + vtag = (cm_node->user_pri << VLAN_PRIO_SHIFT) | cm_node->vlan_id; + ((struct vlan_ethhdr *)ethh)->h_vlan_TCI = htons(vtag); ((struct vlan_ethhdr *)ethh)->h_vlan_encapsulated_proto = htons(ETH_P_IPV6); } else { ethh->h_proto = htons(ETH_P_IPV6); } ip6h->version = 6; - ip6h->flow_lbl[0] = 0; + ip6h->priority = cm_node->tos >> 4; + ip6h->flow_lbl[0] = cm_node->tos << 4; ip6h->flow_lbl[1] = 0; ip6h->flow_lbl[2] = 0; ip6h->payload_len = htons(packetsize - sizeof(*ip6h)); @@ -1065,7 +1059,7 @@ int i40iw_schedule_cm_timer(struct i40iw_cm_node *cm_node, int send_retrans, int close_when_complete) { - struct i40iw_sc_dev *dev = cm_node->dev; + struct i40iw_sc_vsi *vsi = &cm_node->iwdev->vsi; struct i40iw_cm_core *cm_core = cm_node->cm_core; struct i40iw_timer_entry *new_send; int ret = 0; @@ -1074,7 +1068,7 @@ int i40iw_schedule_cm_timer(struct i40iw_cm_node *cm_node, new_send = kzalloc(sizeof(*new_send), GFP_ATOMIC); if (!new_send) { - i40iw_free_sqbuf(cm_node->dev, (void *)sqbuf); + i40iw_free_sqbuf(vsi, (void *)sqbuf); return -ENOMEM; } new_send->retrycount = I40IW_DEFAULT_RETRYS; @@ -1089,7 +1083,7 @@ int i40iw_schedule_cm_timer(struct i40iw_cm_node *cm_node, new_send->timetosend += (HZ / 10); if (cm_node->close_entry) { kfree(new_send); - i40iw_free_sqbuf(cm_node->dev, (void *)sqbuf); + i40iw_free_sqbuf(vsi, (void *)sqbuf); i40iw_pr_err("already close entry\n"); return -EINVAL; } @@ -1104,7 +1098,7 @@ int i40iw_schedule_cm_timer(struct i40iw_cm_node *cm_node, new_send->timetosend = jiffies + I40IW_RETRY_TIMEOUT; atomic_inc(&sqbuf->refcount); - i40iw_puda_send_buf(dev->ilq, sqbuf); + i40iw_puda_send_buf(vsi->ilq, sqbuf); if (!send_retrans) { i40iw_cleanup_retrans_entry(cm_node); if (close_when_complete) @@ -1201,6 +1195,7 @@ static void i40iw_cm_timer_tick(unsigned long pass) struct i40iw_cm_node *cm_node; struct i40iw_timer_entry *send_entry, *close_entry; struct list_head *list_core_temp; + struct i40iw_sc_vsi *vsi; struct list_head *list_node; struct i40iw_cm_core *cm_core = (struct i40iw_cm_core *)pass; u32 settimer = 0; @@ -1276,9 +1271,10 @@ static void i40iw_cm_timer_tick(unsigned long pass) cm_node->cm_core->stats_pkt_retrans++; spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); + vsi = &cm_node->iwdev->vsi; dev = cm_node->dev; atomic_inc(&send_entry->sqbuf->refcount); - i40iw_puda_send_buf(dev->ilq, send_entry->sqbuf); + i40iw_puda_send_buf(vsi->ilq, send_entry->sqbuf); spin_lock_irqsave(&cm_node->retrans_list_lock, flags); if (send_entry->send_retrans) { send_entry->retranscount--; @@ -1379,10 +1375,11 @@ int i40iw_send_syn(struct i40iw_cm_node *cm_node, u32 sendack) static void i40iw_send_ack(struct i40iw_cm_node *cm_node) { struct i40iw_puda_buf *sqbuf; + struct i40iw_sc_vsi *vsi = &cm_node->iwdev->vsi; sqbuf = i40iw_form_cm_frame(cm_node, NULL, NULL, NULL, SET_ACK); if (sqbuf) - i40iw_puda_send_buf(cm_node->dev->ilq, sqbuf); + i40iw_puda_send_buf(vsi->ilq, sqbuf); else i40iw_pr_err("no sqbuf\n"); } @@ -1564,9 +1561,15 @@ static enum i40iw_status_code i40iw_del_multiple_qhash( memcpy(cm_info->loc_addr, child_listen_node->loc_addr, sizeof(cm_info->loc_addr)); cm_info->vlan_id = child_listen_node->vlan_id; - ret = i40iw_manage_qhash(iwdev, cm_info, - I40IW_QHASH_TYPE_TCP_SYN, - I40IW_QHASH_MANAGE_TYPE_DELETE, NULL, false); + if (child_listen_node->qhash_set) { + ret = i40iw_manage_qhash(iwdev, cm_info, + I40IW_QHASH_TYPE_TCP_SYN, + I40IW_QHASH_MANAGE_TYPE_DELETE, + NULL, false); + child_listen_node->qhash_set = false; + } else { + ret = I40IW_SUCCESS; + } i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_CM, "freed pointer = %p\n", @@ -1591,9 +1594,10 @@ static enum i40iw_status_code i40iw_del_multiple_qhash( static struct net_device *i40iw_netdev_vlan_ipv6(u32 *addr, u16 *vlan_id, u8 *mac) { struct net_device *ip_dev = NULL; -#if IS_ENABLED(CONFIG_IPV6) struct in6_addr laddr6; + if (!IS_ENABLED(CONFIG_IPV6)) + return NULL; i40iw_copy_ip_htonl(laddr6.in6_u.u6_addr32, addr); if (vlan_id) *vlan_id = I40IW_NO_VLAN; @@ -1610,7 +1614,6 @@ static struct net_device *i40iw_netdev_vlan_ipv6(u32 *addr, u16 *vlan_id, u8 *ma } } rcu_read_unlock(); -#endif return ip_dev; } @@ -1646,7 +1649,7 @@ static enum i40iw_status_code i40iw_add_mqh_6(struct i40iw_device *iwdev, { struct net_device *ip_dev; struct inet6_dev *idev; - struct inet6_ifaddr *ifp; + struct inet6_ifaddr *ifp, *tmp; enum i40iw_status_code ret = 0; struct i40iw_cm_listener *child_listen_node; unsigned long flags; @@ -1661,7 +1664,7 @@ static enum i40iw_status_code i40iw_add_mqh_6(struct i40iw_device *iwdev, i40iw_pr_err("idev == NULL\n"); break; } - list_for_each_entry(ifp, &idev->addr_list, if_list) { + list_for_each_entry_safe(ifp, tmp, &idev->addr_list, if_list) { i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_CM, "IP=%pI6, vlan_id=%d, MAC=%pM\n", @@ -1675,7 +1678,6 @@ static enum i40iw_status_code i40iw_add_mqh_6(struct i40iw_device *iwdev, "Allocating child listener %p\n", child_listen_node); if (!child_listen_node) { - i40iw_pr_err("listener memory allocation\n"); ret = I40IW_ERR_NO_MEMORY; goto exit; } @@ -1695,6 +1697,7 @@ static enum i40iw_status_code i40iw_add_mqh_6(struct i40iw_device *iwdev, I40IW_QHASH_MANAGE_TYPE_ADD, NULL, true); if (!ret) { + child_listen_node->qhash_set = true; spin_lock_irqsave(&iwdev->cm_core.listen_list_lock, flags); list_add(&child_listen_node->child_listen_list, &cm_parent_listen_node->child_listen_list); @@ -1751,7 +1754,6 @@ static enum i40iw_status_code i40iw_add_mqh_4( "Allocating child listener %p\n", child_listen_node); if (!child_listen_node) { - i40iw_pr_err("listener memory allocation\n"); in_dev_put(idev); ret = I40IW_ERR_NO_MEMORY; goto exit; @@ -1773,6 +1775,7 @@ static enum i40iw_status_code i40iw_add_mqh_4( NULL, true); if (!ret) { + child_listen_node->qhash_set = true; spin_lock_irqsave(&iwdev->cm_core.listen_list_lock, flags); list_add(&child_listen_node->child_listen_list, &cm_parent_listen_node->child_listen_list); @@ -1880,6 +1883,7 @@ static int i40iw_dec_refcnt_listen(struct i40iw_cm_core *cm_core, nfo.loc_port = listener->loc_port; nfo.ipv4 = listener->ipv4; nfo.vlan_id = listener->vlan_id; + nfo.user_pri = listener->user_pri; if (!list_empty(&listener->child_listen_list)) { i40iw_del_multiple_qhash(listener->iwdev, &nfo, listener); @@ -2138,6 +2142,20 @@ static struct i40iw_cm_node *i40iw_make_cm_node( /* set our node specific transport info */ cm_node->ipv4 = cm_info->ipv4; cm_node->vlan_id = cm_info->vlan_id; + if ((cm_node->vlan_id == I40IW_NO_VLAN) && iwdev->dcb) + cm_node->vlan_id = 0; + cm_node->tos = cm_info->tos; + cm_node->user_pri = cm_info->user_pri; + if (listener) { + if (listener->tos != cm_info->tos) + i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_DCB, + "application TOS[%d] and remote client TOS[%d] mismatch\n", + listener->tos, cm_info->tos); + cm_node->tos = max(listener->tos, cm_info->tos); + cm_node->user_pri = rt_tos2priority(cm_node->tos); + i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_DCB, "listener: TOS:[%d] UP:[%d]\n", + cm_node->tos, cm_node->user_pri); + } memcpy(cm_node->loc_addr, cm_info->loc_addr, sizeof(cm_node->loc_addr)); memcpy(cm_node->rem_addr, cm_info->rem_addr, sizeof(cm_node->rem_addr)); cm_node->loc_port = cm_info->loc_port; @@ -2162,7 +2180,7 @@ static struct i40iw_cm_node *i40iw_make_cm_node( I40IW_CM_DEFAULT_RCV_WND_SCALED >> I40IW_CM_DEFAULT_RCV_WND_SCALE; ts = current_kernel_time(); cm_node->tcp_cntxt.loc_seq_num = ts.tv_nsec; - cm_node->tcp_cntxt.mss = iwdev->mss; + cm_node->tcp_cntxt.mss = iwdev->vsi.mss; cm_node->iwdev = iwdev; cm_node->dev = &iwdev->sc_dev; @@ -2236,7 +2254,7 @@ static void i40iw_rem_ref_cm_node(struct i40iw_cm_node *cm_node) i40iw_dec_refcnt_listen(cm_core, cm_node->listener, 0, true); } else { if (!i40iw_listen_port_in_use(cm_core, cm_node->loc_port) && - cm_node->apbvt_set && cm_node->iwdev) { + cm_node->apbvt_set) { i40iw_manage_apbvt(cm_node->iwdev, cm_node->loc_port, I40IW_MANAGE_APBVT_DEL); @@ -2861,7 +2879,7 @@ static struct i40iw_cm_node *i40iw_create_cm_node( /* create a CM connection node */ cm_node = i40iw_make_cm_node(cm_core, iwdev, cm_info, NULL); if (!cm_node) - return NULL; + return ERR_PTR(-ENOMEM); /* set our node side to client (active) side */ cm_node->tcp_cntxt.client = 1; cm_node->tcp_cntxt.rcv_wscale = I40IW_CM_DEFAULT_RCV_WND_SCALE; @@ -2874,7 +2892,8 @@ static struct i40iw_cm_node *i40iw_create_cm_node( cm_node->vlan_id, I40IW_CM_LISTENER_ACTIVE_STATE); if (!loopback_remotelistener) { - i40iw_create_event(cm_node, I40IW_CM_EVENT_ABORTED); + i40iw_rem_ref_cm_node(cm_node); + return ERR_PTR(-ECONNREFUSED); } else { loopback_cm_info = *cm_info; loopback_cm_info.loc_port = cm_info->rem_port; @@ -2887,7 +2906,7 @@ static struct i40iw_cm_node *i40iw_create_cm_node( loopback_remotelistener); if (!loopback_remotenode) { i40iw_rem_ref_cm_node(cm_node); - return NULL; + return ERR_PTR(-ENOMEM); } cm_core->stats_loopbacks++; loopback_remotenode->loopbackpartner = cm_node; @@ -3041,10 +3060,10 @@ static int i40iw_cm_close(struct i40iw_cm_node *cm_node) /** * i40iw_receive_ilq - recv an ETHERNET packet, and process it * through CM - * @dev: FPK dev struct + * @vsi: pointer to the vsi structure * @rbuf: receive buffer */ -void i40iw_receive_ilq(struct i40iw_sc_dev *dev, struct i40iw_puda_buf *rbuf) +void i40iw_receive_ilq(struct i40iw_sc_vsi *vsi, struct i40iw_puda_buf *rbuf) { struct i40iw_cm_node *cm_node; struct i40iw_cm_listener *listener; @@ -3052,9 +3071,11 @@ void i40iw_receive_ilq(struct i40iw_sc_dev *dev, struct i40iw_puda_buf *rbuf) struct ipv6hdr *ip6h; struct tcphdr *tcph; struct i40iw_cm_info cm_info; + struct i40iw_sc_dev *dev = vsi->dev; struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev; struct i40iw_cm_core *cm_core = &iwdev->cm_core; struct vlan_ethhdr *ethh; + u16 vtag; /* if vlan, then maclen = 18 else 14 */ iph = (struct iphdr *)rbuf->iph; @@ -3068,7 +3089,9 @@ void i40iw_receive_ilq(struct i40iw_sc_dev *dev, struct i40iw_puda_buf *rbuf) ethh = (struct vlan_ethhdr *)rbuf->mem.va; if (ethh->h_vlan_proto == htons(ETH_P_8021Q)) { - cm_info.vlan_id = ntohs(ethh->h_vlan_TCI) & VLAN_VID_MASK; + vtag = ntohs(ethh->h_vlan_TCI); + cm_info.user_pri = (vtag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; + cm_info.vlan_id = vtag & VLAN_VID_MASK; i40iw_debug(cm_core->dev, I40IW_DEBUG_CM, "%s vlan_id=%d\n", @@ -3083,6 +3106,7 @@ void i40iw_receive_ilq(struct i40iw_sc_dev *dev, struct i40iw_puda_buf *rbuf) cm_info.loc_addr[0] = ntohl(iph->daddr); cm_info.rem_addr[0] = ntohl(iph->saddr); cm_info.ipv4 = true; + cm_info.tos = iph->tos; } else { ip6h = (struct ipv6hdr *)rbuf->iph; i40iw_copy_ip_ntohl(cm_info.loc_addr, @@ -3090,6 +3114,7 @@ void i40iw_receive_ilq(struct i40iw_sc_dev *dev, struct i40iw_puda_buf *rbuf) i40iw_copy_ip_ntohl(cm_info.rem_addr, ip6h->saddr.in6_u.u6_addr32); cm_info.ipv4 = false; + cm_info.tos = (ip6h->priority << 4) | (ip6h->flow_lbl[0] >> 4); } cm_info.loc_port = ntohs(tcph->dest); cm_info.rem_port = ntohs(tcph->source); @@ -3309,6 +3334,8 @@ static void i40iw_cm_init_tsa_conn(struct i40iw_qp *iwqp, ctx_info->tcp_info_valid = true; ctx_info->iwarp_info_valid = true; + ctx_info->add_to_qoslist = true; + ctx_info->user_pri = cm_node->user_pri; i40iw_init_tcp_ctx(cm_node, &tcp_info, iwqp); if (cm_node->snd_mark_en) { @@ -3320,33 +3347,47 @@ static void i40iw_cm_init_tsa_conn(struct i40iw_qp *iwqp, cm_node->state = I40IW_CM_STATE_OFFLOADED; tcp_info.tcp_state = I40IW_TCP_STATE_ESTABLISHED; tcp_info.src_mac_addr_idx = iwdev->mac_ip_table_idx; + tcp_info.tos = cm_node->tos; dev->iw_priv_qp_ops->qp_setctx(&iwqp->sc_qp, (u64 *)(iwqp->host_ctx.va), ctx_info); /* once tcp_info is set, no need to do it again */ ctx_info->tcp_info_valid = false; ctx_info->iwarp_info_valid = false; + ctx_info->add_to_qoslist = false; } /** * i40iw_cm_disconn - when a connection is being closed * @iwqp: associate qp for the connection */ -int i40iw_cm_disconn(struct i40iw_qp *iwqp) +void i40iw_cm_disconn(struct i40iw_qp *iwqp) { struct disconn_work *work; struct i40iw_device *iwdev = iwqp->iwdev; struct i40iw_cm_core *cm_core = &iwdev->cm_core; + unsigned long flags; work = kzalloc(sizeof(*work), GFP_ATOMIC); if (!work) - return -ENOMEM; /* Timer will clean up */ - + return; /* Timer will clean up */ + + spin_lock_irqsave(&iwdev->qptable_lock, flags); + if (!iwdev->qp_table[iwqp->ibqp.qp_num]) { + spin_unlock_irqrestore(&iwdev->qptable_lock, flags); + i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_CM, + "%s qp_id %d is already freed\n", + __func__, iwqp->ibqp.qp_num); + kfree(work); + return; + } i40iw_add_ref(&iwqp->ibqp); + spin_unlock_irqrestore(&iwdev->qptable_lock, flags); + work->iwqp = iwqp; INIT_WORK(&work->work, i40iw_disconnect_worker); queue_work(cm_core->disconn_wq, &work->work); - return 0; + return; } /** @@ -3432,7 +3473,7 @@ static void i40iw_cm_disconn_true(struct i40iw_qp *iwqp) *terminate-handler to issue cm_disconn which can re-free *a QP even after its refcnt=0. */ - del_timer(&iwqp->terminate_timer); + i40iw_terminate_del_timer(qp); if (!iwqp->flush_issued) { iwqp->flush_issued = 1; issue_flush = 1; @@ -3462,7 +3503,7 @@ static void i40iw_cm_disconn_true(struct i40iw_qp *iwqp) /* Flush the queues */ i40iw_flush_wqes(iwdev, iwqp); - if (qp->term_flags) { + if (qp->term_flags && iwqp->ibqp.event_handler) { ibevent.device = iwqp->ibqp.device; ibevent.event = (qp->eventtype == TERM_EVENT_QP_FATAL) ? IB_EVENT_QP_FATAL : IB_EVENT_QP_ACCESS_ERR; @@ -3571,7 +3612,7 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) iwqp->cm_node = (void *)cm_node; cm_node->iwqp = iwqp; - buf_len = conn_param->private_data_len + I40IW_MAX_IETF_SIZE + MPA_ZERO_PAD_LEN; + buf_len = conn_param->private_data_len + I40IW_MAX_IETF_SIZE; status = i40iw_allocate_dma_mem(dev->hw, &iwqp->ietf_mem, buf_len, 1); @@ -3605,18 +3646,10 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) iwqp->lsmm_mr = ibmr; if (iwqp->page) iwqp->sc_qp.qp_uk.sq_base = kmap(iwqp->page); - if (is_remote_ne020_or_chelsio(cm_node)) - dev->iw_priv_qp_ops->qp_send_lsmm( - &iwqp->sc_qp, + dev->iw_priv_qp_ops->qp_send_lsmm(&iwqp->sc_qp, iwqp->ietf_mem.va, (accept.size + conn_param->private_data_len), ibmr->lkey); - else - dev->iw_priv_qp_ops->qp_send_lsmm( - &iwqp->sc_qp, - iwqp->ietf_mem.va, - (accept.size + conn_param->private_data_len + MPA_ZERO_PAD_LEN), - ibmr->lkey); } else { if (iwqp->page) @@ -3714,6 +3747,7 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct sockaddr_in6 *raddr6; bool qhash_set = false; int apbvt_set = 0; + int err = 0; enum i40iw_status_code status; ibqp = i40iw_get_qp(cm_id->device, conn_param->qpn); @@ -3759,6 +3793,10 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) i40iw_netdev_vlan_ipv6(cm_info.loc_addr, &cm_info.vlan_id, NULL); } cm_info.cm_id = cm_id; + cm_info.tos = cm_id->tos; + cm_info.user_pri = rt_tos2priority(cm_id->tos); + i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_DCB, "%s TOS:[%d] UP:[%d]\n", + __func__, cm_id->tos, cm_info.user_pri); if ((cm_info.ipv4 && (laddr->sin_addr.s_addr != raddr->sin_addr.s_addr)) || (!cm_info.ipv4 && memcmp(laddr6->sin6_addr.in6_u.u6_addr32, raddr6->sin6_addr.in6_u.u6_addr32, @@ -3790,8 +3828,11 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) conn_param->private_data_len, (void *)conn_param->private_data, &cm_info); - if (!cm_node) - goto err; + + if (IS_ERR(cm_node)) { + err = PTR_ERR(cm_node); + goto err_out; + } i40iw_record_ird_ord(cm_node, (u16)conn_param->ird, (u16)conn_param->ord); if (cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO && @@ -3805,10 +3846,12 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) iwqp->cm_id = cm_id; i40iw_add_ref(&iwqp->ibqp); - if (cm_node->state == I40IW_CM_STATE_SYN_SENT) { - if (i40iw_send_syn(cm_node, 0)) { + if (cm_node->state != I40IW_CM_STATE_OFFLOADED) { + cm_node->state = I40IW_CM_STATE_SYN_SENT; + err = i40iw_send_syn(cm_node, 0); + if (err) { i40iw_rem_ref_cm_node(cm_node); - goto err; + goto err_out; } } @@ -3820,24 +3863,25 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) cm_node->cm_id); return 0; -err: - if (cm_node) { - if (cm_node->ipv4) - i40iw_debug(cm_node->dev, - I40IW_DEBUG_CM, - "Api - connect() FAILED: dest addr=%pI4", - cm_node->rem_addr); - else - i40iw_debug(cm_node->dev, I40IW_DEBUG_CM, - "Api - connect() FAILED: dest addr=%pI6", - cm_node->rem_addr); - } - i40iw_manage_qhash(iwdev, - &cm_info, - I40IW_QHASH_TYPE_TCP_ESTABLISHED, - I40IW_QHASH_MANAGE_TYPE_DELETE, - NULL, - false); +err_out: + if (cm_info.ipv4) + i40iw_debug(&iwdev->sc_dev, + I40IW_DEBUG_CM, + "Api - connect() FAILED: dest addr=%pI4", + cm_info.rem_addr); + else + i40iw_debug(&iwdev->sc_dev, + I40IW_DEBUG_CM, + "Api - connect() FAILED: dest addr=%pI6", + cm_info.rem_addr); + + if (qhash_set) + i40iw_manage_qhash(iwdev, + &cm_info, + I40IW_QHASH_TYPE_TCP_ESTABLISHED, + I40IW_QHASH_MANAGE_TYPE_DELETE, + NULL, + false); if (apbvt_set && !i40iw_listen_port_in_use(&iwdev->cm_core, cm_info.loc_port)) @@ -3846,7 +3890,7 @@ err: I40IW_MANAGE_APBVT_DEL); cm_id->rem_ref(cm_id); iwdev->cm_core.stats_connect_errs++; - return -ENOMEM; + return err; } /** @@ -3904,6 +3948,10 @@ int i40iw_create_listen(struct iw_cm_id *cm_id, int backlog) cm_id->provider_data = cm_listen_node; + cm_listen_node->tos = cm_id->tos; + cm_listen_node->user_pri = rt_tos2priority(cm_id->tos); + cm_info.user_pri = cm_listen_node->user_pri; + if (!cm_listen_node->reused_node) { if (wildcard) { if (cm_info.ipv4) @@ -4124,3 +4172,158 @@ static void i40iw_cm_post_event(struct i40iw_cm_event *event) queue_work(event->cm_node->cm_core->event_wq, &event->event_work); } + +/** + * i40iw_qhash_ctrl - enable/disable qhash for list + * @iwdev: device pointer + * @parent_listen_node: parent listen node + * @nfo: cm info node + * @ipaddr: Pointer to IPv4 or IPv6 address + * @ipv4: flag indicating IPv4 when true + * @ifup: flag indicating interface up when true + * + * Enables or disables the qhash for the node in the child + * listen list that matches ipaddr. If no matching IP was found + * it will allocate and add a new child listen node to the + * parent listen node. The listen_list_lock is assumed to be + * held when called. + */ +static void i40iw_qhash_ctrl(struct i40iw_device *iwdev, + struct i40iw_cm_listener *parent_listen_node, + struct i40iw_cm_info *nfo, + u32 *ipaddr, bool ipv4, bool ifup) +{ + struct list_head *child_listen_list = &parent_listen_node->child_listen_list; + struct i40iw_cm_listener *child_listen_node; + struct list_head *pos, *tpos; + enum i40iw_status_code ret; + bool node_allocated = false; + enum i40iw_quad_hash_manage_type op = + ifup ? I40IW_QHASH_MANAGE_TYPE_ADD : I40IW_QHASH_MANAGE_TYPE_DELETE; + + list_for_each_safe(pos, tpos, child_listen_list) { + child_listen_node = + list_entry(pos, + struct i40iw_cm_listener, + child_listen_list); + if (!memcmp(child_listen_node->loc_addr, ipaddr, ipv4 ? 4 : 16)) + goto set_qhash; + } + + /* if not found then add a child listener if interface is going up */ + if (!ifup) + return; + child_listen_node = kzalloc(sizeof(*child_listen_node), GFP_ATOMIC); + if (!child_listen_node) + return; + node_allocated = true; + memcpy(child_listen_node, parent_listen_node, sizeof(*child_listen_node)); + + memcpy(child_listen_node->loc_addr, ipaddr, ipv4 ? 4 : 16); + +set_qhash: + memcpy(nfo->loc_addr, + child_listen_node->loc_addr, + sizeof(nfo->loc_addr)); + nfo->vlan_id = child_listen_node->vlan_id; + ret = i40iw_manage_qhash(iwdev, nfo, + I40IW_QHASH_TYPE_TCP_SYN, + op, + NULL, false); + if (!ret) { + child_listen_node->qhash_set = ifup; + if (node_allocated) + list_add(&child_listen_node->child_listen_list, + &parent_listen_node->child_listen_list); + } else if (node_allocated) { + kfree(child_listen_node); + } +} + +/** + * i40iw_cm_disconnect_all - disconnect all connected qp's + * @iwdev: device pointer + */ +void i40iw_cm_disconnect_all(struct i40iw_device *iwdev) +{ + struct i40iw_cm_core *cm_core = &iwdev->cm_core; + struct list_head *list_core_temp; + struct list_head *list_node; + struct i40iw_cm_node *cm_node; + unsigned long flags; + struct list_head connected_list; + struct ib_qp_attr attr; + + INIT_LIST_HEAD(&connected_list); + spin_lock_irqsave(&cm_core->ht_lock, flags); + list_for_each_safe(list_node, list_core_temp, &cm_core->connected_nodes) { + cm_node = container_of(list_node, struct i40iw_cm_node, list); + atomic_inc(&cm_node->ref_count); + list_add(&cm_node->connected_entry, &connected_list); + } + spin_unlock_irqrestore(&cm_core->ht_lock, flags); + + list_for_each_safe(list_node, list_core_temp, &connected_list) { + cm_node = container_of(list_node, struct i40iw_cm_node, connected_entry); + attr.qp_state = IB_QPS_ERR; + i40iw_modify_qp(&cm_node->iwqp->ibqp, &attr, IB_QP_STATE, NULL); + i40iw_rem_ref_cm_node(cm_node); + } +} + +/** + * i40iw_ifdown_notify - process an ifdown on an interface + * @iwdev: device pointer + * @ipaddr: Pointer to IPv4 or IPv6 address + * @ipv4: flag indicating IPv4 when true + * @ifup: flag indicating interface up when true + */ +void i40iw_if_notify(struct i40iw_device *iwdev, struct net_device *netdev, + u32 *ipaddr, bool ipv4, bool ifup) +{ + struct i40iw_cm_core *cm_core = &iwdev->cm_core; + unsigned long flags; + struct i40iw_cm_listener *listen_node; + static const u32 ip_zero[4] = { 0, 0, 0, 0 }; + struct i40iw_cm_info nfo; + u16 vlan_id = rdma_vlan_dev_vlan_id(netdev); + enum i40iw_status_code ret; + enum i40iw_quad_hash_manage_type op = + ifup ? I40IW_QHASH_MANAGE_TYPE_ADD : I40IW_QHASH_MANAGE_TYPE_DELETE; + + /* Disable or enable qhash for listeners */ + spin_lock_irqsave(&cm_core->listen_list_lock, flags); + list_for_each_entry(listen_node, &cm_core->listen_nodes, list) { + if (vlan_id == listen_node->vlan_id && + (!memcmp(listen_node->loc_addr, ipaddr, ipv4 ? 4 : 16) || + !memcmp(listen_node->loc_addr, ip_zero, ipv4 ? 4 : 16))) { + memcpy(nfo.loc_addr, listen_node->loc_addr, + sizeof(nfo.loc_addr)); + nfo.loc_port = listen_node->loc_port; + nfo.ipv4 = listen_node->ipv4; + nfo.vlan_id = listen_node->vlan_id; + nfo.user_pri = listen_node->user_pri; + if (!list_empty(&listen_node->child_listen_list)) { + i40iw_qhash_ctrl(iwdev, + listen_node, + &nfo, + ipaddr, ipv4, ifup); + } else if (memcmp(listen_node->loc_addr, ip_zero, + ipv4 ? 4 : 16)) { + ret = i40iw_manage_qhash(iwdev, + &nfo, + I40IW_QHASH_TYPE_TCP_SYN, + op, + NULL, + false); + if (!ret) + listen_node->qhash_set = ifup; + } + } + } + spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); + + /* disconnect any connected qp's on ifdown */ + if (!ifup) + i40iw_cm_disconnect_all(iwdev); +} diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.h b/drivers/infiniband/hw/i40iw/i40iw_cm.h index e9046d9f9645..2e52e38ffcf3 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.h +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.h @@ -56,8 +56,6 @@ #define I40IW_MAX_IETF_SIZE 32 -#define MPA_ZERO_PAD_LEN 4 - /* IETF RTR MSG Fields */ #define IETF_PEER_TO_PEER 0x8000 #define IETF_FLPDU_ZERO_LEN 0x4000 @@ -299,6 +297,7 @@ struct i40iw_cm_listener { enum i40iw_cm_listener_state listener_state; u32 reused_node; u8 user_pri; + u8 tos; u16 vlan_id; bool qhash_set; bool ipv4; @@ -341,9 +340,11 @@ struct i40iw_cm_node { int accept_pend; struct list_head timer_entry; struct list_head reset_entry; + struct list_head connected_entry; atomic_t passive_state; bool qhash_set; u8 user_pri; + u8 tos; bool ipv4; bool snd_mark_en; u16 lsmm_size; @@ -368,7 +369,8 @@ struct i40iw_cm_info { u32 rem_addr[4]; u16 vlan_id; int backlog; - u16 user_pri; + u8 user_pri; + u8 tos; bool ipv4; }; @@ -445,4 +447,7 @@ int i40iw_arp_table(struct i40iw_device *iwdev, u8 *mac_addr, u32 action); +void i40iw_if_notify(struct i40iw_device *iwdev, struct net_device *netdev, + u32 *ipaddr, bool ipv4, bool ifup); +void i40iw_cm_disconnect_all(struct i40iw_device *iwdev); #endif /* I40IW_CM_H */ diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c index 2c4b4d072d6a..392f78384a60 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c @@ -103,6 +103,7 @@ static enum i40iw_status_code i40iw_cqp_poll_registers( if (newtail != tail) { /* SUCCESS */ I40IW_RING_MOVE_TAIL(cqp->sq_ring); + cqp->dev->cqp_cmd_stats[OP_COMPLETED_COMMANDS]++; return 0; } udelay(I40IW_SLEEP_COUNT); @@ -223,6 +224,136 @@ static enum i40iw_status_code i40iw_sc_parse_fpm_query_buf( } /** + * i40iw_fill_qos_list - Change all unknown qs handles to available ones + * @qs_list: list of qs_handles to be fixed with valid qs_handles + */ +static void i40iw_fill_qos_list(u16 *qs_list) +{ + u16 qshandle = qs_list[0]; + int i; + + for (i = 0; i < I40IW_MAX_USER_PRIORITY; i++) { + if (qs_list[i] == QS_HANDLE_UNKNOWN) + qs_list[i] = qshandle; + else + qshandle = qs_list[i]; + } +} + +/** + * i40iw_qp_from_entry - Given entry, get to the qp structure + * @entry: Points to list of qp structure + */ +static struct i40iw_sc_qp *i40iw_qp_from_entry(struct list_head *entry) +{ + if (!entry) + return NULL; + + return (struct i40iw_sc_qp *)((char *)entry - offsetof(struct i40iw_sc_qp, list)); +} + +/** + * i40iw_get_qp - get the next qp from the list given current qp + * @head: Listhead of qp's + * @qp: current qp + */ +static struct i40iw_sc_qp *i40iw_get_qp(struct list_head *head, struct i40iw_sc_qp *qp) +{ + struct list_head *entry = NULL; + struct list_head *lastentry; + + if (list_empty(head)) + return NULL; + + if (!qp) { + entry = head->next; + } else { + lastentry = &qp->list; + entry = (lastentry != head) ? lastentry->next : NULL; + } + + return i40iw_qp_from_entry(entry); +} + +/** + * i40iw_change_l2params - given the new l2 parameters, change all qp + * @vsi: pointer to the vsi structure + * @l2params: New paramaters from l2 + */ +void i40iw_change_l2params(struct i40iw_sc_vsi *vsi, struct i40iw_l2params *l2params) +{ + struct i40iw_sc_dev *dev = vsi->dev; + struct i40iw_sc_qp *qp = NULL; + bool qs_handle_change = false; + bool mss_change = false; + unsigned long flags; + u16 qs_handle; + int i; + + if (vsi->mss != l2params->mss) { + mss_change = true; + vsi->mss = l2params->mss; + } + + i40iw_fill_qos_list(l2params->qs_handle_list); + for (i = 0; i < I40IW_MAX_USER_PRIORITY; i++) { + qs_handle = l2params->qs_handle_list[i]; + if (vsi->qos[i].qs_handle != qs_handle) + qs_handle_change = true; + else if (!mss_change) + continue; /* no MSS nor qs handle change */ + spin_lock_irqsave(&vsi->qos[i].lock, flags); + qp = i40iw_get_qp(&vsi->qos[i].qplist, qp); + while (qp) { + if (mss_change) + i40iw_qp_mss_modify(dev, qp); + if (qs_handle_change) { + qp->qs_handle = qs_handle; + /* issue cqp suspend command */ + i40iw_qp_suspend_resume(dev, qp, true); + } + qp = i40iw_get_qp(&vsi->qos[i].qplist, qp); + } + spin_unlock_irqrestore(&vsi->qos[i].lock, flags); + vsi->qos[i].qs_handle = qs_handle; + } +} + +/** + * i40iw_qp_rem_qos - remove qp from qos lists during destroy qp + * @qp: qp to be removed from qos + */ +static void i40iw_qp_rem_qos(struct i40iw_sc_qp *qp) +{ + struct i40iw_sc_vsi *vsi = qp->vsi; + unsigned long flags; + + if (!qp->on_qoslist) + return; + spin_lock_irqsave(&vsi->qos[qp->user_pri].lock, flags); + list_del(&qp->list); + spin_unlock_irqrestore(&vsi->qos[qp->user_pri].lock, flags); +} + +/** + * i40iw_qp_add_qos - called during setctx fot qp to be added to qos + * @qp: qp to be added to qos + */ +void i40iw_qp_add_qos(struct i40iw_sc_qp *qp) +{ + struct i40iw_sc_vsi *vsi = qp->vsi; + unsigned long flags; + + if (qp->on_qoslist) + return; + spin_lock_irqsave(&vsi->qos[qp->user_pri].lock, flags); + qp->qs_handle = vsi->qos[qp->user_pri].qs_handle; + list_add(&qp->list, &vsi->qos[qp->user_pri].qplist); + qp->on_qoslist = true; + spin_unlock_irqrestore(&vsi->qos[qp->user_pri].lock, flags); +} + +/** * i40iw_sc_pd_init - initialize sc pd struct * @dev: sc device struct * @pd: sc pd ptr @@ -292,6 +423,9 @@ static enum i40iw_status_code i40iw_sc_cqp_init(struct i40iw_sc_cqp *cqp, info->dev->cqp = cqp; I40IW_RING_INIT(cqp->sq_ring, cqp->sq_size); + cqp->dev->cqp_cmd_stats[OP_REQUESTED_COMMANDS] = 0; + cqp->dev->cqp_cmd_stats[OP_COMPLETED_COMMANDS] = 0; + i40iw_debug(cqp->dev, I40IW_DEBUG_WQE, "%s: sq_size[%04d] hw_sq_size[%04d] sq_base[%p] sq_pa[%llxh] cqp[%p] polarity[x%04X]\n", __func__, cqp->sq_size, cqp->hw_sq_size, @@ -302,12 +436,10 @@ static enum i40iw_status_code i40iw_sc_cqp_init(struct i40iw_sc_cqp *cqp, /** * i40iw_sc_cqp_create - create cqp during bringup * @cqp: struct for cqp hw - * @disable_pfpdus: if pfpdu to be disabled * @maj_err: If error, major err number * @min_err: If error, minor err number */ static enum i40iw_status_code i40iw_sc_cqp_create(struct i40iw_sc_cqp *cqp, - bool disable_pfpdus, u16 *maj_err, u16 *min_err) { @@ -326,9 +458,6 @@ static enum i40iw_status_code i40iw_sc_cqp_create(struct i40iw_sc_cqp *cqp, temp = LS_64(cqp->hw_sq_size, I40IW_CQPHC_SQSIZE) | LS_64(cqp->struct_ver, I40IW_CQPHC_SVER); - if (disable_pfpdus) - temp |= LS_64(1, I40IW_CQPHC_DISABLE_PFPDUS); - set_64bit_val(cqp->host_ctx, 0, temp); set_64bit_val(cqp->host_ctx, 8, cqp->sq_pa); temp = LS_64(cqp->enabled_vf_count, I40IW_CQPHC_ENABLED_VFS) | @@ -424,6 +553,7 @@ u64 *i40iw_sc_cqp_get_next_send_wqe(struct i40iw_sc_cqp *cqp, u64 scratch) return NULL; } I40IW_ATOMIC_RING_MOVE_HEAD(cqp->sq_ring, wqe_idx, ret_code); + cqp->dev->cqp_cmd_stats[OP_REQUESTED_COMMANDS]++; if (ret_code) return NULL; if (!wqe_idx) @@ -559,6 +689,8 @@ static enum i40iw_status_code i40iw_sc_ccq_get_cqe_info( I40IW_RING_GETCURRENT_HEAD(ccq->cq_uk.cq_ring)); wmb(); /* write shadow area before tail */ I40IW_RING_MOVE_TAIL(cqp->sq_ring); + ccq->dev->cqp_cmd_stats[OP_COMPLETED_COMMANDS]++; + return ret_code; } @@ -1051,6 +1183,7 @@ static enum i40iw_status_code i40iw_sc_manage_qhash_table_entry( u64 qw1 = 0; u64 qw2 = 0; u64 temp; + struct i40iw_sc_vsi *vsi = info->vsi; wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) @@ -1082,7 +1215,7 @@ static enum i40iw_status_code i40iw_sc_manage_qhash_table_entry( LS_64(info->dest_ip[2], I40IW_CQPSQ_QHASH_ADDR2) | LS_64(info->dest_ip[3], I40IW_CQPSQ_QHASH_ADDR3)); } - qw2 = LS_64(cqp->dev->qs_handle, I40IW_CQPSQ_QHASH_QS_HANDLE); + qw2 = LS_64(vsi->qos[info->user_pri].qs_handle, I40IW_CQPSQ_QHASH_QS_HANDLE); if (info->vlan_valid) qw2 |= LS_64(info->vlan_id, I40IW_CQPSQ_QHASH_VLANID); set_64bit_val(wqe, 16, qw2); @@ -2103,6 +2236,7 @@ static enum i40iw_status_code i40iw_sc_qp_init(struct i40iw_sc_qp *qp, u32 offset; qp->dev = info->pd->dev; + qp->vsi = info->vsi; qp->sq_pa = info->sq_pa; qp->rq_pa = info->rq_pa; qp->hw_host_ctx_pa = info->host_ctx_pa; @@ -2151,7 +2285,7 @@ static enum i40iw_status_code i40iw_sc_qp_init(struct i40iw_sc_qp *qp, qp->rq_tph_en = info->rq_tph_en; qp->rcv_tph_en = info->rcv_tph_en; qp->xmit_tph_en = info->xmit_tph_en; - qp->qs_handle = qp->pd->dev->qs_handle; + qp->qs_handle = qp->vsi->qos[qp->user_pri].qs_handle; qp->exception_lan_queue = qp->pd->dev->exception_lan_queue; return 0; @@ -2296,6 +2430,7 @@ static enum i40iw_status_code i40iw_sc_qp_destroy( struct i40iw_sc_cqp *cqp; u64 header; + i40iw_qp_rem_qos(qp); cqp = qp->pd->dev->cqp; wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) @@ -2443,10 +2578,20 @@ static enum i40iw_status_code i40iw_sc_qp_setctx( { struct i40iwarp_offload_info *iw; struct i40iw_tcp_offload_info *tcp; + struct i40iw_sc_vsi *vsi; + struct i40iw_sc_dev *dev; u64 qw0, qw3, qw7 = 0; iw = info->iwarp_info; tcp = info->tcp_info; + vsi = qp->vsi; + dev = qp->dev; + if (info->add_to_qoslist) { + qp->user_pri = info->user_pri; + i40iw_qp_add_qos(qp); + i40iw_debug(qp->dev, I40IW_DEBUG_DCB, "%s qp[%d] UP[%d] qset[%d]\n", + __func__, qp->qp_uk.qp_id, qp->user_pri, qp->qs_handle); + } qw0 = LS_64(qp->qp_uk.rq_wqe_size, I40IWQPC_RQWQESIZE) | LS_64(info->err_rq_idx_valid, I40IWQPC_ERR_RQ_IDX_VALID) | LS_64(qp->rcv_tph_en, I40IWQPC_RCVTPHEN) | @@ -2487,16 +2632,14 @@ static enum i40iw_status_code i40iw_sc_qp_setctx( LS_64(iw->rdmap_ver, I40IWQPC_RDMAP_VER); qw7 |= LS_64(iw->pd_id, I40IWQPC_PDIDX); - set_64bit_val(qp_ctx, 144, qp->q2_pa); + set_64bit_val(qp_ctx, + 144, + LS_64(qp->q2_pa, I40IWQPC_Q2ADDR) | + LS_64(vsi->fcn_id, I40IWQPC_STAT_INDEX)); set_64bit_val(qp_ctx, 152, LS_64(iw->last_byte_sent, I40IWQPC_LASTBYTESENT)); - /* - * Hard-code IRD_SIZE to hw-limit, 128, in qpctx, i.e matching an - *advertisable IRD of 64 - */ - iw->ird_size = I40IW_QPCTX_ENCD_MAXIRD; set_64bit_val(qp_ctx, 160, LS_64(iw->ord_size, I40IWQPC_ORDSIZE) | @@ -2507,6 +2650,9 @@ static enum i40iw_status_code i40iw_sc_qp_setctx( LS_64(iw->bind_en, I40IWQPC_BINDEN) | LS_64(iw->fast_reg_en, I40IWQPC_FASTREGEN) | LS_64(iw->priv_mode_en, I40IWQPC_PRIVEN) | + LS_64((((vsi->stats_fcn_id_alloc) && + (dev->is_pf) && (vsi->fcn_id >= I40IW_FIRST_NON_PF_STAT)) ? 1 : 0), + I40IWQPC_USESTATSINSTANCE) | LS_64(1, I40IWQPC_IWARPMODE) | LS_64(iw->rcv_mark_en, I40IWQPC_RCVMARKERS) | LS_64(iw->align_hdrs, I40IWQPC_ALIGNHDRS) | @@ -2623,7 +2769,9 @@ static enum i40iw_status_code i40iw_sc_alloc_stag( u64 *wqe; struct i40iw_sc_cqp *cqp; u64 header; + enum i40iw_page_size page_size; + page_size = (info->page_size == 0x200000) ? I40IW_PAGE_SIZE_2M : I40IW_PAGE_SIZE_4K; cqp = dev->cqp; wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) @@ -2643,7 +2791,7 @@ static enum i40iw_status_code i40iw_sc_alloc_stag( LS_64(1, I40IW_CQPSQ_STAG_MR) | LS_64(info->access_rights, I40IW_CQPSQ_STAG_ARIGHTS) | LS_64(info->chunk_size, I40IW_CQPSQ_STAG_LPBLSIZE) | - LS_64(info->page_size, I40IW_CQPSQ_STAG_HPAGESIZE) | + LS_64(page_size, I40IW_CQPSQ_STAG_HPAGESIZE) | LS_64(info->remote_access, I40IW_CQPSQ_STAG_REMACCENABLED) | LS_64(info->use_hmc_fcn_index, I40IW_CQPSQ_STAG_USEHMCFNIDX) | LS_64(info->use_pf_rid, I40IW_CQPSQ_STAG_USEPFRID) | @@ -2679,7 +2827,9 @@ static enum i40iw_status_code i40iw_sc_mr_reg_non_shared( u32 pble_obj_cnt; bool remote_access; u8 addr_type; + enum i40iw_page_size page_size; + page_size = (info->page_size == 0x200000) ? I40IW_PAGE_SIZE_2M : I40IW_PAGE_SIZE_4K; if (info->access_rights & (I40IW_ACCESS_FLAGS_REMOTEREAD_ONLY | I40IW_ACCESS_FLAGS_REMOTEWRITE_ONLY)) remote_access = true; @@ -2722,7 +2872,7 @@ static enum i40iw_status_code i40iw_sc_mr_reg_non_shared( header = LS_64(I40IW_CQP_OP_REG_MR, I40IW_CQPSQ_OPCODE) | LS_64(1, I40IW_CQPSQ_STAG_MR) | LS_64(info->chunk_size, I40IW_CQPSQ_STAG_LPBLSIZE) | - LS_64(info->page_size, I40IW_CQPSQ_STAG_HPAGESIZE) | + LS_64(page_size, I40IW_CQPSQ_STAG_HPAGESIZE) | LS_64(info->access_rights, I40IW_CQPSQ_STAG_ARIGHTS) | LS_64(remote_access, I40IW_CQPSQ_STAG_REMACCENABLED) | LS_64(addr_type, I40IW_CQPSQ_STAG_VABASEDTO) | @@ -2937,7 +3087,9 @@ enum i40iw_status_code i40iw_sc_mr_fast_register( u64 temp, header; u64 *wqe; u32 wqe_idx; + enum i40iw_page_size page_size; + page_size = (info->page_size == 0x200000) ? I40IW_PAGE_SIZE_2M : I40IW_PAGE_SIZE_4K; wqe = i40iw_qp_get_next_send_wqe(&qp->qp_uk, &wqe_idx, I40IW_QP_WQE_MIN_SIZE, 0, info->wr_id); if (!wqe) @@ -2964,7 +3116,7 @@ enum i40iw_status_code i40iw_sc_mr_fast_register( LS_64(info->stag_idx, I40IWQPSQ_STAGINDEX) | LS_64(I40IWQP_OP_FAST_REGISTER, I40IWQPSQ_OPCODE) | LS_64(info->chunk_size, I40IWQPSQ_LPBLSIZE) | - LS_64(info->page_size, I40IWQPSQ_HPAGESIZE) | + LS_64(page_size, I40IWQPSQ_HPAGESIZE) | LS_64(info->access_rights, I40IWQPSQ_STAGRIGHTS) | LS_64(info->addr_type, I40IWQPSQ_VABASEDTO) | LS_64(info->read_fence, I40IWQPSQ_READFENCE) | @@ -3959,7 +4111,7 @@ enum i40iw_status_code i40iw_process_cqp_cmd(struct i40iw_sc_dev *dev, struct cqp_commands_info *pcmdinfo) { enum i40iw_status_code status = 0; - unsigned long flags; + unsigned long flags; spin_lock_irqsave(&dev->cqp_lock, flags); if (list_empty(&dev->cqp_cmd_head) && !i40iw_ring_full(dev->cqp)) @@ -3978,7 +4130,7 @@ enum i40iw_status_code i40iw_process_bh(struct i40iw_sc_dev *dev) { enum i40iw_status_code status = 0; struct cqp_commands_info *pcmdinfo; - unsigned long flags; + unsigned long flags; spin_lock_irqsave(&dev->cqp_lock, flags); while (!list_empty(&dev->cqp_cmd_head) && !i40iw_ring_full(dev->cqp)) { @@ -4055,7 +4207,6 @@ static int i40iw_bld_terminate_hdr(struct i40iw_sc_qp *qp, u16 ddp_seg_len; int copy_len = 0; u8 is_tagged = 0; - enum i40iw_flush_opcode flush_code = FLUSH_INVALID; u32 opcode; struct i40iw_terminate_hdr *termhdr; @@ -4228,9 +4379,6 @@ static int i40iw_bld_terminate_hdr(struct i40iw_sc_qp *qp, if (copy_len) memcpy(termhdr + 1, pkt, copy_len); - if (flush_code && !info->in_rdrsp_wr) - qp->sq_flush = (info->sq) ? true : false; - return sizeof(struct i40iw_terminate_hdr) + copy_len; } @@ -4321,286 +4469,370 @@ void i40iw_terminate_received(struct i40iw_sc_qp *qp, struct i40iw_aeqe_info *in } /** - * i40iw_hw_stat_init - Initiliaze HW stats table - * @devstat: pestat struct + * i40iw_sc_vsi_init - Initialize virtual device + * @vsi: pointer to the vsi structure + * @info: parameters to initialize vsi + **/ +void i40iw_sc_vsi_init(struct i40iw_sc_vsi *vsi, struct i40iw_vsi_init_info *info) +{ + int i; + + vsi->dev = info->dev; + vsi->back_vsi = info->back_vsi; + vsi->mss = info->params->mss; + i40iw_fill_qos_list(info->params->qs_handle_list); + + for (i = 0; i < I40IW_MAX_USER_PRIORITY; i++) { + vsi->qos[i].qs_handle = + info->params->qs_handle_list[i]; + i40iw_debug(vsi->dev, I40IW_DEBUG_DCB, "qset[%d]: %d\n", i, vsi->qos[i].qs_handle); + spin_lock_init(&vsi->qos[i].lock); + INIT_LIST_HEAD(&vsi->qos[i].qplist); + } +} + +/** + * i40iw_hw_stats_init - Initiliaze HW stats table + * @stats: pestat struct * @fcn_idx: PCI fn id - * @hw: PF i40iw_hw structure. * @is_pf: Is it a PF? * - * Populate the HW stat table with register offset addr for each - * stat. And start the perioidic stats timer. + * Populate the HW stats table with register offset addr for each + * stats. And start the perioidic stats timer. */ -static void i40iw_hw_stat_init(struct i40iw_dev_pestat *devstat, - u8 fcn_idx, - struct i40iw_hw *hw, bool is_pf) +void i40iw_hw_stats_init(struct i40iw_vsi_pestat *stats, u8 fcn_idx, bool is_pf) { - u32 stat_reg_offset; - u32 stat_index; - struct i40iw_dev_hw_stat_offsets *stat_table = - &devstat->hw_stat_offsets; - struct i40iw_dev_hw_stats *last_rd_stats = &devstat->last_read_hw_stats; - - devstat->hw = hw; + u32 stats_reg_offset; + u32 stats_index; + struct i40iw_dev_hw_stats_offsets *stats_table = + &stats->hw_stats_offsets; + struct i40iw_dev_hw_stats *last_rd_stats = &stats->last_read_hw_stats; if (is_pf) { - stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP4RXDISCARD] = + stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP4RXDISCARD] = I40E_GLPES_PFIP4RXDISCARD(fcn_idx); - stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP4RXTRUNC] = + stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP4RXTRUNC] = I40E_GLPES_PFIP4RXTRUNC(fcn_idx); - stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP4TXNOROUTE] = + stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP4TXNOROUTE] = I40E_GLPES_PFIP4TXNOROUTE(fcn_idx); - stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP6RXDISCARD] = + stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP6RXDISCARD] = I40E_GLPES_PFIP6RXDISCARD(fcn_idx); - stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP6RXTRUNC] = + stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP6RXTRUNC] = I40E_GLPES_PFIP6RXTRUNC(fcn_idx); - stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP6TXNOROUTE] = + stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP6TXNOROUTE] = I40E_GLPES_PFIP6TXNOROUTE(fcn_idx); - stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_TCPRTXSEG] = + stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_TCPRTXSEG] = I40E_GLPES_PFTCPRTXSEG(fcn_idx); - stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_TCPRXOPTERR] = + stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_TCPRXOPTERR] = I40E_GLPES_PFTCPRXOPTERR(fcn_idx); - stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_TCPRXPROTOERR] = + stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_TCPRXPROTOERR] = I40E_GLPES_PFTCPRXPROTOERR(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXOCTS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXOCTS] = I40E_GLPES_PFIP4RXOCTSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXPKTS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXPKTS] = I40E_GLPES_PFIP4RXPKTSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXFRAGS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXFRAGS] = I40E_GLPES_PFIP4RXFRAGSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXMCPKTS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXMCPKTS] = I40E_GLPES_PFIP4RXMCPKTSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXOCTS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXOCTS] = I40E_GLPES_PFIP4TXOCTSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXPKTS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXPKTS] = I40E_GLPES_PFIP4TXPKTSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXFRAGS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXFRAGS] = I40E_GLPES_PFIP4TXFRAGSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXMCPKTS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXMCPKTS] = I40E_GLPES_PFIP4TXMCPKTSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXOCTS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXOCTS] = I40E_GLPES_PFIP6RXOCTSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXPKTS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXPKTS] = I40E_GLPES_PFIP6RXPKTSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXFRAGS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXFRAGS] = I40E_GLPES_PFIP6RXFRAGSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXMCPKTS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXMCPKTS] = I40E_GLPES_PFIP6RXMCPKTSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXOCTS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXOCTS] = I40E_GLPES_PFIP6TXOCTSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] = I40E_GLPES_PFIP6TXPKTSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] = I40E_GLPES_PFIP6TXPKTSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXFRAGS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXFRAGS] = I40E_GLPES_PFIP6TXFRAGSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_TCPRXSEGS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_TCPRXSEGS] = I40E_GLPES_PFTCPRXSEGSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_TCPTXSEG] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_TCPTXSEG] = I40E_GLPES_PFTCPTXSEGLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMARXRDS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMARXRDS] = I40E_GLPES_PFRDMARXRDSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMARXSNDS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMARXSNDS] = I40E_GLPES_PFRDMARXSNDSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMARXWRS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMARXWRS] = I40E_GLPES_PFRDMARXWRSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMATXRDS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMATXRDS] = I40E_GLPES_PFRDMATXRDSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMATXSNDS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMATXSNDS] = I40E_GLPES_PFRDMATXSNDSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMATXWRS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMATXWRS] = I40E_GLPES_PFRDMATXWRSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMAVBND] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMAVBND] = I40E_GLPES_PFRDMAVBNDLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMAVINV] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMAVINV] = I40E_GLPES_PFRDMAVINVLO(fcn_idx); } else { - stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP4RXDISCARD] = + stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP4RXDISCARD] = I40E_GLPES_VFIP4RXDISCARD(fcn_idx); - stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP4RXTRUNC] = + stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP4RXTRUNC] = I40E_GLPES_VFIP4RXTRUNC(fcn_idx); - stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP4TXNOROUTE] = + stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP4TXNOROUTE] = I40E_GLPES_VFIP4TXNOROUTE(fcn_idx); - stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP6RXDISCARD] = + stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP6RXDISCARD] = I40E_GLPES_VFIP6RXDISCARD(fcn_idx); - stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP6RXTRUNC] = + stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP6RXTRUNC] = I40E_GLPES_VFIP6RXTRUNC(fcn_idx); - stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP6TXNOROUTE] = + stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP6TXNOROUTE] = I40E_GLPES_VFIP6TXNOROUTE(fcn_idx); - stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_TCPRTXSEG] = + stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_TCPRTXSEG] = I40E_GLPES_VFTCPRTXSEG(fcn_idx); - stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_TCPRXOPTERR] = + stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_TCPRXOPTERR] = I40E_GLPES_VFTCPRXOPTERR(fcn_idx); - stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_TCPRXPROTOERR] = + stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_TCPRXPROTOERR] = I40E_GLPES_VFTCPRXPROTOERR(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXOCTS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXOCTS] = I40E_GLPES_VFIP4RXOCTSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXPKTS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXPKTS] = I40E_GLPES_VFIP4RXPKTSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXFRAGS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXFRAGS] = I40E_GLPES_VFIP4RXFRAGSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXMCPKTS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXMCPKTS] = I40E_GLPES_VFIP4RXMCPKTSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXOCTS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXOCTS] = I40E_GLPES_VFIP4TXOCTSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXPKTS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXPKTS] = I40E_GLPES_VFIP4TXPKTSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXFRAGS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXFRAGS] = I40E_GLPES_VFIP4TXFRAGSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXMCPKTS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXMCPKTS] = I40E_GLPES_VFIP4TXMCPKTSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXOCTS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXOCTS] = I40E_GLPES_VFIP6RXOCTSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXPKTS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXPKTS] = I40E_GLPES_VFIP6RXPKTSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXFRAGS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXFRAGS] = I40E_GLPES_VFIP6RXFRAGSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXMCPKTS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXMCPKTS] = I40E_GLPES_VFIP6RXMCPKTSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXOCTS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXOCTS] = I40E_GLPES_VFIP6TXOCTSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] = I40E_GLPES_VFIP6TXPKTSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] = I40E_GLPES_VFIP6TXPKTSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXFRAGS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXFRAGS] = I40E_GLPES_VFIP6TXFRAGSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_TCPRXSEGS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_TCPRXSEGS] = I40E_GLPES_VFTCPRXSEGSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_TCPTXSEG] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_TCPTXSEG] = I40E_GLPES_VFTCPTXSEGLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMARXRDS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMARXRDS] = I40E_GLPES_VFRDMARXRDSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMARXSNDS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMARXSNDS] = I40E_GLPES_VFRDMARXSNDSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMARXWRS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMARXWRS] = I40E_GLPES_VFRDMARXWRSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMATXRDS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMATXRDS] = I40E_GLPES_VFRDMATXRDSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMATXSNDS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMATXSNDS] = I40E_GLPES_VFRDMATXSNDSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMATXWRS] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMATXWRS] = I40E_GLPES_VFRDMATXWRSLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMAVBND] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMAVBND] = I40E_GLPES_VFRDMAVBNDLO(fcn_idx); - stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMAVINV] = + stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMAVINV] = I40E_GLPES_VFRDMAVINVLO(fcn_idx); } - for (stat_index = 0; stat_index < I40IW_HW_STAT_INDEX_MAX_64; - stat_index++) { - stat_reg_offset = stat_table->stat_offset_64[stat_index]; - last_rd_stats->stat_value_64[stat_index] = - readq(devstat->hw->hw_addr + stat_reg_offset); + for (stats_index = 0; stats_index < I40IW_HW_STAT_INDEX_MAX_64; + stats_index++) { + stats_reg_offset = stats_table->stats_offset_64[stats_index]; + last_rd_stats->stats_value_64[stats_index] = + readq(stats->hw->hw_addr + stats_reg_offset); } - for (stat_index = 0; stat_index < I40IW_HW_STAT_INDEX_MAX_32; - stat_index++) { - stat_reg_offset = stat_table->stat_offset_32[stat_index]; - last_rd_stats->stat_value_32[stat_index] = - i40iw_rd32(devstat->hw, stat_reg_offset); + for (stats_index = 0; stats_index < I40IW_HW_STAT_INDEX_MAX_32; + stats_index++) { + stats_reg_offset = stats_table->stats_offset_32[stats_index]; + last_rd_stats->stats_value_32[stats_index] = + i40iw_rd32(stats->hw, stats_reg_offset); } } /** - * i40iw_hw_stat_read_32 - Read 32-bit HW stat counters and accommodates for roll-overs. - * @devstat: pestat struct - * @index: index in HW stat table which contains offset reg-addr - * @value: hw stat value + * i40iw_hw_stats_read_32 - Read 32-bit HW stats counters and accommodates for roll-overs. + * @stat: pestat struct + * @index: index in HW stats table which contains offset reg-addr + * @value: hw stats value */ -static void i40iw_hw_stat_read_32(struct i40iw_dev_pestat *devstat, - enum i40iw_hw_stat_index_32b index, - u64 *value) +void i40iw_hw_stats_read_32(struct i40iw_vsi_pestat *stats, + enum i40iw_hw_stats_index_32b index, + u64 *value) { - struct i40iw_dev_hw_stat_offsets *stat_table = - &devstat->hw_stat_offsets; - struct i40iw_dev_hw_stats *last_rd_stats = &devstat->last_read_hw_stats; - struct i40iw_dev_hw_stats *hw_stats = &devstat->hw_stats; - u64 new_stat_value = 0; - u32 stat_reg_offset = stat_table->stat_offset_32[index]; - - new_stat_value = i40iw_rd32(devstat->hw, stat_reg_offset); + struct i40iw_dev_hw_stats_offsets *stats_table = + &stats->hw_stats_offsets; + struct i40iw_dev_hw_stats *last_rd_stats = &stats->last_read_hw_stats; + struct i40iw_dev_hw_stats *hw_stats = &stats->hw_stats; + u64 new_stats_value = 0; + u32 stats_reg_offset = stats_table->stats_offset_32[index]; + + new_stats_value = i40iw_rd32(stats->hw, stats_reg_offset); /*roll-over case */ - if (new_stat_value < last_rd_stats->stat_value_32[index]) - hw_stats->stat_value_32[index] += new_stat_value; + if (new_stats_value < last_rd_stats->stats_value_32[index]) + hw_stats->stats_value_32[index] += new_stats_value; else - hw_stats->stat_value_32[index] += - new_stat_value - last_rd_stats->stat_value_32[index]; - last_rd_stats->stat_value_32[index] = new_stat_value; - *value = hw_stats->stat_value_32[index]; + hw_stats->stats_value_32[index] += + new_stats_value - last_rd_stats->stats_value_32[index]; + last_rd_stats->stats_value_32[index] = new_stats_value; + *value = hw_stats->stats_value_32[index]; } /** - * i40iw_hw_stat_read_64 - Read HW stat counters (greater than 32-bit) and accommodates for roll-overs. - * @devstat: pestat struct - * @index: index in HW stat table which contains offset reg-addr - * @value: hw stat value + * i40iw_hw_stats_read_64 - Read HW stats counters (greater than 32-bit) and accommodates for roll-overs. + * @stats: pestat struct + * @index: index in HW stats table which contains offset reg-addr + * @value: hw stats value */ -static void i40iw_hw_stat_read_64(struct i40iw_dev_pestat *devstat, - enum i40iw_hw_stat_index_64b index, - u64 *value) +void i40iw_hw_stats_read_64(struct i40iw_vsi_pestat *stats, + enum i40iw_hw_stats_index_64b index, + u64 *value) { - struct i40iw_dev_hw_stat_offsets *stat_table = - &devstat->hw_stat_offsets; - struct i40iw_dev_hw_stats *last_rd_stats = &devstat->last_read_hw_stats; - struct i40iw_dev_hw_stats *hw_stats = &devstat->hw_stats; - u64 new_stat_value = 0; - u32 stat_reg_offset = stat_table->stat_offset_64[index]; - - new_stat_value = readq(devstat->hw->hw_addr + stat_reg_offset); + struct i40iw_dev_hw_stats_offsets *stats_table = + &stats->hw_stats_offsets; + struct i40iw_dev_hw_stats *last_rd_stats = &stats->last_read_hw_stats; + struct i40iw_dev_hw_stats *hw_stats = &stats->hw_stats; + u64 new_stats_value = 0; + u32 stats_reg_offset = stats_table->stats_offset_64[index]; + + new_stats_value = readq(stats->hw->hw_addr + stats_reg_offset); /*roll-over case */ - if (new_stat_value < last_rd_stats->stat_value_64[index]) - hw_stats->stat_value_64[index] += new_stat_value; + if (new_stats_value < last_rd_stats->stats_value_64[index]) + hw_stats->stats_value_64[index] += new_stats_value; else - hw_stats->stat_value_64[index] += - new_stat_value - last_rd_stats->stat_value_64[index]; - last_rd_stats->stat_value_64[index] = new_stat_value; - *value = hw_stats->stat_value_64[index]; + hw_stats->stats_value_64[index] += + new_stats_value - last_rd_stats->stats_value_64[index]; + last_rd_stats->stats_value_64[index] = new_stats_value; + *value = hw_stats->stats_value_64[index]; } /** - * i40iw_hw_stat_read_all - read all HW stat counters - * @devstat: pestat struct - * @stat_values: hw stats structure + * i40iw_hw_stats_read_all - read all HW stat counters + * @stats: pestat struct + * @stats_values: hw stats structure * * Read all the HW stat counters and populates hw_stats structure - * of passed-in dev's pestat as well as copy created in stat_values. + * of passed-in vsi's pestat as well as copy created in stat_values. */ -static void i40iw_hw_stat_read_all(struct i40iw_dev_pestat *devstat, - struct i40iw_dev_hw_stats *stat_values) +void i40iw_hw_stats_read_all(struct i40iw_vsi_pestat *stats, + struct i40iw_dev_hw_stats *stats_values) { - u32 stat_index; - - for (stat_index = 0; stat_index < I40IW_HW_STAT_INDEX_MAX_32; - stat_index++) - i40iw_hw_stat_read_32(devstat, stat_index, - &stat_values->stat_value_32[stat_index]); - for (stat_index = 0; stat_index < I40IW_HW_STAT_INDEX_MAX_64; - stat_index++) - i40iw_hw_stat_read_64(devstat, stat_index, - &stat_values->stat_value_64[stat_index]); + u32 stats_index; + unsigned long flags; + + spin_lock_irqsave(&stats->lock, flags); + + for (stats_index = 0; stats_index < I40IW_HW_STAT_INDEX_MAX_32; + stats_index++) + i40iw_hw_stats_read_32(stats, stats_index, + &stats_values->stats_value_32[stats_index]); + for (stats_index = 0; stats_index < I40IW_HW_STAT_INDEX_MAX_64; + stats_index++) + i40iw_hw_stats_read_64(stats, stats_index, + &stats_values->stats_value_64[stats_index]); + spin_unlock_irqrestore(&stats->lock, flags); } /** - * i40iw_hw_stat_refresh_all - Update all HW stat structs - * @devstat: pestat struct - * @stat_values: hw stats structure + * i40iw_hw_stats_refresh_all - Update all HW stats structs + * @stats: pestat struct * - * Read all the HW stat counters to refresh values in hw_stats structure + * Read all the HW stats counters to refresh values in hw_stats structure * of passed-in dev's pestat */ -static void i40iw_hw_stat_refresh_all(struct i40iw_dev_pestat *devstat) +void i40iw_hw_stats_refresh_all(struct i40iw_vsi_pestat *stats) +{ + u64 stats_value; + u32 stats_index; + unsigned long flags; + + spin_lock_irqsave(&stats->lock, flags); + + for (stats_index = 0; stats_index < I40IW_HW_STAT_INDEX_MAX_32; + stats_index++) + i40iw_hw_stats_read_32(stats, stats_index, &stats_value); + for (stats_index = 0; stats_index < I40IW_HW_STAT_INDEX_MAX_64; + stats_index++) + i40iw_hw_stats_read_64(stats, stats_index, &stats_value); + spin_unlock_irqrestore(&stats->lock, flags); +} + +/** + * i40iw_get_fcn_id - Return the function id + * @dev: pointer to the device + */ +static u8 i40iw_get_fcn_id(struct i40iw_sc_dev *dev) +{ + u8 fcn_id = I40IW_INVALID_FCN_ID; + u8 i; + + for (i = I40IW_FIRST_NON_PF_STAT; i < I40IW_MAX_STATS_COUNT; i++) + if (!dev->fcn_id_array[i]) { + fcn_id = i; + dev->fcn_id_array[i] = true; + break; + } + return fcn_id; +} + +/** + * i40iw_vsi_stats_init - Initialize the vsi statistics + * @vsi: pointer to the vsi structure + * @info: The info structure used for initialization + */ +enum i40iw_status_code i40iw_vsi_stats_init(struct i40iw_sc_vsi *vsi, struct i40iw_vsi_stats_info *info) { - u64 stat_value; - u32 stat_index; - - for (stat_index = 0; stat_index < I40IW_HW_STAT_INDEX_MAX_32; - stat_index++) - i40iw_hw_stat_read_32(devstat, stat_index, &stat_value); - for (stat_index = 0; stat_index < I40IW_HW_STAT_INDEX_MAX_64; - stat_index++) - i40iw_hw_stat_read_64(devstat, stat_index, &stat_value); + u8 fcn_id = info->fcn_id; + + if (info->alloc_fcn_id) + fcn_id = i40iw_get_fcn_id(vsi->dev); + + if (fcn_id == I40IW_INVALID_FCN_ID) + return I40IW_ERR_NOT_READY; + + vsi->pestat = info->pestat; + vsi->pestat->hw = vsi->dev->hw; + + if (info->stats_initialize) { + i40iw_hw_stats_init(vsi->pestat, fcn_id, true); + spin_lock_init(&vsi->pestat->lock); + i40iw_hw_stats_start_timer(vsi); + } + vsi->stats_fcn_id_alloc = info->alloc_fcn_id; + vsi->fcn_id = fcn_id; + return I40IW_SUCCESS; +} + +/** + * i40iw_vsi_stats_free - Free the vsi stats + * @vsi: pointer to the vsi structure + */ +void i40iw_vsi_stats_free(struct i40iw_sc_vsi *vsi) +{ + u8 fcn_id = vsi->fcn_id; + + if ((vsi->stats_fcn_id_alloc) && (fcn_id != I40IW_INVALID_FCN_ID)) + vsi->dev->fcn_id_array[fcn_id] = false; + i40iw_hw_stats_stop_timer(vsi); } static struct i40iw_cqp_ops iw_cqp_ops = { @@ -4711,24 +4943,6 @@ static struct i40iw_hmc_ops iw_hmc_ops = { NULL }; -static const struct i40iw_device_pestat_ops iw_device_pestat_ops = { - i40iw_hw_stat_init, - i40iw_hw_stat_read_32, - i40iw_hw_stat_read_64, - i40iw_hw_stat_read_all, - i40iw_hw_stat_refresh_all -}; - -/** - * i40iw_device_init_pestat - Initialize the pestat structure - * @dev: pestat struct - */ -enum i40iw_status_code i40iw_device_init_pestat(struct i40iw_dev_pestat *devstat) -{ - devstat->ops = iw_device_pestat_ops; - return 0; -} - /** * i40iw_device_init - Initialize IWARP device * @dev: IWARP device pointer @@ -4750,14 +4964,7 @@ enum i40iw_status_code i40iw_device_init(struct i40iw_sc_dev *dev, dev->debug_mask = info->debug_mask; - ret_code = i40iw_device_init_pestat(&dev->dev_pestat); - if (ret_code) { - i40iw_debug(dev, I40IW_DEBUG_DEV, - "%s: i40iw_device_init_pestat failed\n", __func__); - return ret_code; - } dev->hmc_fn_id = info->hmc_fn_id; - dev->qs_handle = info->qs_handle; dev->exception_lan_queue = info->exception_lan_queue; dev->is_pf = info->is_pf; @@ -4770,15 +4977,10 @@ enum i40iw_status_code i40iw_device_init(struct i40iw_sc_dev *dev, dev->hw = info->hw; dev->hw->hw_addr = info->bar0; - val = i40iw_rd32(dev->hw, I40E_GLPCI_DREVID); - dev->hw_rev = (u8)RS_32(val, I40E_GLPCI_DREVID_DEFAULT_REVID); - if (dev->is_pf) { - dev->dev_pestat.ops.iw_hw_stat_init(&dev->dev_pestat, - dev->hmc_fn_id, dev->hw, true); - spin_lock_init(&dev->dev_pestat.stats_lock); - /*start the periodic stats_timer */ - i40iw_hw_stats_start_timer(dev); + val = i40iw_rd32(dev->hw, I40E_GLPCI_DREVID); + dev->hw_rev = (u8)RS_32(val, I40E_GLPCI_DREVID_DEFAULT_REVID); + val = i40iw_rd32(dev->hw, I40E_GLPCI_LBARCTRL); db_size = (u8)RS_32(val, I40E_GLPCI_LBARCTRL_PE_DB_SIZE); if ((db_size != I40IW_PE_DB_SIZE_4M) && diff --git a/drivers/infiniband/hw/i40iw/i40iw_d.h b/drivers/infiniband/hw/i40iw/i40iw_d.h index 2fac1db0e0a0..a39ac12b6a7e 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_d.h +++ b/drivers/infiniband/hw/i40iw/i40iw_d.h @@ -35,6 +35,8 @@ #ifndef I40IW_D_H #define I40IW_D_H +#define I40IW_FIRST_USER_QP_ID 2 + #define I40IW_DB_ADDR_OFFSET (4 * 1024 * 1024 - 64 * 1024) #define I40IW_VF_DB_ADDR_OFFSET (64 * 1024) @@ -67,6 +69,9 @@ #define I40IW_STAG_TYPE_NONSHARED 1 #define I40IW_MAX_USER_PRIORITY 8 +#define I40IW_MAX_STATS_COUNT 16 +#define I40IW_FIRST_NON_PF_STAT 4 + #define LS_64_1(val, bits) ((u64)(uintptr_t)val << bits) #define RS_64_1(val, bits) ((u64)(uintptr_t)val >> bits) @@ -74,6 +79,8 @@ #define RS_32_1(val, bits) (u32)(val >> bits) #define I40E_HI_DWORD(x) ((u32)((((x) >> 16) >> 16) & 0xFFFFFFFF)) +#define QS_HANDLE_UNKNOWN 0xffff + #define LS_64(val, field) (((u64)val << field ## _SHIFT) & (field ## _MASK)) #define RS_64(val, field) ((u64)(val & field ## _MASK) >> field ## _SHIFT) @@ -1199,8 +1206,11 @@ #define I40IWQPC_RXCQNUM_SHIFT 32 #define I40IWQPC_RXCQNUM_MASK (0x1ffffULL << I40IWQPC_RXCQNUM_SHIFT) -#define I40IWQPC_Q2ADDR_SHIFT I40IW_CQPHC_QPCTX_SHIFT -#define I40IWQPC_Q2ADDR_MASK I40IW_CQPHC_QPCTX_MASK +#define I40IWQPC_STAT_INDEX_SHIFT 0 +#define I40IWQPC_STAT_INDEX_MASK (0x1fULL << I40IWQPC_STAT_INDEX_SHIFT) + +#define I40IWQPC_Q2ADDR_SHIFT 0 +#define I40IWQPC_Q2ADDR_MASK (0xffffffffffffff00ULL << I40IWQPC_Q2ADDR_SHIFT) #define I40IWQPC_LASTBYTESENT_SHIFT 0 #define I40IWQPC_LASTBYTESENT_MASK (0xffUL << I40IWQPC_LASTBYTESENT_SHIFT) @@ -1232,11 +1242,8 @@ #define I40IWQPC_PRIVEN_SHIFT 25 #define I40IWQPC_PRIVEN_MASK (1UL << I40IWQPC_PRIVEN_SHIFT) -#define I40IWQPC_LSMMPRESENT_SHIFT 26 -#define I40IWQPC_LSMMPRESENT_MASK (1UL << I40IWQPC_LSMMPRESENT_SHIFT) - -#define I40IWQPC_ADJUSTFORLSMM_SHIFT 27 -#define I40IWQPC_ADJUSTFORLSMM_MASK (1UL << I40IWQPC_ADJUSTFORLSMM_SHIFT) +#define I40IWQPC_USESTATSINSTANCE_SHIFT 26 +#define I40IWQPC_USESTATSINSTANCE_MASK (1UL << I40IWQPC_USESTATSINSTANCE_SHIFT) #define I40IWQPC_IWARPMODE_SHIFT 28 #define I40IWQPC_IWARPMODE_MASK (1UL << I40IWQPC_IWARPMODE_SHIFT) @@ -1713,6 +1720,8 @@ enum i40iw_alignment { #define OP_MANAGE_VF_PBLE_BP 28 #define OP_QUERY_FPM_VALUES 29 #define OP_COMMIT_FPM_VALUES 30 -#define OP_SIZE_CQP_STAT_ARRAY 31 +#define OP_REQUESTED_COMMANDS 31 +#define OP_COMPLETED_COMMANDS 32 +#define OP_SIZE_CQP_STAT_ARRAY 33 #endif diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c index 0c92a40b3e86..476867a3f584 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_hw.c +++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c @@ -62,7 +62,7 @@ u32 i40iw_initialize_hw_resources(struct i40iw_device *iwdev) max_mr = iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_MR].cnt; arp_table_size = iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_ARP].cnt; iwdev->max_cqe = 0xFFFFF; - num_pds = max_qp * 4; + num_pds = I40IW_MAX_PDS; resources_size = sizeof(struct i40iw_arp_entry) * arp_table_size; resources_size += sizeof(unsigned long) * BITS_TO_LONGS(max_qp); resources_size += sizeof(unsigned long) * BITS_TO_LONGS(max_mr); @@ -308,7 +308,9 @@ void i40iw_process_aeq(struct i40iw_device *iwdev) iwqp = iwdev->qp_table[info->qp_cq_id]; if (!iwqp) { spin_unlock_irqrestore(&iwdev->qptable_lock, flags); - i40iw_pr_err("qp_id %d is already freed\n", info->qp_cq_id); + i40iw_debug(dev, I40IW_DEBUG_AEQ, + "%s qp_id %d is already freed\n", + __func__, info->qp_cq_id); continue; } i40iw_add_ref(&iwqp->ibqp); @@ -359,6 +361,9 @@ void i40iw_process_aeq(struct i40iw_device *iwdev) continue; i40iw_cm_disconn(iwqp); break; + case I40IW_AE_QP_SUSPEND_COMPLETE: + i40iw_qp_suspend_resume(dev, &iwqp->sc_qp, false); + break; case I40IW_AE_TERMINATE_SENT: i40iw_terminate_send_fin(qp); break; @@ -404,19 +409,18 @@ void i40iw_process_aeq(struct i40iw_device *iwdev) case I40IW_AE_LCE_CQ_CATASTROPHIC: case I40IW_AE_UDA_XMIT_DGRAM_TOO_LONG: case I40IW_AE_UDA_XMIT_IPADDR_MISMATCH: - case I40IW_AE_QP_SUSPEND_COMPLETE: ctx_info->err_rq_idx_valid = false; default: - if (!info->sq && ctx_info->err_rq_idx_valid) { - ctx_info->err_rq_idx = info->wqe_idx; - ctx_info->tcp_info_valid = false; - ctx_info->iwarp_info_valid = false; - ret = dev->iw_priv_qp_ops->qp_setctx(&iwqp->sc_qp, - iwqp->host_ctx.va, - ctx_info); - } - i40iw_terminate_connection(qp, info); - break; + if (!info->sq && ctx_info->err_rq_idx_valid) { + ctx_info->err_rq_idx = info->wqe_idx; + ctx_info->tcp_info_valid = false; + ctx_info->iwarp_info_valid = false; + ret = dev->iw_priv_qp_ops->qp_setctx(&iwqp->sc_qp, + iwqp->host_ctx.va, + ctx_info); + } + i40iw_terminate_connection(qp, info); + break; } if (info->qp) i40iw_rem_ref(&iwqp->ibqp); @@ -538,6 +542,7 @@ enum i40iw_status_code i40iw_manage_qhash(struct i40iw_device *iwdev, { struct i40iw_qhash_table_info *info; struct i40iw_sc_dev *dev = &iwdev->sc_dev; + struct i40iw_sc_vsi *vsi = &iwdev->vsi; enum i40iw_status_code status; struct i40iw_cqp *iwcqp = &iwdev->cqp; struct i40iw_cqp_request *cqp_request; @@ -550,6 +555,7 @@ enum i40iw_status_code i40iw_manage_qhash(struct i40iw_device *iwdev, info = &cqp_info->in.u.manage_qhash_table_entry.info; memset(info, 0, sizeof(*info)); + info->vsi = &iwdev->vsi; info->manage = mtype; info->entry_type = etype; if (cminfo->vlan_id != 0xFFFF) { @@ -560,8 +566,9 @@ enum i40iw_status_code i40iw_manage_qhash(struct i40iw_device *iwdev, } info->ipv4_valid = cminfo->ipv4; + info->user_pri = cminfo->user_pri; ether_addr_copy(info->mac_addr, iwdev->netdev->dev_addr); - info->qp_num = cpu_to_le32(dev->ilq->qp_id); + info->qp_num = cpu_to_le32(vsi->ilq->qp_id); info->dest_port = cpu_to_le16(cminfo->loc_port); info->dest_ip[0] = cpu_to_le32(cminfo->loc_addr[0]); info->dest_ip[1] = cpu_to_le32(cminfo->loc_addr[1]); @@ -617,6 +624,7 @@ enum i40iw_status_code i40iw_hw_flush_wqes(struct i40iw_device *iwdev, struct i40iw_qp_flush_info *hw_info; struct i40iw_cqp_request *cqp_request; struct cqp_commands_info *cqp_info; + struct i40iw_qp *iwqp = (struct i40iw_qp *)qp->back_qp; cqp_request = i40iw_get_cqp_request(&iwdev->cqp, wait); if (!cqp_request) @@ -631,9 +639,30 @@ enum i40iw_status_code i40iw_hw_flush_wqes(struct i40iw_device *iwdev, cqp_info->in.u.qp_flush_wqes.qp = qp; cqp_info->in.u.qp_flush_wqes.scratch = (uintptr_t)cqp_request; status = i40iw_handle_cqp_op(iwdev, cqp_request); - if (status) + if (status) { i40iw_pr_err("CQP-OP Flush WQE's fail"); - return status; + complete(&iwqp->sq_drained); + complete(&iwqp->rq_drained); + return status; + } + if (!cqp_request->compl_info.maj_err_code) { + switch (cqp_request->compl_info.min_err_code) { + case I40IW_CQP_COMPL_RQ_WQE_FLUSHED: + complete(&iwqp->sq_drained); + break; + case I40IW_CQP_COMPL_SQ_WQE_FLUSHED: + complete(&iwqp->rq_drained); + break; + case I40IW_CQP_COMPL_RQ_SQ_WQE_FLUSHED: + break; + default: + complete(&iwqp->sq_drained); + complete(&iwqp->rq_drained); + break; + } + } + + return 0; } /** diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index ac2f3cd9478c..2728af3103ce 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -237,14 +237,11 @@ static irqreturn_t i40iw_irq_handler(int irq, void *data) */ static void i40iw_destroy_cqp(struct i40iw_device *iwdev, bool free_hwcqp) { - enum i40iw_status_code status = 0; struct i40iw_sc_dev *dev = &iwdev->sc_dev; struct i40iw_cqp *cqp = &iwdev->cqp; - if (free_hwcqp && dev->cqp_ops->cqp_destroy) - status = dev->cqp_ops->cqp_destroy(dev->cqp); - if (status) - i40iw_pr_err("destroy cqp failed"); + if (free_hwcqp) + dev->cqp_ops->cqp_destroy(dev->cqp); i40iw_free_dma_mem(dev->hw, &cqp->sq); kfree(cqp->scratch_array); @@ -270,6 +267,7 @@ static void i40iw_disable_irq(struct i40iw_sc_dev *dev, i40iw_wr32(dev->hw, I40E_PFINT_DYN_CTLN(msix_vec->idx - 1), 0); else i40iw_wr32(dev->hw, I40E_VFINT_DYN_CTLN1(msix_vec->idx - 1), 0); + irq_set_affinity_hint(msix_vec->irq, NULL); free_irq(msix_vec->irq, dev_id); } @@ -603,7 +601,7 @@ static enum i40iw_status_code i40iw_create_cqp(struct i40iw_device *iwdev) i40iw_pr_err("cqp init status %d\n", status); goto exit; } - status = dev->cqp_ops->cqp_create(dev->cqp, true, &maj_err, &min_err); + status = dev->cqp_ops->cqp_create(dev->cqp, &maj_err, &min_err); if (status) { i40iw_pr_err("cqp create status %d maj_err %d min_err %d\n", status, maj_err, min_err); @@ -688,6 +686,7 @@ static enum i40iw_status_code i40iw_configure_ceq_vector(struct i40iw_device *iw struct i40iw_msix_vector *msix_vec) { enum i40iw_status_code status; + cpumask_t mask; if (iwdev->msix_shared && !ceq_id) { tasklet_init(&iwdev->dpc_tasklet, i40iw_dpc, (unsigned long)iwdev); @@ -697,12 +696,15 @@ static enum i40iw_status_code i40iw_configure_ceq_vector(struct i40iw_device *iw status = request_irq(msix_vec->irq, i40iw_ceq_handler, 0, "CEQ", iwceq); } + cpumask_clear(&mask); + cpumask_set_cpu(msix_vec->cpu_affinity, &mask); + irq_set_affinity_hint(msix_vec->irq, &mask); + if (status) { i40iw_pr_err("ceq irq config fail\n"); return I40IW_ERR_CONFIG; } msix_vec->ceq_id = ceq_id; - msix_vec->cpu_affinity = 0; return 0; } @@ -930,6 +932,7 @@ static enum i40iw_status_code i40iw_initialize_ilq(struct i40iw_device *iwdev) struct i40iw_puda_rsrc_info info; enum i40iw_status_code status; + memset(&info, 0, sizeof(info)); info.type = I40IW_PUDA_RSRC_TYPE_ILQ; info.cq_id = 1; info.qp_id = 0; @@ -939,10 +942,9 @@ static enum i40iw_status_code i40iw_initialize_ilq(struct i40iw_device *iwdev) info.rq_size = 8192; info.buf_size = 1024; info.tx_buf_cnt = 16384; - info.mss = iwdev->mss; info.receive = i40iw_receive_ilq; info.xmit_complete = i40iw_free_sqbuf; - status = i40iw_puda_create_rsrc(&iwdev->sc_dev, &info); + status = i40iw_puda_create_rsrc(&iwdev->vsi, &info); if (status) i40iw_pr_err("ilq create fail\n"); return status; @@ -959,6 +961,7 @@ static enum i40iw_status_code i40iw_initialize_ieq(struct i40iw_device *iwdev) struct i40iw_puda_rsrc_info info; enum i40iw_status_code status; + memset(&info, 0, sizeof(info)); info.type = I40IW_PUDA_RSRC_TYPE_IEQ; info.cq_id = 2; info.qp_id = iwdev->sc_dev.exception_lan_queue; @@ -967,9 +970,8 @@ static enum i40iw_status_code i40iw_initialize_ieq(struct i40iw_device *iwdev) info.sq_size = 8192; info.rq_size = 8192; info.buf_size = 2048; - info.mss = iwdev->mss; info.tx_buf_cnt = 16384; - status = i40iw_puda_create_rsrc(&iwdev->sc_dev, &info); + status = i40iw_puda_create_rsrc(&iwdev->vsi, &info); if (status) i40iw_pr_err("ieq create fail\n"); return status; @@ -1159,7 +1161,7 @@ static void i40iw_add_ipv6_addr(struct i40iw_device *iwdev) { struct net_device *ip_dev; struct inet6_dev *idev; - struct inet6_ifaddr *ifp; + struct inet6_ifaddr *ifp, *tmp; u32 local_ipaddr6[4]; rcu_read_lock(); @@ -1172,7 +1174,7 @@ static void i40iw_add_ipv6_addr(struct i40iw_device *iwdev) i40iw_pr_err("ipv6 inet device not found\n"); break; } - list_for_each_entry(ifp, &idev->addr_list, if_list) { + list_for_each_entry_safe(ifp, tmp, &idev->addr_list, if_list) { i40iw_pr_info("IP=%pI6, vlan_id=%d, MAC=%pM\n", &ifp->addr, rdma_vlan_dev_vlan_id(ip_dev), ip_dev->dev_addr); i40iw_copy_ip_ntohl(local_ipaddr6, @@ -1294,17 +1296,23 @@ static enum i40iw_status_code i40iw_initialize_dev(struct i40iw_device *iwdev, enum i40iw_status_code status; struct i40iw_sc_dev *dev = &iwdev->sc_dev; struct i40iw_device_init_info info; + struct i40iw_vsi_init_info vsi_info; struct i40iw_dma_mem mem; + struct i40iw_l2params l2params; u32 size; + struct i40iw_vsi_stats_info stats_info; + u16 last_qset = I40IW_NO_QSET; + u16 qset; + u32 i; + memset(&l2params, 0, sizeof(l2params)); memset(&info, 0, sizeof(info)); size = sizeof(struct i40iw_hmc_pble_rsrc) + sizeof(struct i40iw_hmc_info) + (sizeof(struct i40iw_hmc_obj_info) * I40IW_HMC_IW_MAX); iwdev->hmc_info_mem = kzalloc(size, GFP_KERNEL); - if (!iwdev->hmc_info_mem) { - i40iw_pr_err("memory alloc fail\n"); + if (!iwdev->hmc_info_mem) return I40IW_ERR_NO_MEMORY; - } + iwdev->pble_rsrc = (struct i40iw_hmc_pble_rsrc *)iwdev->hmc_info_mem; dev->hmc_info = &iwdev->hw.hmc; dev->hmc_info->hmc_obj = (struct i40iw_hmc_obj_info *)(iwdev->pble_rsrc + 1); @@ -1325,7 +1333,17 @@ static enum i40iw_status_code i40iw_initialize_dev(struct i40iw_device *iwdev, info.bar0 = ldev->hw_addr; info.hw = &iwdev->hw; info.debug_mask = debug; - info.qs_handle = ldev->params.qos.prio_qos[0].qs_handle; + l2params.mss = + (ldev->params.mtu) ? ldev->params.mtu - I40IW_MTU_TO_MSS : I40IW_DEFAULT_MSS; + for (i = 0; i < I40E_CLIENT_MAX_USER_PRIORITY; i++) { + qset = ldev->params.qos.prio_qos[i].qs_handle; + l2params.qs_handle_list[i] = qset; + if (last_qset == I40IW_NO_QSET) + last_qset = qset; + else if ((qset != last_qset) && (qset != I40IW_NO_QSET)) + iwdev->dcb = true; + } + i40iw_pr_info("DCB is set/clear = %d\n", iwdev->dcb); info.exception_lan_queue = 1; info.vchnl_send = i40iw_virtchnl_send; status = i40iw_device_init(&iwdev->sc_dev, &info); @@ -1334,6 +1352,20 @@ exit: kfree(iwdev->hmc_info_mem); iwdev->hmc_info_mem = NULL; } + memset(&vsi_info, 0, sizeof(vsi_info)); + vsi_info.dev = &iwdev->sc_dev; + vsi_info.back_vsi = (void *)iwdev; + vsi_info.params = &l2params; + i40iw_sc_vsi_init(&iwdev->vsi, &vsi_info); + + if (dev->is_pf) { + memset(&stats_info, 0, sizeof(stats_info)); + stats_info.fcn_id = ldev->fid; + stats_info.pestat = kzalloc(sizeof(*stats_info.pestat), GFP_KERNEL); + stats_info.stats_initialize = true; + if (stats_info.pestat) + i40iw_vsi_stats_init(&iwdev->vsi, &stats_info); + } return status; } @@ -1384,6 +1416,7 @@ static enum i40iw_status_code i40iw_save_msix_info(struct i40iw_device *iwdev, for (i = 0, ceq_idx = 0; i < iwdev->msix_count; i++, iw_qvinfo++) { iwdev->iw_msixtbl[i].idx = ldev->msix_entries[i].entry; iwdev->iw_msixtbl[i].irq = ldev->msix_entries[i].vector; + iwdev->iw_msixtbl[i].cpu_affinity = ceq_idx; if (i == 0) { iw_qvinfo->aeq_idx = 0; if (iwdev->msix_shared) @@ -1404,18 +1437,19 @@ static enum i40iw_status_code i40iw_save_msix_info(struct i40iw_device *iwdev, * i40iw_deinit_device - clean up the device resources * @iwdev: iwarp device * @reset: true if called before reset - * @del_hdl: true if delete hdl entry * * Destroy the ib device interface, remove the mac ip entry and ipv4/ipv6 addresses, * destroy the device queues and free the pble and the hmc objects */ -static void i40iw_deinit_device(struct i40iw_device *iwdev, bool reset, bool del_hdl) +static void i40iw_deinit_device(struct i40iw_device *iwdev, bool reset) { struct i40e_info *ldev = iwdev->ldev; struct i40iw_sc_dev *dev = &iwdev->sc_dev; i40iw_pr_info("state = %d\n", iwdev->init_state); + if (iwdev->param_wq) + destroy_workqueue(iwdev->param_wq); switch (iwdev->init_state) { case RDMA_DEV_REGISTERED: @@ -1441,10 +1475,10 @@ static void i40iw_deinit_device(struct i40iw_device *iwdev, bool reset, bool del i40iw_destroy_aeq(iwdev, reset); /* fallthrough */ case IEQ_CREATED: - i40iw_puda_dele_resources(dev, I40IW_PUDA_RSRC_TYPE_IEQ, reset); + i40iw_puda_dele_resources(&iwdev->vsi, I40IW_PUDA_RSRC_TYPE_IEQ, reset); /* fallthrough */ case ILQ_CREATED: - i40iw_puda_dele_resources(dev, I40IW_PUDA_RSRC_TYPE_ILQ, reset); + i40iw_puda_dele_resources(&iwdev->vsi, I40IW_PUDA_RSRC_TYPE_ILQ, reset); /* fallthrough */ case CCQ_CREATED: i40iw_destroy_ccq(iwdev, reset); @@ -1456,13 +1490,14 @@ static void i40iw_deinit_device(struct i40iw_device *iwdev, bool reset, bool del i40iw_del_hmc_objects(dev, dev->hmc_info, true, reset); /* fallthrough */ case CQP_CREATED: - i40iw_destroy_cqp(iwdev, !reset); + i40iw_destroy_cqp(iwdev, true); /* fallthrough */ case INITIAL_STATE: i40iw_cleanup_cm_core(&iwdev->cm_core); - if (dev->is_pf) - i40iw_hw_stats_del_timer(dev); - + if (iwdev->vsi.pestat) { + i40iw_vsi_stats_free(&iwdev->vsi); + kfree(iwdev->vsi.pestat); + } i40iw_del_init_mem(iwdev); break; case INVALID_STATE: @@ -1472,8 +1507,7 @@ static void i40iw_deinit_device(struct i40iw_device *iwdev, bool reset, bool del break; } - if (del_hdl) - i40iw_del_handler(i40iw_find_i40e_handler(ldev)); + i40iw_del_handler(i40iw_find_i40e_handler(ldev)); kfree(iwdev->hdl); } @@ -1508,7 +1542,6 @@ static enum i40iw_status_code i40iw_setup_init_state(struct i40iw_handler *hdl, iwdev->max_enabled_vfs = iwdev->max_rdma_vfs; iwdev->netdev = ldev->netdev; hdl->client = client; - iwdev->mss = (!ldev->params.mtu) ? I40IW_DEFAULT_MSS : ldev->params.mtu - I40IW_MTU_TO_MSS; if (!ldev->ftype) iwdev->db_start = pci_resource_start(ldev->pcidev, 0) + I40IW_DB_ADDR_OFFSET; else @@ -1528,6 +1561,7 @@ static enum i40iw_status_code i40iw_setup_init_state(struct i40iw_handler *hdl, init_waitqueue_head(&iwdev->vchnl_waitq); init_waitqueue_head(&dev->vf_reqs); + init_waitqueue_head(&iwdev->close_wq); status = i40iw_initialize_dev(iwdev, ldev); exit: @@ -1540,6 +1574,20 @@ exit: } /** + * i40iw_get_used_rsrc - determine resources used internally + * @iwdev: iwarp device + * + * Called after internal allocations + */ +static void i40iw_get_used_rsrc(struct i40iw_device *iwdev) +{ + iwdev->used_pds = find_next_zero_bit(iwdev->allocated_pds, iwdev->max_pd, 0); + iwdev->used_qps = find_next_zero_bit(iwdev->allocated_qps, iwdev->max_qp, 0); + iwdev->used_cqs = find_next_zero_bit(iwdev->allocated_cqs, iwdev->max_cq, 0); + iwdev->used_mrs = find_next_zero_bit(iwdev->allocated_mrs, iwdev->max_mr, 0); +} + +/** * i40iw_open - client interface operation open for iwarp/uda device * @ldev: lan device information * @client: iwarp client information, provided during registration @@ -1611,6 +1659,7 @@ static int i40iw_open(struct i40e_info *ldev, struct i40e_client *client) status = i40iw_initialize_hw_resources(iwdev); if (status) break; + i40iw_get_used_rsrc(iwdev); dev->ccq_ops->ccq_arm(dev->ccq); status = i40iw_hmc_init_pble(&iwdev->sc_dev, iwdev->pble_rsrc); if (status) @@ -1630,35 +1679,73 @@ static int i40iw_open(struct i40e_info *ldev, struct i40e_client *client) iwdev->init_state = RDMA_DEV_REGISTERED; iwdev->iw_status = 1; i40iw_port_ibevent(iwdev); + iwdev->param_wq = alloc_ordered_workqueue("l2params", WQ_MEM_RECLAIM); + if(iwdev->param_wq == NULL) + break; i40iw_pr_info("i40iw_open completed\n"); return 0; } while (0); i40iw_pr_err("status = %d last completion = %d\n", status, iwdev->init_state); - i40iw_deinit_device(iwdev, false, false); + i40iw_deinit_device(iwdev, false); return -ERESTART; } /** - * i40iw_l2param_change : handle qs handles for qos and mss change + * i40iw_l2params_worker - worker for l2 params change + * @work: work pointer for l2 params + */ +static void i40iw_l2params_worker(struct work_struct *work) +{ + struct l2params_work *dwork = + container_of(work, struct l2params_work, work); + struct i40iw_device *iwdev = dwork->iwdev; + + i40iw_change_l2params(&iwdev->vsi, &dwork->l2params); + atomic_dec(&iwdev->params_busy); + kfree(work); +} + +/** + * i40iw_l2param_change - handle qs handles for qos and mss change * @ldev: lan device information * @client: client for paramater change * @params: new parameters from L2 */ -static void i40iw_l2param_change(struct i40e_info *ldev, - struct i40e_client *client, +static void i40iw_l2param_change(struct i40e_info *ldev, struct i40e_client *client, struct i40e_params *params) { struct i40iw_handler *hdl; + struct i40iw_l2params *l2params; + struct l2params_work *work; struct i40iw_device *iwdev; + int i; hdl = i40iw_find_i40e_handler(ldev); if (!hdl) return; iwdev = &hdl->device; - if (params->mtu) - iwdev->mss = params->mtu - I40IW_MTU_TO_MSS; + + if (atomic_read(&iwdev->params_busy)) + return; + + + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) + return; + + atomic_inc(&iwdev->params_busy); + + work->iwdev = iwdev; + l2params = &work->l2params; + for (i = 0; i < I40E_CLIENT_MAX_USER_PRIORITY; i++) + l2params->qs_handle_list[i] = params->qos.prio_qos[i].qs_handle; + + l2params->mss = (params->mtu) ? params->mtu - I40IW_MTU_TO_MSS : iwdev->vsi.mss; + + INIT_WORK(&work->work, i40iw_l2params_worker); + queue_work(iwdev->param_wq, &work->work); } /** @@ -1679,8 +1766,11 @@ static void i40iw_close(struct i40e_info *ldev, struct i40e_client *client, bool return; iwdev = &hdl->device; + iwdev->closing = true; + + i40iw_cm_disconnect_all(iwdev); destroy_workqueue(iwdev->virtchnl_wq); - i40iw_deinit_device(iwdev, reset, true); + i40iw_deinit_device(iwdev, reset); } /** @@ -1701,21 +1791,23 @@ static void i40iw_vf_reset(struct i40e_info *ldev, struct i40e_client *client, u struct i40iw_vfdev *tmp_vfdev; unsigned int i; unsigned long flags; + struct i40iw_device *iwdev; hdl = i40iw_find_i40e_handler(ldev); if (!hdl) return; dev = &hdl->device.sc_dev; + iwdev = (struct i40iw_device *)dev->back_dev; for (i = 0; i < I40IW_MAX_PE_ENABLED_VF_COUNT; i++) { if (!dev->vf_dev[i] || (dev->vf_dev[i]->vf_id != vf_id)) continue; /* free all resources allocated on behalf of vf */ tmp_vfdev = dev->vf_dev[i]; - spin_lock_irqsave(&dev->dev_pestat.stats_lock, flags); + spin_lock_irqsave(&iwdev->vsi.pestat->lock, flags); dev->vf_dev[i] = NULL; - spin_unlock_irqrestore(&dev->dev_pestat.stats_lock, flags); + spin_unlock_irqrestore(&iwdev->vsi.pestat->lock, flags); i40iw_del_hmc_objects(dev, &tmp_vfdev->hmc_info, false, false); /* remove vf hmc function */ memset(&hmc_fcn_info, 0, sizeof(hmc_fcn_info)); diff --git a/drivers/infiniband/hw/i40iw/i40iw_osdep.h b/drivers/infiniband/hw/i40iw/i40iw_osdep.h index 80f422bf3967..aa66c1c63dfa 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_osdep.h +++ b/drivers/infiniband/hw/i40iw/i40iw_osdep.h @@ -198,6 +198,8 @@ enum i40iw_status_code i40iw_cqp_manage_vf_pble_bp(struct i40iw_sc_dev *dev, void i40iw_cqp_spawn_worker(struct i40iw_sc_dev *dev, struct i40iw_virtchnl_work_info *work_info, u32 iw_vf_idx); void *i40iw_remove_head(struct list_head *list); +void i40iw_qp_suspend_resume(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp, bool suspend); +void i40iw_qp_mss_modify(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp); void i40iw_term_modify_qp(struct i40iw_sc_qp *qp, u8 next_state, u8 term, u8 term_len); void i40iw_terminate_done(struct i40iw_sc_qp *qp, int timeout_occurred); @@ -207,9 +209,9 @@ void i40iw_terminate_del_timer(struct i40iw_sc_qp *qp); enum i40iw_status_code i40iw_hw_manage_vf_pble_bp(struct i40iw_device *iwdev, struct i40iw_manage_vf_pble_info *info, bool wait); -struct i40iw_dev_pestat; -void i40iw_hw_stats_start_timer(struct i40iw_sc_dev *); -void i40iw_hw_stats_del_timer(struct i40iw_sc_dev *); +struct i40iw_sc_vsi; +void i40iw_hw_stats_start_timer(struct i40iw_sc_vsi *vsi); +void i40iw_hw_stats_stop_timer(struct i40iw_sc_vsi *vsi); #define i40iw_mmiowb() mmiowb() void i40iw_wr32(struct i40iw_hw *hw, u32 reg, u32 value); u32 i40iw_rd32(struct i40iw_hw *hw, u32 reg); diff --git a/drivers/infiniband/hw/i40iw/i40iw_p.h b/drivers/infiniband/hw/i40iw/i40iw_p.h index a0b8ca10d67e..28a92fee0822 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_p.h +++ b/drivers/infiniband/hw/i40iw/i40iw_p.h @@ -47,8 +47,6 @@ void i40iw_debug_buf(struct i40iw_sc_dev *dev, enum i40iw_debug_flag mask, enum i40iw_status_code i40iw_device_init(struct i40iw_sc_dev *dev, struct i40iw_device_init_info *info); -enum i40iw_status_code i40iw_device_init_pestat(struct i40iw_dev_pestat *); - void i40iw_sc_cqp_post_sq(struct i40iw_sc_cqp *cqp); u64 *i40iw_sc_cqp_get_next_send_wqe(struct i40iw_sc_cqp *cqp, u64 scratch); @@ -64,7 +62,24 @@ enum i40iw_status_code i40iw_sc_init_iw_hmc(struct i40iw_sc_dev *dev, enum i40iw_status_code i40iw_pf_init_vfhmc(struct i40iw_sc_dev *dev, u8 vf_hmc_fn_id, u32 *vf_cnt_array); -/* cqp misc functions */ +/* stats functions */ +void i40iw_hw_stats_refresh_all(struct i40iw_vsi_pestat *stats); +void i40iw_hw_stats_read_all(struct i40iw_vsi_pestat *stats, struct i40iw_dev_hw_stats *stats_values); +void i40iw_hw_stats_read_32(struct i40iw_vsi_pestat *stats, + enum i40iw_hw_stats_index_32b index, + u64 *value); +void i40iw_hw_stats_read_64(struct i40iw_vsi_pestat *stats, + enum i40iw_hw_stats_index_64b index, + u64 *value); +void i40iw_hw_stats_init(struct i40iw_vsi_pestat *stats, u8 index, bool is_pf); + +/* vsi misc functions */ +enum i40iw_status_code i40iw_vsi_stats_init(struct i40iw_sc_vsi *vsi, struct i40iw_vsi_stats_info *info); +void i40iw_vsi_stats_free(struct i40iw_sc_vsi *vsi); +void i40iw_sc_vsi_init(struct i40iw_sc_vsi *vsi, struct i40iw_vsi_init_info *info); + +void i40iw_change_l2params(struct i40iw_sc_vsi *vsi, struct i40iw_l2params *l2params); +void i40iw_qp_add_qos(struct i40iw_sc_qp *qp); void i40iw_terminate_send_fin(struct i40iw_sc_qp *qp); diff --git a/drivers/infiniband/hw/i40iw/i40iw_pble.c b/drivers/infiniband/hw/i40iw/i40iw_pble.c index 85993dc44f6e..c87ba1617087 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_pble.c +++ b/drivers/infiniband/hw/i40iw/i40iw_pble.c @@ -353,10 +353,6 @@ static enum i40iw_status_code add_pble_pool(struct i40iw_sc_dev *dev, pages = (idx->rel_pd_idx) ? (I40IW_HMC_PD_CNT_IN_SD - idx->rel_pd_idx) : I40IW_HMC_PD_CNT_IN_SD; pages = min(pages, pble_rsrc->unallocated_pble >> PBLE_512_SHIFT); - if (!pages) { - ret_code = I40IW_ERR_NO_PBLCHUNKS_AVAILABLE; - goto error; - } info.chunk = chunk; info.hmc_info = hmc_info; info.pages = pages; diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.c b/drivers/infiniband/hw/i40iw/i40iw_puda.c index c62d354f7810..449ba8c81ce7 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_puda.c +++ b/drivers/infiniband/hw/i40iw/i40iw_puda.c @@ -42,12 +42,13 @@ #include "i40iw_p.h" #include "i40iw_puda.h" -static void i40iw_ieq_receive(struct i40iw_sc_dev *dev, +static void i40iw_ieq_receive(struct i40iw_sc_vsi *vsi, struct i40iw_puda_buf *buf); -static void i40iw_ieq_tx_compl(struct i40iw_sc_dev *dev, void *sqwrid); +static void i40iw_ieq_tx_compl(struct i40iw_sc_vsi *vsi, void *sqwrid); static void i40iw_ilq_putback_rcvbuf(struct i40iw_sc_qp *qp, u32 wqe_idx); static enum i40iw_status_code i40iw_puda_replenish_rq(struct i40iw_puda_rsrc *rsrc, bool initial); +static void i40iw_ieq_cleanup_qp(struct i40iw_puda_rsrc *ieq, struct i40iw_sc_qp *qp); /** * i40iw_puda_get_listbuf - get buffer from puda list * @list: list to use for buffers (ILQ or IEQ) @@ -292,7 +293,7 @@ enum i40iw_status_code i40iw_puda_poll_completion(struct i40iw_sc_dev *dev, unsigned long flags; if ((cq_type == I40IW_CQ_TYPE_ILQ) || (cq_type == I40IW_CQ_TYPE_IEQ)) { - rsrc = (cq_type == I40IW_CQ_TYPE_ILQ) ? dev->ilq : dev->ieq; + rsrc = (cq_type == I40IW_CQ_TYPE_ILQ) ? cq->vsi->ilq : cq->vsi->ieq; } else { i40iw_debug(dev, I40IW_DEBUG_PUDA, "%s qp_type error\n", __func__); return I40IW_ERR_BAD_PTR; @@ -335,7 +336,7 @@ enum i40iw_status_code i40iw_puda_poll_completion(struct i40iw_sc_dev *dev, rsrc->stats_pkt_rcvd++; rsrc->compl_rxwqe_idx = info.wqe_idx; i40iw_debug(dev, I40IW_DEBUG_PUDA, "%s RQ completion\n", __func__); - rsrc->receive(rsrc->dev, buf); + rsrc->receive(rsrc->vsi, buf); if (cq_type == I40IW_CQ_TYPE_ILQ) i40iw_ilq_putback_rcvbuf(&rsrc->qp, info.wqe_idx); else @@ -345,12 +346,12 @@ enum i40iw_status_code i40iw_puda_poll_completion(struct i40iw_sc_dev *dev, i40iw_debug(dev, I40IW_DEBUG_PUDA, "%s SQ completion\n", __func__); sqwrid = (void *)(uintptr_t)qp->sq_wrtrk_array[info.wqe_idx].wrid; I40IW_RING_SET_TAIL(qp->sq_ring, info.wqe_idx); - rsrc->xmit_complete(rsrc->dev, sqwrid); + rsrc->xmit_complete(rsrc->vsi, sqwrid); spin_lock_irqsave(&rsrc->bufpool_lock, flags); rsrc->tx_wqe_avail_cnt++; spin_unlock_irqrestore(&rsrc->bufpool_lock, flags); - if (!list_empty(&dev->ilq->txpend)) - i40iw_puda_send_buf(dev->ilq, NULL); + if (!list_empty(&rsrc->vsi->ilq->txpend)) + i40iw_puda_send_buf(rsrc->vsi->ilq, NULL); } done: @@ -513,10 +514,8 @@ static void i40iw_puda_qp_setctx(struct i40iw_puda_rsrc *rsrc) * i40iw_puda_qp_wqe - setup wqe for qp create * @rsrc: resource for qp */ -static enum i40iw_status_code i40iw_puda_qp_wqe(struct i40iw_puda_rsrc *rsrc) +static enum i40iw_status_code i40iw_puda_qp_wqe(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp) { - struct i40iw_sc_qp *qp = &rsrc->qp; - struct i40iw_sc_dev *dev = rsrc->dev; struct i40iw_sc_cqp *cqp; u64 *wqe; u64 header; @@ -582,6 +581,7 @@ static enum i40iw_status_code i40iw_puda_qp_create(struct i40iw_puda_rsrc *rsrc) qp->back_qp = (void *)rsrc; qp->sq_pa = mem->pa; qp->rq_pa = qp->sq_pa + sq_size; + qp->vsi = rsrc->vsi; ukqp->sq_base = mem->va; ukqp->rq_base = &ukqp->sq_base[rsrc->sq_size]; ukqp->shadow_area = ukqp->rq_base[rsrc->rq_size].elem; @@ -608,15 +608,63 @@ static enum i40iw_status_code i40iw_puda_qp_create(struct i40iw_puda_rsrc *rsrc) ukqp->wqe_alloc_reg = (u32 __iomem *)(i40iw_get_hw_addr(qp->pd->dev) + I40E_VFPE_WQEALLOC1); - qp->qs_handle = qp->dev->qs_handle; + qp->user_pri = 0; + i40iw_qp_add_qos(qp); i40iw_puda_qp_setctx(rsrc); - ret = i40iw_puda_qp_wqe(rsrc); + if (rsrc->ceq_valid) + ret = i40iw_cqp_qp_create_cmd(rsrc->dev, qp); + else + ret = i40iw_puda_qp_wqe(rsrc->dev, qp); if (ret) i40iw_free_dma_mem(rsrc->dev->hw, &rsrc->qpmem); return ret; } /** + * i40iw_puda_cq_wqe - setup wqe for cq create + * @rsrc: resource for cq + */ +static enum i40iw_status_code i40iw_puda_cq_wqe(struct i40iw_sc_dev *dev, struct i40iw_sc_cq *cq) +{ + u64 *wqe; + struct i40iw_sc_cqp *cqp; + u64 header; + struct i40iw_ccq_cqe_info compl_info; + enum i40iw_status_code status = 0; + + cqp = dev->cqp; + wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, 0); + if (!wqe) + return I40IW_ERR_RING_FULL; + + set_64bit_val(wqe, 0, cq->cq_uk.cq_size); + set_64bit_val(wqe, 8, RS_64_1(cq, 1)); + set_64bit_val(wqe, 16, + LS_64(cq->shadow_read_threshold, + I40IW_CQPSQ_CQ_SHADOW_READ_THRESHOLD)); + set_64bit_val(wqe, 32, cq->cq_pa); + + set_64bit_val(wqe, 40, cq->shadow_area_pa); + + header = cq->cq_uk.cq_id | + LS_64(I40IW_CQP_OP_CREATE_CQ, I40IW_CQPSQ_OPCODE) | + LS_64(1, I40IW_CQPSQ_CQ_CHKOVERFLOW) | + LS_64(1, I40IW_CQPSQ_CQ_ENCEQEMASK) | + LS_64(1, I40IW_CQPSQ_CQ_CEQIDVALID) | + LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID); + set_64bit_val(wqe, 24, header); + + i40iw_debug_buf(dev, I40IW_DEBUG_PUDA, "PUDA CQE", + wqe, I40IW_CQP_WQE_SIZE * 8); + + i40iw_sc_cqp_post_sq(dev->cqp); + status = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp, + I40IW_CQP_OP_CREATE_CQ, + &compl_info); + return status; +} + +/** * i40iw_puda_cq_create - create cq for resource * @rsrc: resource for which cq to create */ @@ -624,18 +672,13 @@ static enum i40iw_status_code i40iw_puda_cq_create(struct i40iw_puda_rsrc *rsrc) { struct i40iw_sc_dev *dev = rsrc->dev; struct i40iw_sc_cq *cq = &rsrc->cq; - u64 *wqe; - struct i40iw_sc_cqp *cqp; - u64 header; enum i40iw_status_code ret = 0; u32 tsize, cqsize; - u32 shadow_read_threshold = 128; struct i40iw_dma_mem *mem; - struct i40iw_ccq_cqe_info compl_info; struct i40iw_cq_init_info info; struct i40iw_cq_uk_init_info *init_info = &info.cq_uk_init_info; - cq->back_cq = (void *)rsrc; + cq->vsi = rsrc->vsi; cqsize = rsrc->cq_size * (sizeof(struct i40iw_cqe)); tsize = cqsize + sizeof(struct i40iw_cq_shadow_area); ret = i40iw_allocate_dma_mem(dev->hw, &rsrc->cqmem, tsize, @@ -656,43 +699,84 @@ static enum i40iw_status_code i40iw_puda_cq_create(struct i40iw_puda_rsrc *rsrc) init_info->shadow_area = (u64 *)((u8 *)mem->va + cqsize); init_info->cq_size = rsrc->cq_size; init_info->cq_id = rsrc->cq_id; + info.ceqe_mask = true; + info.ceq_id_valid = true; ret = dev->iw_priv_cq_ops->cq_init(cq, &info); if (ret) goto error; - cqp = dev->cqp; - wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, 0); - if (!wqe) { - ret = I40IW_ERR_RING_FULL; - goto error; - } + if (rsrc->ceq_valid) + ret = i40iw_cqp_cq_create_cmd(dev, cq); + else + ret = i40iw_puda_cq_wqe(dev, cq); +error: + if (ret) + i40iw_free_dma_mem(dev->hw, &rsrc->cqmem); + return ret; +} - set_64bit_val(wqe, 0, rsrc->cq_size); - set_64bit_val(wqe, 8, RS_64_1(cq, 1)); - set_64bit_val(wqe, 16, LS_64(shadow_read_threshold, I40IW_CQPSQ_CQ_SHADOW_READ_THRESHOLD)); - set_64bit_val(wqe, 32, cq->cq_pa); +/** + * i40iw_puda_free_qp - free qp for resource + * @rsrc: resource for which qp to free + */ +static void i40iw_puda_free_qp(struct i40iw_puda_rsrc *rsrc) +{ + enum i40iw_status_code ret; + struct i40iw_ccq_cqe_info compl_info; + struct i40iw_sc_dev *dev = rsrc->dev; - set_64bit_val(wqe, 40, cq->shadow_area_pa); + if (rsrc->ceq_valid) { + i40iw_cqp_qp_destroy_cmd(dev, &rsrc->qp); + return; + } - header = rsrc->cq_id | - LS_64(I40IW_CQP_OP_CREATE_CQ, I40IW_CQPSQ_OPCODE) | - LS_64(1, I40IW_CQPSQ_CQ_CHKOVERFLOW) | - LS_64(1, I40IW_CQPSQ_CQ_ENCEQEMASK) | - LS_64(1, I40IW_CQPSQ_CQ_CEQIDVALID) | - LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID); - set_64bit_val(wqe, 24, header); + ret = dev->iw_priv_qp_ops->qp_destroy(&rsrc->qp, + 0, false, true, true); + if (ret) + i40iw_debug(dev, I40IW_DEBUG_PUDA, + "%s error puda qp destroy wqe\n", + __func__); - i40iw_debug_buf(dev, I40IW_DEBUG_PUDA, "PUDA CQE", - wqe, I40IW_CQP_WQE_SIZE * 8); + if (!ret) { + ret = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp, + I40IW_CQP_OP_DESTROY_QP, + &compl_info); + if (ret) + i40iw_debug(dev, I40IW_DEBUG_PUDA, + "%s error puda qp destroy failed\n", + __func__); + } +} - i40iw_sc_cqp_post_sq(dev->cqp); - ret = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp, - I40IW_CQP_OP_CREATE_CQ, - &compl_info); +/** + * i40iw_puda_free_cq - free cq for resource + * @rsrc: resource for which cq to free + */ +static void i40iw_puda_free_cq(struct i40iw_puda_rsrc *rsrc) +{ + enum i40iw_status_code ret; + struct i40iw_ccq_cqe_info compl_info; + struct i40iw_sc_dev *dev = rsrc->dev; + + if (rsrc->ceq_valid) { + i40iw_cqp_cq_destroy_cmd(dev, &rsrc->cq); + return; + } + ret = dev->iw_priv_cq_ops->cq_destroy(&rsrc->cq, 0, true); -error: if (ret) - i40iw_free_dma_mem(dev->hw, &rsrc->cqmem); - return ret; + i40iw_debug(dev, I40IW_DEBUG_PUDA, + "%s error ieq cq destroy\n", + __func__); + + if (!ret) { + ret = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp, + I40IW_CQP_OP_DESTROY_CQ, + &compl_info); + if (ret) + i40iw_debug(dev, I40IW_DEBUG_PUDA, + "%s error ieq qp destroy done\n", + __func__); + } } /** @@ -701,25 +785,24 @@ error: * @type: type of resource to dele * @reset: true if reset chip */ -void i40iw_puda_dele_resources(struct i40iw_sc_dev *dev, +void i40iw_puda_dele_resources(struct i40iw_sc_vsi *vsi, enum puda_resource_type type, bool reset) { - struct i40iw_ccq_cqe_info compl_info; + struct i40iw_sc_dev *dev = vsi->dev; struct i40iw_puda_rsrc *rsrc; struct i40iw_puda_buf *buf = NULL; struct i40iw_puda_buf *nextbuf = NULL; struct i40iw_virt_mem *vmem; - enum i40iw_status_code ret; switch (type) { case I40IW_PUDA_RSRC_TYPE_ILQ: - rsrc = dev->ilq; - vmem = &dev->ilq_mem; + rsrc = vsi->ilq; + vmem = &vsi->ilq_mem; break; case I40IW_PUDA_RSRC_TYPE_IEQ: - rsrc = dev->ieq; - vmem = &dev->ieq_mem; + rsrc = vsi->ieq; + vmem = &vsi->ieq_mem; break; default: i40iw_debug(dev, I40IW_DEBUG_PUDA, "%s: error resource type = 0x%x\n", @@ -731,45 +814,14 @@ void i40iw_puda_dele_resources(struct i40iw_sc_dev *dev, case PUDA_HASH_CRC_COMPLETE: i40iw_free_hash_desc(rsrc->hash_desc); case PUDA_QP_CREATED: - do { - if (reset) - break; - ret = dev->iw_priv_qp_ops->qp_destroy(&rsrc->qp, - 0, false, true, true); - if (ret) - i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA, - "%s error ieq qp destroy\n", - __func__); - - ret = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp, - I40IW_CQP_OP_DESTROY_QP, - &compl_info); - if (ret) - i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA, - "%s error ieq qp destroy done\n", - __func__); - } while (0); + if (!reset) + i40iw_puda_free_qp(rsrc); i40iw_free_dma_mem(dev->hw, &rsrc->qpmem); /* fallthrough */ case PUDA_CQ_CREATED: - do { - if (reset) - break; - ret = dev->iw_priv_cq_ops->cq_destroy(&rsrc->cq, 0, true); - if (ret) - i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA, - "%s error ieq cq destroy\n", - __func__); - - ret = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp, - I40IW_CQP_OP_DESTROY_CQ, - &compl_info); - if (ret) - i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA, - "%s error ieq qp destroy done\n", - __func__); - } while (0); + if (!reset) + i40iw_puda_free_cq(rsrc); i40iw_free_dma_mem(dev->hw, &rsrc->cqmem); break; @@ -825,9 +877,10 @@ static enum i40iw_status_code i40iw_puda_allocbufs(struct i40iw_puda_rsrc *rsrc, * @dev: iwarp device * @info: resource information */ -enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_dev *dev, +enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_vsi *vsi, struct i40iw_puda_rsrc_info *info) { + struct i40iw_sc_dev *dev = vsi->dev; enum i40iw_status_code ret = 0; struct i40iw_puda_rsrc *rsrc; u32 pudasize; @@ -840,10 +893,10 @@ enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_dev *dev, rqwridsize = info->rq_size * 8; switch (info->type) { case I40IW_PUDA_RSRC_TYPE_ILQ: - vmem = &dev->ilq_mem; + vmem = &vsi->ilq_mem; break; case I40IW_PUDA_RSRC_TYPE_IEQ: - vmem = &dev->ieq_mem; + vmem = &vsi->ieq_mem; break; default: return I40IW_NOT_SUPPORTED; @@ -856,22 +909,22 @@ enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_dev *dev, rsrc = (struct i40iw_puda_rsrc *)vmem->va; spin_lock_init(&rsrc->bufpool_lock); if (info->type == I40IW_PUDA_RSRC_TYPE_ILQ) { - dev->ilq = (struct i40iw_puda_rsrc *)vmem->va; - dev->ilq_count = info->count; + vsi->ilq = (struct i40iw_puda_rsrc *)vmem->va; + vsi->ilq_count = info->count; rsrc->receive = info->receive; rsrc->xmit_complete = info->xmit_complete; } else { - vmem = &dev->ieq_mem; - dev->ieq_count = info->count; - dev->ieq = (struct i40iw_puda_rsrc *)vmem->va; + vmem = &vsi->ieq_mem; + vsi->ieq_count = info->count; + vsi->ieq = (struct i40iw_puda_rsrc *)vmem->va; rsrc->receive = i40iw_ieq_receive; rsrc->xmit_complete = i40iw_ieq_tx_compl; } + rsrc->ceq_valid = info->ceq_valid; rsrc->type = info->type; rsrc->sq_wrtrk_array = (struct i40iw_sq_uk_wr_trk_info *)((u8 *)vmem->va + pudasize); rsrc->rq_wrid_array = (u64 *)((u8 *)vmem->va + pudasize + sqwridsize); - rsrc->mss = info->mss; /* Initialize all ieq lists */ INIT_LIST_HEAD(&rsrc->bufpool); INIT_LIST_HEAD(&rsrc->txpend); @@ -885,6 +938,7 @@ enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_dev *dev, rsrc->cq_size = info->rq_size + info->sq_size; rsrc->buf_size = info->buf_size; rsrc->dev = dev; + rsrc->vsi = vsi; ret = i40iw_puda_cq_create(rsrc); if (!ret) { @@ -919,7 +973,7 @@ enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_dev *dev, dev->ccq_ops->ccq_arm(&rsrc->cq); return ret; error: - i40iw_puda_dele_resources(dev, info->type, false); + i40iw_puda_dele_resources(vsi, info->type, false); return ret; } @@ -1131,7 +1185,7 @@ static enum i40iw_status_code i40iw_ieq_handle_partial(struct i40iw_puda_rsrc *i list_add(&buf->list, &pbufl); status = i40iw_ieq_create_pbufl(pfpdu, rxlist, &pbufl, buf, fpdu_len); - if (!status) + if (status) goto error; txbuf = i40iw_puda_get_bufpool(ieq); @@ -1332,7 +1386,7 @@ static void i40iw_ieq_handle_exception(struct i40iw_puda_rsrc *ieq, } if (pfpdu->mode && (fps != pfpdu->fps)) { /* clean up qp as it is new partial sequence */ - i40iw_ieq_cleanup_qp(ieq->dev, qp); + i40iw_ieq_cleanup_qp(ieq, qp); i40iw_debug(ieq->dev, I40IW_DEBUG_IEQ, "%s: restarting new partial\n", __func__); pfpdu->mode = false; @@ -1344,7 +1398,7 @@ static void i40iw_ieq_handle_exception(struct i40iw_puda_rsrc *ieq, pfpdu->rcv_nxt = fps; pfpdu->fps = fps; pfpdu->mode = true; - pfpdu->max_fpdu_data = ieq->mss; + pfpdu->max_fpdu_data = ieq->vsi->mss; pfpdu->pmode_count++; INIT_LIST_HEAD(rxlist); i40iw_ieq_check_first_buf(buf, fps); @@ -1379,14 +1433,14 @@ static void i40iw_ieq_handle_exception(struct i40iw_puda_rsrc *ieq, * @dev: iwarp device * @buf: exception buffer received */ -static void i40iw_ieq_receive(struct i40iw_sc_dev *dev, +static void i40iw_ieq_receive(struct i40iw_sc_vsi *vsi, struct i40iw_puda_buf *buf) { - struct i40iw_puda_rsrc *ieq = dev->ieq; + struct i40iw_puda_rsrc *ieq = vsi->ieq; struct i40iw_sc_qp *qp = NULL; u32 wqe_idx = ieq->compl_rxwqe_idx; - qp = i40iw_ieq_get_qp(dev, buf); + qp = i40iw_ieq_get_qp(vsi->dev, buf); if (!qp) { ieq->stats_bad_qp_id++; i40iw_puda_ret_bufpool(ieq, buf); @@ -1404,12 +1458,12 @@ static void i40iw_ieq_receive(struct i40iw_sc_dev *dev, /** * i40iw_ieq_tx_compl - put back after sending completed exception buffer - * @dev: iwarp device + * @vsi: pointer to the vsi structure * @sqwrid: pointer to puda buffer */ -static void i40iw_ieq_tx_compl(struct i40iw_sc_dev *dev, void *sqwrid) +static void i40iw_ieq_tx_compl(struct i40iw_sc_vsi *vsi, void *sqwrid) { - struct i40iw_puda_rsrc *ieq = dev->ieq; + struct i40iw_puda_rsrc *ieq = vsi->ieq; struct i40iw_puda_buf *buf = (struct i40iw_puda_buf *)sqwrid; i40iw_puda_ret_bufpool(ieq, buf); @@ -1421,15 +1475,14 @@ static void i40iw_ieq_tx_compl(struct i40iw_sc_dev *dev, void *sqwrid) /** * i40iw_ieq_cleanup_qp - qp is being destroyed - * @dev: iwarp device + * @ieq: ieq resource * @qp: all pending fpdu buffers */ -void i40iw_ieq_cleanup_qp(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp) +static void i40iw_ieq_cleanup_qp(struct i40iw_puda_rsrc *ieq, struct i40iw_sc_qp *qp) { struct i40iw_puda_buf *buf; struct i40iw_pfpdu *pfpdu = &qp->pfpdu; struct list_head *rxlist = &pfpdu->rxlist; - struct i40iw_puda_rsrc *ieq = dev->ieq; if (!pfpdu->mode) return; diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.h b/drivers/infiniband/hw/i40iw/i40iw_puda.h index 52bf7826ce4e..dba05ce7d392 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_puda.h +++ b/drivers/infiniband/hw/i40iw/i40iw_puda.h @@ -100,6 +100,7 @@ struct i40iw_puda_rsrc_info { enum puda_resource_type type; /* ILQ or IEQ */ u32 count; u16 pd_id; + bool ceq_valid; u32 cq_id; u32 qp_id; u32 sq_size; @@ -107,8 +108,8 @@ struct i40iw_puda_rsrc_info { u16 buf_size; u16 mss; u32 tx_buf_cnt; /* total bufs allocated will be rq_size + tx_buf_cnt */ - void (*receive)(struct i40iw_sc_dev *, struct i40iw_puda_buf *); - void (*xmit_complete)(struct i40iw_sc_dev *, void *); + void (*receive)(struct i40iw_sc_vsi *, struct i40iw_puda_buf *); + void (*xmit_complete)(struct i40iw_sc_vsi *, void *); }; struct i40iw_puda_rsrc { @@ -116,6 +117,7 @@ struct i40iw_puda_rsrc { struct i40iw_sc_qp qp; struct i40iw_sc_pd sc_pd; struct i40iw_sc_dev *dev; + struct i40iw_sc_vsi *vsi; struct i40iw_dma_mem cqmem; struct i40iw_dma_mem qpmem; struct i40iw_virt_mem ilq_mem; @@ -123,6 +125,7 @@ struct i40iw_puda_rsrc { enum puda_resource_type type; u16 buf_size; /*buffer must be max datalen + tcpip hdr + mac */ u16 mss; + bool ceq_valid; u32 cq_id; u32 qp_id; u32 sq_size; @@ -142,8 +145,8 @@ struct i40iw_puda_rsrc { u32 avail_buf_count; /* snapshot of currently available buffers */ spinlock_t bufpool_lock; struct i40iw_puda_buf *alloclist; - void (*receive)(struct i40iw_sc_dev *, struct i40iw_puda_buf *); - void (*xmit_complete)(struct i40iw_sc_dev *, void *); + void (*receive)(struct i40iw_sc_vsi *, struct i40iw_puda_buf *); + void (*xmit_complete)(struct i40iw_sc_vsi *, void *); /* puda stats */ u64 stats_buf_alloc_fail; u64 stats_pkt_rcvd; @@ -160,14 +163,13 @@ void i40iw_puda_send_buf(struct i40iw_puda_rsrc *rsrc, struct i40iw_puda_buf *buf); enum i40iw_status_code i40iw_puda_send(struct i40iw_sc_qp *qp, struct i40iw_puda_send_info *info); -enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_dev *dev, +enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_vsi *vsi, struct i40iw_puda_rsrc_info *info); -void i40iw_puda_dele_resources(struct i40iw_sc_dev *dev, +void i40iw_puda_dele_resources(struct i40iw_sc_vsi *vsi, enum puda_resource_type type, bool reset); enum i40iw_status_code i40iw_puda_poll_completion(struct i40iw_sc_dev *dev, struct i40iw_sc_cq *cq, u32 *compl_err); -void i40iw_ieq_cleanup_qp(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp); struct i40iw_sc_qp *i40iw_ieq_get_qp(struct i40iw_sc_dev *dev, struct i40iw_puda_buf *buf); @@ -180,4 +182,8 @@ void i40iw_ieq_mpa_crc_ae(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp); void i40iw_free_hash_desc(struct shash_desc *desc); void i40iw_ieq_update_tcpip_info(struct i40iw_puda_buf *buf, u16 length, u32 seqnum); +enum i40iw_status_code i40iw_cqp_qp_create_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp); +enum i40iw_status_code i40iw_cqp_cq_create_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_cq *cq); +void i40iw_cqp_qp_destroy_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp); +void i40iw_cqp_cq_destroy_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_cq *cq); #endif diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h index 2b1a04e9ca3c..f3f8e9cc3c05 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_type.h +++ b/drivers/infiniband/hw/i40iw/i40iw_type.h @@ -61,7 +61,7 @@ struct i40iw_cq_shadow_area { struct i40iw_sc_dev; struct i40iw_hmc_info; -struct i40iw_dev_pestat; +struct i40iw_vsi_pestat; struct i40iw_cqp_ops; struct i40iw_ccq_ops; @@ -74,6 +74,11 @@ struct i40iw_priv_qp_ops; struct i40iw_priv_cq_ops; struct i40iw_hmc_ops; +enum i40iw_page_size { + I40IW_PAGE_SIZE_4K, + I40IW_PAGE_SIZE_2M +}; + enum i40iw_resource_indicator_type { I40IW_RSRC_INDICATOR_TYPE_ADAPTER = 0, I40IW_RSRC_INDICATOR_TYPE_CQ, @@ -186,7 +191,7 @@ enum i40iw_debug_flag { I40IW_DEBUG_ALL = 0xFFFFFFFF }; -enum i40iw_hw_stat_index_32b { +enum i40iw_hw_stats_index_32b { I40IW_HW_STAT_INDEX_IP4RXDISCARD = 0, I40IW_HW_STAT_INDEX_IP4RXTRUNC, I40IW_HW_STAT_INDEX_IP4TXNOROUTE, @@ -199,7 +204,7 @@ enum i40iw_hw_stat_index_32b { I40IW_HW_STAT_INDEX_MAX_32 }; -enum i40iw_hw_stat_index_64b { +enum i40iw_hw_stats_index_64b { I40IW_HW_STAT_INDEX_IP4RXOCTS = 0, I40IW_HW_STAT_INDEX_IP4RXPKTS, I40IW_HW_STAT_INDEX_IP4RXFRAGS, @@ -229,32 +234,23 @@ enum i40iw_hw_stat_index_64b { I40IW_HW_STAT_INDEX_MAX_64 }; -struct i40iw_dev_hw_stat_offsets { - u32 stat_offset_32[I40IW_HW_STAT_INDEX_MAX_32]; - u32 stat_offset_64[I40IW_HW_STAT_INDEX_MAX_64]; +struct i40iw_dev_hw_stats_offsets { + u32 stats_offset_32[I40IW_HW_STAT_INDEX_MAX_32]; + u32 stats_offset_64[I40IW_HW_STAT_INDEX_MAX_64]; }; struct i40iw_dev_hw_stats { - u64 stat_value_32[I40IW_HW_STAT_INDEX_MAX_32]; - u64 stat_value_64[I40IW_HW_STAT_INDEX_MAX_64]; -}; - -struct i40iw_device_pestat_ops { - void (*iw_hw_stat_init)(struct i40iw_dev_pestat *, u8, struct i40iw_hw *, bool); - void (*iw_hw_stat_read_32)(struct i40iw_dev_pestat *, enum i40iw_hw_stat_index_32b, u64 *); - void (*iw_hw_stat_read_64)(struct i40iw_dev_pestat *, enum i40iw_hw_stat_index_64b, u64 *); - void (*iw_hw_stat_read_all)(struct i40iw_dev_pestat *, struct i40iw_dev_hw_stats *); - void (*iw_hw_stat_refresh_all)(struct i40iw_dev_pestat *); + u64 stats_value_32[I40IW_HW_STAT_INDEX_MAX_32]; + u64 stats_value_64[I40IW_HW_STAT_INDEX_MAX_64]; }; -struct i40iw_dev_pestat { +struct i40iw_vsi_pestat { struct i40iw_hw *hw; - struct i40iw_device_pestat_ops ops; struct i40iw_dev_hw_stats hw_stats; struct i40iw_dev_hw_stats last_read_hw_stats; - struct i40iw_dev_hw_stat_offsets hw_stat_offsets; + struct i40iw_dev_hw_stats_offsets hw_stats_offsets; struct timer_list stats_timer; - spinlock_t stats_lock; /* rdma stats lock */ + spinlock_t lock; /* rdma stats lock */ }; struct i40iw_hw { @@ -350,6 +346,7 @@ struct i40iw_sc_cq { u64 cq_pa; u64 shadow_area_pa; struct i40iw_sc_dev *dev; + struct i40iw_sc_vsi *vsi; void *pbl_list; void *back_cq; u32 ceq_id; @@ -373,6 +370,7 @@ struct i40iw_sc_qp { u64 shadow_area_pa; u64 q2_pa; struct i40iw_sc_dev *dev; + struct i40iw_sc_vsi *vsi; struct i40iw_sc_pd *pd; u64 *hw_host_ctx; void *llp_stream_handle; @@ -397,6 +395,9 @@ struct i40iw_sc_qp { bool virtual_map; bool flush_sq; bool flush_rq; + u8 user_pri; + struct list_head list; + bool on_qoslist; bool sq_flush; enum i40iw_flush_opcode flush_code; enum i40iw_term_eventtypes eventtype; @@ -424,10 +425,16 @@ struct i40iw_vchnl_vf_msg_buffer { char parm_buffer[I40IW_VCHNL_MAX_VF_MSG_SIZE - 1]; }; +struct i40iw_qos { + struct list_head qplist; + spinlock_t lock; /* qos list */ + u16 qs_handle; +}; + struct i40iw_vfdev { struct i40iw_sc_dev *pf_dev; u8 *hmc_info_mem; - struct i40iw_dev_pestat dev_pestat; + struct i40iw_vsi_pestat pestat; struct i40iw_hmc_pble_info *pble_info; struct i40iw_hmc_info hmc_info; struct i40iw_vchnl_vf_msg_buffer vf_msg_buffer; @@ -441,11 +448,28 @@ struct i40iw_vfdev { bool stats_initialized; }; +#define I40IW_INVALID_FCN_ID 0xff +struct i40iw_sc_vsi { + struct i40iw_sc_dev *dev; + void *back_vsi; /* Owned by OS */ + u32 ilq_count; + struct i40iw_virt_mem ilq_mem; + struct i40iw_puda_rsrc *ilq; + u32 ieq_count; + struct i40iw_virt_mem ieq_mem; + struct i40iw_puda_rsrc *ieq; + u16 mss; + u8 fcn_id; + bool stats_fcn_id_alloc; + struct i40iw_qos qos[I40IW_MAX_USER_PRIORITY]; + struct i40iw_vsi_pestat *pestat; +}; + struct i40iw_sc_dev { struct list_head cqp_cmd_head; /* head of the CQP command list */ spinlock_t cqp_lock; /* cqp list sync */ struct i40iw_dev_uk dev_uk; - struct i40iw_dev_pestat dev_pestat; + bool fcn_id_array[I40IW_MAX_STATS_COUNT]; struct i40iw_dma_mem vf_fpm_query_buf[I40IW_MAX_PE_ENABLED_VF_COUNT]; u64 fpm_query_buf_pa; u64 fpm_commit_buf_pa; @@ -472,17 +496,9 @@ struct i40iw_sc_dev { struct i40iw_cqp_misc_ops *cqp_misc_ops; struct i40iw_hmc_ops *hmc_ops; struct i40iw_vchnl_if vchnl_if; - u32 ilq_count; - struct i40iw_virt_mem ilq_mem; - struct i40iw_puda_rsrc *ilq; - u32 ieq_count; - struct i40iw_virt_mem ieq_mem; - struct i40iw_puda_rsrc *ieq; - const struct i40iw_vf_cqp_ops *iw_vf_cqp_ops; struct i40iw_hmc_fpm_misc hmc_fpm_misc; - u16 qs_handle; u32 debug_mask; u16 exception_lan_queue; u8 hmc_fn_id; @@ -556,6 +572,19 @@ struct i40iw_l2params { u16 mss; }; +struct i40iw_vsi_init_info { + struct i40iw_sc_dev *dev; + void *back_vsi; + struct i40iw_l2params *params; +}; + +struct i40iw_vsi_stats_info { + struct i40iw_vsi_pestat *pestat; + u8 fcn_id; + bool alloc_fcn_id; + bool stats_initialize; +}; + struct i40iw_device_init_info { u64 fpm_query_buf_pa; u64 fpm_commit_buf_pa; @@ -564,7 +593,6 @@ struct i40iw_device_init_info { struct i40iw_hw *hw; void __iomem *bar0; enum i40iw_status_code (*vchnl_send)(struct i40iw_sc_dev *, u32, u8 *, u16); - u16 qs_handle; u16 exception_lan_queue; u8 hmc_fn_id; bool is_pf; @@ -722,6 +750,8 @@ struct i40iw_qp_host_ctx_info { bool iwarp_info_valid; bool err_rq_idx_valid; u16 err_rq_idx; + bool add_to_qoslist; + u8 user_pri; }; struct i40iw_aeqe_info { @@ -814,6 +844,7 @@ struct i40iw_register_shared_stag { struct i40iw_qp_init_info { struct i40iw_qp_uk_init_info qp_uk_init_info; struct i40iw_sc_pd *pd; + struct i40iw_sc_vsi *vsi; u64 *host_ctx; u8 *q2; u64 sq_pa; @@ -880,13 +911,14 @@ enum i40iw_quad_hash_manage_type { }; struct i40iw_qhash_table_info { + struct i40iw_sc_vsi *vsi; enum i40iw_quad_hash_manage_type manage; enum i40iw_quad_entry_type entry_type; bool vlan_valid; bool ipv4_valid; u8 mac_addr[6]; u16 vlan_id; - u16 qs_handle; + u8 user_pri; u32 qp_num; u32 dest_ip[4]; u32 src_ip[4]; @@ -976,7 +1008,7 @@ struct i40iw_cqp_query_fpm_values { struct i40iw_cqp_ops { enum i40iw_status_code (*cqp_init)(struct i40iw_sc_cqp *, struct i40iw_cqp_init_info *); - enum i40iw_status_code (*cqp_create)(struct i40iw_sc_cqp *, bool, u16 *, u16 *); + enum i40iw_status_code (*cqp_create)(struct i40iw_sc_cqp *, u16 *, u16 *); void (*cqp_post_sq)(struct i40iw_sc_cqp *); u64 *(*cqp_get_next_send_wqe)(struct i40iw_sc_cqp *, u64 scratch); enum i40iw_status_code (*cqp_destroy)(struct i40iw_sc_cqp *); diff --git a/drivers/infiniband/hw/i40iw/i40iw_uk.c b/drivers/infiniband/hw/i40iw/i40iw_uk.c index 4d28c3cb03cc..4376cd628774 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_uk.c +++ b/drivers/infiniband/hw/i40iw/i40iw_uk.c @@ -175,12 +175,10 @@ u64 *i40iw_qp_get_next_send_wqe(struct i40iw_qp_uk *qp, if (!*wqe_idx) qp->swqe_polarity = !qp->swqe_polarity; } - - for (i = 0; i < wqe_size / I40IW_QP_WQE_MIN_SIZE; i++) { - I40IW_RING_MOVE_HEAD(qp->sq_ring, ret_code); - if (ret_code) - return NULL; - } + I40IW_RING_MOVE_HEAD_BY_COUNT(qp->sq_ring, + wqe_size / I40IW_QP_WQE_MIN_SIZE, ret_code); + if (ret_code) + return NULL; wqe = qp->sq_base[*wqe_idx].elem; @@ -430,7 +428,7 @@ static enum i40iw_status_code i40iw_inline_rdma_write(struct i40iw_qp_uk *qp, struct i40iw_inline_rdma_write *op_info; u64 *push; u64 header = 0; - u32 i, wqe_idx; + u32 wqe_idx; enum i40iw_status_code ret_code; bool read_fence = false; u8 wqe_size; @@ -465,14 +463,12 @@ static enum i40iw_status_code i40iw_inline_rdma_write(struct i40iw_qp_uk *qp, src = (u8 *)(op_info->data); if (op_info->len <= 16) { - for (i = 0; i < op_info->len; i++, src++, dest++) - *dest = *src; + memcpy(dest, src, op_info->len); } else { - for (i = 0; i < 16; i++, src++, dest++) - *dest = *src; + memcpy(dest, src, 16); + src += 16; dest = (u8 *)wqe + 32; - for (; i < op_info->len; i++, src++, dest++) - *dest = *src; + memcpy(dest, src, op_info->len - 16); } wmb(); /* make sure WQE is populated before valid bit is set */ @@ -507,7 +503,7 @@ static enum i40iw_status_code i40iw_inline_send(struct i40iw_qp_uk *qp, u8 *dest, *src; struct i40iw_post_inline_send *op_info; u64 header; - u32 wqe_idx, i; + u32 wqe_idx; enum i40iw_status_code ret_code; bool read_fence = false; u8 wqe_size; @@ -540,14 +536,12 @@ static enum i40iw_status_code i40iw_inline_send(struct i40iw_qp_uk *qp, src = (u8 *)(op_info->data); if (op_info->len <= 16) { - for (i = 0; i < op_info->len; i++, src++, dest++) - *dest = *src; + memcpy(dest, src, op_info->len); } else { - for (i = 0; i < 16; i++, src++, dest++) - *dest = *src; + memcpy(dest, src, 16); + src += 16; dest = (u8 *)wqe + 32; - for (; i < op_info->len; i++, src++, dest++) - *dest = *src; + memcpy(dest, src, op_info->len - 16); } wmb(); /* make sure WQE is populated before valid bit is set */ @@ -1190,12 +1184,8 @@ enum i40iw_status_code i40iw_inline_data_size_to_wqesize(u32 data_size, if (data_size <= 16) *wqe_size = I40IW_QP_WQE_MIN_SIZE; - else if (data_size <= 48) - *wqe_size = 64; - else if (data_size <= 80) - *wqe_size = 96; else - *wqe_size = 128; + *wqe_size = 64; return 0; } diff --git a/drivers/infiniband/hw/i40iw/i40iw_user.h b/drivers/infiniband/hw/i40iw/i40iw_user.h index 276bcefffd7e..80d9f464f65e 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_user.h +++ b/drivers/infiniband/hw/i40iw/i40iw_user.h @@ -72,12 +72,12 @@ enum i40iw_device_capabilities_const { I40IW_MAX_SQ_PAYLOAD_SIZE = 2145386496, I40IW_MAX_INLINE_DATA_SIZE = 48, I40IW_MAX_PUSHMODE_INLINE_DATA_SIZE = 48, - I40IW_MAX_IRD_SIZE = 32, - I40IW_QPCTX_ENCD_MAXIRD = 3, + I40IW_MAX_IRD_SIZE = 63, + I40IW_MAX_ORD_SIZE = 127, I40IW_MAX_WQ_ENTRIES = 2048, - I40IW_MAX_ORD_SIZE = 32, I40IW_Q2_BUFFER_SIZE = (248 + 100), - I40IW_QP_CTX_SIZE = 248 + I40IW_QP_CTX_SIZE = 248, + I40IW_MAX_PDS = 32768 }; #define i40iw_handle void * @@ -96,12 +96,6 @@ enum i40iw_device_capabilities_const { #define i40iw_physical_fragment u64 #define i40iw_address_list u64 * -#define I40IW_CREATE_STAG(index, key) (((index) << 8) + (key)) - -#define I40IW_STAG_KEY_FROM_STAG(stag) ((stag) && 0x000000FF) - -#define I40IW_STAG_INDEX_FROM_STAG(stag) (((stag) && 0xFFFFFF00) >> 8) - #define I40IW_MAX_MR_SIZE 0x10000000000L struct i40iw_qp_uk; diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c index 6fd043b1d714..0f5d43d1f5fc 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_utils.c +++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c @@ -153,6 +153,7 @@ int i40iw_inetaddr_event(struct notifier_block *notifier, struct i40iw_device *iwdev; struct i40iw_handler *hdl; u32 local_ipaddr; + u32 action = I40IW_ARP_ADD; hdl = i40iw_find_netdev(event_netdev); if (!hdl) @@ -164,44 +165,25 @@ int i40iw_inetaddr_event(struct notifier_block *notifier, if (netdev != event_netdev) return NOTIFY_DONE; + if (upper_dev) + local_ipaddr = ntohl( + ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address); + else + local_ipaddr = ntohl(ifa->ifa_address); switch (event) { case NETDEV_DOWN: - if (upper_dev) - local_ipaddr = ntohl( - ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address); - else - local_ipaddr = ntohl(ifa->ifa_address); - i40iw_manage_arp_cache(iwdev, - netdev->dev_addr, - &local_ipaddr, - true, - I40IW_ARP_DELETE); - return NOTIFY_OK; + action = I40IW_ARP_DELETE; + /* Fall through */ case NETDEV_UP: - if (upper_dev) - local_ipaddr = ntohl( - ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address); - else - local_ipaddr = ntohl(ifa->ifa_address); - i40iw_manage_arp_cache(iwdev, - netdev->dev_addr, - &local_ipaddr, - true, - I40IW_ARP_ADD); - break; + /* Fall through */ case NETDEV_CHANGEADDR: - /* Add the address to the IP table */ - if (upper_dev) - local_ipaddr = ntohl( - ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address); - else - local_ipaddr = ntohl(ifa->ifa_address); - i40iw_manage_arp_cache(iwdev, netdev->dev_addr, &local_ipaddr, true, - I40IW_ARP_ADD); + action); + i40iw_if_notify(iwdev, netdev, &local_ipaddr, true, + (action == I40IW_ARP_ADD) ? true : false); break; default: break; @@ -225,6 +207,7 @@ int i40iw_inet6addr_event(struct notifier_block *notifier, struct i40iw_device *iwdev; struct i40iw_handler *hdl; u32 local_ipaddr6[4]; + u32 action = I40IW_ARP_ADD; hdl = i40iw_find_netdev(event_netdev); if (!hdl) @@ -235,24 +218,21 @@ int i40iw_inet6addr_event(struct notifier_block *notifier, if (netdev != event_netdev) return NOTIFY_DONE; + i40iw_copy_ip_ntohl(local_ipaddr6, ifa->addr.in6_u.u6_addr32); switch (event) { case NETDEV_DOWN: - i40iw_copy_ip_ntohl(local_ipaddr6, ifa->addr.in6_u.u6_addr32); - i40iw_manage_arp_cache(iwdev, - netdev->dev_addr, - local_ipaddr6, - false, - I40IW_ARP_DELETE); - return NOTIFY_OK; + action = I40IW_ARP_DELETE; + /* Fall through */ case NETDEV_UP: /* Fall through */ case NETDEV_CHANGEADDR: - i40iw_copy_ip_ntohl(local_ipaddr6, ifa->addr.in6_u.u6_addr32); i40iw_manage_arp_cache(iwdev, netdev->dev_addr, local_ipaddr6, false, - I40IW_ARP_ADD); + action); + i40iw_if_notify(iwdev, netdev, local_ipaddr6, false, + (action == I40IW_ARP_ADD) ? true : false); break; default: break; @@ -392,6 +372,7 @@ static void i40iw_free_qp(struct i40iw_cqp_request *cqp_request, u32 num) i40iw_rem_pdusecount(iwqp->iwpd, iwdev); i40iw_free_qp_resources(iwdev, iwqp, qp_num); + i40iw_rem_devusecount(iwdev); } /** @@ -415,7 +396,10 @@ static int i40iw_wait_event(struct i40iw_device *iwdev, i40iw_pr_err("error cqp command 0x%x timed out ret = %d\n", info->cqp_cmd, timeout_ret); err_code = -ETIME; - i40iw_request_reset(iwdev); + if (!iwdev->reset) { + iwdev->reset = true; + i40iw_request_reset(iwdev); + } goto done; } cqp_error = cqp_request->compl_info.error; @@ -445,6 +429,11 @@ enum i40iw_status_code i40iw_handle_cqp_op(struct i40iw_device *iwdev, struct cqp_commands_info *info = &cqp_request->info; int err_code = 0; + if (iwdev->reset) { + i40iw_free_cqp_request(&iwdev->cqp, cqp_request); + return I40IW_ERR_CQP_COMPL_ERROR; + } + status = i40iw_process_cqp_cmd(dev, info); if (status) { i40iw_pr_err("error cqp command 0x%x failed\n", info->cqp_cmd); @@ -459,6 +448,26 @@ enum i40iw_status_code i40iw_handle_cqp_op(struct i40iw_device *iwdev, } /** + * i40iw_add_devusecount - add dev refcount + * @iwdev: dev for refcount + */ +void i40iw_add_devusecount(struct i40iw_device *iwdev) +{ + atomic64_inc(&iwdev->use_count); +} + +/** + * i40iw_rem_devusecount - decrement refcount for dev + * @iwdev: device + */ +void i40iw_rem_devusecount(struct i40iw_device *iwdev) +{ + if (!atomic64_dec_and_test(&iwdev->use_count)) + return; + wake_up(&iwdev->close_wq); +} + +/** * i40iw_add_pdusecount - add pd refcount * @iwpd: pd for refcount */ @@ -712,6 +721,51 @@ enum i40iw_status_code i40iw_cqp_sds_cmd(struct i40iw_sc_dev *dev, } /** + * i40iw_qp_suspend_resume - cqp command for suspend/resume + * @dev: hardware control device structure + * @qp: hardware control qp + * @suspend: flag if suspend or resume + */ +void i40iw_qp_suspend_resume(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp, bool suspend) +{ + struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev; + struct i40iw_cqp_request *cqp_request; + struct i40iw_sc_cqp *cqp = dev->cqp; + struct cqp_commands_info *cqp_info; + enum i40iw_status_code status; + + cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false); + if (!cqp_request) + return; + + cqp_info = &cqp_request->info; + cqp_info->cqp_cmd = (suspend) ? OP_SUSPEND : OP_RESUME; + cqp_info->in.u.suspend_resume.cqp = cqp; + cqp_info->in.u.suspend_resume.qp = qp; + cqp_info->in.u.suspend_resume.scratch = (uintptr_t)cqp_request; + status = i40iw_handle_cqp_op(iwdev, cqp_request); + if (status) + i40iw_pr_err("CQP-OP QP Suspend/Resume fail"); +} + +/** + * i40iw_qp_mss_modify - modify mss for qp + * @dev: hardware control device structure + * @qp: hardware control qp + */ +void i40iw_qp_mss_modify(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp) +{ + struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev; + struct i40iw_qp *iwqp = (struct i40iw_qp *)qp->back_qp; + struct i40iw_modify_qp_info info; + + memset(&info, 0, sizeof(info)); + info.mss_change = true; + info.new_mss = qp->vsi->mss; + i40iw_hw_modify_qp(iwdev, iwqp, &info, false); +} + +/** * i40iw_term_modify_qp - modify qp for term message * @qp: hardware control qp * @next_state: qp's next state @@ -769,6 +823,7 @@ static void i40iw_terminate_timeout(unsigned long context) struct i40iw_sc_qp *qp = (struct i40iw_sc_qp *)&iwqp->sc_qp; i40iw_terminate_done(qp, 1); + i40iw_rem_ref(&iwqp->ibqp); } /** @@ -780,6 +835,7 @@ void i40iw_terminate_start_timer(struct i40iw_sc_qp *qp) struct i40iw_qp *iwqp; iwqp = (struct i40iw_qp *)qp->back_qp; + i40iw_add_ref(&iwqp->ibqp); init_timer(&iwqp->terminate_timer); iwqp->terminate_timer.function = i40iw_terminate_timeout; iwqp->terminate_timer.expires = jiffies + HZ; @@ -796,7 +852,8 @@ void i40iw_terminate_del_timer(struct i40iw_sc_qp *qp) struct i40iw_qp *iwqp; iwqp = (struct i40iw_qp *)qp->back_qp; - del_timer(&iwqp->terminate_timer); + if (del_timer(&iwqp->terminate_timer)) + i40iw_rem_ref(&iwqp->ibqp); } /** @@ -1011,6 +1068,116 @@ enum i40iw_status_code i40iw_vf_wait_vchnl_resp(struct i40iw_sc_dev *dev) } /** + * i40iw_cqp_cq_create_cmd - create a cq for the cqp + * @dev: device pointer + * @cq: pointer to created cq + */ +enum i40iw_status_code i40iw_cqp_cq_create_cmd(struct i40iw_sc_dev *dev, + struct i40iw_sc_cq *cq) +{ + struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev; + struct i40iw_cqp *iwcqp = &iwdev->cqp; + struct i40iw_cqp_request *cqp_request; + struct cqp_commands_info *cqp_info; + enum i40iw_status_code status; + + cqp_request = i40iw_get_cqp_request(iwcqp, true); + if (!cqp_request) + return I40IW_ERR_NO_MEMORY; + + cqp_info = &cqp_request->info; + cqp_info->cqp_cmd = OP_CQ_CREATE; + cqp_info->post_sq = 1; + cqp_info->in.u.cq_create.cq = cq; + cqp_info->in.u.cq_create.scratch = (uintptr_t)cqp_request; + status = i40iw_handle_cqp_op(iwdev, cqp_request); + if (status) + i40iw_pr_err("CQP-OP Create QP fail"); + + return status; +} + +/** + * i40iw_cqp_qp_create_cmd - create a qp for the cqp + * @dev: device pointer + * @qp: pointer to created qp + */ +enum i40iw_status_code i40iw_cqp_qp_create_cmd(struct i40iw_sc_dev *dev, + struct i40iw_sc_qp *qp) +{ + struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev; + struct i40iw_cqp *iwcqp = &iwdev->cqp; + struct i40iw_cqp_request *cqp_request; + struct cqp_commands_info *cqp_info; + struct i40iw_create_qp_info *qp_info; + enum i40iw_status_code status; + + cqp_request = i40iw_get_cqp_request(iwcqp, true); + if (!cqp_request) + return I40IW_ERR_NO_MEMORY; + + cqp_info = &cqp_request->info; + qp_info = &cqp_request->info.in.u.qp_create.info; + + memset(qp_info, 0, sizeof(*qp_info)); + + qp_info->cq_num_valid = true; + qp_info->next_iwarp_state = I40IW_QP_STATE_RTS; + + cqp_info->cqp_cmd = OP_QP_CREATE; + cqp_info->post_sq = 1; + cqp_info->in.u.qp_create.qp = qp; + cqp_info->in.u.qp_create.scratch = (uintptr_t)cqp_request; + status = i40iw_handle_cqp_op(iwdev, cqp_request); + if (status) + i40iw_pr_err("CQP-OP QP create fail"); + return status; +} + +/** + * i40iw_cqp_cq_destroy_cmd - destroy the cqp cq + * @dev: device pointer + * @cq: pointer to cq + */ +void i40iw_cqp_cq_destroy_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_cq *cq) +{ + struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev; + + i40iw_cq_wq_destroy(iwdev, cq); +} + +/** + * i40iw_cqp_qp_destroy_cmd - destroy the cqp + * @dev: device pointer + * @qp: pointer to qp + */ +void i40iw_cqp_qp_destroy_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp) +{ + struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev; + struct i40iw_cqp *iwcqp = &iwdev->cqp; + struct i40iw_cqp_request *cqp_request; + struct cqp_commands_info *cqp_info; + enum i40iw_status_code status; + + cqp_request = i40iw_get_cqp_request(iwcqp, true); + if (!cqp_request) + return; + + cqp_info = &cqp_request->info; + memset(cqp_info, 0, sizeof(*cqp_info)); + + cqp_info->cqp_cmd = OP_QP_DESTROY; + cqp_info->post_sq = 1; + cqp_info->in.u.qp_destroy.qp = qp; + cqp_info->in.u.qp_destroy.scratch = (uintptr_t)cqp_request; + cqp_info->in.u.qp_destroy.remove_hash_idx = true; + status = i40iw_handle_cqp_op(iwdev, cqp_request); + if (status) + i40iw_pr_err("CQP QP_DESTROY fail"); +} + + +/** * i40iw_ieq_mpa_crc_ae - generate AE for crc error * @dev: hardware control device structure * @qp: hardware control qp @@ -1208,7 +1375,7 @@ enum i40iw_status_code i40iw_puda_get_tcpip_info(struct i40iw_puda_completion_in buf->totallen = pkt_len + buf->maclen; - if (info->payload_len < buf->totallen - 4) { + if (info->payload_len < buf->totallen) { i40iw_pr_err("payload_len = 0x%x totallen expected0x%x\n", info->payload_len, buf->totallen); return I40IW_ERR_INVALID_SIZE; @@ -1224,27 +1391,29 @@ enum i40iw_status_code i40iw_puda_get_tcpip_info(struct i40iw_puda_completion_in /** * i40iw_hw_stats_timeout - Stats timer-handler which updates all HW stats - * @dev: hardware control device structure + * @vsi: pointer to the vsi structure */ -static void i40iw_hw_stats_timeout(unsigned long dev) +static void i40iw_hw_stats_timeout(unsigned long vsi) { - struct i40iw_sc_dev *pf_dev = (struct i40iw_sc_dev *)dev; - struct i40iw_dev_pestat *pf_devstat = &pf_dev->dev_pestat; - struct i40iw_dev_pestat *vf_devstat = NULL; + struct i40iw_sc_vsi *sc_vsi = (struct i40iw_sc_vsi *)vsi; + struct i40iw_sc_dev *pf_dev = sc_vsi->dev; + struct i40iw_vsi_pestat *pf_devstat = sc_vsi->pestat; + struct i40iw_vsi_pestat *vf_devstat = NULL; u16 iw_vf_idx; unsigned long flags; /*PF*/ - pf_devstat->ops.iw_hw_stat_read_all(pf_devstat, &pf_devstat->hw_stats); + i40iw_hw_stats_read_all(pf_devstat, &pf_devstat->hw_stats); + for (iw_vf_idx = 0; iw_vf_idx < I40IW_MAX_PE_ENABLED_VF_COUNT; iw_vf_idx++) { - spin_lock_irqsave(&pf_devstat->stats_lock, flags); + spin_lock_irqsave(&pf_devstat->lock, flags); if (pf_dev->vf_dev[iw_vf_idx]) { if (pf_dev->vf_dev[iw_vf_idx]->stats_initialized) { - vf_devstat = &pf_dev->vf_dev[iw_vf_idx]->dev_pestat; - vf_devstat->ops.iw_hw_stat_read_all(vf_devstat, &vf_devstat->hw_stats); + vf_devstat = &pf_dev->vf_dev[iw_vf_idx]->pestat; + i40iw_hw_stats_read_all(vf_devstat, &vf_devstat->hw_stats); } } - spin_unlock_irqrestore(&pf_devstat->stats_lock, flags); + spin_unlock_irqrestore(&pf_devstat->lock, flags); } mod_timer(&pf_devstat->stats_timer, @@ -1253,26 +1422,26 @@ static void i40iw_hw_stats_timeout(unsigned long dev) /** * i40iw_hw_stats_start_timer - Start periodic stats timer - * @dev: hardware control device structure + * @vsi: pointer to the vsi structure */ -void i40iw_hw_stats_start_timer(struct i40iw_sc_dev *dev) +void i40iw_hw_stats_start_timer(struct i40iw_sc_vsi *vsi) { - struct i40iw_dev_pestat *devstat = &dev->dev_pestat; + struct i40iw_vsi_pestat *devstat = vsi->pestat; init_timer(&devstat->stats_timer); devstat->stats_timer.function = i40iw_hw_stats_timeout; - devstat->stats_timer.data = (unsigned long)dev; + devstat->stats_timer.data = (unsigned long)vsi; mod_timer(&devstat->stats_timer, jiffies + msecs_to_jiffies(STATS_TIMER_DELAY)); } /** - * i40iw_hw_stats_del_timer - Delete periodic stats timer - * @dev: hardware control device structure + * i40iw_hw_stats_stop_timer - Delete periodic stats timer + * @vsi: pointer to the vsi structure */ -void i40iw_hw_stats_del_timer(struct i40iw_sc_dev *dev) +void i40iw_hw_stats_stop_timer(struct i40iw_sc_vsi *vsi) { - struct i40iw_dev_pestat *devstat = &dev->dev_pestat; + struct i40iw_vsi_pestat *devstat = vsi->pestat; del_timer_sync(&devstat->stats_timer); } diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 6329c971c22f..7368a50bbdaa 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -37,6 +37,7 @@ #include <linux/random.h> #include <linux/highmem.h> #include <linux/time.h> +#include <linux/hugetlb.h> #include <asm/byteorder.h> #include <net/ip.h> #include <rdma/ib_verbs.h> @@ -67,13 +68,13 @@ static int i40iw_query_device(struct ib_device *ibdev, props->vendor_part_id = iwdev->ldev->pcidev->device; props->hw_ver = (u32)iwdev->sc_dev.hw_rev; props->max_mr_size = I40IW_MAX_OUTBOUND_MESSAGE_SIZE; - props->max_qp = iwdev->max_qp; + props->max_qp = iwdev->max_qp - iwdev->used_qps; props->max_qp_wr = (I40IW_MAX_WQ_ENTRIES >> 2) - 1; props->max_sge = I40IW_MAX_WQ_FRAGMENT_COUNT; - props->max_cq = iwdev->max_cq; + props->max_cq = iwdev->max_cq - iwdev->used_cqs; props->max_cqe = iwdev->max_cqe; - props->max_mr = iwdev->max_mr; - props->max_pd = iwdev->max_pd; + props->max_mr = iwdev->max_mr - iwdev->used_mrs; + props->max_pd = iwdev->max_pd - iwdev->used_pds; props->max_sge_rd = I40IW_MAX_SGE_RD; props->max_qp_rd_atom = I40IW_MAX_IRD_SIZE; props->max_qp_init_rd_atom = props->max_qp_rd_atom; @@ -254,7 +255,6 @@ static void i40iw_alloc_push_page(struct i40iw_device *iwdev, struct i40iw_sc_qp { struct i40iw_cqp_request *cqp_request; struct cqp_commands_info *cqp_info; - struct i40iw_sc_dev *dev = &iwdev->sc_dev; enum i40iw_status_code status; if (qp->push_idx != I40IW_INVALID_PUSH_PAGE_INDEX) @@ -270,7 +270,7 @@ static void i40iw_alloc_push_page(struct i40iw_device *iwdev, struct i40iw_sc_qp cqp_info->cqp_cmd = OP_MANAGE_PUSH_PAGE; cqp_info->post_sq = 1; - cqp_info->in.u.manage_push_page.info.qs_handle = dev->qs_handle; + cqp_info->in.u.manage_push_page.info.qs_handle = qp->qs_handle; cqp_info->in.u.manage_push_page.info.free_page = 0; cqp_info->in.u.manage_push_page.cqp = &iwdev->cqp.sc_cqp; cqp_info->in.u.manage_push_page.scratch = (uintptr_t)cqp_request; @@ -292,7 +292,6 @@ static void i40iw_dealloc_push_page(struct i40iw_device *iwdev, struct i40iw_sc_ { struct i40iw_cqp_request *cqp_request; struct cqp_commands_info *cqp_info; - struct i40iw_sc_dev *dev = &iwdev->sc_dev; enum i40iw_status_code status; if (qp->push_idx == I40IW_INVALID_PUSH_PAGE_INDEX) @@ -307,7 +306,7 @@ static void i40iw_dealloc_push_page(struct i40iw_device *iwdev, struct i40iw_sc_ cqp_info->post_sq = 1; cqp_info->in.u.manage_push_page.info.push_idx = qp->push_idx; - cqp_info->in.u.manage_push_page.info.qs_handle = dev->qs_handle; + cqp_info->in.u.manage_push_page.info.qs_handle = qp->qs_handle; cqp_info->in.u.manage_push_page.info.free_page = 1; cqp_info->in.u.manage_push_page.cqp = &iwdev->cqp.sc_cqp; cqp_info->in.u.manage_push_page.scratch = (uintptr_t)cqp_request; @@ -337,6 +336,9 @@ static struct ib_pd *i40iw_alloc_pd(struct ib_device *ibdev, u32 pd_id = 0; int err; + if (iwdev->closing) + return ERR_PTR(-ENODEV); + err = i40iw_alloc_resource(iwdev, iwdev->allocated_pds, iwdev->max_pd, &pd_id, &iwdev->next_pd); if (err) { @@ -602,6 +604,9 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, struct i40iwarp_offload_info *iwarp_info; unsigned long flags; + if (iwdev->closing) + return ERR_PTR(-ENODEV); + if (init_attr->create_flags) return ERR_PTR(-EINVAL); if (init_attr->cap.max_inline_data > I40IW_MAX_INLINE_DATA_SIZE) @@ -610,11 +615,15 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, if (init_attr->cap.max_send_sge > I40IW_MAX_WQ_FRAGMENT_COUNT) init_attr->cap.max_send_sge = I40IW_MAX_WQ_FRAGMENT_COUNT; + if (init_attr->cap.max_recv_sge > I40IW_MAX_WQ_FRAGMENT_COUNT) + init_attr->cap.max_recv_sge = I40IW_MAX_WQ_FRAGMENT_COUNT; + memset(&init_info, 0, sizeof(init_info)); sq_size = init_attr->cap.max_send_wr; rq_size = init_attr->cap.max_recv_wr; + init_info.vsi = &iwdev->vsi; init_info.qp_uk_init_info.sq_size = sq_size; init_info.qp_uk_init_info.rq_size = rq_size; init_info.qp_uk_init_info.max_sq_frag_cnt = init_attr->cap.max_send_sge; @@ -774,6 +783,7 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, iwqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? 1 : 0; iwdev->qp_table[qp_num] = iwqp; i40iw_add_pdusecount(iwqp->iwpd); + i40iw_add_devusecount(iwdev); if (ibpd->uobject && udata) { memset(&uresp, 0, sizeof(uresp)); uresp.actual_sq_size = sq_size; @@ -815,8 +825,9 @@ static int i40iw_query_qp(struct ib_qp *ibqp, attr->qp_access_flags = 0; attr->cap.max_send_wr = qp->qp_uk.sq_size; attr->cap.max_recv_wr = qp->qp_uk.rq_size; - attr->cap.max_recv_sge = 1; attr->cap.max_inline_data = I40IW_MAX_INLINE_DATA_SIZE; + attr->cap.max_send_sge = I40IW_MAX_WQ_FRAGMENT_COUNT; + attr->cap.max_recv_sge = I40IW_MAX_WQ_FRAGMENT_COUNT; init_attr->event_handler = iwqp->ibqp.event_handler; init_attr->qp_context = iwqp->ibqp.qp_context; init_attr->send_cq = iwqp->ibqp.send_cq; @@ -884,6 +895,11 @@ int i40iw_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, spin_lock_irqsave(&iwqp->lock, flags); if (attr_mask & IB_QP_STATE) { + if (iwdev->closing && attr->qp_state != IB_QPS_ERR) { + err = -EINVAL; + goto exit; + } + switch (attr->qp_state) { case IB_QPS_INIT: case IB_QPS_RTR: @@ -944,7 +960,7 @@ int i40iw_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, goto exit; } if (iwqp->sc_qp.term_flags) - del_timer(&iwqp->terminate_timer); + i40iw_terminate_del_timer(&iwqp->sc_qp); info.next_iwarp_state = I40IW_QP_STATE_ERROR; if ((iwqp->hw_tcp_state > I40IW_TCP_STATE_CLOSED) && iwdev->iw_status && @@ -1037,11 +1053,11 @@ static void cq_free_resources(struct i40iw_device *iwdev, struct i40iw_cq *iwcq) } /** - * cq_wq_destroy - send cq destroy cqp + * i40iw_cq_wq_destroy - send cq destroy cqp * @iwdev: iwarp device * @cq: hardware control cq */ -static void cq_wq_destroy(struct i40iw_device *iwdev, struct i40iw_sc_cq *cq) +void i40iw_cq_wq_destroy(struct i40iw_device *iwdev, struct i40iw_sc_cq *cq) { enum i40iw_status_code status; struct i40iw_cqp_request *cqp_request; @@ -1080,9 +1096,10 @@ static int i40iw_destroy_cq(struct ib_cq *ib_cq) iwcq = to_iwcq(ib_cq); iwdev = to_iwdev(ib_cq->device); cq = &iwcq->sc_cq; - cq_wq_destroy(iwdev, cq); + i40iw_cq_wq_destroy(iwdev, cq); cq_free_resources(iwdev, iwcq); kfree(iwcq); + i40iw_rem_devusecount(iwdev); return 0; } @@ -1113,6 +1130,9 @@ static struct ib_cq *i40iw_create_cq(struct ib_device *ibdev, int err_code; int entries = attr->cqe; + if (iwdev->closing) + return ERR_PTR(-ENODEV); + if (entries > iwdev->max_cqe) return ERR_PTR(-EINVAL); @@ -1137,7 +1157,8 @@ static struct ib_cq *i40iw_create_cq(struct ib_device *ibdev, ukinfo->cq_id = cq_num; iwcq->ibcq.cqe = info.cq_uk_init_info.cq_size; info.ceqe_mask = 0; - info.ceq_id = 0; + if (attr->comp_vector < iwdev->ceqs_count) + info.ceq_id = attr->comp_vector; info.ceq_id_valid = true; info.ceqe_mask = 1; info.type = I40IW_CQ_TYPE_IWARP; @@ -1229,10 +1250,11 @@ static struct ib_cq *i40iw_create_cq(struct ib_device *ibdev, } } + i40iw_add_devusecount(iwdev); return (struct ib_cq *)iwcq; cq_destroy: - cq_wq_destroy(iwdev, cq); + i40iw_cq_wq_destroy(iwdev, cq); cq_free_resources: cq_free_resources(iwdev, iwcq); error: @@ -1266,6 +1288,7 @@ static void i40iw_free_stag(struct i40iw_device *iwdev, u32 stag) stag_idx = (stag & iwdev->mr_stagmask) >> I40IW_CQPSQ_STAG_IDX_SHIFT; i40iw_free_resource(iwdev, iwdev->allocated_mrs, stag_idx); + i40iw_rem_devusecount(iwdev); } /** @@ -1296,19 +1319,18 @@ static u32 i40iw_create_stag(struct i40iw_device *iwdev) stag = stag_index << I40IW_CQPSQ_STAG_IDX_SHIFT; stag |= driver_key; stag += (u32)consumer_key; + i40iw_add_devusecount(iwdev); } return stag; } /** * i40iw_next_pbl_addr - Get next pbl address - * @palloc: Poiner to allocated pbles * @pbl: pointer to a pble * @pinfo: info pointer * @idx: index */ -static inline u64 *i40iw_next_pbl_addr(struct i40iw_pble_alloc *palloc, - u64 *pbl, +static inline u64 *i40iw_next_pbl_addr(u64 *pbl, struct i40iw_pble_info **pinfo, u32 *idx) { @@ -1336,9 +1358,11 @@ static void i40iw_copy_user_pgaddrs(struct i40iw_mr *iwmr, struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc; struct i40iw_pble_info *pinfo; struct scatterlist *sg; + u64 pg_addr = 0; u32 idx = 0; pinfo = (level == I40IW_LEVEL_1) ? NULL : palloc->level2.leaf; + pg_shift = ffs(region->page_size) - 1; for_each_sg(region->sg_head.sgl, sg, region->nmap, entry) { chunk_pages = sg_dma_len(sg) >> pg_shift; @@ -1346,17 +1370,96 @@ static void i40iw_copy_user_pgaddrs(struct i40iw_mr *iwmr, !iwpbl->qp_mr.sq_page) iwpbl->qp_mr.sq_page = sg_page(sg); for (i = 0; i < chunk_pages; i++) { - *pbl = cpu_to_le64(sg_dma_address(sg) + region->page_size * i); - pbl = i40iw_next_pbl_addr(palloc, pbl, &pinfo, &idx); + pg_addr = sg_dma_address(sg) + region->page_size * i; + + if ((entry + i) == 0) + *pbl = cpu_to_le64(pg_addr & iwmr->page_msk); + else if (!(pg_addr & ~iwmr->page_msk)) + *pbl = cpu_to_le64(pg_addr); + else + continue; + pbl = i40iw_next_pbl_addr(pbl, &pinfo, &idx); + } + } +} + +/** + * i40iw_set_hugetlb_params - set MR pg size and mask to huge pg values. + * @addr: virtual address + * @iwmr: mr pointer for this memory registration + */ +static void i40iw_set_hugetlb_values(u64 addr, struct i40iw_mr *iwmr) +{ + struct vm_area_struct *vma; + struct hstate *h; + + vma = find_vma(current->mm, addr); + if (vma && is_vm_hugetlb_page(vma)) { + h = hstate_vma(vma); + if (huge_page_size(h) == 0x200000) { + iwmr->page_size = huge_page_size(h); + iwmr->page_msk = huge_page_mask(h); } } } /** + * i40iw_check_mem_contiguous - check if pbls stored in arr are contiguous + * @arr: lvl1 pbl array + * @npages: page count + * pg_size: page size + * + */ +static bool i40iw_check_mem_contiguous(u64 *arr, u32 npages, u32 pg_size) +{ + u32 pg_idx; + + for (pg_idx = 0; pg_idx < npages; pg_idx++) { + if ((*arr + (pg_size * pg_idx)) != arr[pg_idx]) + return false; + } + return true; +} + +/** + * i40iw_check_mr_contiguous - check if MR is physically contiguous + * @palloc: pbl allocation struct + * pg_size: page size + */ +static bool i40iw_check_mr_contiguous(struct i40iw_pble_alloc *palloc, u32 pg_size) +{ + struct i40iw_pble_level2 *lvl2 = &palloc->level2; + struct i40iw_pble_info *leaf = lvl2->leaf; + u64 *arr = NULL; + u64 *start_addr = NULL; + int i; + bool ret; + + if (palloc->level == I40IW_LEVEL_1) { + arr = (u64 *)palloc->level1.addr; + ret = i40iw_check_mem_contiguous(arr, palloc->total_cnt, pg_size); + return ret; + } + + start_addr = (u64 *)leaf->addr; + + for (i = 0; i < lvl2->leaf_cnt; i++, leaf++) { + arr = (u64 *)leaf->addr; + if ((*start_addr + (i * pg_size * PBLE_PER_PAGE)) != *arr) + return false; + ret = i40iw_check_mem_contiguous(arr, leaf->cnt, pg_size); + if (!ret) + return false; + } + + return true; +} + +/** * i40iw_setup_pbles - copy user pg address to pble's * @iwdev: iwarp device * @iwmr: mr pointer for this memory registration - * @use_pbles: flag if to use pble's or memory (level 0) + * @use_pbles: flag if to use pble's */ static int i40iw_setup_pbles(struct i40iw_device *iwdev, struct i40iw_mr *iwmr, @@ -1369,9 +1472,6 @@ static int i40iw_setup_pbles(struct i40iw_device *iwdev, enum i40iw_status_code status; enum i40iw_pble_level level = I40IW_LEVEL_1; - if (!use_pbles && (iwmr->page_cnt > MAX_SAVE_PAGE_ADDRS)) - return -ENOMEM; - if (use_pbles) { mutex_lock(&iwdev->pbl_mutex); status = i40iw_get_pble(&iwdev->sc_dev, iwdev->pble_rsrc, palloc, iwmr->page_cnt); @@ -1388,6 +1488,10 @@ static int i40iw_setup_pbles(struct i40iw_device *iwdev, } i40iw_copy_user_pgaddrs(iwmr, pbl, level); + + if (use_pbles) + iwmr->pgaddrmem[0] = *pbl; + return 0; } @@ -1409,14 +1513,18 @@ static int i40iw_handle_q_mem(struct i40iw_device *iwdev, struct i40iw_cq_mr *cqmr = &iwpbl->cq_mr; struct i40iw_hmc_pble *hmc_p; u64 *arr = iwmr->pgaddrmem; + u32 pg_size; int err; int total; + bool ret = true; total = req->sq_pages + req->rq_pages + req->cq_pages; + pg_size = iwmr->page_size; err = i40iw_setup_pbles(iwdev, iwmr, use_pbles); if (err) return err; + if (use_pbles && (palloc->level != I40IW_LEVEL_1)) { i40iw_free_pble(iwdev->pble_rsrc, palloc); iwpbl->pbl_allocated = false; @@ -1425,26 +1533,44 @@ static int i40iw_handle_q_mem(struct i40iw_device *iwdev, if (use_pbles) arr = (u64 *)palloc->level1.addr; - if (req->reg_type == IW_MEMREG_TYPE_QP) { + + if (iwmr->type == IW_MEMREG_TYPE_QP) { hmc_p = &qpmr->sq_pbl; qpmr->shadow = (dma_addr_t)arr[total]; + if (use_pbles) { + ret = i40iw_check_mem_contiguous(arr, req->sq_pages, pg_size); + if (ret) + ret = i40iw_check_mem_contiguous(&arr[req->sq_pages], req->rq_pages, pg_size); + } + + if (!ret) { hmc_p->idx = palloc->level1.idx; hmc_p = &qpmr->rq_pbl; hmc_p->idx = palloc->level1.idx + req->sq_pages; } else { hmc_p->addr = arr[0]; hmc_p = &qpmr->rq_pbl; - hmc_p->addr = arr[1]; + hmc_p->addr = arr[req->sq_pages]; } } else { /* CQ */ hmc_p = &cqmr->cq_pbl; cqmr->shadow = (dma_addr_t)arr[total]; + if (use_pbles) + ret = i40iw_check_mem_contiguous(arr, req->cq_pages, pg_size); + + if (!ret) hmc_p->idx = palloc->level1.idx; else hmc_p->addr = arr[0]; } + + if (use_pbles && ret) { + i40iw_free_pble(iwdev->pble_rsrc, palloc); + iwpbl->pbl_allocated = false; + } + return err; } @@ -1642,8 +1768,9 @@ static int i40iw_hwreg_mr(struct i40iw_device *iwdev, stag_info->access_rights = access; stag_info->pd_id = iwpd->sc_pd.pd_id; stag_info->addr_type = I40IW_ADDR_TYPE_VA_BASED; + stag_info->page_size = iwmr->page_size; - if (iwmr->page_cnt > 1) { + if (iwpbl->pbl_allocated) { if (palloc->level == I40IW_LEVEL_1) { stag_info->first_pm_pbl_index = palloc->level1.idx; stag_info->chunk_size = 1; @@ -1699,6 +1826,11 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd, bool use_pbles = false; unsigned long flags; int err = -ENOSYS; + int ret; + int pg_shift; + + if (iwdev->closing) + return ERR_PTR(-ENODEV); if (length > I40IW_MAX_MR_SIZE) return ERR_PTR(-EINVAL); @@ -1723,9 +1855,17 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd, iwmr->ibmr.pd = pd; iwmr->ibmr.device = pd->device; ucontext = to_ucontext(pd->uobject->context); - region_length = region->length + (start & 0xfff); - pbl_depth = region_length >> 12; - pbl_depth += (region_length & (4096 - 1)) ? 1 : 0; + + iwmr->page_size = region->page_size; + iwmr->page_msk = PAGE_MASK; + + if (region->hugetlb && (req.reg_type == IW_MEMREG_TYPE_MEM)) + i40iw_set_hugetlb_values(start, iwmr); + + region_length = region->length + (start & (iwmr->page_size - 1)); + pg_shift = ffs(iwmr->page_size) - 1; + pbl_depth = region_length >> pg_shift; + pbl_depth += (region_length & (iwmr->page_size - 1)) ? 1 : 0; iwmr->length = region->length; iwpbl->user_base = virt; @@ -1755,13 +1895,21 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd, spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags); break; case IW_MEMREG_TYPE_MEM: + use_pbles = (iwmr->page_cnt != 1); access = I40IW_ACCESS_FLAGS_LOCALREAD; - use_pbles = (iwmr->page_cnt != 1); err = i40iw_setup_pbles(iwdev, iwmr, use_pbles); if (err) goto error; + if (use_pbles) { + ret = i40iw_check_mr_contiguous(palloc, iwmr->page_size); + if (ret) { + i40iw_free_pble(iwdev->pble_rsrc, palloc); + iwpbl->pbl_allocated = false; + } + } + access |= i40iw_get_user_access(acc); stag = i40iw_create_stag(iwdev); if (!stag) { @@ -1778,6 +1926,7 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd, i40iw_free_stag(iwdev, stag); goto error; } + break; default: goto error; @@ -1789,7 +1938,7 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd, return &iwmr->ibmr; error: - if (palloc->level != I40IW_LEVEL_0) + if (palloc->level != I40IW_LEVEL_0 && iwpbl->pbl_allocated) i40iw_free_pble(iwdev->pble_rsrc, palloc); ib_umem_release(region); kfree(iwmr); @@ -2142,7 +2291,6 @@ static int i40iw_post_send(struct ib_qp *ibqp, case IB_WR_REG_MR: { struct i40iw_mr *iwmr = to_iwmr(reg_wr(ib_wr)->mr); - int page_shift = ilog2(reg_wr(ib_wr)->mr->page_size); int flags = reg_wr(ib_wr)->access; struct i40iw_pble_alloc *palloc = &iwmr->iwpbl.pble_alloc; struct i40iw_sc_dev *dev = &iwqp->iwdev->sc_dev; @@ -2153,6 +2301,7 @@ static int i40iw_post_send(struct ib_qp *ibqp, info.access_rights |= i40iw_get_user_access(flags); info.stag_key = reg_wr(ib_wr)->key & 0xff; info.stag_idx = reg_wr(ib_wr)->key >> 8; + info.page_size = reg_wr(ib_wr)->mr->page_size; info.wr_id = ib_wr->wr_id; info.addr_type = I40IW_ADDR_TYPE_VA_BASED; @@ -2166,9 +2315,6 @@ static int i40iw_post_send(struct ib_qp *ibqp, if (iwmr->npages > I40IW_MIN_PAGES_PER_FMR) info.chunk_size = 1; - if (page_shift == 21) - info.page_size = 1; /* 2M page */ - ret = dev->iw_priv_qp_ops->iw_mr_fast_register(&iwqp->sc_qp, &info, true); if (ret) err = -ENOMEM; @@ -2487,21 +2633,17 @@ static int i40iw_get_hw_stats(struct ib_device *ibdev, { struct i40iw_device *iwdev = to_iwdev(ibdev); struct i40iw_sc_dev *dev = &iwdev->sc_dev; - struct i40iw_dev_pestat *devstat = &dev->dev_pestat; + struct i40iw_vsi_pestat *devstat = iwdev->vsi.pestat; struct i40iw_dev_hw_stats *hw_stats = &devstat->hw_stats; - unsigned long flags; if (dev->is_pf) { - spin_lock_irqsave(&devstat->stats_lock, flags); - devstat->ops.iw_hw_stat_read_all(devstat, - &devstat->hw_stats); - spin_unlock_irqrestore(&devstat->stats_lock, flags); + i40iw_hw_stats_read_all(devstat, &devstat->hw_stats); } else { if (i40iw_vchnl_vf_get_pe_stats(dev, &devstat->hw_stats)) return -ENOSYS; } - memcpy(&stats->value[0], &hw_stats, sizeof(*hw_stats)); + memcpy(&stats->value[0], hw_stats, sizeof(*hw_stats)); return stats->num_counters; } @@ -2562,7 +2704,9 @@ static int i40iw_query_pkey(struct ib_device *ibdev, * @ah_attr: address handle attributes */ static struct ib_ah *i40iw_create_ah(struct ib_pd *ibpd, - struct ib_ah_attr *attr) + struct ib_ah_attr *attr, + struct ib_udata *udata) + { return ERR_PTR(-ENOSYS); } @@ -2621,7 +2765,7 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev (1ull << IB_USER_VERBS_CMD_POST_RECV) | (1ull << IB_USER_VERBS_CMD_POST_SEND); iwibdev->ibdev.phys_port_cnt = 1; - iwibdev->ibdev.num_comp_vectors = 1; + iwibdev->ibdev.num_comp_vectors = iwdev->ceqs_count; iwibdev->ibdev.dma_device = &pcidev->dev; iwibdev->ibdev.dev.parent = &pcidev->dev; iwibdev->ibdev.query_port = i40iw_query_port; @@ -2654,7 +2798,6 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev iwibdev->ibdev.iwcm = kzalloc(sizeof(*iwibdev->ibdev.iwcm), GFP_KERNEL); if (!iwibdev->ibdev.iwcm) { ib_dealloc_device(&iwibdev->ibdev); - i40iw_pr_err("iwcm == NULL\n"); return NULL; } @@ -2719,6 +2862,9 @@ void i40iw_destroy_rdma_device(struct i40iw_ib_device *iwibdev) i40iw_unregister_rdma_device(iwibdev); kfree(iwibdev->ibdev.iwcm); iwibdev->ibdev.iwcm = NULL; + wait_event_timeout(iwibdev->iwdev->close_wq, + !atomic64_read(&iwibdev->iwdev->use_count), + I40IW_EVENT_TIMEOUT); ib_dealloc_device(&iwibdev->ibdev); } diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.h b/drivers/infiniband/hw/i40iw/i40iw_verbs.h index 0069be8a5a38..6549c939500f 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.h +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.h @@ -92,6 +92,8 @@ struct i40iw_mr { struct ib_umem *region; u16 type; u32 page_cnt; + u32 page_size; + u64 page_msk; u32 npages; u32 stag; u64 length; diff --git a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c index 3041003c94d2..f4d13683a403 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c @@ -403,6 +403,19 @@ del_out: } /** + * i40iw_vf_init_pestat - Initialize stats for VF + * @devL pointer to the VF Device + * @stats: Statistics structure pointer + * @index: Stats index + */ +static void i40iw_vf_init_pestat(struct i40iw_sc_dev *dev, struct i40iw_vsi_pestat *stats, u16 index) +{ + stats->hw = dev->hw; + i40iw_hw_stats_init(stats, (u8)index, false); + spin_lock_init(&stats->lock); +} + +/** * i40iw_vchnl_recv_pf - Receive PF virtual channel messages * @dev: IWARP device pointer * @vf_id: Virtual function ID associated with the message @@ -421,9 +434,8 @@ enum i40iw_status_code i40iw_vchnl_recv_pf(struct i40iw_sc_dev *dev, u16 first_avail_iw_vf = I40IW_MAX_PE_ENABLED_VF_COUNT; struct i40iw_virt_mem vf_dev_mem; struct i40iw_virtchnl_work_info work_info; - struct i40iw_dev_pestat *devstat; + struct i40iw_vsi_pestat *stats; enum i40iw_status_code ret_code; - unsigned long flags; if (!dev || !msg || !len) return I40IW_ERR_PARAM; @@ -496,14 +508,7 @@ enum i40iw_status_code i40iw_vchnl_recv_pf(struct i40iw_sc_dev *dev, i40iw_debug(dev, I40IW_DEBUG_VIRT, "VF%u error CQP HMC Function operation.\n", vf_id); - ret_code = i40iw_device_init_pestat(&vf_dev->dev_pestat); - if (ret_code) - i40iw_debug(dev, I40IW_DEBUG_VIRT, - "VF%u - i40iw_device_init_pestat failed\n", - vf_id); - vf_dev->dev_pestat.ops.iw_hw_stat_init(&vf_dev->dev_pestat, - (u8)vf_dev->pmf_index, - dev->hw, false); + i40iw_vf_init_pestat(dev, &vf_dev->pestat, vf_dev->pmf_index); vf_dev->stats_initialized = true; } else { if (vf_dev) { @@ -534,12 +539,10 @@ enum i40iw_status_code i40iw_vchnl_recv_pf(struct i40iw_sc_dev *dev, case I40IW_VCHNL_OP_GET_STATS: if (!vf_dev) return I40IW_ERR_BAD_PTR; - devstat = &vf_dev->dev_pestat; - spin_lock_irqsave(&dev->dev_pestat.stats_lock, flags); - devstat->ops.iw_hw_stat_read_all(devstat, &devstat->hw_stats); - spin_unlock_irqrestore(&dev->dev_pestat.stats_lock, flags); + stats = &vf_dev->pestat; + i40iw_hw_stats_read_all(stats, &stats->hw_stats); vf_dev->msg_count--; - vchnl_pf_send_get_pe_stats_resp(dev, vf_id, vchnl_msg, &devstat->hw_stats); + vchnl_pf_send_get_pe_stats_resp(dev, vf_id, vchnl_msg, &stats->hw_stats); break; default: i40iw_debug(dev, I40IW_DEBUG_VIRT, diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index b9bf0759f10a..077c33d2dc75 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -114,7 +114,9 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr !(1 << ah->av.eth.stat_rate & dev->caps.stat_rate_support)) --ah->av.eth.stat_rate; } - + ah->av.eth.sl_tclass_flowlabel |= + cpu_to_be32((ah_attr->grh.traffic_class << 20) | + ah_attr->grh.flow_label); /* * HW requires multicast LID so we just choose one. */ @@ -122,12 +124,14 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr ah->av.ib.dlid = cpu_to_be16(0xc000); memcpy(ah->av.eth.dgid, ah_attr->grh.dgid.raw, 16); - ah->av.eth.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 29); + ah->av.eth.sl_tclass_flowlabel |= cpu_to_be32(ah_attr->sl << 29); return &ah->ibah; } -struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) +struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, + struct ib_udata *udata) + { struct mlx4_ib_ah *ah; struct ib_ah *ret; diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c index 5e9939045852..06020c54db20 100644 --- a/drivers/infiniband/hw/mlx4/alias_GUID.c +++ b/drivers/infiniband/hw/mlx4/alias_GUID.c @@ -755,10 +755,8 @@ static void alias_guid_work(struct work_struct *work) struct mlx4_ib_dev *dev = container_of(ib_sriov, struct mlx4_ib_dev, sriov); rec = kzalloc(sizeof *rec, GFP_KERNEL); - if (!rec) { - pr_err("alias_guid_work: No Memory\n"); + if (!rec) return; - } pr_debug("starting [port: %d]...\n", sriov_alias_port->port + 1); ret = get_next_record_to_update(dev, sriov_alias_port->port, rec); diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c index 39a488889fc7..d64845335e87 100644 --- a/drivers/infiniband/hw/mlx4/cm.c +++ b/drivers/infiniband/hw/mlx4/cm.c @@ -247,10 +247,8 @@ id_map_alloc(struct ib_device *ibdev, int slave_id, u32 sl_cm_id) struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov; ent = kmalloc(sizeof (struct id_map_entry), GFP_KERNEL); - if (!ent) { - mlx4_ib_warn(ibdev, "Couldn't allocate id cache entry - out of memory\n"); + if (!ent) return ERR_PTR(-ENOMEM); - } ent->sl_cm_id = sl_cm_id; ent->slave_id = slave_id; diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 1672907ff219..db564ccc0f92 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -39,6 +39,8 @@ #include <linux/mlx4/cmd.h> #include <linux/gfp.h> #include <rdma/ib_pma.h> +#include <linux/ip.h> +#include <net/ipv6.h> #include <linux/mlx4/driver.h> #include "mlx4_ib.h" @@ -480,6 +482,23 @@ static int find_slave_port_pkey_ix(struct mlx4_ib_dev *dev, int slave, return -EINVAL; } +static int get_gids_from_l3_hdr(struct ib_grh *grh, union ib_gid *sgid, + union ib_gid *dgid) +{ + int version = ib_get_rdma_header_version((const union rdma_network_hdr *)grh); + enum rdma_network_type net_type; + + if (version == 4) + net_type = RDMA_NETWORK_IPV4; + else if (version == 6) + net_type = RDMA_NETWORK_IPV6; + else + return -EINVAL; + + return ib_get_gids_from_rdma_hdr((union rdma_network_hdr *)grh, net_type, + sgid, dgid); +} + int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, enum ib_qp_type dest_qpt, struct ib_wc *wc, struct ib_grh *grh, struct ib_mad *mad) @@ -538,7 +557,10 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, memset(&attr, 0, sizeof attr); attr.port_num = port; if (is_eth) { - memcpy(&attr.grh.dgid.raw[0], &grh->dgid.raw[0], 16); + union ib_gid sgid; + + if (get_gids_from_l3_hdr(grh, &sgid, &attr.grh.dgid)) + return -EINVAL; attr.ah_flags = IB_AH_GRH; } ah = ib_create_ah(tun_ctx->pd, &attr); @@ -651,6 +673,11 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port, is_eth = 1; if (is_eth) { + union ib_gid dgid; + union ib_gid sgid; + + if (get_gids_from_l3_hdr(grh, &sgid, &dgid)) + return -EINVAL; if (!(wc->wc_flags & IB_WC_GRH)) { mlx4_ib_warn(ibdev, "RoCE grh not present.\n"); return -EINVAL; @@ -659,10 +686,10 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port, mlx4_ib_warn(ibdev, "RoCE mgmt class is not CM\n"); return -EINVAL; } - err = mlx4_get_slave_from_roce_gid(dev->dev, port, grh->dgid.raw, &slave); + err = mlx4_get_slave_from_roce_gid(dev->dev, port, dgid.raw, &slave); if (err && mlx4_is_mf_bonded(dev->dev)) { other_port = (port == 1) ? 2 : 1; - err = mlx4_get_slave_from_roce_gid(dev->dev, other_port, grh->dgid.raw, &slave); + err = mlx4_get_slave_from_roce_gid(dev->dev, other_port, dgid.raw, &slave); if (!err) { port = other_port; pr_debug("resolved slave %d from gid %pI6 wire port %d other %d\n", @@ -702,10 +729,18 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port, /* If a grh is present, we demux according to it */ if (wc->wc_flags & IB_WC_GRH) { - slave = mlx4_ib_find_real_gid(ibdev, port, grh->dgid.global.interface_id); - if (slave < 0) { - mlx4_ib_warn(ibdev, "failed matching grh\n"); - return -ENOENT; + if (grh->dgid.global.interface_id == + cpu_to_be64(IB_SA_WELL_KNOWN_GUID) && + grh->dgid.global.subnet_prefix == cpu_to_be64( + atomic64_read(&dev->sriov.demux[port - 1].subnet_prefix))) { + slave = 0; + } else { + slave = mlx4_ib_find_real_gid(ibdev, port, + grh->dgid.global.interface_id); + if (slave < 0) { + mlx4_ib_warn(ibdev, "failed matching grh\n"); + return -ENOENT; + } } } /* Class-specific handling */ @@ -1102,10 +1137,8 @@ static void handle_slaves_guid_change(struct mlx4_ib_dev *dev, u8 port_num, in_mad = kmalloc(sizeof *in_mad, GFP_KERNEL); out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); - if (!in_mad || !out_mad) { - mlx4_ib_warn(&dev->ib_dev, "failed to allocate memory for guid info mads\n"); + if (!in_mad || !out_mad) goto out; - } guid_tbl_blk_num *= 4; @@ -1916,11 +1949,8 @@ static int alloc_pv_object(struct mlx4_ib_dev *dev, int slave, int port, *ret_ctx = NULL; ctx = kzalloc(sizeof (struct mlx4_ib_demux_pv_ctx), GFP_KERNEL); - if (!ctx) { - pr_err("failed allocating pv resource context " - "for port %d, slave %d\n", port, slave); + if (!ctx) return -ENOMEM; - } ctx->ib_dev = &dev->ib_dev; ctx->port = port; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index b597e8227591..c8413fc120e6 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -430,7 +430,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, struct mlx4_ib_dev *dev = to_mdev(ibdev); struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; - int err = -ENOMEM; + int err; int have_ib_ports; struct mlx4_uverbs_ex_query_device cmd; struct mlx4_uverbs_ex_query_device_resp resp = {.comp_mask = 0}; @@ -455,6 +455,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, sizeof(resp.response_length); in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); + err = -ENOMEM; if (!in_mad || !out_mad) goto out; @@ -547,6 +548,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->max_map_per_fmr = dev->dev->caps.max_fmr_maps; props->hca_core_clock = dev->dev->caps.hca_core_clock * 1000UL; props->timestamp_mask = 0xFFFFFFFFFFFFULL; + props->max_ah = INT_MAX; if (!mlx4_is_slave(dev->dev)) err = mlx4_get_internal_clock_params(dev->dev, &clock_params); @@ -697,9 +699,11 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port, if (err) goto out; - props->active_width = (((u8 *)mailbox->buf)[5] == 0x40) ? - IB_WIDTH_4X : IB_WIDTH_1X; - props->active_speed = IB_SPEED_QDR; + props->active_width = (((u8 *)mailbox->buf)[5] == 0x40) || + (((u8 *)mailbox->buf)[5] == 0x20 /*56Gb*/) ? + IB_WIDTH_4X : IB_WIDTH_1X; + props->active_speed = (((u8 *)mailbox->buf)[5] == 0x20 /*56Gb*/) ? + IB_SPEED_FDR : IB_SPEED_QDR; props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_IP_BASED_GIDS; props->gid_tbl_len = mdev->dev->caps.gid_table_len[port]; props->max_msg_sz = mdev->dev->caps.max_msg_sz; @@ -2814,20 +2818,22 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) kmalloc(BITS_TO_LONGS(ibdev->steer_qpn_count) * sizeof(long), GFP_KERNEL); - if (!ibdev->ib_uc_qpns_bitmap) { - dev_err(&dev->persist->pdev->dev, - "bit map alloc failed\n"); + if (!ibdev->ib_uc_qpns_bitmap) goto err_steer_qp_release; - } - bitmap_zero(ibdev->ib_uc_qpns_bitmap, ibdev->steer_qpn_count); - - err = mlx4_FLOW_STEERING_IB_UC_QP_RANGE( - dev, ibdev->steer_qpn_base, - ibdev->steer_qpn_base + - ibdev->steer_qpn_count - 1); - if (err) - goto err_steer_free_bitmap; + if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_IPOIB) { + bitmap_zero(ibdev->ib_uc_qpns_bitmap, + ibdev->steer_qpn_count); + err = mlx4_FLOW_STEERING_IB_UC_QP_RANGE( + dev, ibdev->steer_qpn_base, + ibdev->steer_qpn_base + + ibdev->steer_qpn_count - 1); + if (err) + goto err_steer_free_bitmap; + } else { + bitmap_fill(ibdev->ib_uc_qpns_bitmap, + ibdev->steer_qpn_count); + } } for (j = 1; j <= ibdev->dev->caps.num_ports; j++) @@ -3055,15 +3061,12 @@ static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init) first_port = find_first_bit(actv_ports.ports, dev->caps.num_ports); dm = kcalloc(ports, sizeof(*dm), GFP_ATOMIC); - if (!dm) { - pr_err("failed to allocate memory for tunneling qp update\n"); + if (!dm) return; - } for (i = 0; i < ports; i++) { dm[i] = kmalloc(sizeof (struct mlx4_ib_demux_work), GFP_ATOMIC); if (!dm[i]) { - pr_err("failed to allocate memory for tunneling qp update work struct\n"); while (--i >= 0) kfree(dm[i]); goto out; @@ -3223,8 +3226,6 @@ void mlx4_sched_ib_sl2vl_update_work(struct mlx4_ib_dev *ibdev, ew->port = port; ew->ib_dev = ibdev; queue_work(wq, &ew->work); - } else { - pr_err("failed to allocate memory for sl2vl update work\n"); } } @@ -3284,10 +3285,8 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, case MLX4_DEV_EVENT_PORT_MGMT_CHANGE: ew = kmalloc(sizeof *ew, GFP_ATOMIC); - if (!ew) { - pr_err("failed to allocate memory for events work\n"); + if (!ew) break; - } INIT_WORK(&ew->work, handle_port_mgmt_change_event); memcpy(&ew->ib_eqe, eqe, sizeof *eqe); diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c index a21d37f02f35..e010fe459e67 100644 --- a/drivers/infiniband/hw/mlx4/mcg.c +++ b/drivers/infiniband/hw/mlx4/mcg.c @@ -1142,7 +1142,6 @@ void mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy_wq) work = kmalloc(sizeof *work, GFP_KERNEL); if (!work) { ctx->flushing = 0; - mcg_warn("failed allocating work for cleanup\n"); return; } @@ -1202,10 +1201,8 @@ static int push_deleteing_req(struct mcast_group *group, int slave) return 0; req = kzalloc(sizeof *req, GFP_KERNEL); - if (!req) { - mcg_warn_group(group, "failed allocation - may leave stall groups\n"); + if (!req) return -ENOMEM; - } if (!list_empty(&group->func[slave].pending)) { pend_req = list_entry(group->func[slave].pending.prev, struct mcast_req, group_list); diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 35141f451e5c..7f3d976d81ed 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -742,7 +742,8 @@ int mlx4_ib_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq); void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq); -struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); +struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, + struct ib_udata *udata); int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr); int mlx4_ib_destroy_ah(struct ib_ah *ah); diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 570bc866b1d6..c068add8838b 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -644,7 +644,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, int qpn; int err; struct ib_qp_cap backup_cap; - struct mlx4_ib_sqp *sqp; + struct mlx4_ib_sqp *sqp = NULL; struct mlx4_ib_qp *qp; enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type; struct mlx4_ib_cq *mcq; @@ -933,7 +933,9 @@ err_db: mlx4_db_free(dev->dev, &qp->db); err: - if (!*caller_qp) + if (sqp) + kfree(sqp); + else if (!*caller_qp) kfree(qp); return err; } @@ -1280,7 +1282,8 @@ static int _mlx4_ib_destroy_qp(struct ib_qp *qp) if (is_qp0(dev, mqp)) mlx4_CLOSE_PORT(dev->dev, mqp->port); - if (dev->qp1_proxy[mqp->port - 1] == mqp) { + if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI && + dev->qp1_proxy[mqp->port - 1] == mqp) { mutex_lock(&dev->qp1_proxy_lock[mqp->port - 1]); dev->qp1_proxy[mqp->port - 1] = NULL; mutex_unlock(&dev->qp1_proxy_lock[mqp->port - 1]); @@ -1764,14 +1767,14 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, u8 port_num = mlx4_is_bonded(to_mdev(ibqp->device)->dev) ? 1 : attr_mask & IB_QP_PORT ? attr->port_num : qp->port; union ib_gid gid; - struct ib_gid_attr gid_attr; + struct ib_gid_attr gid_attr = {.gid_type = IB_GID_TYPE_IB}; u16 vlan = 0xffff; u8 smac[ETH_ALEN]; int status = 0; int is_eth = rdma_cap_eth_ah(&dev->ib_dev, port_num) && attr->ah_attr.ah_flags & IB_AH_GRH; - if (is_eth) { + if (is_eth && attr->ah_attr.ah_flags & IB_AH_GRH) { int index = attr->ah_attr.grh.sgid_index; status = ib_get_cached_gid(ibqp->device, port_num, diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c index 745efa4cfc71..d090e96f6f01 100644 --- a/drivers/infiniband/hw/mlx5/ah.c +++ b/drivers/infiniband/hw/mlx5/ah.c @@ -64,7 +64,9 @@ static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev, return &ah->ibah; } -struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) +struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, + struct ib_udata *udata) + { struct mlx5_ib_ah *ah; struct mlx5_ib_dev *dev = to_mdev(pd->device); @@ -75,6 +77,27 @@ struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) if (ll == IB_LINK_LAYER_ETHERNET && !(ah_attr->ah_flags & IB_AH_GRH)) return ERR_PTR(-EINVAL); + if (ll == IB_LINK_LAYER_ETHERNET && udata) { + int err; + struct mlx5_ib_create_ah_resp resp = {}; + u32 min_resp_len = offsetof(typeof(resp), dmac) + + sizeof(resp.dmac); + + if (udata->outlen < min_resp_len) + return ERR_PTR(-EINVAL); + + resp.response_length = min_resp_len; + + err = ib_resolve_eth_dmac(pd->device, ah_attr); + if (err) + return ERR_PTR(err); + + memcpy(resp.dmac, ah_attr->dmac, ETH_ALEN); + err = ib_copy_to_udata(udata, &resp, resp.response_length); + if (err) + return ERR_PTR(err); + } + ah = kzalloc(sizeof(*ah), GFP_ATOMIC); if (!ah) return ERR_PTR(-ENOMEM); diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index fcd04b881ec1..b3ef47c3ab73 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -731,7 +731,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, int entries, u32 **cqb, int *cqe_size, int *index, int *inlen) { - struct mlx5_ib_create_cq ucmd; + struct mlx5_ib_create_cq ucmd = {}; size_t ucmdlen; int page_shift; __be64 *pas; @@ -770,7 +770,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, if (err) goto err_umem; - mlx5_ib_cont_pages(cq->buf.umem, ucmd.buf_addr, &npages, &page_shift, + mlx5_ib_cont_pages(cq->buf.umem, ucmd.buf_addr, 0, &npages, &page_shift, &ncont, NULL); mlx5_ib_dbg(dev, "addr 0x%llx, size %u, npages %d, page_shift %d, ncont %d\n", ucmd.buf_addr, entries * ucmd.cqe_size, npages, page_shift, ncont); @@ -792,8 +792,36 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, *index = to_mucontext(context)->uuari.uars[0].index; + if (ucmd.cqe_comp_en == 1) { + if (unlikely((*cqe_size != 64) || + !MLX5_CAP_GEN(dev->mdev, cqe_compression))) { + err = -EOPNOTSUPP; + mlx5_ib_warn(dev, "CQE compression is not supported for size %d!\n", + *cqe_size); + goto err_cqb; + } + + if (unlikely(!ucmd.cqe_comp_res_format || + !(ucmd.cqe_comp_res_format < + MLX5_IB_CQE_RES_RESERVED) || + (ucmd.cqe_comp_res_format & + (ucmd.cqe_comp_res_format - 1)))) { + err = -EOPNOTSUPP; + mlx5_ib_warn(dev, "CQE compression res format %d is not supported!\n", + ucmd.cqe_comp_res_format); + goto err_cqb; + } + + MLX5_SET(cqc, cqc, cqe_comp_en, 1); + MLX5_SET(cqc, cqc, mini_cqe_res_format, + ilog2(ucmd.cqe_comp_res_format)); + } + return 0; +err_cqb: + kfree(cqb); + err_db: mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db); @@ -1124,7 +1152,7 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, return err; } - mlx5_ib_cont_pages(umem, ucmd.buf_addr, &npages, page_shift, + mlx5_ib_cont_pages(umem, ucmd.buf_addr, 0, &npages, page_shift, npas, NULL); cq->resize_umem = umem; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 2be65ddf56ba..d566f6738833 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -127,7 +127,7 @@ static int mlx5_netdev_event(struct notifier_block *this, if ((upper == ndev || (!upper && ndev == ibdev->roce.netdev)) && ibdev->ib_active) { - struct ib_event ibev = {0}; + struct ib_event ibev = { }; ibev.device = &ibdev->ib_dev; ibev.event = (event == NETDEV_UP) ? @@ -496,6 +496,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_core_dev *mdev = dev->mdev; int err = -ENOMEM; + int max_sq_desc; int max_rq_sg; int max_sq_sg; u64 min_page_size = 1ull << MLX5_CAP_GEN(mdev, log_pg_sz); @@ -618,9 +619,10 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->max_qp_wr = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz); max_rq_sg = MLX5_CAP_GEN(mdev, max_wqe_sz_rq) / sizeof(struct mlx5_wqe_data_seg); - max_sq_sg = (MLX5_CAP_GEN(mdev, max_wqe_sz_sq) - - sizeof(struct mlx5_wqe_ctrl_seg)) / - sizeof(struct mlx5_wqe_data_seg); + max_sq_desc = min_t(int, MLX5_CAP_GEN(mdev, max_wqe_sz_sq), 512); + max_sq_sg = (max_sq_desc - sizeof(struct mlx5_wqe_ctrl_seg) - + sizeof(struct mlx5_wqe_raddr_seg)) / + sizeof(struct mlx5_wqe_data_seg); props->max_sge = min(max_rq_sg, max_sq_sg); props->max_sge_rd = MLX5_MAX_SGE_RD; props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq); @@ -643,6 +645,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * props->max_mcast_grp; props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */ + props->max_ah = INT_MAX; props->hca_core_clock = MLX5_CAP_GEN(mdev, device_frequency_khz); props->timestamp_mask = 0x7FFFFFFFFFFFFFFFULL; @@ -669,6 +672,40 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, 1 << MLX5_CAP_GEN(dev->mdev, log_max_rq); } + if (field_avail(typeof(resp), mlx5_ib_support_multi_pkt_send_wqes, + uhw->outlen)) { + resp.mlx5_ib_support_multi_pkt_send_wqes = + MLX5_CAP_ETH(mdev, multi_pkt_send_wqe); + resp.response_length += + sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes); + } + + if (field_avail(typeof(resp), reserved, uhw->outlen)) + resp.response_length += sizeof(resp.reserved); + + if (field_avail(typeof(resp), cqe_comp_caps, uhw->outlen)) { + resp.cqe_comp_caps.max_num = + MLX5_CAP_GEN(dev->mdev, cqe_compression) ? + MLX5_CAP_GEN(dev->mdev, cqe_compression_max_num) : 0; + resp.cqe_comp_caps.supported_format = + MLX5_IB_CQE_RES_FORMAT_HASH | + MLX5_IB_CQE_RES_FORMAT_CSUM; + resp.response_length += sizeof(resp.cqe_comp_caps); + } + + if (field_avail(typeof(resp), packet_pacing_caps, uhw->outlen)) { + if (MLX5_CAP_QOS(mdev, packet_pacing) && + MLX5_CAP_GEN(mdev, qos)) { + resp.packet_pacing_caps.qp_rate_limit_max = + MLX5_CAP_QOS(mdev, packet_pacing_max_rate); + resp.packet_pacing_caps.qp_rate_limit_min = + MLX5_CAP_QOS(mdev, packet_pacing_min_rate); + resp.packet_pacing_caps.supported_qpts |= + 1 << IB_QPT_RAW_PACKET; + } + resp.response_length += sizeof(resp.packet_pacing_caps); + } + if (uhw->outlen) { err = ib_copy_to_udata(uhw, &resp, resp.response_length); @@ -1093,7 +1130,8 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, resp.response_length += sizeof(resp.cqe_version); if (field_avail(typeof(resp), cmds_supp_uhw, udata->outlen)) { - resp.cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE; + resp.cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE | + MLX5_USER_CMDS_SUPP_UHW_CREATE_AH; resp.response_length += sizeof(resp.cmds_supp_uhw); } @@ -1502,6 +1540,22 @@ static void set_proto(void *outer_c, void *outer_v, u8 mask, u8 val) MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val); } +static void set_flow_label(void *misc_c, void *misc_v, u8 mask, u8 val, + bool inner) +{ + if (inner) { + MLX5_SET(fte_match_set_misc, + misc_c, inner_ipv6_flow_label, mask); + MLX5_SET(fte_match_set_misc, + misc_v, inner_ipv6_flow_label, val); + } else { + MLX5_SET(fte_match_set_misc, + misc_c, outer_ipv6_flow_label, mask); + MLX5_SET(fte_match_set_misc, + misc_v, outer_ipv6_flow_label, val); + } +} + static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val) { MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask); @@ -1515,6 +1569,7 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val) #define LAST_IPV4_FIELD tos #define LAST_IPV6_FIELD traffic_class #define LAST_TCP_UDP_FIELD src_port +#define LAST_TUNNEL_FIELD tunnel_id /* Field is the last supported field */ #define FIELDS_NOT_SUPPORTED(filter, field)\ @@ -1527,155 +1582,164 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val) static int parse_flow_attr(u32 *match_c, u32 *match_v, const union ib_flow_spec *ib_spec) { - void *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_c, - outer_headers); - void *outer_headers_v = MLX5_ADDR_OF(fte_match_param, match_v, - outer_headers); void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c, misc_parameters); void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v, misc_parameters); + void *headers_c; + void *headers_v; + + if (ib_spec->type & IB_FLOW_SPEC_INNER) { + headers_c = MLX5_ADDR_OF(fte_match_param, match_c, + inner_headers); + headers_v = MLX5_ADDR_OF(fte_match_param, match_v, + inner_headers); + } else { + headers_c = MLX5_ADDR_OF(fte_match_param, match_c, + outer_headers); + headers_v = MLX5_ADDR_OF(fte_match_param, match_v, + outer_headers); + } - switch (ib_spec->type) { + switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) { case IB_FLOW_SPEC_ETH: if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD)) return -ENOTSUPP; - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, dmac_47_16), ib_spec->eth.mask.dst_mac); - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, dmac_47_16), ib_spec->eth.val.dst_mac); - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, smac_47_16), ib_spec->eth.mask.src_mac); - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, smac_47_16), ib_spec->eth.val.src_mac); if (ib_spec->eth.mask.vlan_tag) { - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + MLX5_SET(fte_match_set_lyr_2_4, headers_c, vlan_tag, 1); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + MLX5_SET(fte_match_set_lyr_2_4, headers_v, vlan_tag, 1); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, ntohs(ib_spec->eth.mask.vlan_tag)); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, ntohs(ib_spec->eth.val.vlan_tag)); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_cfi, ntohs(ib_spec->eth.mask.vlan_tag) >> 12); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_cfi, ntohs(ib_spec->eth.val.vlan_tag) >> 12); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio, ntohs(ib_spec->eth.mask.vlan_tag) >> 13); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio, ntohs(ib_spec->eth.val.vlan_tag) >> 13); } - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype, ntohs(ib_spec->eth.mask.ether_type)); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ntohs(ib_spec->eth.val.ether_type)); break; case IB_FLOW_SPEC_IPV4: if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD)) return -ENOTSUPP; - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype, 0xffff); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IP); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, src_ipv4_src_ipv6.ipv4_layout.ipv4), &ib_spec->ipv4.mask.src_ip, sizeof(ib_spec->ipv4.mask.src_ip)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, src_ipv4_src_ipv6.ipv4_layout.ipv4), &ib_spec->ipv4.val.src_ip, sizeof(ib_spec->ipv4.val.src_ip)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, dst_ipv4_dst_ipv6.ipv4_layout.ipv4), &ib_spec->ipv4.mask.dst_ip, sizeof(ib_spec->ipv4.mask.dst_ip)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, dst_ipv4_dst_ipv6.ipv4_layout.ipv4), &ib_spec->ipv4.val.dst_ip, sizeof(ib_spec->ipv4.val.dst_ip)); - set_tos(outer_headers_c, outer_headers_v, + set_tos(headers_c, headers_v, ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos); - set_proto(outer_headers_c, outer_headers_v, + set_proto(headers_c, headers_v, ib_spec->ipv4.mask.proto, ib_spec->ipv4.val.proto); break; case IB_FLOW_SPEC_IPV6: if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD)) return -ENOTSUPP; - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype, 0xffff); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IPV6); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, src_ipv4_src_ipv6.ipv6_layout.ipv6), &ib_spec->ipv6.mask.src_ip, sizeof(ib_spec->ipv6.mask.src_ip)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, src_ipv4_src_ipv6.ipv6_layout.ipv6), &ib_spec->ipv6.val.src_ip, sizeof(ib_spec->ipv6.val.src_ip)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, dst_ipv4_dst_ipv6.ipv6_layout.ipv6), &ib_spec->ipv6.mask.dst_ip, sizeof(ib_spec->ipv6.mask.dst_ip)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, dst_ipv4_dst_ipv6.ipv6_layout.ipv6), &ib_spec->ipv6.val.dst_ip, sizeof(ib_spec->ipv6.val.dst_ip)); - set_tos(outer_headers_c, outer_headers_v, + set_tos(headers_c, headers_v, ib_spec->ipv6.mask.traffic_class, ib_spec->ipv6.val.traffic_class); - set_proto(outer_headers_c, outer_headers_v, + set_proto(headers_c, headers_v, ib_spec->ipv6.mask.next_hdr, ib_spec->ipv6.val.next_hdr); - MLX5_SET(fte_match_set_misc, misc_params_c, - outer_ipv6_flow_label, - ntohl(ib_spec->ipv6.mask.flow_label)); - MLX5_SET(fte_match_set_misc, misc_params_v, - outer_ipv6_flow_label, - ntohl(ib_spec->ipv6.val.flow_label)); + set_flow_label(misc_params_c, misc_params_v, + ntohl(ib_spec->ipv6.mask.flow_label), + ntohl(ib_spec->ipv6.val.flow_label), + ib_spec->type & IB_FLOW_SPEC_INNER); + break; case IB_FLOW_SPEC_TCP: if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, LAST_TCP_UDP_FIELD)) return -ENOTSUPP; - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol, + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, 0xff); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol, + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_TCP); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_sport, + MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_sport, ntohs(ib_spec->tcp_udp.mask.src_port)); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_sport, + MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_sport, ntohs(ib_spec->tcp_udp.val.src_port)); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_dport, + MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_dport, ntohs(ib_spec->tcp_udp.mask.dst_port)); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_dport, + MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_dport, ntohs(ib_spec->tcp_udp.val.dst_port)); break; case IB_FLOW_SPEC_UDP: @@ -1683,21 +1747,31 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v, LAST_TCP_UDP_FIELD)) return -ENOTSUPP; - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol, + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, 0xff); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol, + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_sport, + MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport, ntohs(ib_spec->tcp_udp.mask.src_port)); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_sport, + MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport, ntohs(ib_spec->tcp_udp.val.src_port)); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_dport, + MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport, ntohs(ib_spec->tcp_udp.mask.dst_port)); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_dport, + MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, ntohs(ib_spec->tcp_udp.val.dst_port)); break; + case IB_FLOW_SPEC_VXLAN_TUNNEL: + if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask, + LAST_TUNNEL_FIELD)) + return -ENOTSUPP; + + MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni, + ntohl(ib_spec->tunnel.mask.tunnel_id)); + MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni, + ntohl(ib_spec->tunnel.val.tunnel_id)); + break; default: return -EINVAL; } @@ -2721,6 +2795,8 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num, struct ib_port_immutable *immutable) { struct ib_port_attr attr; + struct mlx5_ib_dev *dev = to_mdev(ibdev); + enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, port_num); int err; err = mlx5_ib_query_port(ibdev, port_num, &attr); @@ -2730,7 +2806,8 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num, immutable->pkey_tbl_len = attr.pkey_tbl_len; immutable->gid_tbl_len = attr.gid_tbl_len; immutable->core_cap_flags = get_core_cap_flags(ibdev); - immutable->max_mad_size = IB_MGMT_MAD_SIZE; + if ((ll == IB_LINK_LAYER_INFINIBAND) || MLX5_CAP_GEN(dev->mdev, roce)) + immutable->max_mad_size = IB_MGMT_MAD_SIZE; return 0; } @@ -2744,7 +2821,7 @@ static void get_dev_fw_str(struct ib_device *ibdev, char *str, fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev)); } -static int mlx5_roce_lag_init(struct mlx5_ib_dev *dev) +static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev) { struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_flow_namespace *ns = mlx5_get_flow_namespace(mdev, @@ -2773,7 +2850,7 @@ err_destroy_vport_lag: return err; } -static void mlx5_roce_lag_cleanup(struct mlx5_ib_dev *dev) +static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev) { struct mlx5_core_dev *mdev = dev->mdev; @@ -2785,7 +2862,21 @@ static void mlx5_roce_lag_cleanup(struct mlx5_ib_dev *dev) } } -static void mlx5_remove_roce_notifier(struct mlx5_ib_dev *dev) +static int mlx5_add_netdev_notifier(struct mlx5_ib_dev *dev) +{ + int err; + + dev->roce.nb.notifier_call = mlx5_netdev_event; + err = register_netdevice_notifier(&dev->roce.nb); + if (err) { + dev->roce.nb.notifier_call = NULL; + return err; + } + + return 0; +} + +static void mlx5_remove_netdev_notifier(struct mlx5_ib_dev *dev) { if (dev->roce.nb.notifier_call) { unregister_netdevice_notifier(&dev->roce.nb); @@ -2793,39 +2884,40 @@ static void mlx5_remove_roce_notifier(struct mlx5_ib_dev *dev) } } -static int mlx5_enable_roce(struct mlx5_ib_dev *dev) +static int mlx5_enable_eth(struct mlx5_ib_dev *dev) { int err; - dev->roce.nb.notifier_call = mlx5_netdev_event; - err = register_netdevice_notifier(&dev->roce.nb); - if (err) { - dev->roce.nb.notifier_call = NULL; + err = mlx5_add_netdev_notifier(dev); + if (err) return err; - } - err = mlx5_nic_vport_enable_roce(dev->mdev); - if (err) - goto err_unregister_netdevice_notifier; + if (MLX5_CAP_GEN(dev->mdev, roce)) { + err = mlx5_nic_vport_enable_roce(dev->mdev); + if (err) + goto err_unregister_netdevice_notifier; + } - err = mlx5_roce_lag_init(dev); + err = mlx5_eth_lag_init(dev); if (err) goto err_disable_roce; return 0; err_disable_roce: - mlx5_nic_vport_disable_roce(dev->mdev); + if (MLX5_CAP_GEN(dev->mdev, roce)) + mlx5_nic_vport_disable_roce(dev->mdev); err_unregister_netdevice_notifier: - mlx5_remove_roce_notifier(dev); + mlx5_remove_netdev_notifier(dev); return err; } -static void mlx5_disable_roce(struct mlx5_ib_dev *dev) +static void mlx5_disable_eth(struct mlx5_ib_dev *dev) { - mlx5_roce_lag_cleanup(dev); - mlx5_nic_vport_disable_roce(dev->mdev); + mlx5_eth_lag_cleanup(dev); + if (MLX5_CAP_GEN(dev->mdev, roce)) + mlx5_nic_vport_disable_roce(dev->mdev); } static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev) @@ -2947,9 +3039,6 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) port_type_cap = MLX5_CAP_GEN(mdev, port_type); ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); - if ((ll == IB_LINK_LAYER_ETHERNET) && !MLX5_CAP_GEN(mdev, roce)) - return NULL; - printk_once(KERN_INFO "%s", mlx5_version); dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev)); @@ -2995,6 +3084,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_CREATE_AH) | + (1ull << IB_USER_VERBS_CMD_DESTROY_AH) | (1ull << IB_USER_VERBS_CMD_REG_MR) | (1ull << IB_USER_VERBS_CMD_REREG_MR) | (1ull << IB_USER_VERBS_CMD_DEREG_MR) | @@ -3017,7 +3108,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) dev->ib_dev.uverbs_ex_cmd_mask = (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) | (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP); + (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP) | + (1ull << IB_USER_VERBS_EX_CMD_MODIFY_QP); dev->ib_dev.query_device = mlx5_ib_query_device; dev->ib_dev.query_port = mlx5_ib_query_port; @@ -3128,14 +3220,14 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) spin_lock_init(&dev->reset_flow_resource_lock); if (ll == IB_LINK_LAYER_ETHERNET) { - err = mlx5_enable_roce(dev); + err = mlx5_enable_eth(dev); if (err) goto err_free_port; } err = create_dev_resources(&dev->devr); if (err) - goto err_disable_roce; + goto err_disable_eth; err = mlx5_ib_odp_init_one(dev); if (err) @@ -3179,10 +3271,10 @@ err_odp: err_rsrc: destroy_dev_resources(&dev->devr); -err_disable_roce: +err_disable_eth: if (ll == IB_LINK_LAYER_ETHERNET) { - mlx5_disable_roce(dev); - mlx5_remove_roce_notifier(dev); + mlx5_disable_eth(dev); + mlx5_remove_netdev_notifier(dev); } err_free_port: @@ -3199,14 +3291,14 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) struct mlx5_ib_dev *dev = context; enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1); - mlx5_remove_roce_notifier(dev); + mlx5_remove_netdev_notifier(dev); ib_unregister_device(&dev->ib_dev); mlx5_ib_dealloc_q_counters(dev); destroy_umrc_res(dev); mlx5_ib_odp_remove_one(dev); destroy_dev_resources(&dev->devr); if (ll == IB_LINK_LAYER_ETHERNET) - mlx5_disable_roce(dev); + mlx5_disable_eth(dev); kfree(dev->port); ib_dealloc_device(&dev->ib_dev); } diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index 996b54e366b0..6851357c16f4 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -37,12 +37,15 @@ /* @umem: umem object to scan * @addr: ib virtual address requested by the user + * @max_page_shift: high limit for page_shift - 0 means no limit * @count: number of PAGE_SIZE pages covered by umem * @shift: page shift for the compound pages found in the region * @ncont: number of compund pages * @order: log2 of the number of compound pages */ -void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, +void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, + unsigned long max_page_shift, + int *count, int *shift, int *ncont, int *order) { unsigned long tmp; @@ -72,6 +75,8 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, addr = addr >> page_shift; tmp = (unsigned long)addr; m = find_first_bit(&tmp, BITS_PER_LONG); + if (max_page_shift) + m = min_t(unsigned long, max_page_shift - page_shift, m); skip = 1 << m; mask = skip - 1; i = 0; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 854748b61212..6c6057eb60ea 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -63,6 +63,8 @@ pr_warn("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ #define MLX5_IB_DEFAULT_UIDX 0xffffff #define MLX5_USER_ASSIGNED_UIDX_MASK __mlx5_mask(qpc, user_index) +#define MLX5_MKEY_PAGE_SHIFT_MASK __mlx5_mask(mkc, log_page_size) + enum { MLX5_IB_MMAP_CMD_SHIFT = 8, MLX5_IB_MMAP_CMD_MASK = 0xff, @@ -387,6 +389,7 @@ struct mlx5_ib_qp { struct list_head qps_list; struct list_head cq_recv_list; struct list_head cq_send_list; + u32 rate_limit; }; struct mlx5_ib_cq_buf { @@ -418,7 +421,7 @@ struct mlx5_umr_wr { struct ib_pd *pd; unsigned int page_shift; unsigned int npages; - u32 length; + u64 length; int access_flags; u32 mkey; }; @@ -739,7 +742,8 @@ void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index); int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, const void *in_mad, void *response_mad); -struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); +struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, + struct ib_udata *udata); int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr); int mlx5_ib_destroy_ah(struct ib_ah *ah); struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, @@ -825,7 +829,9 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props); int mlx5_ib_init_fmr(struct mlx5_ib_dev *dev); void mlx5_ib_cleanup_fmr(struct mlx5_ib_dev *dev); -void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, +void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, + unsigned long max_page_shift, + int *count, int *shift, int *ncont, int *order); void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, int page_shift, size_t offset, size_t num_pages, diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 4e9012463c37..8f608debe141 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -628,7 +628,8 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) ent->order = i + 2; ent->dev = dev; - if (dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) + if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) && + (mlx5_core_is_pf(dev->mdev))) limit = dev->mdev->profile->mr_cache[i].limit; else limit = 0; @@ -646,6 +647,33 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) return 0; } +static void wait_for_async_commands(struct mlx5_ib_dev *dev) +{ + struct mlx5_mr_cache *cache = &dev->cache; + struct mlx5_cache_ent *ent; + int total = 0; + int i; + int j; + + for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { + ent = &cache->ent[i]; + for (j = 0 ; j < 1000; j++) { + if (!ent->pending) + break; + msleep(50); + } + } + for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { + ent = &cache->ent[i]; + total += ent->pending; + } + + if (total) + mlx5_ib_warn(dev, "aborted while there are %d pending mr requests\n", total); + else + mlx5_ib_warn(dev, "done with all pending requests\n"); +} + int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) { int i; @@ -659,6 +687,7 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) clean_keys(dev, i); destroy_workqueue(dev->cache.wq); + wait_for_async_commands(dev); del_timer_sync(&dev->delay_timer); return 0; @@ -816,29 +845,34 @@ static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev, umrwr->mkey = key; } -static struct ib_umem *mr_umem_get(struct ib_pd *pd, u64 start, u64 length, - int access_flags, int *npages, - int *page_shift, int *ncont, int *order) +static int mr_umem_get(struct ib_pd *pd, u64 start, u64 length, + int access_flags, struct ib_umem **umem, + int *npages, int *page_shift, int *ncont, + int *order) { struct mlx5_ib_dev *dev = to_mdev(pd->device); - struct ib_umem *umem = ib_umem_get(pd->uobject->context, start, length, - access_flags, 0); - if (IS_ERR(umem)) { + int err; + + *umem = ib_umem_get(pd->uobject->context, start, length, + access_flags, 0); + err = PTR_ERR_OR_ZERO(*umem); + if (err < 0) { mlx5_ib_err(dev, "umem get failed (%ld)\n", PTR_ERR(umem)); - return (void *)umem; + return err; } - mlx5_ib_cont_pages(umem, start, npages, page_shift, ncont, order); + mlx5_ib_cont_pages(*umem, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages, + page_shift, ncont, order); if (!*npages) { mlx5_ib_warn(dev, "avoid zero region\n"); - ib_umem_release(umem); - return ERR_PTR(-EINVAL); + ib_umem_release(*umem); + return -EINVAL; } mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n", *npages, *ncont, *order, *page_shift); - return umem; + return 0; } static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc) @@ -1164,11 +1198,11 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", start, virt_addr, length, access_flags); - umem = mr_umem_get(pd, start, length, access_flags, &npages, + err = mr_umem_get(pd, start, length, access_flags, &umem, &npages, &page_shift, &ncont, &order); - if (IS_ERR(umem)) - return (void *)umem; + if (err < 0) + return ERR_PTR(err); if (use_umr(order)) { mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift, @@ -1345,10 +1379,9 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, */ flags |= IB_MR_REREG_TRANS; ib_umem_release(mr->umem); - mr->umem = mr_umem_get(pd, addr, len, access_flags, &npages, - &page_shift, &ncont, &order); - if (IS_ERR(mr->umem)) { - err = PTR_ERR(mr->umem); + err = mr_umem_get(pd, addr, len, access_flags, &mr->umem, + &npages, &page_shift, &ncont, &order); + if (err < 0) { mr->umem = NULL; return err; } diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index d1e921816bfe..a1b3125f0a6e 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -77,12 +77,14 @@ struct mlx5_wqe_eth_pad { enum raw_qp_set_mask_map { MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID = 1UL << 0, + MLX5_RAW_QP_RATE_LIMIT = 1UL << 1, }; struct mlx5_modify_raw_qp_param { u16 operation; u32 set_mask; /* raw_qp_set_mask_map */ + u32 rate_limit; u8 rq_q_ctr_id; }; @@ -351,6 +353,29 @@ static int calc_send_wqe(struct ib_qp_init_attr *attr) return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB); } +static int get_send_sge(struct ib_qp_init_attr *attr, int wqe_size) +{ + int max_sge; + + if (attr->qp_type == IB_QPT_RC) + max_sge = (min_t(int, wqe_size, 512) - + sizeof(struct mlx5_wqe_ctrl_seg) - + sizeof(struct mlx5_wqe_raddr_seg)) / + sizeof(struct mlx5_wqe_data_seg); + else if (attr->qp_type == IB_QPT_XRC_INI) + max_sge = (min_t(int, wqe_size, 512) - + sizeof(struct mlx5_wqe_ctrl_seg) - + sizeof(struct mlx5_wqe_xrc_seg) - + sizeof(struct mlx5_wqe_raddr_seg)) / + sizeof(struct mlx5_wqe_data_seg); + else + max_sge = (wqe_size - sq_overhead(attr)) / + sizeof(struct mlx5_wqe_data_seg); + + return min_t(int, max_sge, wqe_size - sq_overhead(attr) / + sizeof(struct mlx5_wqe_data_seg)); +} + static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, struct mlx5_ib_qp *qp) { @@ -381,13 +406,18 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size); qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB; if (qp->sq.wqe_cnt > (1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz))) { - mlx5_ib_dbg(dev, "wqe count(%d) exceeds limits(%d)\n", + mlx5_ib_dbg(dev, "send queue size (%d * %d / %d -> %d) exceeds limits(%d)\n", + attr->cap.max_send_wr, wqe_size, MLX5_SEND_WQE_BB, qp->sq.wqe_cnt, 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz)); return -ENOMEM; } qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB); - qp->sq.max_gs = attr->cap.max_send_sge; + qp->sq.max_gs = get_send_sge(attr, wqe_size); + if (qp->sq.max_gs < attr->cap.max_send_sge) + return -ENOMEM; + + attr->cap.max_send_sge = qp->sq.max_gs; qp->sq.max_post = wq_size / wqe_size; attr->cap.max_send_wr = qp->sq.max_post; @@ -647,7 +677,7 @@ static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev, return PTR_ERR(*umem); } - mlx5_ib_cont_pages(*umem, addr, npages, page_shift, ncont, NULL); + mlx5_ib_cont_pages(*umem, addr, 0, npages, page_shift, ncont, NULL); err = mlx5_ib_get_buf_offset(addr, *page_shift, offset); if (err) { @@ -700,7 +730,7 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, return err; } - mlx5_ib_cont_pages(rwq->umem, ucmd->buf_addr, &npages, &page_shift, + mlx5_ib_cont_pages(rwq->umem, ucmd->buf_addr, 0, &npages, &page_shift, &ncont, NULL); err = mlx5_ib_get_buf_offset(ucmd->buf_addr, page_shift, &rwq->rq_page_offset); @@ -2442,8 +2472,14 @@ out: } static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev, - struct mlx5_ib_sq *sq, int new_state) + struct mlx5_ib_sq *sq, + int new_state, + const struct mlx5_modify_raw_qp_param *raw_qp_param) { + struct mlx5_ib_qp *ibqp = sq->base.container_mibqp; + u32 old_rate = ibqp->rate_limit; + u32 new_rate = old_rate; + u16 rl_index = 0; void *in; void *sqc; int inlen; @@ -2459,10 +2495,44 @@ static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev, sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx); MLX5_SET(sqc, sqc, state, new_state); + if (raw_qp_param->set_mask & MLX5_RAW_QP_RATE_LIMIT) { + if (new_state != MLX5_SQC_STATE_RDY) + pr_warn("%s: Rate limit can only be changed when SQ is moving to RDY\n", + __func__); + else + new_rate = raw_qp_param->rate_limit; + } + + if (old_rate != new_rate) { + if (new_rate) { + err = mlx5_rl_add_rate(dev, new_rate, &rl_index); + if (err) { + pr_err("Failed configuring rate %u: %d\n", + new_rate, err); + goto out; + } + } + + MLX5_SET64(modify_sq_in, in, modify_bitmask, 1); + MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, rl_index); + } + err = mlx5_core_modify_sq(dev, sq->base.mqp.qpn, in, inlen); - if (err) + if (err) { + /* Remove new rate from table if failed */ + if (new_rate && + old_rate != new_rate) + mlx5_rl_remove_rate(dev, new_rate); goto out; + } + + /* Only remove the old rate after new rate was set */ + if ((old_rate && + (old_rate != new_rate)) || + (new_state != MLX5_SQC_STATE_RDY)) + mlx5_rl_remove_rate(dev, old_rate); + ibqp->rate_limit = new_rate; sq->state = new_state; out: @@ -2477,6 +2547,8 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp; struct mlx5_ib_rq *rq = &raw_packet_qp->rq; struct mlx5_ib_sq *sq = &raw_packet_qp->sq; + int modify_rq = !!qp->rq.wqe_cnt; + int modify_sq = !!qp->sq.wqe_cnt; int rq_state; int sq_state; int err; @@ -2494,10 +2566,18 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, rq_state = MLX5_RQC_STATE_RST; sq_state = MLX5_SQC_STATE_RST; break; - case MLX5_CMD_OP_INIT2INIT_QP: - case MLX5_CMD_OP_INIT2RTR_QP: case MLX5_CMD_OP_RTR2RTS_QP: case MLX5_CMD_OP_RTS2RTS_QP: + if (raw_qp_param->set_mask == + MLX5_RAW_QP_RATE_LIMIT) { + modify_rq = 0; + sq_state = sq->state; + } else { + return raw_qp_param->set_mask ? -EINVAL : 0; + } + break; + case MLX5_CMD_OP_INIT2INIT_QP: + case MLX5_CMD_OP_INIT2RTR_QP: if (raw_qp_param->set_mask) return -EINVAL; else @@ -2507,13 +2587,13 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, return -EINVAL; } - if (qp->rq.wqe_cnt) { - err = modify_raw_packet_qp_rq(dev, rq, rq_state, raw_qp_param); + if (modify_rq) { + err = modify_raw_packet_qp_rq(dev, rq, rq_state, raw_qp_param); if (err) return err; } - if (qp->sq.wqe_cnt) { + if (modify_sq) { if (tx_affinity) { err = modify_raw_packet_tx_affinity(dev->mdev, sq, tx_affinity); @@ -2521,7 +2601,7 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, return err; } - return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state); + return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state, raw_qp_param); } return 0; @@ -2577,7 +2657,6 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, struct mlx5_ib_port *mibport = NULL; enum mlx5_qp_state mlx5_cur, mlx5_new; enum mlx5_qp_optpar optpar; - int sqd_event; int mlx5_st; int err; u16 op; @@ -2724,12 +2803,6 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) context->db_rec_addr = cpu_to_be64(qp->db.dma); - if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD && - attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify) - sqd_event = 1; - else - sqd_event = 0; - if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { u8 port_num = (attr_mask & IB_QP_PORT ? attr->port_num : qp->port) - 1; @@ -2776,6 +2849,12 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, raw_qp_param.rq_q_ctr_id = mibport->q_cnt_id; raw_qp_param.set_mask |= MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID; } + + if (attr_mask & IB_QP_RATE_LIMIT) { + raw_qp_param.rate_limit = attr->rate_limit; + raw_qp_param.set_mask |= MLX5_RAW_QP_RATE_LIMIT; + } + err = modify_raw_packet_qp(dev, qp, &raw_qp_param, tx_affinity); } else { err = mlx5_core_qp_modify(dev->mdev, op, optpar, context, @@ -3067,10 +3146,10 @@ static void set_linv_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr) { memset(umr, 0, sizeof(*umr)); umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); - umr->flags = 1 << 7; + umr->flags = MLX5_UMR_INLINE; } -static __be64 get_umr_reg_mr_mask(void) +static __be64 get_umr_reg_mr_mask(int atomic) { u64 result; @@ -3083,9 +3162,11 @@ static __be64 get_umr_reg_mr_mask(void) MLX5_MKEY_MASK_KEY | MLX5_MKEY_MASK_RR | MLX5_MKEY_MASK_RW | - MLX5_MKEY_MASK_A | MLX5_MKEY_MASK_FREE; + if (atomic) + result |= MLX5_MKEY_MASK_A; + return cpu_to_be64(result); } @@ -3146,7 +3227,7 @@ static __be64 get_umr_update_pd_mask(void) } static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, - struct ib_send_wr *wr) + struct ib_send_wr *wr, int atomic) { struct mlx5_umr_wr *umrwr = umr_wr(wr); @@ -3171,7 +3252,7 @@ static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD) umr->mkey_mask |= get_umr_update_pd_mask(); if (!umr->mkey_mask) - umr->mkey_mask = get_umr_reg_mr_mask(); + umr->mkey_mask = get_umr_reg_mr_mask(atomic); } else { umr->mkey_mask = get_umr_unreg_mr_mask(); } @@ -4024,7 +4105,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } qp->sq.wr_data[idx] = MLX5_IB_WR_UMR; ctrl->imm = cpu_to_be32(umr_wr(wr)->mkey); - set_reg_umr_segment(seg, wr); + set_reg_umr_segment(seg, wr, !!(MLX5_CAP_GEN(mdev, atomic))); seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; if (unlikely((seg == qend))) diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 3857dbd9c956..6f4397ee1ed6 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -118,7 +118,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, return err; } - mlx5_ib_cont_pages(srq->umem, ucmd.buf_addr, &npages, + mlx5_ib_cont_pages(srq->umem, ucmd.buf_addr, 0, &npages, &page_shift, &ncont, NULL); err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, &offset); @@ -203,8 +203,6 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, srq->wrid = kmalloc(srq->msrq.max * sizeof(u64), GFP_KERNEL); if (!srq->wrid) { - mlx5_ib_dbg(dev, "kmalloc failed %lu\n", - (unsigned long)(srq->msrq.max * sizeof(u64))); err = -ENOMEM; goto err_in; } @@ -282,6 +280,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, mlx5_ib_dbg(dev, "desc_size 0x%x, req wr 0x%x, srq size 0x%x, max_gs 0x%x, max_avail_gather 0x%x\n", desc_size, init_attr->attr.max_wr, srq->msrq.max, srq->msrq.max_gs, srq->msrq.max_avail_gather); + in.type = init_attr->srq_type; if (pd->uobject) err = create_srq_user(pd, srq, &in, udata, buf_size); @@ -294,7 +293,6 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, goto err_srq; } - in.type = init_attr->srq_type; in.log_size = ilog2(srq->msrq.max); in.wqe_shift = srq->msrq.wqe_shift - 4; if (srq->wq_sig) diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c index bcac294042f5..c9f0f364f484 100644 --- a/drivers/infiniband/hw/mthca/mthca_av.c +++ b/drivers/infiniband/hw/mthca/mthca_av.c @@ -186,8 +186,8 @@ int mthca_create_ah(struct mthca_dev *dev, on_hca_fail: if (ah->type == MTHCA_AH_PCI_POOL) { - ah->av = pci_pool_alloc(dev->av_table.pool, - GFP_ATOMIC, &ah->avdma); + ah->av = pci_pool_zalloc(dev->av_table.pool, + GFP_ATOMIC, &ah->avdma); if (!ah->av) return -ENOMEM; @@ -196,8 +196,6 @@ on_hca_fail: ah->key = pd->ntmr.ibmr.lkey; - memset(av, 0, MTHCA_AV_SIZE); - av->port_pd = cpu_to_be32(pd->pd_num | (ah_attr->port_num << 24)); av->g_slid = ah_attr->src_path_bits; av->dlid = cpu_to_be16(ah_attr->dlid); diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 358930a41e36..d31708742ba5 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -410,7 +410,9 @@ static int mthca_dealloc_pd(struct ib_pd *pd) } static struct ib_ah *mthca_ah_create(struct ib_pd *pd, - struct ib_ah_attr *ah_attr) + struct ib_ah_attr *ah_attr, + struct ib_udata *udata) + { int err; struct mthca_ah *ah; diff --git a/drivers/infiniband/hw/mthca/mthca_reset.c b/drivers/infiniband/hw/mthca/mthca_reset.c index 6727af27c017..2a6979e4ae1c 100644 --- a/drivers/infiniband/hw/mthca/mthca_reset.c +++ b/drivers/infiniband/hw/mthca/mthca_reset.c @@ -96,8 +96,6 @@ int mthca_reset(struct mthca_dev *mdev) hca_header = kmalloc(256, GFP_KERNEL); if (!hca_header) { err = -ENOMEM; - mthca_err(mdev, "Couldn't allocate memory to save HCA " - "PCI header, aborting.\n"); goto put_dev; } @@ -119,8 +117,6 @@ int mthca_reset(struct mthca_dev *mdev) bridge_header = kmalloc(256, GFP_KERNEL); if (!bridge_header) { err = -ENOMEM; - mthca_err(mdev, "Couldn't allocate memory to save HCA " - "bridge PCI header, aborting.\n"); goto free_hca; } diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index 2baa45a8e401..5b9601014f0c 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -515,7 +515,6 @@ static int nes_probe(struct pci_dev *pcidev, const struct pci_device_id *ent) /* Allocate hardware structure */ nesdev = kzalloc(sizeof(struct nes_device), GFP_KERNEL); if (!nesdev) { - printk(KERN_ERR PFX "%s: Unable to alloc hardware struct\n", pci_name(pcidev)); ret = -ENOMEM; goto bail2; } diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 57db9b332f44..8e703479e7ce 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -2282,10 +2282,8 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core, if (!listener) { /* create a CM listen node (1/2 node to compare incoming traffic to) */ listener = kzalloc(sizeof(*listener), GFP_ATOMIC); - if (!listener) { - nes_debug(NES_DBG_CM, "Not creating listener memory allocation failed\n"); + if (!listener) return NULL; - } listener->loc_addr = cm_info->loc_addr; listener->loc_port = cm_info->loc_port; diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index a1c6481d8038..19acd13c6cb1 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -351,9 +351,8 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) { /* allocate a new adapter struct */ nesadapter = kzalloc(adapter_size, GFP_KERNEL); - if (nesadapter == NULL) { + if (!nesadapter) return NULL; - } nes_debug(NES_DBG_INIT, "Allocating new nesadapter @ %p, size = %u (actual size = %u).\n", nesadapter, (u32)sizeof(struct nes_adapter), adapter_size); @@ -1007,8 +1006,7 @@ int nes_init_cqp(struct nes_device *nesdev) /* Allocate a twice the number of CQP requests as the SQ size */ nesdev->nes_cqp_requests = kzalloc(sizeof(struct nes_cqp_request) * 2 * NES_CQP_SQ_SIZE, GFP_KERNEL); - if (nesdev->nes_cqp_requests == NULL) { - nes_debug(NES_DBG_INIT, "Unable to allocate memory CQP request entries.\n"); + if (!nesdev->nes_cqp_requests) { pci_free_consistent(nesdev->pcidev, nesdev->cqp_mem_size, nesdev->cqp.sq_vbase, nesdev->cqp.sq_pbase); return -ENOMEM; diff --git a/drivers/infiniband/hw/nes/nes_mgt.c b/drivers/infiniband/hw/nes/nes_mgt.c index 416645259b0f..33624f17c347 100644 --- a/drivers/infiniband/hw/nes/nes_mgt.c +++ b/drivers/infiniband/hw/nes/nes_mgt.c @@ -320,8 +320,7 @@ static int get_fpdu_info(struct nes_device *nesdev, struct nes_qp *nesqp, /* Found one */ fpdu_info = kzalloc(sizeof(*fpdu_info), GFP_ATOMIC); - if (fpdu_info == NULL) { - nes_debug(NES_DBG_PAU, "Failed to alloc a fpdu_info.\n"); + if (!fpdu_info) { rc = -ENOMEM; goto out; } @@ -729,8 +728,7 @@ static int nes_change_quad_hash(struct nes_device *nesdev, } qh_chg = kmalloc(sizeof *qh_chg, GFP_ATOMIC); - if (qh_chg == NULL) { - nes_debug(NES_DBG_PAU, "Failed to get a cqp_request.\n"); + if (!qh_chg) { ret = -ENOMEM; goto chg_qh_err; } @@ -880,10 +878,8 @@ int nes_init_mgt_qp(struct nes_device *nesdev, struct net_device *netdev, struct /* Allocate space the all mgt QPs once */ mgtvnic = kzalloc(NES_MGT_QP_COUNT * sizeof(struct nes_vnic_mgt), GFP_KERNEL); - if (mgtvnic == NULL) { - nes_debug(NES_DBG_INIT, "Unable to allocate memory for mgt structure\n"); + if (!mgtvnic) return -ENOMEM; - } /* Allocate fragment, RQ, and CQ; Reuse CEQ based on the PCI function */ /* We are not sending from this NIC so sq is not allocated */ diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index 7f8597d6738b..5921ea3d50ae 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -662,10 +662,14 @@ tso_sq_no_longer_full: nesnic->sq_head &= nesnic->sq_size-1; } } else { - nesvnic->linearized_skbs++; hoffset = skb_transport_header(skb) - skb->data; nhoffset = skb_network_header(skb) - skb->data; - skb_linearize(skb); + if (skb_linearize(skb)) { + nesvnic->tx_sw_dropped++; + kfree_skb(skb); + return NETDEV_TX_OK; + } + nesvnic->linearized_skbs++; skb_set_transport_header(skb, hoffset); skb_set_network_header(skb, nhoffset); if (!nes_nic_send(skb, netdev)) @@ -1461,7 +1465,8 @@ static int nes_netdev_set_pauseparam(struct net_device *netdev, /** * nes_netdev_get_settings */ -static int nes_netdev_get_settings(struct net_device *netdev, struct ethtool_cmd *et_cmd) +static int nes_netdev_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *cmd) { struct nes_vnic *nesvnic = netdev_priv(netdev); struct nes_device *nesdev = nesvnic->nesdev; @@ -1470,54 +1475,59 @@ static int nes_netdev_get_settings(struct net_device *netdev, struct ethtool_cmd u8 phy_type = nesadapter->phy_type[mac_index]; u8 phy_index = nesadapter->phy_index[mac_index]; u16 phy_data; + u32 supported, advertising; - et_cmd->duplex = DUPLEX_FULL; - et_cmd->port = PORT_MII; - et_cmd->maxtxpkt = 511; - et_cmd->maxrxpkt = 511; + cmd->base.duplex = DUPLEX_FULL; + cmd->base.port = PORT_MII; if (nesadapter->OneG_Mode) { - ethtool_cmd_speed_set(et_cmd, SPEED_1000); + cmd->base.speed = SPEED_1000; if (phy_type == NES_PHY_TYPE_PUMA_1G) { - et_cmd->supported = SUPPORTED_1000baseT_Full; - et_cmd->advertising = ADVERTISED_1000baseT_Full; - et_cmd->autoneg = AUTONEG_DISABLE; - et_cmd->transceiver = XCVR_INTERNAL; - et_cmd->phy_address = mac_index; + supported = SUPPORTED_1000baseT_Full; + advertising = ADVERTISED_1000baseT_Full; + cmd->base.autoneg = AUTONEG_DISABLE; + cmd->base.phy_address = mac_index; } else { unsigned long flags; - et_cmd->supported = SUPPORTED_1000baseT_Full - | SUPPORTED_Autoneg; - et_cmd->advertising = ADVERTISED_1000baseT_Full - | ADVERTISED_Autoneg; + + supported = SUPPORTED_1000baseT_Full + | SUPPORTED_Autoneg; + advertising = ADVERTISED_1000baseT_Full + | ADVERTISED_Autoneg; spin_lock_irqsave(&nesadapter->phy_lock, flags); nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data); spin_unlock_irqrestore(&nesadapter->phy_lock, flags); if (phy_data & 0x1000) - et_cmd->autoneg = AUTONEG_ENABLE; + cmd->base.autoneg = AUTONEG_ENABLE; else - et_cmd->autoneg = AUTONEG_DISABLE; - et_cmd->transceiver = XCVR_EXTERNAL; - et_cmd->phy_address = phy_index; + cmd->base.autoneg = AUTONEG_DISABLE; + cmd->base.phy_address = phy_index; } + ethtool_convert_legacy_u32_to_link_mode( + cmd->link_modes.supported, supported); + ethtool_convert_legacy_u32_to_link_mode( + cmd->link_modes.advertising, advertising); return 0; } if ((phy_type == NES_PHY_TYPE_ARGUS) || (phy_type == NES_PHY_TYPE_SFP_D) || (phy_type == NES_PHY_TYPE_KR)) { - et_cmd->transceiver = XCVR_EXTERNAL; - et_cmd->port = PORT_FIBRE; - et_cmd->supported = SUPPORTED_FIBRE; - et_cmd->advertising = ADVERTISED_FIBRE; - et_cmd->phy_address = phy_index; + cmd->base.port = PORT_FIBRE; + supported = SUPPORTED_FIBRE; + advertising = ADVERTISED_FIBRE; + cmd->base.phy_address = phy_index; } else { - et_cmd->transceiver = XCVR_INTERNAL; - et_cmd->supported = SUPPORTED_10000baseT_Full; - et_cmd->advertising = ADVERTISED_10000baseT_Full; - et_cmd->phy_address = mac_index; + supported = SUPPORTED_10000baseT_Full; + advertising = ADVERTISED_10000baseT_Full; + cmd->base.phy_address = mac_index; } - ethtool_cmd_speed_set(et_cmd, SPEED_10000); - et_cmd->autoneg = AUTONEG_DISABLE; + cmd->base.speed = SPEED_10000; + cmd->base.autoneg = AUTONEG_DISABLE; + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + supported); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, + advertising); + return 0; } @@ -1525,7 +1535,9 @@ static int nes_netdev_get_settings(struct net_device *netdev, struct ethtool_cmd /** * nes_netdev_set_settings */ -static int nes_netdev_set_settings(struct net_device *netdev, struct ethtool_cmd *et_cmd) +static int +nes_netdev_set_link_ksettings(struct net_device *netdev, + const struct ethtool_link_ksettings *cmd) { struct nes_vnic *nesvnic = netdev_priv(netdev); struct nes_device *nesdev = nesvnic->nesdev; @@ -1539,7 +1551,7 @@ static int nes_netdev_set_settings(struct net_device *netdev, struct ethtool_cmd spin_lock_irqsave(&nesadapter->phy_lock, flags); nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data); - if (et_cmd->autoneg) { + if (cmd->base.autoneg) { /* Turn on Full duplex, Autoneg, and restart autonegotiation */ phy_data |= 0x1300; } else { @@ -1556,8 +1568,6 @@ static int nes_netdev_set_settings(struct net_device *netdev, struct ethtool_cmd static const struct ethtool_ops nes_ethtool_ops = { .get_link = ethtool_op_get_link, - .get_settings = nes_netdev_get_settings, - .set_settings = nes_netdev_set_settings, .get_strings = nes_netdev_get_strings, .get_sset_count = nes_netdev_get_sset_count, .get_ethtool_stats = nes_netdev_get_ethtool_stats, @@ -1566,6 +1576,8 @@ static const struct ethtool_ops nes_ethtool_ops = { .set_coalesce = nes_netdev_set_coalesce, .get_pauseparam = nes_netdev_get_pauseparam, .set_pauseparam = nes_netdev_set_pauseparam, + .get_link_ksettings = nes_netdev_get_link_ksettings, + .set_link_ksettings = nes_netdev_set_link_ksettings, }; static void nes_vlan_mode(struct net_device *netdev, struct nes_device *nesdev, netdev_features_t features) diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index bd69125731c1..aff9fb14768b 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -771,7 +771,8 @@ static int nes_dealloc_pd(struct ib_pd *ibpd) /** * nes_create_ah */ -static struct ib_ah *nes_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) +static struct ib_ah *nes_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, + struct ib_udata *udata) { return ERR_PTR(-ENOSYS); } @@ -1075,7 +1076,6 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, mem = kzalloc(sizeof(*nesqp)+NES_SW_CONTEXT_ALIGN-1, GFP_KERNEL); if (!mem) { nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); - nes_debug(NES_DBG_QP, "Unable to allocate QP\n"); return ERR_PTR(-ENOMEM); } u64nesqp = (unsigned long)mem; @@ -1475,7 +1475,6 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, nescq = kzalloc(sizeof(struct nes_cq), GFP_KERNEL); if (!nescq) { nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); - nes_debug(NES_DBG_CQ, "Unable to allocate nes_cq struct\n"); return ERR_PTR(-ENOMEM); } @@ -2408,7 +2407,6 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } nespbl = kzalloc(sizeof(*nespbl), GFP_KERNEL); if (!nespbl) { - nes_debug(NES_DBG_MR, "Unable to allocate PBL\n"); ib_umem_release(region); return ERR_PTR(-ENOMEM); } @@ -2416,7 +2414,6 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (!nesmr) { ib_umem_release(region); kfree(nespbl); - nes_debug(NES_DBG_MR, "Unable to allocate nesmr\n"); return ERR_PTR(-ENOMEM); } nesmr->region = region; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index 797362a297b2..14d33b0f3950 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -154,7 +154,8 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, return status; } -struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) +struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr, + struct ib_udata *udata) { u32 *ahid_addr; int status; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h index 3856dd4c7e3d..0704a24b17c8 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h @@ -50,7 +50,9 @@ enum { OCRDMA_AH_L3_TYPE_MASK = 0x03, OCRDMA_AH_L3_TYPE_SHIFT = 0x1D /* 29 bits */ }; -struct ib_ah *ocrdma_create_ah(struct ib_pd *, struct ib_ah_attr *); + +struct ib_ah *ocrdma_create_ah(struct ib_pd *, struct ib_ah_attr *, + struct ib_udata *); int ocrdma_destroy_ah(struct ib_ah *); int ocrdma_query_ah(struct ib_ah *, struct ib_ah_attr *); int ocrdma_modify_ah(struct ib_ah *, struct ib_ah_attr *); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 67fc0b6857e1..9a305201545e 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -1596,10 +1596,9 @@ void ocrdma_alloc_pd_pool(struct ocrdma_dev *dev) dev->pd_mgr = kzalloc(sizeof(struct ocrdma_pd_resource_mgr), GFP_KERNEL); - if (!dev->pd_mgr) { - pr_err("%s(%d)Memory allocation failure.\n", __func__, dev->id); + if (!dev->pd_mgr) return; - } + status = ocrdma_mbx_alloc_pd_range(dev); if (status) { pr_err("%s(%d) Unable to initialize PD pool, using default.\n", @@ -1642,7 +1641,7 @@ static int ocrdma_build_q_conf(u32 *num_entries, int entry_size, static int ocrdma_mbx_create_ah_tbl(struct ocrdma_dev *dev) { int i; - int status = 0; + int status = -ENOMEM; int max_ah; struct ocrdma_create_ah_tbl *cmd; struct ocrdma_create_ah_tbl_rsp *rsp; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c index 8bef09a8c49f..f8e4b0a6486f 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c @@ -84,10 +84,8 @@ bool ocrdma_alloc_stats_resources(struct ocrdma_dev *dev) /* Alloc debugfs mem */ mem->debugfs_mem = kzalloc(OCRDMA_MAX_DBGFS_MEM, GFP_KERNEL); - if (!mem->debugfs_mem) { - pr_err("%s: stats debugfs mem allocation failed\n", __func__); + if (!mem->debugfs_mem) return false; - } return true; } diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index a61514296767..302fb05e6e6f 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -511,8 +511,10 @@ int qedr_dealloc_pd(struct ib_pd *ibpd) struct qedr_dev *dev = get_qedr_dev(ibpd->device); struct qedr_pd *pd = get_qedr_pd(ibpd); - if (!pd) + if (!pd) { pr_err("Invalid PD received in dealloc_pd\n"); + return -EINVAL; + } DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id); dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id); @@ -1477,6 +1479,7 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd, struct qedr_ucontext *ctx = NULL; struct qedr_create_qp_ureq ureq; struct qedr_qp *qp; + struct ib_qp *ibqp; int rc = 0; DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n", @@ -1486,13 +1489,13 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd, if (rc) return ERR_PTR(rc); + if (attrs->srq) + return ERR_PTR(-EINVAL); + qp = kzalloc(sizeof(*qp), GFP_KERNEL); if (!qp) return ERR_PTR(-ENOMEM); - if (attrs->srq) - return ERR_PTR(-EINVAL); - DP_DEBUG(dev, QEDR_MSG_QP, "create qp: sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n", get_qedr_cq(attrs->send_cq), @@ -1508,7 +1511,10 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd, "create qp: unexpected udata when creating GSI QP\n"); goto err0; } - return qedr_create_gsi_qp(dev, attrs, qp); + ibqp = qedr_create_gsi_qp(dev, attrs, qp); + if (IS_ERR(ibqp)) + kfree(qp); + return ibqp; } memset(&in_params, 0, sizeof(in_params)); @@ -2094,7 +2100,8 @@ int qedr_destroy_qp(struct ib_qp *ibqp) return rc; } -struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) +struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr, + struct ib_udata *udata) { struct qedr_ah *ah; @@ -2413,8 +2420,7 @@ static void handle_completed_mrs(struct qedr_dev *dev, struct mr_info *info) */ pbl = list_first_entry(&info->inuse_pbl_list, struct qedr_pbl, list_entry); - list_del(&pbl->list_entry); - list_add_tail(&pbl->list_entry, &info->free_pbl_list); + list_move_tail(&pbl->list_entry, &info->free_pbl_list); info->completed_handled++; } } @@ -2981,11 +2987,6 @@ int qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, return -EINVAL; } - if (!wr) { - DP_ERR(dev, "Got an empty post send.\n"); - return -EINVAL; - } - while (wr) { rc = __qedr_post_send(ibqp, wr, bad_wr); if (rc) diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h index a9b5e67bb81e..070677ca4d19 100644 --- a/drivers/infiniband/hw/qedr/verbs.h +++ b/drivers/infiniband/hw/qedr/verbs.h @@ -70,7 +70,8 @@ int qedr_query_qp(struct ib_qp *, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *); int qedr_destroy_qp(struct ib_qp *ibqp); -struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr); +struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr, + struct ib_udata *udata); int qedr_destroy_ah(struct ib_ah *ibah); int qedr_dereg_mr(struct ib_mr *); diff --git a/drivers/infiniband/hw/qib/qib_diag.c b/drivers/infiniband/hw/qib/qib_diag.c index 8c34b23e5bf6..775018b32b0d 100644 --- a/drivers/infiniband/hw/qib/qib_diag.c +++ b/drivers/infiniband/hw/qib/qib_diag.c @@ -609,8 +609,6 @@ static ssize_t qib_diagpkt_write(struct file *fp, tmpbuf = vmalloc(plen); if (!tmpbuf) { - qib_devinfo(dd->pcidev, - "Unable to allocate tmp buffer, failing\n"); ret = -ENOMEM; goto bail; } @@ -702,10 +700,8 @@ int qib_register_observer(struct qib_devdata *dd, if (!dd || !op) return -EINVAL; olp = vmalloc(sizeof(*olp)); - if (!olp) { - pr_err("vmalloc for observer failed\n"); + if (!olp) return -ENOMEM; - } spin_lock_irqsave(&dd->qib_diag_trans_lock, flags); olp->op = op; diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c index 728e0a030d2e..2b5982f743ef 100644 --- a/drivers/infiniband/hw/qib/qib_driver.c +++ b/drivers/infiniband/hw/qib/qib_driver.c @@ -420,8 +420,7 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd, if (list_empty(&qp->rspwait)) { qp->r_flags |= RVT_R_RSP_NAK; - atomic_inc( - &qp->refcount); + rvt_get_qp(qp); list_add_tail( &qp->rspwait, &rcd->qp_wait_list); diff --git a/drivers/infiniband/hw/qib/qib_eeprom.c b/drivers/infiniband/hw/qib/qib_eeprom.c index 311ee6c3dd5e..33a2e74c8495 100644 --- a/drivers/infiniband/hw/qib/qib_eeprom.c +++ b/drivers/infiniband/hw/qib/qib_eeprom.c @@ -182,12 +182,8 @@ void qib_get_eeprom_info(struct qib_devdata *dd) * */ len = sizeof(struct qib_flash); buf = vmalloc(len); - if (!buf) { - qib_dev_err(dd, - "Couldn't allocate memory to read %u bytes from eeprom for GUID\n", - len); + if (!buf) goto bail; - } /* * Use "public" eeprom read function, which does locking and diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index 382466a90da7..2d1eacf1dfed 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -2066,8 +2066,11 @@ static ssize_t qib_write(struct file *fp, const char __user *data, ssize_t ret = 0; void *dest; - if (WARN_ON_ONCE(!ib_safe_file_access(fp))) + if (!ib_safe_file_access(fp)) { + pr_err_once("qib_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n", + task_tgid_vnr(current), current->comm); return -EACCES; + } if (count < sizeof(cmd.type)) { ret = -EINVAL; diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index a3733f25280f..92399d3ffd15 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -1759,9 +1759,7 @@ static void pe_boardname(struct qib_devdata *dd) } namelen = strlen(n) + 1; dd->boardname = kmalloc(namelen, GFP_KERNEL); - if (!dd->boardname) - qib_dev_err(dd, "Failed allocation for board name: %s\n", n); - else + if (dd->boardname) snprintf(dd->boardname, namelen, "%s", n); if (dd->majrev != 4 || !dd->minrev || dd->minrev > 2) @@ -2533,8 +2531,6 @@ static void init_6120_cntrnames(struct qib_devdata *dd) dd->cspec->cntrnamelen = 1 + s - cntr6120names; dd->cspec->cntrs = kmalloc(dd->cspec->ncntrs * sizeof(u64), GFP_KERNEL); - if (!dd->cspec->cntrs) - qib_dev_err(dd, "Failed allocation for counters\n"); for (i = 0, s = (char *)portcntr6120names; s; i++) s = strchr(s + 1, '\n'); @@ -2542,8 +2538,6 @@ static void init_6120_cntrnames(struct qib_devdata *dd) dd->cspec->portcntrnamelen = sizeof(portcntr6120names) - 1; dd->cspec->portcntrs = kmalloc(dd->cspec->nportcntrs * sizeof(u64), GFP_KERNEL); - if (!dd->cspec->portcntrs) - qib_dev_err(dd, "Failed allocation for portcounters\n"); } static u32 qib_read_6120cntrs(struct qib_devdata *dd, loff_t pos, char **namep, diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c index 00b2af211157..e55e31a69195 100644 --- a/drivers/infiniband/hw/qib/qib_iba7220.c +++ b/drivers/infiniband/hw/qib/qib_iba7220.c @@ -2070,9 +2070,7 @@ static void qib_7220_boardname(struct qib_devdata *dd) namelen = strlen(n) + 1; dd->boardname = kmalloc(namelen, GFP_KERNEL); - if (!dd->boardname) - qib_dev_err(dd, "Failed allocation for board name: %s\n", n); - else + if (dd->boardname) snprintf(dd->boardname, namelen, "%s", n); if (dd->majrev != 5 || !dd->minrev || dd->minrev > 2) @@ -3179,8 +3177,6 @@ static void init_7220_cntrnames(struct qib_devdata *dd) dd->cspec->cntrnamelen = 1 + s - cntr7220names; dd->cspec->cntrs = kmalloc(dd->cspec->ncntrs * sizeof(u64), GFP_KERNEL); - if (!dd->cspec->cntrs) - qib_dev_err(dd, "Failed allocation for counters\n"); for (i = 0, s = (char *)portcntr7220names; s; i++) s = strchr(s + 1, '\n'); @@ -3188,8 +3184,6 @@ static void init_7220_cntrnames(struct qib_devdata *dd) dd->cspec->portcntrnamelen = sizeof(portcntr7220names) - 1; dd->cspec->portcntrs = kmalloc(dd->cspec->nportcntrs * sizeof(u64), GFP_KERNEL); - if (!dd->cspec->portcntrs) - qib_dev_err(dd, "Failed allocation for portcounters\n"); } static u32 qib_read_7220cntrs(struct qib_devdata *dd, loff_t pos, char **namep, diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index ded27172320e..c4a3616062f1 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -3627,9 +3627,7 @@ static unsigned qib_7322_boardname(struct qib_devdata *dd) namelen = strlen(n) + 1; dd->boardname = kmalloc(namelen, GFP_KERNEL); - if (!dd->boardname) - qib_dev_err(dd, "Failed allocation for board name: %s\n", n); - else + if (dd->boardname) snprintf(dd->boardname, namelen, "%s", n); snprintf(dd->boardversion, sizeof(dd->boardversion), @@ -3656,7 +3654,7 @@ static unsigned qib_7322_boardname(struct qib_devdata *dd) static int qib_do_7322_reset(struct qib_devdata *dd) { u64 val; - u64 *msix_vecsave; + u64 *msix_vecsave = NULL; int i, msix_entries, ret = 1; u16 cmdval; u8 int_line, clinesz; @@ -3677,10 +3675,7 @@ static int qib_do_7322_reset(struct qib_devdata *dd) /* can be up to 512 bytes, too big for stack */ msix_vecsave = kmalloc(2 * dd->cspec->num_msix_entries * sizeof(u64), GFP_KERNEL); - if (!msix_vecsave) - qib_dev_err(dd, "No mem to save MSIx data\n"); - } else - msix_vecsave = NULL; + } /* * Core PCI (as of 2.6.18) doesn't save or rewrite the full vector @@ -5043,8 +5038,6 @@ static void init_7322_cntrnames(struct qib_devdata *dd) dd->cspec->cntrnamelen = 1 + s - cntr7322names; dd->cspec->cntrs = kmalloc(dd->cspec->ncntrs * sizeof(u64), GFP_KERNEL); - if (!dd->cspec->cntrs) - qib_dev_err(dd, "Failed allocation for counters\n"); for (i = 0, s = (char *)portcntr7322names; s; i++) s = strchr(s + 1, '\n'); @@ -5053,9 +5046,6 @@ static void init_7322_cntrnames(struct qib_devdata *dd) for (i = 0; i < dd->num_pports; ++i) { dd->pport[i].cpspec->portcntrs = kmalloc(dd->cspec->nportcntrs * sizeof(u64), GFP_KERNEL); - if (!dd->pport[i].cpspec->portcntrs) - qib_dev_err(dd, - "Failed allocation for portcounters\n"); } } @@ -6461,7 +6451,6 @@ static int qib_init_7322_variables(struct qib_devdata *dd) sizeof(*dd->cspec->sendibchk), GFP_KERNEL); if (!dd->cspec->sendchkenable || !dd->cspec->sendgrhchk || !dd->cspec->sendibchk) { - qib_dev_err(dd, "Failed allocation for hdrchk bitmaps\n"); ret = -ENOMEM; goto bail; } @@ -7338,10 +7327,9 @@ struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *pdev, tabsize = actual_cnt; dd->cspec->msix_entries = kzalloc(tabsize * sizeof(struct qib_msix_entry), GFP_KERNEL); - if (!dd->cspec->msix_entries) { - qib_dev_err(dd, "No memory for MSIx table\n"); + if (!dd->cspec->msix_entries) tabsize = 0; - } + for (i = 0; i < tabsize; i++) dd->cspec->msix_entries[i].msix.entry = i; diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index 1730aa839a47..b50240b1d5a4 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -133,11 +133,8 @@ int qib_create_ctxts(struct qib_devdata *dd) * cleanup iterates across all possible ctxts. */ dd->rcd = kcalloc(dd->ctxtcnt, sizeof(*dd->rcd), GFP_KERNEL); - if (!dd->rcd) { - qib_dev_err(dd, - "Unable to allocate ctxtdata array, failing\n"); + if (!dd->rcd) return -ENOMEM; - } /* create (one or more) kctxt */ for (i = 0; i < dd->first_user_ctxt; ++i) { @@ -265,39 +262,23 @@ int qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd, size = IB_CC_TABLE_CAP_DEFAULT * sizeof(struct ib_cc_table_entry) * IB_CCT_ENTRIES; ppd->ccti_entries = kzalloc(size, GFP_KERNEL); - if (!ppd->ccti_entries) { - qib_dev_err(dd, - "failed to allocate congestion control table for port %d!\n", - port); + if (!ppd->ccti_entries) goto bail; - } size = IB_CC_CCS_ENTRIES * sizeof(struct ib_cc_congestion_entry); ppd->congestion_entries = kzalloc(size, GFP_KERNEL); - if (!ppd->congestion_entries) { - qib_dev_err(dd, - "failed to allocate congestion setting list for port %d!\n", - port); + if (!ppd->congestion_entries) goto bail_1; - } size = sizeof(struct cc_table_shadow); ppd->ccti_entries_shadow = kzalloc(size, GFP_KERNEL); - if (!ppd->ccti_entries_shadow) { - qib_dev_err(dd, - "failed to allocate shadow ccti list for port %d!\n", - port); + if (!ppd->ccti_entries_shadow) goto bail_2; - } size = sizeof(struct ib_cc_congestion_setting_attr); ppd->congestion_entries_shadow = kzalloc(size, GFP_KERNEL); - if (!ppd->congestion_entries_shadow) { - qib_dev_err(dd, - "failed to allocate shadow congestion setting list for port %d!\n", - port); + if (!ppd->congestion_entries_shadow) goto bail_3; - } return 0; @@ -391,18 +372,12 @@ static void init_shadow_tids(struct qib_devdata *dd) dma_addr_t *addrs; pages = vzalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(struct page *)); - if (!pages) { - qib_dev_err(dd, - "failed to allocate shadow page * array, no expected sends!\n"); + if (!pages) goto bail; - } addrs = vzalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(dma_addr_t)); - if (!addrs) { - qib_dev_err(dd, - "failed to allocate shadow dma handle array, no expected sends!\n"); + if (!addrs) goto bail_free; - } dd->pageshadow = pages; dd->physshadow = addrs; @@ -1026,11 +1001,8 @@ static void qib_verify_pioperf(struct qib_devdata *dd) cnt = 1024; addr = vmalloc(cnt); - if (!addr) { - qib_devinfo(dd->pcidev, - "Couldn't get memory for checking PIO perf, skipping\n"); + if (!addr) goto done; - } preempt_disable(); /* we want reasonably accurate elapsed time */ msecs = 1 + jiffies_to_msecs(jiffies); @@ -1172,9 +1144,6 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) sizeof(long), GFP_KERNEL); if (qib_cpulist) qib_cpulist_count = count; - else - qib_early_err(&pdev->dev, - "Could not alloc cpulist info, cpu affinity might be wrong\n"); } #ifdef CONFIG_DEBUG_FS qib_dbg_ibdev_init(&dd->verbs_dev); diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index 2097512e75aa..031433cb7206 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -941,8 +941,6 @@ void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr) { struct ib_other_headers *ohdr; struct rvt_swqe *wqe; - struct ib_wc wc; - unsigned i; u32 opcode; u32 psn; @@ -988,22 +986,8 @@ void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr) qp->s_last = s_last; /* see post_send() */ barrier(); - for (i = 0; i < wqe->wr.num_sge; i++) { - struct rvt_sge *sge = &wqe->sg_list[i]; - - rvt_put_mr(sge->mr); - } - /* Post a send completion queue entry if requested. */ - if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) || - (wqe->wr.send_flags & IB_SEND_SIGNALED)) { - memset(&wc, 0, sizeof(wc)); - wc.wr_id = wqe->wr.wr_id; - wc.status = IB_WC_SUCCESS; - wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode]; - wc.byte_len = wqe->length; - wc.qp = &qp->ibqp; - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0); - } + rvt_put_swqe(wqe); + rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS); } /* * If we were waiting for sends to complete before resending, @@ -1032,9 +1016,6 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, struct rvt_swqe *wqe, struct qib_ibport *ibp) { - struct ib_wc wc; - unsigned i; - /* * Don't decrement refcount and don't generate a * completion if the SWQE is being resent until the send @@ -1044,28 +1025,14 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) { u32 s_last; - for (i = 0; i < wqe->wr.num_sge; i++) { - struct rvt_sge *sge = &wqe->sg_list[i]; - - rvt_put_mr(sge->mr); - } + rvt_put_swqe(wqe); s_last = qp->s_last; if (++s_last >= qp->s_size) s_last = 0; qp->s_last = s_last; /* see post_send() */ barrier(); - /* Post a send completion queue entry if requested. */ - if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) || - (wqe->wr.send_flags & IB_SEND_SIGNALED)) { - memset(&wc, 0, sizeof(wc)); - wc.wr_id = wqe->wr.wr_id; - wc.status = IB_WC_SUCCESS; - wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode]; - wc.byte_len = wqe->length; - wc.qp = &qp->ibqp; - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0); - } + rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS); } else this_cpu_inc(*ibp->rvp.rc_delayed_comp); @@ -2112,8 +2079,7 @@ send_last: * Update the next expected PSN. We add 1 later * below, so only add the remainder here. */ - if (len > pmtu) - qp->r_psn += (len - 1) / pmtu; + qp->r_psn += rvt_div_mtu(qp, len - 1); } else { e->rdma_sge.mr = NULL; e->rdma_sge.vaddr = NULL; diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index de1bde5950f5..e54a2feeeb10 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -793,7 +793,6 @@ void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, enum ib_wc_status status) { u32 old_last, last; - unsigned i; if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND)) return; @@ -805,32 +804,13 @@ void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, qp->s_last = last; /* See post_send() */ barrier(); - for (i = 0; i < wqe->wr.num_sge; i++) { - struct rvt_sge *sge = &wqe->sg_list[i]; - - rvt_put_mr(sge->mr); - } + rvt_put_swqe(wqe); if (qp->ibqp.qp_type == IB_QPT_UD || qp->ibqp.qp_type == IB_QPT_SMI || qp->ibqp.qp_type == IB_QPT_GSI) atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount); - /* See ch. 11.2.4.1 and 10.7.3.1 */ - if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) || - (wqe->wr.send_flags & IB_SEND_SIGNALED) || - status != IB_WC_SUCCESS) { - struct ib_wc wc; - - memset(&wc, 0, sizeof(wc)); - wc.wr_id = wqe->wr.wr_id; - wc.status = status; - wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode]; - wc.qp = &qp->ibqp; - if (status == IB_WC_SUCCESS) - wc.byte_len = wqe->length; - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, - status != IB_WC_SUCCESS); - } + rvt_qp_swqe_complete(qp, wqe, status); if (qp->s_acked == old_last) qp->s_acked = last; diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 954f15064514..4b54c0ddd08a 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -114,19 +114,6 @@ module_param_named(disable_sma, ib_qib_disable_sma, uint, S_IWUSR | S_IRUGO); MODULE_PARM_DESC(disable_sma, "Disable the SMA"); /* - * Translate ib_wr_opcode into ib_wc_opcode. - */ -const enum ib_wc_opcode ib_qib_wc_opcode[] = { - [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE, - [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE, - [IB_WR_SEND] = IB_WC_SEND, - [IB_WR_SEND_WITH_IMM] = IB_WC_SEND, - [IB_WR_RDMA_READ] = IB_WC_RDMA_READ, - [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP, - [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD -}; - -/* * System image GUID. */ __be64 ib_qib_sys_image_guid; @@ -464,7 +451,7 @@ static void mem_timer(unsigned long data) priv = list_entry(list->next, struct qib_qp_priv, iowait); qp = priv->owner; list_del_init(&priv->iowait); - atomic_inc(&qp->refcount); + rvt_get_qp(qp); if (!list_empty(list)) mod_timer(&dev->mem_timer, jiffies + 1); } @@ -477,8 +464,7 @@ static void mem_timer(unsigned long data) qib_schedule_send(qp); } spin_unlock_irqrestore(&qp->s_lock, flags); - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); } } @@ -762,7 +748,7 @@ void qib_put_txreq(struct qib_verbs_txreq *tx) iowait); qp = priv->owner; list_del_init(&priv->iowait); - atomic_inc(&qp->refcount); + rvt_get_qp(qp); spin_unlock_irqrestore(&dev->rdi.pending_lock, flags); spin_lock_irqsave(&qp->s_lock, flags); @@ -772,8 +758,7 @@ void qib_put_txreq(struct qib_verbs_txreq *tx) } spin_unlock_irqrestore(&qp->s_lock, flags); - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); } else spin_unlock_irqrestore(&dev->rdi.pending_lock, flags); } @@ -808,7 +793,7 @@ void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail) break; avail -= qpp->s_tx->txreq.sg_count; list_del_init(&qpp->iowait); - atomic_inc(&qp->refcount); + rvt_get_qp(qp); qps[n++] = qp; } @@ -822,8 +807,7 @@ void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail) qib_schedule_send(qp); } spin_unlock(&qp->s_lock); - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); } } @@ -1288,7 +1272,7 @@ void qib_ib_piobufavail(struct qib_devdata *dd) priv = list_entry(list->next, struct qib_qp_priv, iowait); qp = priv->owner; list_del_init(&priv->iowait); - atomic_inc(&qp->refcount); + rvt_get_qp(qp); qps[n++] = qp; } dd->f_wantpiobuf_intr(dd, 0); @@ -1306,8 +1290,7 @@ full: spin_unlock_irqrestore(&qp->s_lock, flags); /* Notify qib_destroy_qp() if it is waiting. */ - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); } } diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c index 5b0248adf4ce..092d4e11a633 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c @@ -117,10 +117,10 @@ static int enable_qp_grp(struct usnic_ib_qp_grp *qp_grp) vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic); res_chunk = get_qp_res_chunk(qp_grp); - if (IS_ERR_OR_NULL(res_chunk)) { + if (IS_ERR(res_chunk)) { usnic_err("Unable to get qp res with err %ld\n", PTR_ERR(res_chunk)); - return res_chunk ? PTR_ERR(res_chunk) : -ENOMEM; + return PTR_ERR(res_chunk); } for (i = 0; i < res_chunk->cnt; i++) { @@ -158,10 +158,10 @@ static int disable_qp_grp(struct usnic_ib_qp_grp *qp_grp) vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic); res_chunk = get_qp_res_chunk(qp_grp); - if (IS_ERR_OR_NULL(res_chunk)) { + if (IS_ERR(res_chunk)) { usnic_err("Unable to get qp res with err %ld\n", PTR_ERR(res_chunk)); - return res_chunk ? PTR_ERR(res_chunk) : -ENOMEM; + return PTR_ERR(res_chunk); } for (i = 0; i < res_chunk->cnt; i++) { @@ -186,11 +186,11 @@ static int init_filter_action(struct usnic_ib_qp_grp *qp_grp, struct usnic_vnic_res_chunk *res_chunk; res_chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_RQ); - if (IS_ERR_OR_NULL(res_chunk)) { + if (IS_ERR(res_chunk)) { usnic_err("Unable to get %s with err %ld\n", usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_RQ), PTR_ERR(res_chunk)); - return res_chunk ? PTR_ERR(res_chunk) : -ENOMEM; + return PTR_ERR(res_chunk); } uaction->vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic); @@ -228,8 +228,6 @@ create_roce_custom_flow(struct usnic_ib_qp_grp *qp_grp, flow = usnic_fwd_alloc_flow(qp_grp->ufdev, &filter, &uaction); if (IS_ERR_OR_NULL(flow)) { - usnic_err("Unable to alloc flow failed with err %ld\n", - PTR_ERR(flow)); err = flow ? PTR_ERR(flow) : -EFAULT; goto out_unreserve_port; } @@ -303,8 +301,6 @@ create_udp_flow(struct usnic_ib_qp_grp *qp_grp, flow = usnic_fwd_alloc_flow(qp_grp->ufdev, &filter, &uaction); if (IS_ERR_OR_NULL(flow)) { - usnic_err("Unable to alloc flow failed with err %ld\n", - PTR_ERR(flow)); err = flow ? PTR_ERR(flow) : -EFAULT; goto out_put_sock; } @@ -694,18 +690,14 @@ usnic_ib_qp_grp_create(struct usnic_fwd_dev *ufdev, struct usnic_ib_vf *vf, } qp_grp = kzalloc(sizeof(*qp_grp), GFP_ATOMIC); - if (!qp_grp) { - usnic_err("Unable to alloc qp_grp - Out of memory\n"); + if (!qp_grp) return NULL; - } qp_grp->res_chunk_list = alloc_res_chunk_list(vf->vnic, res_spec, qp_grp); if (IS_ERR_OR_NULL(qp_grp->res_chunk_list)) { err = qp_grp->res_chunk_list ? PTR_ERR(qp_grp->res_chunk_list) : -ENOMEM; - usnic_err("Unable to alloc res for %d with err %d\n", - qp_grp->grp_id, err); goto out_free_qp_grp; } diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index a5bfbba6bbac..74819a7951e2 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -87,12 +87,12 @@ static int usnic_ib_fill_create_qp_resp(struct usnic_ib_qp_grp *qp_grp, resp.bar_len = bar->len; chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_RQ); - if (IS_ERR_OR_NULL(chunk)) { + if (IS_ERR(chunk)) { usnic_err("Failed to get chunk %s for qp_grp %d with err %ld\n", usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_RQ), qp_grp->grp_id, PTR_ERR(chunk)); - return chunk ? PTR_ERR(chunk) : -ENOMEM; + return PTR_ERR(chunk); } WARN_ON(chunk->type != USNIC_VNIC_RES_TYPE_RQ); @@ -101,12 +101,12 @@ static int usnic_ib_fill_create_qp_resp(struct usnic_ib_qp_grp *qp_grp, resp.rq_idx[i] = chunk->res[i]->vnic_idx; chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_WQ); - if (IS_ERR_OR_NULL(chunk)) { + if (IS_ERR(chunk)) { usnic_err("Failed to get chunk %s for qp_grp %d with err %ld\n", usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_WQ), qp_grp->grp_id, PTR_ERR(chunk)); - return chunk ? PTR_ERR(chunk) : -ENOMEM; + return PTR_ERR(chunk); } WARN_ON(chunk->type != USNIC_VNIC_RES_TYPE_WQ); @@ -115,12 +115,12 @@ static int usnic_ib_fill_create_qp_resp(struct usnic_ib_qp_grp *qp_grp, resp.wq_idx[i] = chunk->res[i]->vnic_idx; chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_CQ); - if (IS_ERR_OR_NULL(chunk)) { + if (IS_ERR(chunk)) { usnic_err("Failed to get chunk %s for qp_grp %d with err %ld\n", usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_CQ), qp_grp->grp_id, PTR_ERR(chunk)); - return chunk ? PTR_ERR(chunk) : -ENOMEM; + return PTR_ERR(chunk); } WARN_ON(chunk->type != USNIC_VNIC_RES_TYPE_CQ); @@ -738,7 +738,9 @@ int usnic_ib_mmap(struct ib_ucontext *context, /* In ib callbacks section - Start of stub funcs */ struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd, - struct ib_ah_attr *ah_attr) + struct ib_ah_attr *ah_attr, + struct ib_udata *udata) + { usnic_dbg("\n"); return ERR_PTR(-EPERM); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h index 0d9d2e6a14d5..0ed8e072329e 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h @@ -75,7 +75,9 @@ int usnic_ib_dealloc_ucontext(struct ib_ucontext *ibcontext); int usnic_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd, - struct ib_ah_attr *ah_attr); + struct ib_ah_attr *ah_attr, + struct ib_udata *udata); + int usnic_ib_destroy_ah(struct ib_ah *ah); int usnic_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr); diff --git a/drivers/infiniband/hw/usnic/usnic_vnic.c b/drivers/infiniband/hw/usnic/usnic_vnic.c index 887510718690..e7b0030254da 100644 --- a/drivers/infiniband/hw/usnic/usnic_vnic.c +++ b/drivers/infiniband/hw/usnic/usnic_vnic.c @@ -241,17 +241,12 @@ usnic_vnic_get_resources(struct usnic_vnic *vnic, enum usnic_vnic_res_type type, return ERR_PTR(-EINVAL); ret = kzalloc(sizeof(*ret), GFP_ATOMIC); - if (!ret) { - usnic_err("Failed to allocate chunk for %s - Out of memory\n", - usnic_vnic_pci_name(vnic)); + if (!ret) return ERR_PTR(-ENOMEM); - } if (cnt > 0) { ret->res = kcalloc(cnt, sizeof(*(ret->res)), GFP_ATOMIC); if (!ret->res) { - usnic_err("Failed to allocate resources for %s. Out of memory\n", - usnic_vnic_pci_name(vnic)); kfree(ret); return ERR_PTR(-ENOMEM); } @@ -311,8 +306,10 @@ static int usnic_vnic_alloc_res_chunk(struct usnic_vnic *vnic, struct usnic_vnic_res *res; cnt = vnic_dev_get_res_count(vnic->vdev, _to_vnic_res_type(type)); - if (cnt < 1) + if (cnt < 1) { + usnic_err("Wrong res count with cnt %d\n", cnt); return -EINVAL; + } chunk->cnt = chunk->free_cnt = cnt; chunk->res = kzalloc(sizeof(*(chunk->res))*cnt, GFP_KERNEL); @@ -384,12 +381,8 @@ static int usnic_vnic_discover_resources(struct pci_dev *pdev, res_type < USNIC_VNIC_RES_TYPE_MAX; res_type++) { err = usnic_vnic_alloc_res_chunk(vnic, res_type, &vnic->chunks[res_type]); - if (err) { - usnic_err("Failed to alloc res %s with err %d\n", - usnic_vnic_res_type_to_str(res_type), - err); + if (err) goto out_clean_chunks; - } } return 0; @@ -454,11 +447,8 @@ struct usnic_vnic *usnic_vnic_alloc(struct pci_dev *pdev) } vnic = kzalloc(sizeof(*vnic), GFP_KERNEL); - if (!vnic) { - usnic_err("Failed to alloc vnic for %s - out of memory\n", - pci_name(pdev)); + if (!vnic) return ERR_PTR(-ENOMEM); - } spin_lock_init(&vnic->res_lock); diff --git a/drivers/infiniband/hw/vmw_pvrdma/Kconfig b/drivers/infiniband/hw/vmw_pvrdma/Kconfig new file mode 100644 index 000000000000..5a9790ac0ede --- /dev/null +++ b/drivers/infiniband/hw/vmw_pvrdma/Kconfig @@ -0,0 +1,7 @@ +config INFINIBAND_VMWARE_PVRDMA + tristate "VMware Paravirtualized RDMA Driver" + depends on NETDEVICES && ETHERNET && PCI && INET && VMXNET3 + ---help--- + This driver provides low-level support for VMware Paravirtual + RDMA adapter. It interacts with the VMXNet3 driver to provide + Ethernet capabilities. diff --git a/drivers/infiniband/hw/vmw_pvrdma/Makefile b/drivers/infiniband/hw/vmw_pvrdma/Makefile new file mode 100644 index 000000000000..0194ed19f542 --- /dev/null +++ b/drivers/infiniband/hw/vmw_pvrdma/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_INFINIBAND_VMWARE_PVRDMA) += vmw_pvrdma.o + +vmw_pvrdma-y := pvrdma_cmd.o pvrdma_cq.o pvrdma_doorbell.o pvrdma_main.o pvrdma_misc.o pvrdma_mr.o pvrdma_qp.o pvrdma_verbs.o diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h new file mode 100644 index 000000000000..71e1d55d69d6 --- /dev/null +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h @@ -0,0 +1,474 @@ +/* + * Copyright (c) 2012-2016 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of EITHER the GNU General Public License + * version 2 as published by the Free Software Foundation or the BSD + * 2-Clause License. This program is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED + * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License version 2 for more details at + * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html. + * + * You should have received a copy of the GNU General Public License + * along with this program available in the file COPYING in the main + * directory of this source tree. + * + * The BSD 2-Clause License + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __PVRDMA_H__ +#define __PVRDMA_H__ + +#include <linux/compiler.h> +#include <linux/interrupt.h> +#include <linux/list.h> +#include <linux/mutex.h> +#include <linux/pci.h> +#include <linux/semaphore.h> +#include <linux/workqueue.h> +#include <rdma/ib_umem.h> +#include <rdma/ib_verbs.h> +#include <rdma/vmw_pvrdma-abi.h> + +#include "pvrdma_ring.h" +#include "pvrdma_dev_api.h" +#include "pvrdma_verbs.h" + +/* NOT the same as BIT_MASK(). */ +#define PVRDMA_MASK(n) ((n << 1) - 1) + +/* + * VMware PVRDMA PCI device id. + */ +#define PCI_DEVICE_ID_VMWARE_PVRDMA 0x0820 + +struct pvrdma_dev; + +struct pvrdma_page_dir { + dma_addr_t dir_dma; + u64 *dir; + int ntables; + u64 **tables; + u64 npages; + void **pages; +}; + +struct pvrdma_cq { + struct ib_cq ibcq; + int offset; + spinlock_t cq_lock; /* Poll lock. */ + struct pvrdma_uar_map *uar; + struct ib_umem *umem; + struct pvrdma_ring_state *ring_state; + struct pvrdma_page_dir pdir; + u32 cq_handle; + bool is_kernel; + atomic_t refcnt; + wait_queue_head_t wait; +}; + +struct pvrdma_id_table { + u32 last; + u32 top; + u32 max; + u32 mask; + spinlock_t lock; /* Table lock. */ + unsigned long *table; +}; + +struct pvrdma_uar_map { + unsigned long pfn; + void __iomem *map; + int index; +}; + +struct pvrdma_uar_table { + struct pvrdma_id_table tbl; + int size; +}; + +struct pvrdma_ucontext { + struct ib_ucontext ibucontext; + struct pvrdma_dev *dev; + struct pvrdma_uar_map uar; + u64 ctx_handle; +}; + +struct pvrdma_pd { + struct ib_pd ibpd; + u32 pdn; + u32 pd_handle; + int privileged; +}; + +struct pvrdma_mr { + u32 mr_handle; + u64 iova; + u64 size; +}; + +struct pvrdma_user_mr { + struct ib_mr ibmr; + struct ib_umem *umem; + struct pvrdma_mr mmr; + struct pvrdma_page_dir pdir; + u64 *pages; + u32 npages; + u32 max_pages; + u32 page_shift; +}; + +struct pvrdma_wq { + struct pvrdma_ring *ring; + spinlock_t lock; /* Work queue lock. */ + int wqe_cnt; + int wqe_size; + int max_sg; + int offset; +}; + +struct pvrdma_ah { + struct ib_ah ibah; + struct pvrdma_av av; +}; + +struct pvrdma_qp { + struct ib_qp ibqp; + u32 qp_handle; + u32 qkey; + struct pvrdma_wq sq; + struct pvrdma_wq rq; + struct ib_umem *rumem; + struct ib_umem *sumem; + struct pvrdma_page_dir pdir; + int npages; + int npages_send; + int npages_recv; + u32 flags; + u8 port; + u8 state; + bool is_kernel; + struct mutex mutex; /* QP state mutex. */ + atomic_t refcnt; + wait_queue_head_t wait; +}; + +struct pvrdma_dev { + /* PCI device-related information. */ + struct ib_device ib_dev; + struct pci_dev *pdev; + void __iomem *regs; + struct pvrdma_device_shared_region *dsr; /* Shared region pointer */ + dma_addr_t dsrbase; /* Shared region base address */ + void *cmd_slot; + void *resp_slot; + unsigned long flags; + struct list_head device_link; + + /* Locking and interrupt information. */ + spinlock_t cmd_lock; /* Command lock. */ + struct semaphore cmd_sema; + struct completion cmd_done; + struct { + enum pvrdma_intr_type type; /* Intr type */ + struct msix_entry msix_entry[PVRDMA_MAX_INTERRUPTS]; + irq_handler_t handler[PVRDMA_MAX_INTERRUPTS]; + u8 enabled[PVRDMA_MAX_INTERRUPTS]; + u8 size; + } intr; + + /* RDMA-related device information. */ + union ib_gid *sgid_tbl; + struct pvrdma_ring_state *async_ring_state; + struct pvrdma_page_dir async_pdir; + struct pvrdma_ring_state *cq_ring_state; + struct pvrdma_page_dir cq_pdir; + struct pvrdma_cq **cq_tbl; + spinlock_t cq_tbl_lock; + struct pvrdma_qp **qp_tbl; + spinlock_t qp_tbl_lock; + struct pvrdma_uar_table uar_table; + struct pvrdma_uar_map driver_uar; + __be64 sys_image_guid; + spinlock_t desc_lock; /* Device modification lock. */ + u32 port_cap_mask; + struct mutex port_mutex; /* Port modification mutex. */ + bool ib_active; + atomic_t num_qps; + atomic_t num_cqs; + atomic_t num_pds; + atomic_t num_ahs; + + /* Network device information. */ + struct net_device *netdev; + struct notifier_block nb_netdev; +}; + +struct pvrdma_netdevice_work { + struct work_struct work; + struct net_device *event_netdev; + unsigned long event; +}; + +static inline struct pvrdma_dev *to_vdev(struct ib_device *ibdev) +{ + return container_of(ibdev, struct pvrdma_dev, ib_dev); +} + +static inline struct +pvrdma_ucontext *to_vucontext(struct ib_ucontext *ibucontext) +{ + return container_of(ibucontext, struct pvrdma_ucontext, ibucontext); +} + +static inline struct pvrdma_pd *to_vpd(struct ib_pd *ibpd) +{ + return container_of(ibpd, struct pvrdma_pd, ibpd); +} + +static inline struct pvrdma_cq *to_vcq(struct ib_cq *ibcq) +{ + return container_of(ibcq, struct pvrdma_cq, ibcq); +} + +static inline struct pvrdma_user_mr *to_vmr(struct ib_mr *ibmr) +{ + return container_of(ibmr, struct pvrdma_user_mr, ibmr); +} + +static inline struct pvrdma_qp *to_vqp(struct ib_qp *ibqp) +{ + return container_of(ibqp, struct pvrdma_qp, ibqp); +} + +static inline struct pvrdma_ah *to_vah(struct ib_ah *ibah) +{ + return container_of(ibah, struct pvrdma_ah, ibah); +} + +static inline void pvrdma_write_reg(struct pvrdma_dev *dev, u32 reg, u32 val) +{ + writel(cpu_to_le32(val), dev->regs + reg); +} + +static inline u32 pvrdma_read_reg(struct pvrdma_dev *dev, u32 reg) +{ + return le32_to_cpu(readl(dev->regs + reg)); +} + +static inline void pvrdma_write_uar_cq(struct pvrdma_dev *dev, u32 val) +{ + writel(cpu_to_le32(val), dev->driver_uar.map + PVRDMA_UAR_CQ_OFFSET); +} + +static inline void pvrdma_write_uar_qp(struct pvrdma_dev *dev, u32 val) +{ + writel(cpu_to_le32(val), dev->driver_uar.map + PVRDMA_UAR_QP_OFFSET); +} + +static inline void *pvrdma_page_dir_get_ptr(struct pvrdma_page_dir *pdir, + u64 offset) +{ + return pdir->pages[offset / PAGE_SIZE] + (offset % PAGE_SIZE); +} + +static inline enum pvrdma_mtu ib_mtu_to_pvrdma(enum ib_mtu mtu) +{ + return (enum pvrdma_mtu)mtu; +} + +static inline enum ib_mtu pvrdma_mtu_to_ib(enum pvrdma_mtu mtu) +{ + return (enum ib_mtu)mtu; +} + +static inline enum pvrdma_port_state ib_port_state_to_pvrdma( + enum ib_port_state state) +{ + return (enum pvrdma_port_state)state; +} + +static inline enum ib_port_state pvrdma_port_state_to_ib( + enum pvrdma_port_state state) +{ + return (enum ib_port_state)state; +} + +static inline int ib_port_cap_flags_to_pvrdma(int flags) +{ + return flags & PVRDMA_MASK(PVRDMA_PORT_CAP_FLAGS_MAX); +} + +static inline int pvrdma_port_cap_flags_to_ib(int flags) +{ + return flags; +} + +static inline enum pvrdma_port_width ib_port_width_to_pvrdma( + enum ib_port_width width) +{ + return (enum pvrdma_port_width)width; +} + +static inline enum ib_port_width pvrdma_port_width_to_ib( + enum pvrdma_port_width width) +{ + return (enum ib_port_width)width; +} + +static inline enum pvrdma_port_speed ib_port_speed_to_pvrdma( + enum ib_port_speed speed) +{ + return (enum pvrdma_port_speed)speed; +} + +static inline enum ib_port_speed pvrdma_port_speed_to_ib( + enum pvrdma_port_speed speed) +{ + return (enum ib_port_speed)speed; +} + +static inline int pvrdma_qp_attr_mask_to_ib(int attr_mask) +{ + return attr_mask; +} + +static inline int ib_qp_attr_mask_to_pvrdma(int attr_mask) +{ + return attr_mask & PVRDMA_MASK(PVRDMA_QP_ATTR_MASK_MAX); +} + +static inline enum pvrdma_mig_state ib_mig_state_to_pvrdma( + enum ib_mig_state state) +{ + return (enum pvrdma_mig_state)state; +} + +static inline enum ib_mig_state pvrdma_mig_state_to_ib( + enum pvrdma_mig_state state) +{ + return (enum ib_mig_state)state; +} + +static inline int ib_access_flags_to_pvrdma(int flags) +{ + return flags; +} + +static inline int pvrdma_access_flags_to_ib(int flags) +{ + return flags & PVRDMA_MASK(PVRDMA_ACCESS_FLAGS_MAX); +} + +static inline enum pvrdma_qp_type ib_qp_type_to_pvrdma(enum ib_qp_type type) +{ + return (enum pvrdma_qp_type)type; +} + +static inline enum ib_qp_type pvrdma_qp_type_to_ib(enum pvrdma_qp_type type) +{ + return (enum ib_qp_type)type; +} + +static inline enum pvrdma_qp_state ib_qp_state_to_pvrdma(enum ib_qp_state state) +{ + return (enum pvrdma_qp_state)state; +} + +static inline enum ib_qp_state pvrdma_qp_state_to_ib(enum pvrdma_qp_state state) +{ + return (enum ib_qp_state)state; +} + +static inline enum pvrdma_wr_opcode ib_wr_opcode_to_pvrdma(enum ib_wr_opcode op) +{ + return (enum pvrdma_wr_opcode)op; +} + +static inline enum ib_wc_status pvrdma_wc_status_to_ib( + enum pvrdma_wc_status status) +{ + return (enum ib_wc_status)status; +} + +static inline int pvrdma_wc_opcode_to_ib(int opcode) +{ + return opcode; +} + +static inline int pvrdma_wc_flags_to_ib(int flags) +{ + return flags; +} + +static inline int ib_send_flags_to_pvrdma(int flags) +{ + return flags & PVRDMA_MASK(PVRDMA_SEND_FLAGS_MAX); +} + +void pvrdma_qp_cap_to_ib(struct ib_qp_cap *dst, + const struct pvrdma_qp_cap *src); +void ib_qp_cap_to_pvrdma(struct pvrdma_qp_cap *dst, + const struct ib_qp_cap *src); +void pvrdma_gid_to_ib(union ib_gid *dst, const union pvrdma_gid *src); +void ib_gid_to_pvrdma(union pvrdma_gid *dst, const union ib_gid *src); +void pvrdma_global_route_to_ib(struct ib_global_route *dst, + const struct pvrdma_global_route *src); +void ib_global_route_to_pvrdma(struct pvrdma_global_route *dst, + const struct ib_global_route *src); +void pvrdma_ah_attr_to_ib(struct ib_ah_attr *dst, + const struct pvrdma_ah_attr *src); +void ib_ah_attr_to_pvrdma(struct pvrdma_ah_attr *dst, + const struct ib_ah_attr *src); + +int pvrdma_uar_table_init(struct pvrdma_dev *dev); +void pvrdma_uar_table_cleanup(struct pvrdma_dev *dev); + +int pvrdma_uar_alloc(struct pvrdma_dev *dev, struct pvrdma_uar_map *uar); +void pvrdma_uar_free(struct pvrdma_dev *dev, struct pvrdma_uar_map *uar); + +void _pvrdma_flush_cqe(struct pvrdma_qp *qp, struct pvrdma_cq *cq); + +int pvrdma_page_dir_init(struct pvrdma_dev *dev, struct pvrdma_page_dir *pdir, + u64 npages, bool alloc_pages); +void pvrdma_page_dir_cleanup(struct pvrdma_dev *dev, + struct pvrdma_page_dir *pdir); +int pvrdma_page_dir_insert_dma(struct pvrdma_page_dir *pdir, u64 idx, + dma_addr_t daddr); +int pvrdma_page_dir_insert_umem(struct pvrdma_page_dir *pdir, + struct ib_umem *umem, u64 offset); +dma_addr_t pvrdma_page_dir_get_dma(struct pvrdma_page_dir *pdir, u64 idx); +int pvrdma_page_dir_insert_page_list(struct pvrdma_page_dir *pdir, + u64 *page_list, int num_pages); + +int pvrdma_cmd_post(struct pvrdma_dev *dev, union pvrdma_cmd_req *req, + union pvrdma_cmd_resp *rsp, unsigned resp_code); + +#endif /* __PVRDMA_H__ */ diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cmd.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cmd.c new file mode 100644 index 000000000000..4a78c537d8a1 --- /dev/null +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cmd.c @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2012-2016 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of EITHER the GNU General Public License + * version 2 as published by the Free Software Foundation or the BSD + * 2-Clause License. This program is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED + * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License version 2 for more details at + * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html. + * + * You should have received a copy of the GNU General Public License + * along with this program available in the file COPYING in the main + * directory of this source tree. + * + * The BSD 2-Clause License + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/list.h> + +#include "pvrdma.h" + +#define PVRDMA_CMD_TIMEOUT 10000 /* ms */ + +static inline int pvrdma_cmd_recv(struct pvrdma_dev *dev, + union pvrdma_cmd_resp *resp, + unsigned resp_code) +{ + int err; + + dev_dbg(&dev->pdev->dev, "receive response from device\n"); + + err = wait_for_completion_interruptible_timeout(&dev->cmd_done, + msecs_to_jiffies(PVRDMA_CMD_TIMEOUT)); + if (err == 0 || err == -ERESTARTSYS) { + dev_warn(&dev->pdev->dev, + "completion timeout or interrupted\n"); + return -ETIMEDOUT; + } + + spin_lock(&dev->cmd_lock); + memcpy(resp, dev->resp_slot, sizeof(*resp)); + spin_unlock(&dev->cmd_lock); + + if (resp->hdr.ack != resp_code) { + dev_warn(&dev->pdev->dev, + "unknown response %#x expected %#x\n", + resp->hdr.ack, resp_code); + return -EFAULT; + } + + return 0; +} + +int +pvrdma_cmd_post(struct pvrdma_dev *dev, union pvrdma_cmd_req *req, + union pvrdma_cmd_resp *resp, unsigned resp_code) +{ + int err; + + dev_dbg(&dev->pdev->dev, "post request to device\n"); + + /* Serializiation */ + down(&dev->cmd_sema); + + BUILD_BUG_ON(sizeof(union pvrdma_cmd_req) != + sizeof(struct pvrdma_cmd_modify_qp)); + + spin_lock(&dev->cmd_lock); + memcpy(dev->cmd_slot, req, sizeof(*req)); + spin_unlock(&dev->cmd_lock); + + init_completion(&dev->cmd_done); + pvrdma_write_reg(dev, PVRDMA_REG_REQUEST, 0); + + /* Make sure the request is written before reading status. */ + mb(); + + err = pvrdma_read_reg(dev, PVRDMA_REG_ERR); + if (err == 0) { + if (resp != NULL) + err = pvrdma_cmd_recv(dev, resp, resp_code); + } else { + dev_warn(&dev->pdev->dev, + "failed to write request error reg: %d\n", err); + err = -EFAULT; + } + + up(&dev->cmd_sema); + + return err; +} diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c new file mode 100644 index 000000000000..e429ca5b16aa --- /dev/null +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c @@ -0,0 +1,425 @@ +/* + * Copyright (c) 2012-2016 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of EITHER the GNU General Public License + * version 2 as published by the Free Software Foundation or the BSD + * 2-Clause License. This program is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED + * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License version 2 for more details at + * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html. + * + * You should have received a copy of the GNU General Public License + * along with this program available in the file COPYING in the main + * directory of this source tree. + * + * The BSD 2-Clause License + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <asm/page.h> +#include <linux/io.h> +#include <linux/wait.h> +#include <rdma/ib_addr.h> +#include <rdma/ib_smi.h> +#include <rdma/ib_user_verbs.h> + +#include "pvrdma.h" + +/** + * pvrdma_req_notify_cq - request notification for a completion queue + * @ibcq: the completion queue + * @notify_flags: notification flags + * + * @return: 0 for success. + */ +int pvrdma_req_notify_cq(struct ib_cq *ibcq, + enum ib_cq_notify_flags notify_flags) +{ + struct pvrdma_dev *dev = to_vdev(ibcq->device); + struct pvrdma_cq *cq = to_vcq(ibcq); + u32 val = cq->cq_handle; + + val |= (notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ? + PVRDMA_UAR_CQ_ARM_SOL : PVRDMA_UAR_CQ_ARM; + + pvrdma_write_uar_cq(dev, val); + + return 0; +} + +/** + * pvrdma_create_cq - create completion queue + * @ibdev: the device + * @attr: completion queue attributes + * @context: user context + * @udata: user data + * + * @return: ib_cq completion queue pointer on success, + * otherwise returns negative errno. + */ +struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + int entries = attr->cqe; + struct pvrdma_dev *dev = to_vdev(ibdev); + struct pvrdma_cq *cq; + int ret; + int npages; + unsigned long flags; + union pvrdma_cmd_req req; + union pvrdma_cmd_resp rsp; + struct pvrdma_cmd_create_cq *cmd = &req.create_cq; + struct pvrdma_cmd_create_cq_resp *resp = &rsp.create_cq_resp; + struct pvrdma_create_cq ucmd; + + BUILD_BUG_ON(sizeof(struct pvrdma_cqe) != 64); + + entries = roundup_pow_of_two(entries); + if (entries < 1 || entries > dev->dsr->caps.max_cqe) + return ERR_PTR(-EINVAL); + + if (!atomic_add_unless(&dev->num_cqs, 1, dev->dsr->caps.max_cq)) + return ERR_PTR(-ENOMEM); + + cq = kzalloc(sizeof(*cq), GFP_KERNEL); + if (!cq) { + atomic_dec(&dev->num_cqs); + return ERR_PTR(-ENOMEM); + } + + cq->ibcq.cqe = entries; + + if (context) { + if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { + ret = -EFAULT; + goto err_cq; + } + + cq->umem = ib_umem_get(context, ucmd.buf_addr, ucmd.buf_size, + IB_ACCESS_LOCAL_WRITE, 1); + if (IS_ERR(cq->umem)) { + ret = PTR_ERR(cq->umem); + goto err_cq; + } + + npages = ib_umem_page_count(cq->umem); + } else { + cq->is_kernel = true; + + /* One extra page for shared ring state */ + npages = 1 + (entries * sizeof(struct pvrdma_cqe) + + PAGE_SIZE - 1) / PAGE_SIZE; + + /* Skip header page. */ + cq->offset = PAGE_SIZE; + } + + if (npages < 0 || npages > PVRDMA_PAGE_DIR_MAX_PAGES) { + dev_warn(&dev->pdev->dev, + "overflow pages in completion queue\n"); + ret = -EINVAL; + goto err_umem; + } + + ret = pvrdma_page_dir_init(dev, &cq->pdir, npages, cq->is_kernel); + if (ret) { + dev_warn(&dev->pdev->dev, + "could not allocate page directory\n"); + goto err_umem; + } + + /* Ring state is always the first page. Set in library for user cq. */ + if (cq->is_kernel) + cq->ring_state = cq->pdir.pages[0]; + else + pvrdma_page_dir_insert_umem(&cq->pdir, cq->umem, 0); + + atomic_set(&cq->refcnt, 1); + init_waitqueue_head(&cq->wait); + spin_lock_init(&cq->cq_lock); + + memset(cmd, 0, sizeof(*cmd)); + cmd->hdr.cmd = PVRDMA_CMD_CREATE_CQ; + cmd->nchunks = npages; + cmd->ctx_handle = (context) ? + (u64)to_vucontext(context)->ctx_handle : 0; + cmd->cqe = entries; + cmd->pdir_dma = cq->pdir.dir_dma; + ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_CQ_RESP); + if (ret < 0) { + dev_warn(&dev->pdev->dev, + "could not create completion queue, error: %d\n", ret); + goto err_page_dir; + } + + cq->ibcq.cqe = resp->cqe; + cq->cq_handle = resp->cq_handle; + spin_lock_irqsave(&dev->cq_tbl_lock, flags); + dev->cq_tbl[cq->cq_handle % dev->dsr->caps.max_cq] = cq; + spin_unlock_irqrestore(&dev->cq_tbl_lock, flags); + + if (context) { + cq->uar = &(to_vucontext(context)->uar); + + /* Copy udata back. */ + if (ib_copy_to_udata(udata, &cq->cq_handle, sizeof(__u32))) { + dev_warn(&dev->pdev->dev, + "failed to copy back udata\n"); + pvrdma_destroy_cq(&cq->ibcq); + return ERR_PTR(-EINVAL); + } + } + + return &cq->ibcq; + +err_page_dir: + pvrdma_page_dir_cleanup(dev, &cq->pdir); +err_umem: + if (context) + ib_umem_release(cq->umem); +err_cq: + atomic_dec(&dev->num_cqs); + kfree(cq); + + return ERR_PTR(ret); +} + +static void pvrdma_free_cq(struct pvrdma_dev *dev, struct pvrdma_cq *cq) +{ + atomic_dec(&cq->refcnt); + wait_event(cq->wait, !atomic_read(&cq->refcnt)); + + if (!cq->is_kernel) + ib_umem_release(cq->umem); + + pvrdma_page_dir_cleanup(dev, &cq->pdir); + kfree(cq); +} + +/** + * pvrdma_destroy_cq - destroy completion queue + * @cq: the completion queue to destroy. + * + * @return: 0 for success. + */ +int pvrdma_destroy_cq(struct ib_cq *cq) +{ + struct pvrdma_cq *vcq = to_vcq(cq); + union pvrdma_cmd_req req; + struct pvrdma_cmd_destroy_cq *cmd = &req.destroy_cq; + struct pvrdma_dev *dev = to_vdev(cq->device); + unsigned long flags; + int ret; + + memset(cmd, 0, sizeof(*cmd)); + cmd->hdr.cmd = PVRDMA_CMD_DESTROY_CQ; + cmd->cq_handle = vcq->cq_handle; + + ret = pvrdma_cmd_post(dev, &req, NULL, 0); + if (ret < 0) + dev_warn(&dev->pdev->dev, + "could not destroy completion queue, error: %d\n", + ret); + + /* free cq's resources */ + spin_lock_irqsave(&dev->cq_tbl_lock, flags); + dev->cq_tbl[vcq->cq_handle] = NULL; + spin_unlock_irqrestore(&dev->cq_tbl_lock, flags); + + pvrdma_free_cq(dev, vcq); + atomic_dec(&dev->num_cqs); + + return ret; +} + +/** + * pvrdma_modify_cq - modify the CQ moderation parameters + * @ibcq: the CQ to modify + * @cq_count: number of CQEs that will trigger an event + * @cq_period: max period of time in usec before triggering an event + * + * @return: -EOPNOTSUPP as CQ resize is not supported. + */ +int pvrdma_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) +{ + return -EOPNOTSUPP; +} + +static inline struct pvrdma_cqe *get_cqe(struct pvrdma_cq *cq, int i) +{ + return (struct pvrdma_cqe *)pvrdma_page_dir_get_ptr( + &cq->pdir, + cq->offset + + sizeof(struct pvrdma_cqe) * i); +} + +void _pvrdma_flush_cqe(struct pvrdma_qp *qp, struct pvrdma_cq *cq) +{ + int head; + int has_data; + + if (!cq->is_kernel) + return; + + /* Lock held */ + has_data = pvrdma_idx_ring_has_data(&cq->ring_state->rx, + cq->ibcq.cqe, &head); + if (unlikely(has_data > 0)) { + int items; + int curr; + int tail = pvrdma_idx(&cq->ring_state->rx.prod_tail, + cq->ibcq.cqe); + struct pvrdma_cqe *cqe; + struct pvrdma_cqe *curr_cqe; + + items = (tail > head) ? (tail - head) : + (cq->ibcq.cqe - head + tail); + curr = --tail; + while (items-- > 0) { + if (curr < 0) + curr = cq->ibcq.cqe - 1; + if (tail < 0) + tail = cq->ibcq.cqe - 1; + curr_cqe = get_cqe(cq, curr); + if ((curr_cqe->qp & 0xFFFF) != qp->qp_handle) { + if (curr != tail) { + cqe = get_cqe(cq, tail); + *cqe = *curr_cqe; + } + tail--; + } else { + pvrdma_idx_ring_inc( + &cq->ring_state->rx.cons_head, + cq->ibcq.cqe); + } + curr--; + } + } +} + +static int pvrdma_poll_one(struct pvrdma_cq *cq, struct pvrdma_qp **cur_qp, + struct ib_wc *wc) +{ + struct pvrdma_dev *dev = to_vdev(cq->ibcq.device); + int has_data; + unsigned int head; + bool tried = false; + struct pvrdma_cqe *cqe; + +retry: + has_data = pvrdma_idx_ring_has_data(&cq->ring_state->rx, + cq->ibcq.cqe, &head); + if (has_data == 0) { + if (tried) + return -EAGAIN; + + pvrdma_write_uar_cq(dev, cq->cq_handle | PVRDMA_UAR_CQ_POLL); + + tried = true; + goto retry; + } else if (has_data == PVRDMA_INVALID_IDX) { + dev_err(&dev->pdev->dev, "CQ ring state invalid\n"); + return -EAGAIN; + } + + cqe = get_cqe(cq, head); + + /* Ensure cqe is valid. */ + rmb(); + if (dev->qp_tbl[cqe->qp & 0xffff]) + *cur_qp = (struct pvrdma_qp *)dev->qp_tbl[cqe->qp & 0xffff]; + else + return -EAGAIN; + + wc->opcode = pvrdma_wc_opcode_to_ib(cqe->opcode); + wc->status = pvrdma_wc_status_to_ib(cqe->status); + wc->wr_id = cqe->wr_id; + wc->qp = &(*cur_qp)->ibqp; + wc->byte_len = cqe->byte_len; + wc->ex.imm_data = cqe->imm_data; + wc->src_qp = cqe->src_qp; + wc->wc_flags = pvrdma_wc_flags_to_ib(cqe->wc_flags); + wc->pkey_index = cqe->pkey_index; + wc->slid = cqe->slid; + wc->sl = cqe->sl; + wc->dlid_path_bits = cqe->dlid_path_bits; + wc->port_num = cqe->port_num; + wc->vendor_err = 0; + + /* Update shared ring state */ + pvrdma_idx_ring_inc(&cq->ring_state->rx.cons_head, cq->ibcq.cqe); + + return 0; +} + +/** + * pvrdma_poll_cq - poll for work completion queue entries + * @ibcq: completion queue + * @num_entries: the maximum number of entries + * @entry: pointer to work completion array + * + * @return: number of polled completion entries + */ +int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) +{ + struct pvrdma_cq *cq = to_vcq(ibcq); + struct pvrdma_qp *cur_qp = NULL; + unsigned long flags; + int npolled; + + if (num_entries < 1 || wc == NULL) + return 0; + + spin_lock_irqsave(&cq->cq_lock, flags); + for (npolled = 0; npolled < num_entries; ++npolled) { + if (pvrdma_poll_one(cq, &cur_qp, wc + npolled)) + break; + } + + spin_unlock_irqrestore(&cq->cq_lock, flags); + + /* Ensure we do not return errors from poll_cq */ + return npolled; +} + +/** + * pvrdma_resize_cq - resize CQ + * @ibcq: the completion queue + * @entries: CQ entries + * @udata: user data + * + * @return: -EOPNOTSUPP as CQ resize is not supported. + */ +int pvrdma_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) +{ + return -EOPNOTSUPP; +} diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h new file mode 100644 index 000000000000..c06768635d65 --- /dev/null +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h @@ -0,0 +1,586 @@ +/* + * Copyright (c) 2012-2016 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of EITHER the GNU General Public License + * version 2 as published by the Free Software Foundation or the BSD + * 2-Clause License. This program is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED + * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License version 2 for more details at + * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html. + * + * You should have received a copy of the GNU General Public License + * along with this program available in the file COPYING in the main + * directory of this source tree. + * + * The BSD 2-Clause License + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __PVRDMA_DEV_API_H__ +#define __PVRDMA_DEV_API_H__ + +#include <linux/types.h> + +#include "pvrdma_verbs.h" + +#define PVRDMA_VERSION 17 +#define PVRDMA_BOARD_ID 1 +#define PVRDMA_REV_ID 1 + +/* + * Masks and accessors for page directory, which is a two-level lookup: + * page directory -> page table -> page. Only one directory for now, but we + * could expand that easily. 9 bits for tables, 9 bits for pages, gives one + * gigabyte for memory regions and so forth. + */ + +#define PVRDMA_PDIR_SHIFT 18 +#define PVRDMA_PTABLE_SHIFT 9 +#define PVRDMA_PAGE_DIR_DIR(x) (((x) >> PVRDMA_PDIR_SHIFT) & 0x1) +#define PVRDMA_PAGE_DIR_TABLE(x) (((x) >> PVRDMA_PTABLE_SHIFT) & 0x1ff) +#define PVRDMA_PAGE_DIR_PAGE(x) ((x) & 0x1ff) +#define PVRDMA_PAGE_DIR_MAX_PAGES (1 * 512 * 512) +#define PVRDMA_MAX_FAST_REG_PAGES 128 + +/* + * Max MSI-X vectors. + */ + +#define PVRDMA_MAX_INTERRUPTS 3 + +/* Register offsets within PCI resource on BAR1. */ +#define PVRDMA_REG_VERSION 0x00 /* R: Version of device. */ +#define PVRDMA_REG_DSRLOW 0x04 /* W: Device shared region low PA. */ +#define PVRDMA_REG_DSRHIGH 0x08 /* W: Device shared region high PA. */ +#define PVRDMA_REG_CTL 0x0c /* W: PVRDMA_DEVICE_CTL */ +#define PVRDMA_REG_REQUEST 0x10 /* W: Indicate device request. */ +#define PVRDMA_REG_ERR 0x14 /* R: Device error. */ +#define PVRDMA_REG_ICR 0x18 /* R: Interrupt cause. */ +#define PVRDMA_REG_IMR 0x1c /* R/W: Interrupt mask. */ +#define PVRDMA_REG_MACL 0x20 /* R/W: MAC address low. */ +#define PVRDMA_REG_MACH 0x24 /* R/W: MAC address high. */ + +/* Object flags. */ +#define PVRDMA_CQ_FLAG_ARMED_SOL BIT(0) /* Armed for solicited-only. */ +#define PVRDMA_CQ_FLAG_ARMED BIT(1) /* Armed. */ +#define PVRDMA_MR_FLAG_DMA BIT(0) /* DMA region. */ +#define PVRDMA_MR_FLAG_FRMR BIT(1) /* Fast reg memory region. */ + +/* + * Atomic operation capability (masked versions are extended atomic + * operations. + */ + +#define PVRDMA_ATOMIC_OP_COMP_SWAP BIT(0) /* Compare and swap. */ +#define PVRDMA_ATOMIC_OP_FETCH_ADD BIT(1) /* Fetch and add. */ +#define PVRDMA_ATOMIC_OP_MASK_COMP_SWAP BIT(2) /* Masked compare and swap. */ +#define PVRDMA_ATOMIC_OP_MASK_FETCH_ADD BIT(3) /* Masked fetch and add. */ + +/* + * Base Memory Management Extension flags to support Fast Reg Memory Regions + * and Fast Reg Work Requests. Each flag represents a verb operation and we + * must support all of them to qualify for the BMME device cap. + */ + +#define PVRDMA_BMME_FLAG_LOCAL_INV BIT(0) /* Local Invalidate. */ +#define PVRDMA_BMME_FLAG_REMOTE_INV BIT(1) /* Remote Invalidate. */ +#define PVRDMA_BMME_FLAG_FAST_REG_WR BIT(2) /* Fast Reg Work Request. */ + +/* + * GID types. The interpretation of the gid_types bit field in the device + * capabilities will depend on the device mode. For now, the device only + * supports RoCE as mode, so only the different GID types for RoCE are + * defined. + */ + +#define PVRDMA_GID_TYPE_FLAG_ROCE_V1 BIT(0) +#define PVRDMA_GID_TYPE_FLAG_ROCE_V2 BIT(1) + +enum pvrdma_pci_resource { + PVRDMA_PCI_RESOURCE_MSIX, /* BAR0: MSI-X, MMIO. */ + PVRDMA_PCI_RESOURCE_REG, /* BAR1: Registers, MMIO. */ + PVRDMA_PCI_RESOURCE_UAR, /* BAR2: UAR pages, MMIO, 64-bit. */ + PVRDMA_PCI_RESOURCE_LAST, /* Last. */ +}; + +enum pvrdma_device_ctl { + PVRDMA_DEVICE_CTL_ACTIVATE, /* Activate device. */ + PVRDMA_DEVICE_CTL_QUIESCE, /* Quiesce device. */ + PVRDMA_DEVICE_CTL_RESET, /* Reset device. */ +}; + +enum pvrdma_intr_vector { + PVRDMA_INTR_VECTOR_RESPONSE, /* Command response. */ + PVRDMA_INTR_VECTOR_ASYNC, /* Async events. */ + PVRDMA_INTR_VECTOR_CQ, /* CQ notification. */ + /* Additional CQ notification vectors. */ +}; + +enum pvrdma_intr_cause { + PVRDMA_INTR_CAUSE_RESPONSE = (1 << PVRDMA_INTR_VECTOR_RESPONSE), + PVRDMA_INTR_CAUSE_ASYNC = (1 << PVRDMA_INTR_VECTOR_ASYNC), + PVRDMA_INTR_CAUSE_CQ = (1 << PVRDMA_INTR_VECTOR_CQ), +}; + +enum pvrdma_intr_type { + PVRDMA_INTR_TYPE_INTX, /* Legacy. */ + PVRDMA_INTR_TYPE_MSI, /* MSI. */ + PVRDMA_INTR_TYPE_MSIX, /* MSI-X. */ +}; + +enum pvrdma_gos_bits { + PVRDMA_GOS_BITS_UNK, /* Unknown. */ + PVRDMA_GOS_BITS_32, /* 32-bit. */ + PVRDMA_GOS_BITS_64, /* 64-bit. */ +}; + +enum pvrdma_gos_type { + PVRDMA_GOS_TYPE_UNK, /* Unknown. */ + PVRDMA_GOS_TYPE_LINUX, /* Linux. */ +}; + +enum pvrdma_device_mode { + PVRDMA_DEVICE_MODE_ROCE, /* RoCE. */ + PVRDMA_DEVICE_MODE_IWARP, /* iWarp. */ + PVRDMA_DEVICE_MODE_IB, /* InfiniBand. */ +}; + +struct pvrdma_gos_info { + u32 gos_bits:2; /* W: PVRDMA_GOS_BITS_ */ + u32 gos_type:4; /* W: PVRDMA_GOS_TYPE_ */ + u32 gos_ver:16; /* W: Guest OS version. */ + u32 gos_misc:10; /* W: Other. */ + u32 pad; /* Pad to 8-byte alignment. */ +}; + +struct pvrdma_device_caps { + u64 fw_ver; /* R: Query device. */ + __be64 node_guid; + __be64 sys_image_guid; + u64 max_mr_size; + u64 page_size_cap; + u64 atomic_arg_sizes; /* EX verbs. */ + u32 ex_comp_mask; /* EX verbs. */ + u32 device_cap_flags2; /* EX verbs. */ + u32 max_fa_bit_boundary; /* EX verbs. */ + u32 log_max_atomic_inline_arg; /* EX verbs. */ + u32 vendor_id; + u32 vendor_part_id; + u32 hw_ver; + u32 max_qp; + u32 max_qp_wr; + u32 device_cap_flags; + u32 max_sge; + u32 max_sge_rd; + u32 max_cq; + u32 max_cqe; + u32 max_mr; + u32 max_pd; + u32 max_qp_rd_atom; + u32 max_ee_rd_atom; + u32 max_res_rd_atom; + u32 max_qp_init_rd_atom; + u32 max_ee_init_rd_atom; + u32 max_ee; + u32 max_rdd; + u32 max_mw; + u32 max_raw_ipv6_qp; + u32 max_raw_ethy_qp; + u32 max_mcast_grp; + u32 max_mcast_qp_attach; + u32 max_total_mcast_qp_attach; + u32 max_ah; + u32 max_fmr; + u32 max_map_per_fmr; + u32 max_srq; + u32 max_srq_wr; + u32 max_srq_sge; + u32 max_uar; + u32 gid_tbl_len; + u16 max_pkeys; + u8 local_ca_ack_delay; + u8 phys_port_cnt; + u8 mode; /* PVRDMA_DEVICE_MODE_ */ + u8 atomic_ops; /* PVRDMA_ATOMIC_OP_* bits */ + u8 bmme_flags; /* FRWR Mem Mgmt Extensions */ + u8 gid_types; /* PVRDMA_GID_TYPE_FLAG_ */ + u8 reserved[4]; +}; + +struct pvrdma_ring_page_info { + u32 num_pages; /* Num pages incl. header. */ + u32 reserved; /* Reserved. */ + u64 pdir_dma; /* Page directory PA. */ +}; + +#pragma pack(push, 1) + +struct pvrdma_device_shared_region { + u32 driver_version; /* W: Driver version. */ + u32 pad; /* Pad to 8-byte align. */ + struct pvrdma_gos_info gos_info; /* W: Guest OS information. */ + u64 cmd_slot_dma; /* W: Command slot address. */ + u64 resp_slot_dma; /* W: Response slot address. */ + struct pvrdma_ring_page_info async_ring_pages; + /* W: Async ring page info. */ + struct pvrdma_ring_page_info cq_ring_pages; + /* W: CQ ring page info. */ + u32 uar_pfn; /* W: UAR pageframe. */ + u32 pad2; /* Pad to 8-byte align. */ + struct pvrdma_device_caps caps; /* R: Device capabilities. */ +}; + +#pragma pack(pop) + +/* Event types. Currently a 1:1 mapping with enum ib_event. */ +enum pvrdma_eqe_type { + PVRDMA_EVENT_CQ_ERR, + PVRDMA_EVENT_QP_FATAL, + PVRDMA_EVENT_QP_REQ_ERR, + PVRDMA_EVENT_QP_ACCESS_ERR, + PVRDMA_EVENT_COMM_EST, + PVRDMA_EVENT_SQ_DRAINED, + PVRDMA_EVENT_PATH_MIG, + PVRDMA_EVENT_PATH_MIG_ERR, + PVRDMA_EVENT_DEVICE_FATAL, + PVRDMA_EVENT_PORT_ACTIVE, + PVRDMA_EVENT_PORT_ERR, + PVRDMA_EVENT_LID_CHANGE, + PVRDMA_EVENT_PKEY_CHANGE, + PVRDMA_EVENT_SM_CHANGE, + PVRDMA_EVENT_SRQ_ERR, + PVRDMA_EVENT_SRQ_LIMIT_REACHED, + PVRDMA_EVENT_QP_LAST_WQE_REACHED, + PVRDMA_EVENT_CLIENT_REREGISTER, + PVRDMA_EVENT_GID_CHANGE, +}; + +/* Event queue element. */ +struct pvrdma_eqe { + u32 type; /* Event type. */ + u32 info; /* Handle, other. */ +}; + +/* CQ notification queue element. */ +struct pvrdma_cqne { + u32 info; /* Handle */ +}; + +enum { + PVRDMA_CMD_FIRST, + PVRDMA_CMD_QUERY_PORT = PVRDMA_CMD_FIRST, + PVRDMA_CMD_QUERY_PKEY, + PVRDMA_CMD_CREATE_PD, + PVRDMA_CMD_DESTROY_PD, + PVRDMA_CMD_CREATE_MR, + PVRDMA_CMD_DESTROY_MR, + PVRDMA_CMD_CREATE_CQ, + PVRDMA_CMD_RESIZE_CQ, + PVRDMA_CMD_DESTROY_CQ, + PVRDMA_CMD_CREATE_QP, + PVRDMA_CMD_MODIFY_QP, + PVRDMA_CMD_QUERY_QP, + PVRDMA_CMD_DESTROY_QP, + PVRDMA_CMD_CREATE_UC, + PVRDMA_CMD_DESTROY_UC, + PVRDMA_CMD_CREATE_BIND, + PVRDMA_CMD_DESTROY_BIND, + PVRDMA_CMD_MAX, +}; + +enum { + PVRDMA_CMD_FIRST_RESP = (1 << 31), + PVRDMA_CMD_QUERY_PORT_RESP = PVRDMA_CMD_FIRST_RESP, + PVRDMA_CMD_QUERY_PKEY_RESP, + PVRDMA_CMD_CREATE_PD_RESP, + PVRDMA_CMD_DESTROY_PD_RESP_NOOP, + PVRDMA_CMD_CREATE_MR_RESP, + PVRDMA_CMD_DESTROY_MR_RESP_NOOP, + PVRDMA_CMD_CREATE_CQ_RESP, + PVRDMA_CMD_RESIZE_CQ_RESP, + PVRDMA_CMD_DESTROY_CQ_RESP_NOOP, + PVRDMA_CMD_CREATE_QP_RESP, + PVRDMA_CMD_MODIFY_QP_RESP, + PVRDMA_CMD_QUERY_QP_RESP, + PVRDMA_CMD_DESTROY_QP_RESP, + PVRDMA_CMD_CREATE_UC_RESP, + PVRDMA_CMD_DESTROY_UC_RESP_NOOP, + PVRDMA_CMD_CREATE_BIND_RESP_NOOP, + PVRDMA_CMD_DESTROY_BIND_RESP_NOOP, + PVRDMA_CMD_MAX_RESP, +}; + +struct pvrdma_cmd_hdr { + u64 response; /* Key for response lookup. */ + u32 cmd; /* PVRDMA_CMD_ */ + u32 reserved; /* Reserved. */ +}; + +struct pvrdma_cmd_resp_hdr { + u64 response; /* From cmd hdr. */ + u32 ack; /* PVRDMA_CMD_XXX_RESP */ + u8 err; /* Error. */ + u8 reserved[3]; /* Reserved. */ +}; + +struct pvrdma_cmd_query_port { + struct pvrdma_cmd_hdr hdr; + u8 port_num; + u8 reserved[7]; +}; + +struct pvrdma_cmd_query_port_resp { + struct pvrdma_cmd_resp_hdr hdr; + struct pvrdma_port_attr attrs; +}; + +struct pvrdma_cmd_query_pkey { + struct pvrdma_cmd_hdr hdr; + u8 port_num; + u8 index; + u8 reserved[6]; +}; + +struct pvrdma_cmd_query_pkey_resp { + struct pvrdma_cmd_resp_hdr hdr; + u16 pkey; + u8 reserved[6]; +}; + +struct pvrdma_cmd_create_uc { + struct pvrdma_cmd_hdr hdr; + u32 pfn; /* UAR page frame number */ + u8 reserved[4]; +}; + +struct pvrdma_cmd_create_uc_resp { + struct pvrdma_cmd_resp_hdr hdr; + u32 ctx_handle; + u8 reserved[4]; +}; + +struct pvrdma_cmd_destroy_uc { + struct pvrdma_cmd_hdr hdr; + u32 ctx_handle; + u8 reserved[4]; +}; + +struct pvrdma_cmd_create_pd { + struct pvrdma_cmd_hdr hdr; + u32 ctx_handle; + u8 reserved[4]; +}; + +struct pvrdma_cmd_create_pd_resp { + struct pvrdma_cmd_resp_hdr hdr; + u32 pd_handle; + u8 reserved[4]; +}; + +struct pvrdma_cmd_destroy_pd { + struct pvrdma_cmd_hdr hdr; + u32 pd_handle; + u8 reserved[4]; +}; + +struct pvrdma_cmd_create_mr { + struct pvrdma_cmd_hdr hdr; + u64 start; + u64 length; + u64 pdir_dma; + u32 pd_handle; + u32 access_flags; + u32 flags; + u32 nchunks; +}; + +struct pvrdma_cmd_create_mr_resp { + struct pvrdma_cmd_resp_hdr hdr; + u32 mr_handle; + u32 lkey; + u32 rkey; + u8 reserved[4]; +}; + +struct pvrdma_cmd_destroy_mr { + struct pvrdma_cmd_hdr hdr; + u32 mr_handle; + u8 reserved[4]; +}; + +struct pvrdma_cmd_create_cq { + struct pvrdma_cmd_hdr hdr; + u64 pdir_dma; + u32 ctx_handle; + u32 cqe; + u32 nchunks; + u8 reserved[4]; +}; + +struct pvrdma_cmd_create_cq_resp { + struct pvrdma_cmd_resp_hdr hdr; + u32 cq_handle; + u32 cqe; +}; + +struct pvrdma_cmd_resize_cq { + struct pvrdma_cmd_hdr hdr; + u32 cq_handle; + u32 cqe; +}; + +struct pvrdma_cmd_resize_cq_resp { + struct pvrdma_cmd_resp_hdr hdr; + u32 cqe; + u8 reserved[4]; +}; + +struct pvrdma_cmd_destroy_cq { + struct pvrdma_cmd_hdr hdr; + u32 cq_handle; + u8 reserved[4]; +}; + +struct pvrdma_cmd_create_qp { + struct pvrdma_cmd_hdr hdr; + u64 pdir_dma; + u32 pd_handle; + u32 send_cq_handle; + u32 recv_cq_handle; + u32 srq_handle; + u32 max_send_wr; + u32 max_recv_wr; + u32 max_send_sge; + u32 max_recv_sge; + u32 max_inline_data; + u32 lkey; + u32 access_flags; + u16 total_chunks; + u16 send_chunks; + u16 max_atomic_arg; + u8 sq_sig_all; + u8 qp_type; + u8 is_srq; + u8 reserved[3]; +}; + +struct pvrdma_cmd_create_qp_resp { + struct pvrdma_cmd_resp_hdr hdr; + u32 qpn; + u32 max_send_wr; + u32 max_recv_wr; + u32 max_send_sge; + u32 max_recv_sge; + u32 max_inline_data; +}; + +struct pvrdma_cmd_modify_qp { + struct pvrdma_cmd_hdr hdr; + u32 qp_handle; + u32 attr_mask; + struct pvrdma_qp_attr attrs; +}; + +struct pvrdma_cmd_query_qp { + struct pvrdma_cmd_hdr hdr; + u32 qp_handle; + u32 attr_mask; +}; + +struct pvrdma_cmd_query_qp_resp { + struct pvrdma_cmd_resp_hdr hdr; + struct pvrdma_qp_attr attrs; +}; + +struct pvrdma_cmd_destroy_qp { + struct pvrdma_cmd_hdr hdr; + u32 qp_handle; + u8 reserved[4]; +}; + +struct pvrdma_cmd_destroy_qp_resp { + struct pvrdma_cmd_resp_hdr hdr; + u32 events_reported; + u8 reserved[4]; +}; + +struct pvrdma_cmd_create_bind { + struct pvrdma_cmd_hdr hdr; + u32 mtu; + u32 vlan; + u32 index; + u8 new_gid[16]; + u8 gid_type; + u8 reserved[3]; +}; + +struct pvrdma_cmd_destroy_bind { + struct pvrdma_cmd_hdr hdr; + u32 index; + u8 dest_gid[16]; + u8 reserved[4]; +}; + +union pvrdma_cmd_req { + struct pvrdma_cmd_hdr hdr; + struct pvrdma_cmd_query_port query_port; + struct pvrdma_cmd_query_pkey query_pkey; + struct pvrdma_cmd_create_uc create_uc; + struct pvrdma_cmd_destroy_uc destroy_uc; + struct pvrdma_cmd_create_pd create_pd; + struct pvrdma_cmd_destroy_pd destroy_pd; + struct pvrdma_cmd_create_mr create_mr; + struct pvrdma_cmd_destroy_mr destroy_mr; + struct pvrdma_cmd_create_cq create_cq; + struct pvrdma_cmd_resize_cq resize_cq; + struct pvrdma_cmd_destroy_cq destroy_cq; + struct pvrdma_cmd_create_qp create_qp; + struct pvrdma_cmd_modify_qp modify_qp; + struct pvrdma_cmd_query_qp query_qp; + struct pvrdma_cmd_destroy_qp destroy_qp; + struct pvrdma_cmd_create_bind create_bind; + struct pvrdma_cmd_destroy_bind destroy_bind; +}; + +union pvrdma_cmd_resp { + struct pvrdma_cmd_resp_hdr hdr; + struct pvrdma_cmd_query_port_resp query_port_resp; + struct pvrdma_cmd_query_pkey_resp query_pkey_resp; + struct pvrdma_cmd_create_uc_resp create_uc_resp; + struct pvrdma_cmd_create_pd_resp create_pd_resp; + struct pvrdma_cmd_create_mr_resp create_mr_resp; + struct pvrdma_cmd_create_cq_resp create_cq_resp; + struct pvrdma_cmd_resize_cq_resp resize_cq_resp; + struct pvrdma_cmd_create_qp_resp create_qp_resp; + struct pvrdma_cmd_query_qp_resp query_qp_resp; + struct pvrdma_cmd_destroy_qp_resp destroy_qp_resp; +}; + +#endif /* __PVRDMA_DEV_API_H__ */ diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_doorbell.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_doorbell.c new file mode 100644 index 000000000000..bf51357ea3aa --- /dev/null +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_doorbell.c @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2012-2016 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of EITHER the GNU General Public License + * version 2 as published by the Free Software Foundation or the BSD + * 2-Clause License. This program is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED + * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License version 2 for more details at + * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html. + * + * You should have received a copy of the GNU General Public License + * along with this program available in the file COPYING in the main + * directory of this source tree. + * + * The BSD 2-Clause License + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/bitmap.h> +#include <linux/errno.h> +#include <linux/slab.h> + +#include "pvrdma.h" + +int pvrdma_uar_table_init(struct pvrdma_dev *dev) +{ + u32 num = dev->dsr->caps.max_uar; + u32 mask = num - 1; + struct pvrdma_id_table *tbl = &dev->uar_table.tbl; + + if (!is_power_of_2(num)) + return -EINVAL; + + tbl->last = 0; + tbl->top = 0; + tbl->max = num; + tbl->mask = mask; + spin_lock_init(&tbl->lock); + tbl->table = kcalloc(BITS_TO_LONGS(num), sizeof(long), GFP_KERNEL); + if (!tbl->table) + return -ENOMEM; + + /* 0th UAR is taken by the device. */ + set_bit(0, tbl->table); + + return 0; +} + +void pvrdma_uar_table_cleanup(struct pvrdma_dev *dev) +{ + struct pvrdma_id_table *tbl = &dev->uar_table.tbl; + + kfree(tbl->table); +} + +int pvrdma_uar_alloc(struct pvrdma_dev *dev, struct pvrdma_uar_map *uar) +{ + struct pvrdma_id_table *tbl; + unsigned long flags; + u32 obj; + + tbl = &dev->uar_table.tbl; + + spin_lock_irqsave(&tbl->lock, flags); + obj = find_next_zero_bit(tbl->table, tbl->max, tbl->last); + if (obj >= tbl->max) { + tbl->top = (tbl->top + tbl->max) & tbl->mask; + obj = find_first_zero_bit(tbl->table, tbl->max); + } + + if (obj >= tbl->max) { + spin_unlock_irqrestore(&tbl->lock, flags); + return -ENOMEM; + } + + set_bit(obj, tbl->table); + obj |= tbl->top; + + spin_unlock_irqrestore(&tbl->lock, flags); + + uar->index = obj; + uar->pfn = (pci_resource_start(dev->pdev, PVRDMA_PCI_RESOURCE_UAR) >> + PAGE_SHIFT) + uar->index; + + return 0; +} + +void pvrdma_uar_free(struct pvrdma_dev *dev, struct pvrdma_uar_map *uar) +{ + struct pvrdma_id_table *tbl = &dev->uar_table.tbl; + unsigned long flags; + u32 obj; + + obj = uar->index & (tbl->max - 1); + spin_lock_irqsave(&tbl->lock, flags); + clear_bit(obj, tbl->table); + tbl->last = min(tbl->last, obj); + tbl->top = (tbl->top + tbl->max) & tbl->mask; + spin_unlock_irqrestore(&tbl->lock, flags); +} diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c new file mode 100644 index 000000000000..231a1ce1f4be --- /dev/null +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -0,0 +1,1211 @@ +/* + * Copyright (c) 2012-2016 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of EITHER the GNU General Public License + * version 2 as published by the Free Software Foundation or the BSD + * 2-Clause License. This program is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED + * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License version 2 for more details at + * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html. + * + * You should have received a copy of the GNU General Public License + * along with this program available in the file COPYING in the main + * directory of this source tree. + * + * The BSD 2-Clause License + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/errno.h> +#include <linux/inetdevice.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <rdma/ib_addr.h> +#include <rdma/ib_smi.h> +#include <rdma/ib_user_verbs.h> +#include <net/addrconf.h> + +#include "pvrdma.h" + +#define DRV_NAME "vmw_pvrdma" +#define DRV_VERSION "1.0.0.0-k" + +static DEFINE_MUTEX(pvrdma_device_list_lock); +static LIST_HEAD(pvrdma_device_list); +static struct workqueue_struct *event_wq; + +static int pvrdma_add_gid(struct ib_device *ibdev, + u8 port_num, + unsigned int index, + const union ib_gid *gid, + const struct ib_gid_attr *attr, + void **context); +static int pvrdma_del_gid(struct ib_device *ibdev, + u8 port_num, + unsigned int index, + void **context); + + +static ssize_t show_hca(struct device *device, struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "VMW_PVRDMA-%s\n", DRV_VERSION); +} + +static ssize_t show_rev(struct device *device, struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "%d\n", PVRDMA_REV_ID); +} + +static ssize_t show_board(struct device *device, struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "%d\n", PVRDMA_BOARD_ID); +} + +static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); +static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); +static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); + +static struct device_attribute *pvrdma_class_attributes[] = { + &dev_attr_hw_rev, + &dev_attr_hca_type, + &dev_attr_board_id +}; + +static void pvrdma_get_fw_ver_str(struct ib_device *device, char *str, + size_t str_len) +{ + struct pvrdma_dev *dev = + container_of(device, struct pvrdma_dev, ib_dev); + snprintf(str, str_len, "%d.%d.%d\n", + (int) (dev->dsr->caps.fw_ver >> 32), + (int) (dev->dsr->caps.fw_ver >> 16) & 0xffff, + (int) dev->dsr->caps.fw_ver & 0xffff); +} + +static int pvrdma_init_device(struct pvrdma_dev *dev) +{ + /* Initialize some device related stuff */ + spin_lock_init(&dev->cmd_lock); + sema_init(&dev->cmd_sema, 1); + atomic_set(&dev->num_qps, 0); + atomic_set(&dev->num_cqs, 0); + atomic_set(&dev->num_pds, 0); + atomic_set(&dev->num_ahs, 0); + + return 0; +} + +static int pvrdma_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct ib_port_attr attr; + int err; + + err = pvrdma_query_port(ibdev, port_num, &attr); + if (err) + return err; + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE; + immutable->max_mad_size = IB_MGMT_MAD_SIZE; + return 0; +} + +static struct net_device *pvrdma_get_netdev(struct ib_device *ibdev, + u8 port_num) +{ + struct net_device *netdev; + struct pvrdma_dev *dev = to_vdev(ibdev); + + if (port_num != 1) + return NULL; + + rcu_read_lock(); + netdev = dev->netdev; + if (netdev) + dev_hold(netdev); + rcu_read_unlock(); + + return netdev; +} + +static int pvrdma_register_device(struct pvrdma_dev *dev) +{ + int ret = -1; + int i = 0; + + strlcpy(dev->ib_dev.name, "vmw_pvrdma%d", IB_DEVICE_NAME_MAX); + dev->ib_dev.node_guid = dev->dsr->caps.node_guid; + dev->sys_image_guid = dev->dsr->caps.sys_image_guid; + dev->flags = 0; + dev->ib_dev.owner = THIS_MODULE; + dev->ib_dev.num_comp_vectors = 1; + dev->ib_dev.dma_device = &dev->pdev->dev; + dev->ib_dev.uverbs_abi_ver = PVRDMA_UVERBS_ABI_VERSION; + dev->ib_dev.uverbs_cmd_mask = + (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | + (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | + (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | + (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_REG_MR) | + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | + (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | + (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_CMD_POLL_CQ) | + (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | + (1ull << IB_USER_VERBS_CMD_CREATE_QP) | + (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | + (1ull << IB_USER_VERBS_CMD_QUERY_QP) | + (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | + (1ull << IB_USER_VERBS_CMD_POST_SEND) | + (1ull << IB_USER_VERBS_CMD_POST_RECV) | + (1ull << IB_USER_VERBS_CMD_CREATE_AH) | + (1ull << IB_USER_VERBS_CMD_DESTROY_AH); + + dev->ib_dev.node_type = RDMA_NODE_IB_CA; + dev->ib_dev.phys_port_cnt = dev->dsr->caps.phys_port_cnt; + + dev->ib_dev.query_device = pvrdma_query_device; + dev->ib_dev.query_port = pvrdma_query_port; + dev->ib_dev.query_gid = pvrdma_query_gid; + dev->ib_dev.query_pkey = pvrdma_query_pkey; + dev->ib_dev.modify_port = pvrdma_modify_port; + dev->ib_dev.alloc_ucontext = pvrdma_alloc_ucontext; + dev->ib_dev.dealloc_ucontext = pvrdma_dealloc_ucontext; + dev->ib_dev.mmap = pvrdma_mmap; + dev->ib_dev.alloc_pd = pvrdma_alloc_pd; + dev->ib_dev.dealloc_pd = pvrdma_dealloc_pd; + dev->ib_dev.create_ah = pvrdma_create_ah; + dev->ib_dev.destroy_ah = pvrdma_destroy_ah; + dev->ib_dev.create_qp = pvrdma_create_qp; + dev->ib_dev.modify_qp = pvrdma_modify_qp; + dev->ib_dev.query_qp = pvrdma_query_qp; + dev->ib_dev.destroy_qp = pvrdma_destroy_qp; + dev->ib_dev.post_send = pvrdma_post_send; + dev->ib_dev.post_recv = pvrdma_post_recv; + dev->ib_dev.create_cq = pvrdma_create_cq; + dev->ib_dev.modify_cq = pvrdma_modify_cq; + dev->ib_dev.resize_cq = pvrdma_resize_cq; + dev->ib_dev.destroy_cq = pvrdma_destroy_cq; + dev->ib_dev.poll_cq = pvrdma_poll_cq; + dev->ib_dev.req_notify_cq = pvrdma_req_notify_cq; + dev->ib_dev.get_dma_mr = pvrdma_get_dma_mr; + dev->ib_dev.reg_user_mr = pvrdma_reg_user_mr; + dev->ib_dev.dereg_mr = pvrdma_dereg_mr; + dev->ib_dev.alloc_mr = pvrdma_alloc_mr; + dev->ib_dev.map_mr_sg = pvrdma_map_mr_sg; + dev->ib_dev.add_gid = pvrdma_add_gid; + dev->ib_dev.del_gid = pvrdma_del_gid; + dev->ib_dev.get_netdev = pvrdma_get_netdev; + dev->ib_dev.get_port_immutable = pvrdma_port_immutable; + dev->ib_dev.get_link_layer = pvrdma_port_link_layer; + dev->ib_dev.get_dev_fw_str = pvrdma_get_fw_ver_str; + + mutex_init(&dev->port_mutex); + spin_lock_init(&dev->desc_lock); + + dev->cq_tbl = kcalloc(dev->dsr->caps.max_cq, sizeof(void *), + GFP_KERNEL); + if (!dev->cq_tbl) + return ret; + spin_lock_init(&dev->cq_tbl_lock); + + dev->qp_tbl = kcalloc(dev->dsr->caps.max_qp, sizeof(void *), + GFP_KERNEL); + if (!dev->qp_tbl) + goto err_cq_free; + spin_lock_init(&dev->qp_tbl_lock); + + ret = ib_register_device(&dev->ib_dev, NULL); + if (ret) + goto err_qp_free; + + for (i = 0; i < ARRAY_SIZE(pvrdma_class_attributes); ++i) { + ret = device_create_file(&dev->ib_dev.dev, + pvrdma_class_attributes[i]); + if (ret) + goto err_class; + } + + dev->ib_active = true; + + return 0; + +err_class: + ib_unregister_device(&dev->ib_dev); +err_qp_free: + kfree(dev->qp_tbl); +err_cq_free: + kfree(dev->cq_tbl); + + return ret; +} + +static irqreturn_t pvrdma_intr0_handler(int irq, void *dev_id) +{ + u32 icr = PVRDMA_INTR_CAUSE_RESPONSE; + struct pvrdma_dev *dev = dev_id; + + dev_dbg(&dev->pdev->dev, "interrupt 0 (response) handler\n"); + + if (dev->intr.type != PVRDMA_INTR_TYPE_MSIX) { + /* Legacy intr */ + icr = pvrdma_read_reg(dev, PVRDMA_REG_ICR); + if (icr == 0) + return IRQ_NONE; + } + + if (icr == PVRDMA_INTR_CAUSE_RESPONSE) + complete(&dev->cmd_done); + + return IRQ_HANDLED; +} + +static void pvrdma_qp_event(struct pvrdma_dev *dev, u32 qpn, int type) +{ + struct pvrdma_qp *qp; + unsigned long flags; + + spin_lock_irqsave(&dev->qp_tbl_lock, flags); + qp = dev->qp_tbl[qpn % dev->dsr->caps.max_qp]; + if (qp) + atomic_inc(&qp->refcnt); + spin_unlock_irqrestore(&dev->qp_tbl_lock, flags); + + if (qp && qp->ibqp.event_handler) { + struct ib_qp *ibqp = &qp->ibqp; + struct ib_event e; + + e.device = ibqp->device; + e.element.qp = ibqp; + e.event = type; /* 1:1 mapping for now. */ + ibqp->event_handler(&e, ibqp->qp_context); + } + if (qp) { + atomic_dec(&qp->refcnt); + if (atomic_read(&qp->refcnt) == 0) + wake_up(&qp->wait); + } +} + +static void pvrdma_cq_event(struct pvrdma_dev *dev, u32 cqn, int type) +{ + struct pvrdma_cq *cq; + unsigned long flags; + + spin_lock_irqsave(&dev->cq_tbl_lock, flags); + cq = dev->cq_tbl[cqn % dev->dsr->caps.max_cq]; + if (cq) + atomic_inc(&cq->refcnt); + spin_unlock_irqrestore(&dev->cq_tbl_lock, flags); + + if (cq && cq->ibcq.event_handler) { + struct ib_cq *ibcq = &cq->ibcq; + struct ib_event e; + + e.device = ibcq->device; + e.element.cq = ibcq; + e.event = type; /* 1:1 mapping for now. */ + ibcq->event_handler(&e, ibcq->cq_context); + } + if (cq) { + atomic_dec(&cq->refcnt); + if (atomic_read(&cq->refcnt) == 0) + wake_up(&cq->wait); + } +} + +static void pvrdma_dispatch_event(struct pvrdma_dev *dev, int port, + enum ib_event_type event) +{ + struct ib_event ib_event; + + memset(&ib_event, 0, sizeof(ib_event)); + ib_event.device = &dev->ib_dev; + ib_event.element.port_num = port; + ib_event.event = event; + ib_dispatch_event(&ib_event); +} + +static void pvrdma_dev_event(struct pvrdma_dev *dev, u8 port, int type) +{ + if (port < 1 || port > dev->dsr->caps.phys_port_cnt) { + dev_warn(&dev->pdev->dev, "event on port %d\n", port); + return; + } + + pvrdma_dispatch_event(dev, port, type); +} + +static inline struct pvrdma_eqe *get_eqe(struct pvrdma_dev *dev, unsigned int i) +{ + return (struct pvrdma_eqe *)pvrdma_page_dir_get_ptr( + &dev->async_pdir, + PAGE_SIZE + + sizeof(struct pvrdma_eqe) * i); +} + +static irqreturn_t pvrdma_intr1_handler(int irq, void *dev_id) +{ + struct pvrdma_dev *dev = dev_id; + struct pvrdma_ring *ring = &dev->async_ring_state->rx; + int ring_slots = (dev->dsr->async_ring_pages.num_pages - 1) * + PAGE_SIZE / sizeof(struct pvrdma_eqe); + unsigned int head; + + dev_dbg(&dev->pdev->dev, "interrupt 1 (async event) handler\n"); + + /* + * Don't process events until the IB device is registered. Otherwise + * we'll try to ib_dispatch_event() on an invalid device. + */ + if (!dev->ib_active) + return IRQ_HANDLED; + + while (pvrdma_idx_ring_has_data(ring, ring_slots, &head) > 0) { + struct pvrdma_eqe *eqe; + + eqe = get_eqe(dev, head); + + switch (eqe->type) { + case PVRDMA_EVENT_QP_FATAL: + case PVRDMA_EVENT_QP_REQ_ERR: + case PVRDMA_EVENT_QP_ACCESS_ERR: + case PVRDMA_EVENT_COMM_EST: + case PVRDMA_EVENT_SQ_DRAINED: + case PVRDMA_EVENT_PATH_MIG: + case PVRDMA_EVENT_PATH_MIG_ERR: + case PVRDMA_EVENT_QP_LAST_WQE_REACHED: + pvrdma_qp_event(dev, eqe->info, eqe->type); + break; + + case PVRDMA_EVENT_CQ_ERR: + pvrdma_cq_event(dev, eqe->info, eqe->type); + break; + + case PVRDMA_EVENT_SRQ_ERR: + case PVRDMA_EVENT_SRQ_LIMIT_REACHED: + break; + + case PVRDMA_EVENT_PORT_ACTIVE: + case PVRDMA_EVENT_PORT_ERR: + case PVRDMA_EVENT_LID_CHANGE: + case PVRDMA_EVENT_PKEY_CHANGE: + case PVRDMA_EVENT_SM_CHANGE: + case PVRDMA_EVENT_CLIENT_REREGISTER: + case PVRDMA_EVENT_GID_CHANGE: + pvrdma_dev_event(dev, eqe->info, eqe->type); + break; + + case PVRDMA_EVENT_DEVICE_FATAL: + pvrdma_dev_event(dev, 1, eqe->type); + break; + + default: + break; + } + + pvrdma_idx_ring_inc(&ring->cons_head, ring_slots); + } + + return IRQ_HANDLED; +} + +static inline struct pvrdma_cqne *get_cqne(struct pvrdma_dev *dev, + unsigned int i) +{ + return (struct pvrdma_cqne *)pvrdma_page_dir_get_ptr( + &dev->cq_pdir, + PAGE_SIZE + + sizeof(struct pvrdma_cqne) * i); +} + +static irqreturn_t pvrdma_intrx_handler(int irq, void *dev_id) +{ + struct pvrdma_dev *dev = dev_id; + struct pvrdma_ring *ring = &dev->cq_ring_state->rx; + int ring_slots = (dev->dsr->cq_ring_pages.num_pages - 1) * PAGE_SIZE / + sizeof(struct pvrdma_cqne); + unsigned int head; + unsigned long flags; + + dev_dbg(&dev->pdev->dev, "interrupt x (completion) handler\n"); + + while (pvrdma_idx_ring_has_data(ring, ring_slots, &head) > 0) { + struct pvrdma_cqne *cqne; + struct pvrdma_cq *cq; + + cqne = get_cqne(dev, head); + spin_lock_irqsave(&dev->cq_tbl_lock, flags); + cq = dev->cq_tbl[cqne->info % dev->dsr->caps.max_cq]; + if (cq) + atomic_inc(&cq->refcnt); + spin_unlock_irqrestore(&dev->cq_tbl_lock, flags); + + if (cq && cq->ibcq.comp_handler) + cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); + if (cq) { + atomic_dec(&cq->refcnt); + if (atomic_read(&cq->refcnt)) + wake_up(&cq->wait); + } + pvrdma_idx_ring_inc(&ring->cons_head, ring_slots); + } + + return IRQ_HANDLED; +} + +static void pvrdma_disable_msi_all(struct pvrdma_dev *dev) +{ + if (dev->intr.type == PVRDMA_INTR_TYPE_MSIX) + pci_disable_msix(dev->pdev); + else if (dev->intr.type == PVRDMA_INTR_TYPE_MSI) + pci_disable_msi(dev->pdev); +} + +static void pvrdma_free_irq(struct pvrdma_dev *dev) +{ + int i; + + dev_dbg(&dev->pdev->dev, "freeing interrupts\n"); + + if (dev->intr.type == PVRDMA_INTR_TYPE_MSIX) { + for (i = 0; i < dev->intr.size; i++) { + if (dev->intr.enabled[i]) { + free_irq(dev->intr.msix_entry[i].vector, dev); + dev->intr.enabled[i] = 0; + } + } + } else if (dev->intr.type == PVRDMA_INTR_TYPE_INTX || + dev->intr.type == PVRDMA_INTR_TYPE_MSI) { + free_irq(dev->pdev->irq, dev); + } +} + +static void pvrdma_enable_intrs(struct pvrdma_dev *dev) +{ + dev_dbg(&dev->pdev->dev, "enable interrupts\n"); + pvrdma_write_reg(dev, PVRDMA_REG_IMR, 0); +} + +static void pvrdma_disable_intrs(struct pvrdma_dev *dev) +{ + dev_dbg(&dev->pdev->dev, "disable interrupts\n"); + pvrdma_write_reg(dev, PVRDMA_REG_IMR, ~0); +} + +static int pvrdma_enable_msix(struct pci_dev *pdev, struct pvrdma_dev *dev) +{ + int i; + int ret; + + for (i = 0; i < PVRDMA_MAX_INTERRUPTS; i++) { + dev->intr.msix_entry[i].entry = i; + dev->intr.msix_entry[i].vector = i; + + switch (i) { + case 0: + /* CMD ring handler */ + dev->intr.handler[i] = pvrdma_intr0_handler; + break; + case 1: + /* Async event ring handler */ + dev->intr.handler[i] = pvrdma_intr1_handler; + break; + default: + /* Completion queue handler */ + dev->intr.handler[i] = pvrdma_intrx_handler; + break; + } + } + + ret = pci_enable_msix(pdev, dev->intr.msix_entry, + PVRDMA_MAX_INTERRUPTS); + if (!ret) { + dev->intr.type = PVRDMA_INTR_TYPE_MSIX; + dev->intr.size = PVRDMA_MAX_INTERRUPTS; + } else if (ret > 0) { + ret = pci_enable_msix(pdev, dev->intr.msix_entry, ret); + if (!ret) { + dev->intr.type = PVRDMA_INTR_TYPE_MSIX; + dev->intr.size = ret; + } else { + dev->intr.size = 0; + } + } + + dev_dbg(&pdev->dev, "using interrupt type %d, size %d\n", + dev->intr.type, dev->intr.size); + + return ret; +} + +static int pvrdma_alloc_intrs(struct pvrdma_dev *dev) +{ + int ret = 0; + int i; + + if (pci_find_capability(dev->pdev, PCI_CAP_ID_MSIX) && + pvrdma_enable_msix(dev->pdev, dev)) { + /* Try MSI */ + ret = pci_enable_msi(dev->pdev); + if (!ret) { + dev->intr.type = PVRDMA_INTR_TYPE_MSI; + } else { + /* Legacy INTR */ + dev->intr.type = PVRDMA_INTR_TYPE_INTX; + } + } + + /* Request First IRQ */ + switch (dev->intr.type) { + case PVRDMA_INTR_TYPE_INTX: + case PVRDMA_INTR_TYPE_MSI: + ret = request_irq(dev->pdev->irq, pvrdma_intr0_handler, + IRQF_SHARED, DRV_NAME, dev); + if (ret) { + dev_err(&dev->pdev->dev, + "failed to request interrupt\n"); + goto disable_msi; + } + break; + case PVRDMA_INTR_TYPE_MSIX: + ret = request_irq(dev->intr.msix_entry[0].vector, + pvrdma_intr0_handler, 0, DRV_NAME, dev); + if (ret) { + dev_err(&dev->pdev->dev, + "failed to request interrupt 0\n"); + goto disable_msi; + } + dev->intr.enabled[0] = 1; + break; + default: + /* Not reached */ + break; + } + + /* For MSIX: request intr for each vector */ + if (dev->intr.size > 1) { + ret = request_irq(dev->intr.msix_entry[1].vector, + pvrdma_intr1_handler, 0, DRV_NAME, dev); + if (ret) { + dev_err(&dev->pdev->dev, + "failed to request interrupt 1\n"); + goto free_irq; + } + dev->intr.enabled[1] = 1; + + for (i = 2; i < dev->intr.size; i++) { + ret = request_irq(dev->intr.msix_entry[i].vector, + pvrdma_intrx_handler, 0, + DRV_NAME, dev); + if (ret) { + dev_err(&dev->pdev->dev, + "failed to request interrupt %d\n", i); + goto free_irq; + } + dev->intr.enabled[i] = 1; + } + } + + return 0; + +free_irq: + pvrdma_free_irq(dev); +disable_msi: + pvrdma_disable_msi_all(dev); + return ret; +} + +static void pvrdma_free_slots(struct pvrdma_dev *dev) +{ + struct pci_dev *pdev = dev->pdev; + + if (dev->resp_slot) + dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->resp_slot, + dev->dsr->resp_slot_dma); + if (dev->cmd_slot) + dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->cmd_slot, + dev->dsr->cmd_slot_dma); +} + +static int pvrdma_add_gid_at_index(struct pvrdma_dev *dev, + const union ib_gid *gid, + int index) +{ + int ret; + union pvrdma_cmd_req req; + struct pvrdma_cmd_create_bind *cmd_bind = &req.create_bind; + + if (!dev->sgid_tbl) { + dev_warn(&dev->pdev->dev, "sgid table not initialized\n"); + return -EINVAL; + } + + memset(cmd_bind, 0, sizeof(*cmd_bind)); + cmd_bind->hdr.cmd = PVRDMA_CMD_CREATE_BIND; + memcpy(cmd_bind->new_gid, gid->raw, 16); + cmd_bind->mtu = ib_mtu_enum_to_int(IB_MTU_1024); + cmd_bind->vlan = 0xfff; + cmd_bind->index = index; + cmd_bind->gid_type = PVRDMA_GID_TYPE_FLAG_ROCE_V1; + + ret = pvrdma_cmd_post(dev, &req, NULL, 0); + if (ret < 0) { + dev_warn(&dev->pdev->dev, + "could not create binding, error: %d\n", ret); + return -EFAULT; + } + memcpy(&dev->sgid_tbl[index], gid, sizeof(*gid)); + return 0; +} + +static int pvrdma_add_gid(struct ib_device *ibdev, + u8 port_num, + unsigned int index, + const union ib_gid *gid, + const struct ib_gid_attr *attr, + void **context) +{ + struct pvrdma_dev *dev = to_vdev(ibdev); + + return pvrdma_add_gid_at_index(dev, gid, index); +} + +static int pvrdma_del_gid_at_index(struct pvrdma_dev *dev, int index) +{ + int ret; + union pvrdma_cmd_req req; + struct pvrdma_cmd_destroy_bind *cmd_dest = &req.destroy_bind; + + /* Update sgid table. */ + if (!dev->sgid_tbl) { + dev_warn(&dev->pdev->dev, "sgid table not initialized\n"); + return -EINVAL; + } + + memset(cmd_dest, 0, sizeof(*cmd_dest)); + cmd_dest->hdr.cmd = PVRDMA_CMD_DESTROY_BIND; + memcpy(cmd_dest->dest_gid, &dev->sgid_tbl[index], 16); + cmd_dest->index = index; + + ret = pvrdma_cmd_post(dev, &req, NULL, 0); + if (ret < 0) { + dev_warn(&dev->pdev->dev, + "could not destroy binding, error: %d\n", ret); + return ret; + } + memset(&dev->sgid_tbl[index], 0, 16); + return 0; +} + +static int pvrdma_del_gid(struct ib_device *ibdev, + u8 port_num, + unsigned int index, + void **context) +{ + struct pvrdma_dev *dev = to_vdev(ibdev); + + dev_dbg(&dev->pdev->dev, "removing gid at index %u from %s", + index, dev->netdev->name); + + return pvrdma_del_gid_at_index(dev, index); +} + +static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev, + unsigned long event) +{ + switch (event) { + case NETDEV_REBOOT: + case NETDEV_DOWN: + pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ERR); + break; + case NETDEV_UP: + pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE); + break; + default: + dev_dbg(&dev->pdev->dev, "ignore netdevice event %ld on %s\n", + event, dev->ib_dev.name); + break; + } +} + +static void pvrdma_netdevice_event_work(struct work_struct *work) +{ + struct pvrdma_netdevice_work *netdev_work; + struct pvrdma_dev *dev; + + netdev_work = container_of(work, struct pvrdma_netdevice_work, work); + + mutex_lock(&pvrdma_device_list_lock); + list_for_each_entry(dev, &pvrdma_device_list, device_link) { + if (dev->netdev == netdev_work->event_netdev) { + pvrdma_netdevice_event_handle(dev, netdev_work->event); + break; + } + } + mutex_unlock(&pvrdma_device_list_lock); + + kfree(netdev_work); +} + +static int pvrdma_netdevice_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct net_device *event_netdev = netdev_notifier_info_to_dev(ptr); + struct pvrdma_netdevice_work *netdev_work; + + netdev_work = kmalloc(sizeof(*netdev_work), GFP_ATOMIC); + if (!netdev_work) + return NOTIFY_BAD; + + INIT_WORK(&netdev_work->work, pvrdma_netdevice_event_work); + netdev_work->event_netdev = event_netdev; + netdev_work->event = event; + queue_work(event_wq, &netdev_work->work); + + return NOTIFY_DONE; +} + +static int pvrdma_pci_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + struct pci_dev *pdev_net; + struct pvrdma_dev *dev; + int ret; + unsigned long start; + unsigned long len; + unsigned int version; + dma_addr_t slot_dma = 0; + + dev_dbg(&pdev->dev, "initializing driver %s\n", pci_name(pdev)); + + /* Allocate zero-out device */ + dev = (struct pvrdma_dev *)ib_alloc_device(sizeof(*dev)); + if (!dev) { + dev_err(&pdev->dev, "failed to allocate IB device\n"); + return -ENOMEM; + } + + mutex_lock(&pvrdma_device_list_lock); + list_add(&dev->device_link, &pvrdma_device_list); + mutex_unlock(&pvrdma_device_list_lock); + + ret = pvrdma_init_device(dev); + if (ret) + goto err_free_device; + + dev->pdev = pdev; + pci_set_drvdata(pdev, dev); + + ret = pci_enable_device(pdev); + if (ret) { + dev_err(&pdev->dev, "cannot enable PCI device\n"); + goto err_free_device; + } + + dev_dbg(&pdev->dev, "PCI resource flags BAR0 %#lx\n", + pci_resource_flags(pdev, 0)); + dev_dbg(&pdev->dev, "PCI resource len %#llx\n", + (unsigned long long)pci_resource_len(pdev, 0)); + dev_dbg(&pdev->dev, "PCI resource start %#llx\n", + (unsigned long long)pci_resource_start(pdev, 0)); + dev_dbg(&pdev->dev, "PCI resource flags BAR1 %#lx\n", + pci_resource_flags(pdev, 1)); + dev_dbg(&pdev->dev, "PCI resource len %#llx\n", + (unsigned long long)pci_resource_len(pdev, 1)); + dev_dbg(&pdev->dev, "PCI resource start %#llx\n", + (unsigned long long)pci_resource_start(pdev, 1)); + + if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) || + !(pci_resource_flags(pdev, 1) & IORESOURCE_MEM)) { + dev_err(&pdev->dev, "PCI BAR region not MMIO\n"); + ret = -ENOMEM; + goto err_free_device; + } + + ret = pci_request_regions(pdev, DRV_NAME); + if (ret) { + dev_err(&pdev->dev, "cannot request PCI resources\n"); + goto err_disable_pdev; + } + + /* Enable 64-Bit DMA */ + if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) { + ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); + if (ret != 0) { + dev_err(&pdev->dev, + "pci_set_consistent_dma_mask failed\n"); + goto err_free_resource; + } + } else { + ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + if (ret != 0) { + dev_err(&pdev->dev, + "pci_set_dma_mask failed\n"); + goto err_free_resource; + } + } + + pci_set_master(pdev); + + /* Map register space */ + start = pci_resource_start(dev->pdev, PVRDMA_PCI_RESOURCE_REG); + len = pci_resource_len(dev->pdev, PVRDMA_PCI_RESOURCE_REG); + dev->regs = ioremap(start, len); + if (!dev->regs) { + dev_err(&pdev->dev, "register mapping failed\n"); + ret = -ENOMEM; + goto err_free_resource; + } + + /* Setup per-device UAR. */ + dev->driver_uar.index = 0; + dev->driver_uar.pfn = + pci_resource_start(dev->pdev, PVRDMA_PCI_RESOURCE_UAR) >> + PAGE_SHIFT; + dev->driver_uar.map = + ioremap(dev->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE); + if (!dev->driver_uar.map) { + dev_err(&pdev->dev, "failed to remap UAR pages\n"); + ret = -ENOMEM; + goto err_unmap_regs; + } + + version = pvrdma_read_reg(dev, PVRDMA_REG_VERSION); + dev_info(&pdev->dev, "device version %d, driver version %d\n", + version, PVRDMA_VERSION); + if (version < PVRDMA_VERSION) { + dev_err(&pdev->dev, "incompatible device version\n"); + goto err_uar_unmap; + } + + dev->dsr = dma_alloc_coherent(&pdev->dev, sizeof(*dev->dsr), + &dev->dsrbase, GFP_KERNEL); + if (!dev->dsr) { + dev_err(&pdev->dev, "failed to allocate shared region\n"); + ret = -ENOMEM; + goto err_uar_unmap; + } + + /* Setup the shared region */ + memset(dev->dsr, 0, sizeof(*dev->dsr)); + dev->dsr->driver_version = PVRDMA_VERSION; + dev->dsr->gos_info.gos_bits = sizeof(void *) == 4 ? + PVRDMA_GOS_BITS_32 : + PVRDMA_GOS_BITS_64; + dev->dsr->gos_info.gos_type = PVRDMA_GOS_TYPE_LINUX; + dev->dsr->gos_info.gos_ver = 1; + dev->dsr->uar_pfn = dev->driver_uar.pfn; + + /* Command slot. */ + dev->cmd_slot = dma_alloc_coherent(&pdev->dev, PAGE_SIZE, + &slot_dma, GFP_KERNEL); + if (!dev->cmd_slot) { + ret = -ENOMEM; + goto err_free_dsr; + } + + dev->dsr->cmd_slot_dma = (u64)slot_dma; + + /* Response slot. */ + dev->resp_slot = dma_alloc_coherent(&pdev->dev, PAGE_SIZE, + &slot_dma, GFP_KERNEL); + if (!dev->resp_slot) { + ret = -ENOMEM; + goto err_free_slots; + } + + dev->dsr->resp_slot_dma = (u64)slot_dma; + + /* Async event ring */ + dev->dsr->async_ring_pages.num_pages = 4; + ret = pvrdma_page_dir_init(dev, &dev->async_pdir, + dev->dsr->async_ring_pages.num_pages, true); + if (ret) + goto err_free_slots; + dev->async_ring_state = dev->async_pdir.pages[0]; + dev->dsr->async_ring_pages.pdir_dma = dev->async_pdir.dir_dma; + + /* CQ notification ring */ + dev->dsr->cq_ring_pages.num_pages = 4; + ret = pvrdma_page_dir_init(dev, &dev->cq_pdir, + dev->dsr->cq_ring_pages.num_pages, true); + if (ret) + goto err_free_async_ring; + dev->cq_ring_state = dev->cq_pdir.pages[0]; + dev->dsr->cq_ring_pages.pdir_dma = dev->cq_pdir.dir_dma; + + /* + * Write the PA of the shared region to the device. The writes must be + * ordered such that the high bits are written last. When the writes + * complete, the device will have filled out the capabilities. + */ + + pvrdma_write_reg(dev, PVRDMA_REG_DSRLOW, (u32)dev->dsrbase); + pvrdma_write_reg(dev, PVRDMA_REG_DSRHIGH, + (u32)((u64)(dev->dsrbase) >> 32)); + + /* Make sure the write is complete before reading status. */ + mb(); + + /* Currently, the driver only supports RoCE mode. */ + if (dev->dsr->caps.mode != PVRDMA_DEVICE_MODE_ROCE) { + dev_err(&pdev->dev, "unsupported transport %d\n", + dev->dsr->caps.mode); + ret = -EFAULT; + goto err_free_cq_ring; + } + + /* Currently, the driver only supports RoCE V1. */ + if (!(dev->dsr->caps.gid_types & PVRDMA_GID_TYPE_FLAG_ROCE_V1)) { + dev_err(&pdev->dev, "driver needs RoCE v1 support\n"); + ret = -EFAULT; + goto err_free_cq_ring; + } + + /* Paired vmxnet3 will have same bus, slot. But func will be 0 */ + pdev_net = pci_get_slot(pdev->bus, PCI_DEVFN(PCI_SLOT(pdev->devfn), 0)); + if (!pdev_net) { + dev_err(&pdev->dev, "failed to find paired net device\n"); + ret = -ENODEV; + goto err_free_cq_ring; + } + + if (pdev_net->vendor != PCI_VENDOR_ID_VMWARE || + pdev_net->device != PCI_DEVICE_ID_VMWARE_VMXNET3) { + dev_err(&pdev->dev, "failed to find paired vmxnet3 device\n"); + pci_dev_put(pdev_net); + ret = -ENODEV; + goto err_free_cq_ring; + } + + dev->netdev = pci_get_drvdata(pdev_net); + pci_dev_put(pdev_net); + if (!dev->netdev) { + dev_err(&pdev->dev, "failed to get vmxnet3 device\n"); + ret = -ENODEV; + goto err_free_cq_ring; + } + + dev_info(&pdev->dev, "paired device to %s\n", dev->netdev->name); + + /* Interrupt setup */ + ret = pvrdma_alloc_intrs(dev); + if (ret) { + dev_err(&pdev->dev, "failed to allocate interrupts\n"); + ret = -ENOMEM; + goto err_netdevice; + } + + /* Allocate UAR table. */ + ret = pvrdma_uar_table_init(dev); + if (ret) { + dev_err(&pdev->dev, "failed to allocate UAR table\n"); + ret = -ENOMEM; + goto err_free_intrs; + } + + /* Allocate GID table */ + dev->sgid_tbl = kcalloc(dev->dsr->caps.gid_tbl_len, + sizeof(union ib_gid), GFP_KERNEL); + if (!dev->sgid_tbl) { + ret = -ENOMEM; + goto err_free_uar_table; + } + dev_dbg(&pdev->dev, "gid table len %d\n", dev->dsr->caps.gid_tbl_len); + + pvrdma_enable_intrs(dev); + + /* Activate pvrdma device */ + pvrdma_write_reg(dev, PVRDMA_REG_CTL, PVRDMA_DEVICE_CTL_ACTIVATE); + + /* Make sure the write is complete before reading status. */ + mb(); + + /* Check if device was successfully activated */ + ret = pvrdma_read_reg(dev, PVRDMA_REG_ERR); + if (ret != 0) { + dev_err(&pdev->dev, "failed to activate device\n"); + ret = -EFAULT; + goto err_disable_intr; + } + + /* Register IB device */ + ret = pvrdma_register_device(dev); + if (ret) { + dev_err(&pdev->dev, "failed to register IB device\n"); + goto err_disable_intr; + } + + dev->nb_netdev.notifier_call = pvrdma_netdevice_event; + ret = register_netdevice_notifier(&dev->nb_netdev); + if (ret) { + dev_err(&pdev->dev, "failed to register netdevice events\n"); + goto err_unreg_ibdev; + } + + dev_info(&pdev->dev, "attached to device\n"); + return 0; + +err_unreg_ibdev: + ib_unregister_device(&dev->ib_dev); +err_disable_intr: + pvrdma_disable_intrs(dev); + kfree(dev->sgid_tbl); +err_free_uar_table: + pvrdma_uar_table_cleanup(dev); +err_free_intrs: + pvrdma_free_irq(dev); + pvrdma_disable_msi_all(dev); +err_netdevice: + unregister_netdevice_notifier(&dev->nb_netdev); +err_free_cq_ring: + pvrdma_page_dir_cleanup(dev, &dev->cq_pdir); +err_free_async_ring: + pvrdma_page_dir_cleanup(dev, &dev->async_pdir); +err_free_slots: + pvrdma_free_slots(dev); +err_free_dsr: + dma_free_coherent(&pdev->dev, sizeof(*dev->dsr), dev->dsr, + dev->dsrbase); +err_uar_unmap: + iounmap(dev->driver_uar.map); +err_unmap_regs: + iounmap(dev->regs); +err_free_resource: + pci_release_regions(pdev); +err_disable_pdev: + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); +err_free_device: + mutex_lock(&pvrdma_device_list_lock); + list_del(&dev->device_link); + mutex_unlock(&pvrdma_device_list_lock); + ib_dealloc_device(&dev->ib_dev); + return ret; +} + +static void pvrdma_pci_remove(struct pci_dev *pdev) +{ + struct pvrdma_dev *dev = pci_get_drvdata(pdev); + + if (!dev) + return; + + dev_info(&pdev->dev, "detaching from device\n"); + + unregister_netdevice_notifier(&dev->nb_netdev); + dev->nb_netdev.notifier_call = NULL; + + flush_workqueue(event_wq); + + /* Unregister ib device */ + ib_unregister_device(&dev->ib_dev); + + mutex_lock(&pvrdma_device_list_lock); + list_del(&dev->device_link); + mutex_unlock(&pvrdma_device_list_lock); + + pvrdma_disable_intrs(dev); + pvrdma_free_irq(dev); + pvrdma_disable_msi_all(dev); + + /* Deactivate pvrdma device */ + pvrdma_write_reg(dev, PVRDMA_REG_CTL, PVRDMA_DEVICE_CTL_RESET); + pvrdma_page_dir_cleanup(dev, &dev->cq_pdir); + pvrdma_page_dir_cleanup(dev, &dev->async_pdir); + pvrdma_free_slots(dev); + + iounmap(dev->regs); + kfree(dev->sgid_tbl); + kfree(dev->cq_tbl); + kfree(dev->qp_tbl); + pvrdma_uar_table_cleanup(dev); + iounmap(dev->driver_uar.map); + + ib_dealloc_device(&dev->ib_dev); + + /* Free pci resources */ + pci_release_regions(pdev); + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); +} + +static struct pci_device_id pvrdma_pci_table[] = { + { PCI_DEVICE(PCI_VENDOR_ID_VMWARE, PCI_DEVICE_ID_VMWARE_PVRDMA), }, + { 0 }, +}; + +MODULE_DEVICE_TABLE(pci, pvrdma_pci_table); + +static struct pci_driver pvrdma_driver = { + .name = DRV_NAME, + .id_table = pvrdma_pci_table, + .probe = pvrdma_pci_probe, + .remove = pvrdma_pci_remove, +}; + +static int __init pvrdma_init(void) +{ + int err; + + event_wq = alloc_ordered_workqueue("pvrdma_event_wq", WQ_MEM_RECLAIM); + if (!event_wq) + return -ENOMEM; + + err = pci_register_driver(&pvrdma_driver); + if (err) + destroy_workqueue(event_wq); + + return err; +} + +static void __exit pvrdma_cleanup(void) +{ + pci_unregister_driver(&pvrdma_driver); + + destroy_workqueue(event_wq); +} + +module_init(pvrdma_init); +module_exit(pvrdma_cleanup); + +MODULE_AUTHOR("VMware, Inc"); +MODULE_DESCRIPTION("VMware Paravirtual RDMA driver"); +MODULE_VERSION(DRV_VERSION); +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c new file mode 100644 index 000000000000..948b5ccd2a70 --- /dev/null +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c @@ -0,0 +1,304 @@ +/* + * Copyright (c) 2012-2016 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of EITHER the GNU General Public License + * version 2 as published by the Free Software Foundation or the BSD + * 2-Clause License. This program is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED + * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License version 2 for more details at + * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html. + * + * You should have received a copy of the GNU General Public License + * along with this program available in the file COPYING in the main + * directory of this source tree. + * + * The BSD 2-Clause License + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/errno.h> +#include <linux/slab.h> +#include <linux/bitmap.h> + +#include "pvrdma.h" + +int pvrdma_page_dir_init(struct pvrdma_dev *dev, struct pvrdma_page_dir *pdir, + u64 npages, bool alloc_pages) +{ + u64 i; + + if (npages > PVRDMA_PAGE_DIR_MAX_PAGES) + return -EINVAL; + + memset(pdir, 0, sizeof(*pdir)); + + pdir->dir = dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE, + &pdir->dir_dma, GFP_KERNEL); + if (!pdir->dir) + goto err; + + pdir->ntables = PVRDMA_PAGE_DIR_TABLE(npages - 1) + 1; + pdir->tables = kcalloc(pdir->ntables, sizeof(*pdir->tables), + GFP_KERNEL); + if (!pdir->tables) + goto err; + + for (i = 0; i < pdir->ntables; i++) { + pdir->tables[i] = dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE, + (dma_addr_t *)&pdir->dir[i], + GFP_KERNEL); + if (!pdir->tables[i]) + goto err; + } + + pdir->npages = npages; + + if (alloc_pages) { + pdir->pages = kcalloc(npages, sizeof(*pdir->pages), + GFP_KERNEL); + if (!pdir->pages) + goto err; + + for (i = 0; i < pdir->npages; i++) { + dma_addr_t page_dma; + + pdir->pages[i] = dma_alloc_coherent(&dev->pdev->dev, + PAGE_SIZE, + &page_dma, + GFP_KERNEL); + if (!pdir->pages[i]) + goto err; + + pvrdma_page_dir_insert_dma(pdir, i, page_dma); + } + } + + return 0; + +err: + pvrdma_page_dir_cleanup(dev, pdir); + + return -ENOMEM; +} + +static u64 *pvrdma_page_dir_table(struct pvrdma_page_dir *pdir, u64 idx) +{ + return pdir->tables[PVRDMA_PAGE_DIR_TABLE(idx)]; +} + +dma_addr_t pvrdma_page_dir_get_dma(struct pvrdma_page_dir *pdir, u64 idx) +{ + return pvrdma_page_dir_table(pdir, idx)[PVRDMA_PAGE_DIR_PAGE(idx)]; +} + +static void pvrdma_page_dir_cleanup_pages(struct pvrdma_dev *dev, + struct pvrdma_page_dir *pdir) +{ + if (pdir->pages) { + u64 i; + + for (i = 0; i < pdir->npages && pdir->pages[i]; i++) { + dma_addr_t page_dma = pvrdma_page_dir_get_dma(pdir, i); + + dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, + pdir->pages[i], page_dma); + } + + kfree(pdir->pages); + } +} + +static void pvrdma_page_dir_cleanup_tables(struct pvrdma_dev *dev, + struct pvrdma_page_dir *pdir) +{ + if (pdir->tables) { + int i; + + pvrdma_page_dir_cleanup_pages(dev, pdir); + + for (i = 0; i < pdir->ntables; i++) { + u64 *table = pdir->tables[i]; + + if (table) + dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, + table, pdir->dir[i]); + } + + kfree(pdir->tables); + } +} + +void pvrdma_page_dir_cleanup(struct pvrdma_dev *dev, + struct pvrdma_page_dir *pdir) +{ + if (pdir->dir) { + pvrdma_page_dir_cleanup_tables(dev, pdir); + dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, + pdir->dir, pdir->dir_dma); + } +} + +int pvrdma_page_dir_insert_dma(struct pvrdma_page_dir *pdir, u64 idx, + dma_addr_t daddr) +{ + u64 *table; + + if (idx >= pdir->npages) + return -EINVAL; + + table = pvrdma_page_dir_table(pdir, idx); + table[PVRDMA_PAGE_DIR_PAGE(idx)] = daddr; + + return 0; +} + +int pvrdma_page_dir_insert_umem(struct pvrdma_page_dir *pdir, + struct ib_umem *umem, u64 offset) +{ + u64 i = offset; + int j, entry; + int ret = 0, len = 0; + struct scatterlist *sg; + + if (offset >= pdir->npages) + return -EINVAL; + + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { + len = sg_dma_len(sg) >> PAGE_SHIFT; + for (j = 0; j < len; j++) { + dma_addr_t addr = sg_dma_address(sg) + + umem->page_size * j; + + ret = pvrdma_page_dir_insert_dma(pdir, i, addr); + if (ret) + goto exit; + + i++; + } + } + +exit: + return ret; +} + +int pvrdma_page_dir_insert_page_list(struct pvrdma_page_dir *pdir, + u64 *page_list, + int num_pages) +{ + int i; + int ret; + + if (num_pages > pdir->npages) + return -EINVAL; + + for (i = 0; i < num_pages; i++) { + ret = pvrdma_page_dir_insert_dma(pdir, i, page_list[i]); + if (ret) + return ret; + } + + return 0; +} + +void pvrdma_qp_cap_to_ib(struct ib_qp_cap *dst, const struct pvrdma_qp_cap *src) +{ + dst->max_send_wr = src->max_send_wr; + dst->max_recv_wr = src->max_recv_wr; + dst->max_send_sge = src->max_send_sge; + dst->max_recv_sge = src->max_recv_sge; + dst->max_inline_data = src->max_inline_data; +} + +void ib_qp_cap_to_pvrdma(struct pvrdma_qp_cap *dst, const struct ib_qp_cap *src) +{ + dst->max_send_wr = src->max_send_wr; + dst->max_recv_wr = src->max_recv_wr; + dst->max_send_sge = src->max_send_sge; + dst->max_recv_sge = src->max_recv_sge; + dst->max_inline_data = src->max_inline_data; +} + +void pvrdma_gid_to_ib(union ib_gid *dst, const union pvrdma_gid *src) +{ + BUILD_BUG_ON(sizeof(union pvrdma_gid) != sizeof(union ib_gid)); + memcpy(dst, src, sizeof(*src)); +} + +void ib_gid_to_pvrdma(union pvrdma_gid *dst, const union ib_gid *src) +{ + BUILD_BUG_ON(sizeof(union pvrdma_gid) != sizeof(union ib_gid)); + memcpy(dst, src, sizeof(*src)); +} + +void pvrdma_global_route_to_ib(struct ib_global_route *dst, + const struct pvrdma_global_route *src) +{ + pvrdma_gid_to_ib(&dst->dgid, &src->dgid); + dst->flow_label = src->flow_label; + dst->sgid_index = src->sgid_index; + dst->hop_limit = src->hop_limit; + dst->traffic_class = src->traffic_class; +} + +void ib_global_route_to_pvrdma(struct pvrdma_global_route *dst, + const struct ib_global_route *src) +{ + ib_gid_to_pvrdma(&dst->dgid, &src->dgid); + dst->flow_label = src->flow_label; + dst->sgid_index = src->sgid_index; + dst->hop_limit = src->hop_limit; + dst->traffic_class = src->traffic_class; +} + +void pvrdma_ah_attr_to_ib(struct ib_ah_attr *dst, + const struct pvrdma_ah_attr *src) +{ + pvrdma_global_route_to_ib(&dst->grh, &src->grh); + dst->dlid = src->dlid; + dst->sl = src->sl; + dst->src_path_bits = src->src_path_bits; + dst->static_rate = src->static_rate; + dst->ah_flags = src->ah_flags; + dst->port_num = src->port_num; + memcpy(&dst->dmac, &src->dmac, sizeof(dst->dmac)); +} + +void ib_ah_attr_to_pvrdma(struct pvrdma_ah_attr *dst, + const struct ib_ah_attr *src) +{ + ib_global_route_to_pvrdma(&dst->grh, &src->grh); + dst->dlid = src->dlid; + dst->sl = src->sl; + dst->src_path_bits = src->src_path_bits; + dst->static_rate = src->static_rate; + dst->ah_flags = src->ah_flags; + dst->port_num = src->port_num; + memcpy(&dst->dmac, &src->dmac, sizeof(dst->dmac)); +} diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c new file mode 100644 index 000000000000..8519f3212e52 --- /dev/null +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c @@ -0,0 +1,334 @@ +/* + * Copyright (c) 2012-2016 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of EITHER the GNU General Public License + * version 2 as published by the Free Software Foundation or the BSD + * 2-Clause License. This program is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED + * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License version 2 for more details at + * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html. + * + * You should have received a copy of the GNU General Public License + * along with this program available in the file COPYING in the main + * directory of this source tree. + * + * The BSD 2-Clause License + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/list.h> +#include <linux/slab.h> + +#include "pvrdma.h" + +/** + * pvrdma_get_dma_mr - get a DMA memory region + * @pd: protection domain + * @acc: access flags + * + * @return: ib_mr pointer on success, otherwise returns an errno. + */ +struct ib_mr *pvrdma_get_dma_mr(struct ib_pd *pd, int acc) +{ + struct pvrdma_dev *dev = to_vdev(pd->device); + struct pvrdma_user_mr *mr; + union pvrdma_cmd_req req; + union pvrdma_cmd_resp rsp; + struct pvrdma_cmd_create_mr *cmd = &req.create_mr; + struct pvrdma_cmd_create_mr_resp *resp = &rsp.create_mr_resp; + int ret; + + /* Support only LOCAL_WRITE flag for DMA MRs */ + if (acc & ~IB_ACCESS_LOCAL_WRITE) { + dev_warn(&dev->pdev->dev, + "unsupported dma mr access flags %#x\n", acc); + return ERR_PTR(-EOPNOTSUPP); + } + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + memset(cmd, 0, sizeof(*cmd)); + cmd->hdr.cmd = PVRDMA_CMD_CREATE_MR; + cmd->pd_handle = to_vpd(pd)->pd_handle; + cmd->access_flags = acc; + cmd->flags = PVRDMA_MR_FLAG_DMA; + + ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_MR_RESP); + if (ret < 0) { + dev_warn(&dev->pdev->dev, + "could not get DMA mem region, error: %d\n", ret); + kfree(mr); + return ERR_PTR(ret); + } + + mr->mmr.mr_handle = resp->mr_handle; + mr->ibmr.lkey = resp->lkey; + mr->ibmr.rkey = resp->rkey; + + return &mr->ibmr; +} + +/** + * pvrdma_reg_user_mr - register a userspace memory region + * @pd: protection domain + * @start: starting address + * @length: length of region + * @virt_addr: I/O virtual address + * @access_flags: access flags for memory region + * @udata: user data + * + * @return: ib_mr pointer on success, otherwise returns an errno. + */ +struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt_addr, int access_flags, + struct ib_udata *udata) +{ + struct pvrdma_dev *dev = to_vdev(pd->device); + struct pvrdma_user_mr *mr = NULL; + struct ib_umem *umem; + union pvrdma_cmd_req req; + union pvrdma_cmd_resp rsp; + struct pvrdma_cmd_create_mr *cmd = &req.create_mr; + struct pvrdma_cmd_create_mr_resp *resp = &rsp.create_mr_resp; + int nchunks; + int ret; + int entry; + struct scatterlist *sg; + + if (length == 0 || length > dev->dsr->caps.max_mr_size) { + dev_warn(&dev->pdev->dev, "invalid mem region length\n"); + return ERR_PTR(-EINVAL); + } + + umem = ib_umem_get(pd->uobject->context, start, + length, access_flags, 0); + if (IS_ERR(umem)) { + dev_warn(&dev->pdev->dev, + "could not get umem for mem region\n"); + return ERR_CAST(umem); + } + + nchunks = 0; + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) + nchunks += sg_dma_len(sg) >> PAGE_SHIFT; + + if (nchunks < 0 || nchunks > PVRDMA_PAGE_DIR_MAX_PAGES) { + dev_warn(&dev->pdev->dev, "overflow %d pages in mem region\n", + nchunks); + ret = -EINVAL; + goto err_umem; + } + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) { + ret = -ENOMEM; + goto err_umem; + } + + mr->mmr.iova = virt_addr; + mr->mmr.size = length; + mr->umem = umem; + + ret = pvrdma_page_dir_init(dev, &mr->pdir, nchunks, false); + if (ret) { + dev_warn(&dev->pdev->dev, + "could not allocate page directory\n"); + goto err_umem; + } + + ret = pvrdma_page_dir_insert_umem(&mr->pdir, mr->umem, 0); + if (ret) + goto err_pdir; + + memset(cmd, 0, sizeof(*cmd)); + cmd->hdr.cmd = PVRDMA_CMD_CREATE_MR; + cmd->start = start; + cmd->length = length; + cmd->pd_handle = to_vpd(pd)->pd_handle; + cmd->access_flags = access_flags; + cmd->nchunks = nchunks; + cmd->pdir_dma = mr->pdir.dir_dma; + + ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_MR_RESP); + if (ret < 0) { + dev_warn(&dev->pdev->dev, + "could not register mem region, error: %d\n", ret); + goto err_pdir; + } + + mr->mmr.mr_handle = resp->mr_handle; + mr->ibmr.lkey = resp->lkey; + mr->ibmr.rkey = resp->rkey; + + return &mr->ibmr; + +err_pdir: + pvrdma_page_dir_cleanup(dev, &mr->pdir); +err_umem: + ib_umem_release(umem); + kfree(mr); + + return ERR_PTR(ret); +} + +/** + * pvrdma_alloc_mr - allocate a memory region + * @pd: protection domain + * @mr_type: type of memory region + * @max_num_sg: maximum number of pages + * + * @return: ib_mr pointer on success, otherwise returns an errno. + */ +struct ib_mr *pvrdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg) +{ + struct pvrdma_dev *dev = to_vdev(pd->device); + struct pvrdma_user_mr *mr; + union pvrdma_cmd_req req; + union pvrdma_cmd_resp rsp; + struct pvrdma_cmd_create_mr *cmd = &req.create_mr; + struct pvrdma_cmd_create_mr_resp *resp = &rsp.create_mr_resp; + int size = max_num_sg * sizeof(u64); + int ret; + + if (mr_type != IB_MR_TYPE_MEM_REG || + max_num_sg > PVRDMA_MAX_FAST_REG_PAGES) + return ERR_PTR(-EINVAL); + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + mr->pages = kzalloc(size, GFP_KERNEL); + if (!mr->pages) { + ret = -ENOMEM; + goto freemr; + } + + ret = pvrdma_page_dir_init(dev, &mr->pdir, max_num_sg, false); + if (ret) { + dev_warn(&dev->pdev->dev, + "failed to allocate page dir for mr\n"); + ret = -ENOMEM; + goto freepages; + } + + memset(cmd, 0, sizeof(*cmd)); + cmd->hdr.cmd = PVRDMA_CMD_CREATE_MR; + cmd->pd_handle = to_vpd(pd)->pd_handle; + cmd->access_flags = 0; + cmd->flags = PVRDMA_MR_FLAG_FRMR; + cmd->nchunks = max_num_sg; + + ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_MR_RESP); + if (ret < 0) { + dev_warn(&dev->pdev->dev, + "could not create FR mem region, error: %d\n", ret); + goto freepdir; + } + + mr->max_pages = max_num_sg; + mr->mmr.mr_handle = resp->mr_handle; + mr->ibmr.lkey = resp->lkey; + mr->ibmr.rkey = resp->rkey; + mr->page_shift = PAGE_SHIFT; + mr->umem = NULL; + + return &mr->ibmr; + +freepdir: + pvrdma_page_dir_cleanup(dev, &mr->pdir); +freepages: + kfree(mr->pages); +freemr: + kfree(mr); + return ERR_PTR(ret); +} + +/** + * pvrdma_dereg_mr - deregister a memory region + * @ibmr: memory region + * + * @return: 0 on success. + */ +int pvrdma_dereg_mr(struct ib_mr *ibmr) +{ + struct pvrdma_user_mr *mr = to_vmr(ibmr); + struct pvrdma_dev *dev = to_vdev(ibmr->device); + union pvrdma_cmd_req req; + struct pvrdma_cmd_destroy_mr *cmd = &req.destroy_mr; + int ret; + + memset(cmd, 0, sizeof(*cmd)); + cmd->hdr.cmd = PVRDMA_CMD_DESTROY_MR; + cmd->mr_handle = mr->mmr.mr_handle; + ret = pvrdma_cmd_post(dev, &req, NULL, 0); + if (ret < 0) + dev_warn(&dev->pdev->dev, + "could not deregister mem region, error: %d\n", ret); + + pvrdma_page_dir_cleanup(dev, &mr->pdir); + if (mr->umem) + ib_umem_release(mr->umem); + + kfree(mr->pages); + kfree(mr); + + return 0; +} + +static int pvrdma_set_page(struct ib_mr *ibmr, u64 addr) +{ + struct pvrdma_user_mr *mr = to_vmr(ibmr); + + if (mr->npages == mr->max_pages) + return -ENOMEM; + + mr->pages[mr->npages++] = addr; + return 0; +} + +int pvrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, + unsigned int *sg_offset) +{ + struct pvrdma_user_mr *mr = to_vmr(ibmr); + struct pvrdma_dev *dev = to_vdev(ibmr->device); + int ret; + + mr->npages = 0; + + ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, pvrdma_set_page); + if (ret < 0) + dev_warn(&dev->pdev->dev, "could not map sg to pages\n"); + + return ret; +} diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c new file mode 100644 index 000000000000..c8c01e558125 --- /dev/null +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c @@ -0,0 +1,972 @@ +/* + * Copyright (c) 2012-2016 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of EITHER the GNU General Public License + * version 2 as published by the Free Software Foundation or the BSD + * 2-Clause License. This program is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED + * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License version 2 for more details at + * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html. + * + * You should have received a copy of the GNU General Public License + * along with this program available in the file COPYING in the main + * directory of this source tree. + * + * The BSD 2-Clause License + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <asm/page.h> +#include <linux/io.h> +#include <linux/wait.h> +#include <rdma/ib_addr.h> +#include <rdma/ib_smi.h> +#include <rdma/ib_user_verbs.h> + +#include "pvrdma.h" + +static inline void get_cqs(struct pvrdma_qp *qp, struct pvrdma_cq **send_cq, + struct pvrdma_cq **recv_cq) +{ + *send_cq = to_vcq(qp->ibqp.send_cq); + *recv_cq = to_vcq(qp->ibqp.recv_cq); +} + +static void pvrdma_lock_cqs(struct pvrdma_cq *scq, struct pvrdma_cq *rcq, + unsigned long *scq_flags, + unsigned long *rcq_flags) + __acquires(scq->cq_lock) __acquires(rcq->cq_lock) +{ + if (scq == rcq) { + spin_lock_irqsave(&scq->cq_lock, *scq_flags); + __acquire(rcq->cq_lock); + } else if (scq->cq_handle < rcq->cq_handle) { + spin_lock_irqsave(&scq->cq_lock, *scq_flags); + spin_lock_irqsave_nested(&rcq->cq_lock, *rcq_flags, + SINGLE_DEPTH_NESTING); + } else { + spin_lock_irqsave(&rcq->cq_lock, *rcq_flags); + spin_lock_irqsave_nested(&scq->cq_lock, *scq_flags, + SINGLE_DEPTH_NESTING); + } +} + +static void pvrdma_unlock_cqs(struct pvrdma_cq *scq, struct pvrdma_cq *rcq, + unsigned long *scq_flags, + unsigned long *rcq_flags) + __releases(scq->cq_lock) __releases(rcq->cq_lock) +{ + if (scq == rcq) { + __release(rcq->cq_lock); + spin_unlock_irqrestore(&scq->cq_lock, *scq_flags); + } else if (scq->cq_handle < rcq->cq_handle) { + spin_unlock_irqrestore(&rcq->cq_lock, *rcq_flags); + spin_unlock_irqrestore(&scq->cq_lock, *scq_flags); + } else { + spin_unlock_irqrestore(&scq->cq_lock, *scq_flags); + spin_unlock_irqrestore(&rcq->cq_lock, *rcq_flags); + } +} + +static void pvrdma_reset_qp(struct pvrdma_qp *qp) +{ + struct pvrdma_cq *scq, *rcq; + unsigned long scq_flags, rcq_flags; + + /* Clean up cqes */ + get_cqs(qp, &scq, &rcq); + pvrdma_lock_cqs(scq, rcq, &scq_flags, &rcq_flags); + + _pvrdma_flush_cqe(qp, scq); + if (scq != rcq) + _pvrdma_flush_cqe(qp, rcq); + + pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags); + + /* + * Reset queuepair. The checks are because usermode queuepairs won't + * have kernel ringstates. + */ + if (qp->rq.ring) { + atomic_set(&qp->rq.ring->cons_head, 0); + atomic_set(&qp->rq.ring->prod_tail, 0); + } + if (qp->sq.ring) { + atomic_set(&qp->sq.ring->cons_head, 0); + atomic_set(&qp->sq.ring->prod_tail, 0); + } +} + +static int pvrdma_set_rq_size(struct pvrdma_dev *dev, + struct ib_qp_cap *req_cap, + struct pvrdma_qp *qp) +{ + if (req_cap->max_recv_wr > dev->dsr->caps.max_qp_wr || + req_cap->max_recv_sge > dev->dsr->caps.max_sge) { + dev_warn(&dev->pdev->dev, "recv queue size invalid\n"); + return -EINVAL; + } + + qp->rq.wqe_cnt = roundup_pow_of_two(max(1U, req_cap->max_recv_wr)); + qp->rq.max_sg = roundup_pow_of_two(max(1U, req_cap->max_recv_sge)); + + /* Write back */ + req_cap->max_recv_wr = qp->rq.wqe_cnt; + req_cap->max_recv_sge = qp->rq.max_sg; + + qp->rq.wqe_size = roundup_pow_of_two(sizeof(struct pvrdma_rq_wqe_hdr) + + sizeof(struct pvrdma_sge) * + qp->rq.max_sg); + qp->npages_recv = (qp->rq.wqe_cnt * qp->rq.wqe_size + PAGE_SIZE - 1) / + PAGE_SIZE; + + return 0; +} + +static int pvrdma_set_sq_size(struct pvrdma_dev *dev, struct ib_qp_cap *req_cap, + enum ib_qp_type type, struct pvrdma_qp *qp) +{ + if (req_cap->max_send_wr > dev->dsr->caps.max_qp_wr || + req_cap->max_send_sge > dev->dsr->caps.max_sge) { + dev_warn(&dev->pdev->dev, "send queue size invalid\n"); + return -EINVAL; + } + + qp->sq.wqe_cnt = roundup_pow_of_two(max(1U, req_cap->max_send_wr)); + qp->sq.max_sg = roundup_pow_of_two(max(1U, req_cap->max_send_sge)); + + /* Write back */ + req_cap->max_send_wr = qp->sq.wqe_cnt; + req_cap->max_send_sge = qp->sq.max_sg; + + qp->sq.wqe_size = roundup_pow_of_two(sizeof(struct pvrdma_sq_wqe_hdr) + + sizeof(struct pvrdma_sge) * + qp->sq.max_sg); + /* Note: one extra page for the header. */ + qp->npages_send = 1 + (qp->sq.wqe_cnt * qp->sq.wqe_size + + PAGE_SIZE - 1) / PAGE_SIZE; + + return 0; +} + +/** + * pvrdma_create_qp - create queue pair + * @pd: protection domain + * @init_attr: queue pair attributes + * @udata: user data + * + * @return: the ib_qp pointer on success, otherwise returns an errno. + */ +struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) +{ + struct pvrdma_qp *qp = NULL; + struct pvrdma_dev *dev = to_vdev(pd->device); + union pvrdma_cmd_req req; + union pvrdma_cmd_resp rsp; + struct pvrdma_cmd_create_qp *cmd = &req.create_qp; + struct pvrdma_cmd_create_qp_resp *resp = &rsp.create_qp_resp; + struct pvrdma_create_qp ucmd; + unsigned long flags; + int ret; + + if (init_attr->create_flags) { + dev_warn(&dev->pdev->dev, + "invalid create queuepair flags %#x\n", + init_attr->create_flags); + return ERR_PTR(-EINVAL); + } + + if (init_attr->qp_type != IB_QPT_RC && + init_attr->qp_type != IB_QPT_UD && + init_attr->qp_type != IB_QPT_GSI) { + dev_warn(&dev->pdev->dev, "queuepair type %d not supported\n", + init_attr->qp_type); + return ERR_PTR(-EINVAL); + } + + if (!atomic_add_unless(&dev->num_qps, 1, dev->dsr->caps.max_qp)) + return ERR_PTR(-ENOMEM); + + switch (init_attr->qp_type) { + case IB_QPT_GSI: + if (init_attr->port_num == 0 || + init_attr->port_num > pd->device->phys_port_cnt || + udata) { + dev_warn(&dev->pdev->dev, "invalid queuepair attrs\n"); + ret = -EINVAL; + goto err_qp; + } + /* fall through */ + case IB_QPT_RC: + case IB_QPT_UD: + qp = kzalloc(sizeof(*qp), GFP_KERNEL); + if (!qp) { + ret = -ENOMEM; + goto err_qp; + } + + spin_lock_init(&qp->sq.lock); + spin_lock_init(&qp->rq.lock); + mutex_init(&qp->mutex); + atomic_set(&qp->refcnt, 1); + init_waitqueue_head(&qp->wait); + + qp->state = IB_QPS_RESET; + + if (pd->uobject && udata) { + dev_dbg(&dev->pdev->dev, + "create queuepair from user space\n"); + + if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { + ret = -EFAULT; + goto err_qp; + } + + /* set qp->sq.wqe_cnt, shift, buf_size.. */ + qp->rumem = ib_umem_get(pd->uobject->context, + ucmd.rbuf_addr, + ucmd.rbuf_size, 0, 0); + if (IS_ERR(qp->rumem)) { + ret = PTR_ERR(qp->rumem); + goto err_qp; + } + + qp->sumem = ib_umem_get(pd->uobject->context, + ucmd.sbuf_addr, + ucmd.sbuf_size, 0, 0); + if (IS_ERR(qp->sumem)) { + ib_umem_release(qp->rumem); + ret = PTR_ERR(qp->sumem); + goto err_qp; + } + + qp->npages_send = ib_umem_page_count(qp->sumem); + qp->npages_recv = ib_umem_page_count(qp->rumem); + qp->npages = qp->npages_send + qp->npages_recv; + } else { + qp->is_kernel = true; + + ret = pvrdma_set_sq_size(to_vdev(pd->device), + &init_attr->cap, + init_attr->qp_type, qp); + if (ret) + goto err_qp; + + ret = pvrdma_set_rq_size(to_vdev(pd->device), + &init_attr->cap, qp); + if (ret) + goto err_qp; + + qp->npages = qp->npages_send + qp->npages_recv; + + /* Skip header page. */ + qp->sq.offset = PAGE_SIZE; + + /* Recv queue pages are after send pages. */ + qp->rq.offset = qp->npages_send * PAGE_SIZE; + } + + if (qp->npages < 0 || qp->npages > PVRDMA_PAGE_DIR_MAX_PAGES) { + dev_warn(&dev->pdev->dev, + "overflow pages in queuepair\n"); + ret = -EINVAL; + goto err_umem; + } + + ret = pvrdma_page_dir_init(dev, &qp->pdir, qp->npages, + qp->is_kernel); + if (ret) { + dev_warn(&dev->pdev->dev, + "could not allocate page directory\n"); + goto err_umem; + } + + if (!qp->is_kernel) { + pvrdma_page_dir_insert_umem(&qp->pdir, qp->sumem, 0); + pvrdma_page_dir_insert_umem(&qp->pdir, qp->rumem, + qp->npages_send); + } else { + /* Ring state is always the first page. */ + qp->sq.ring = qp->pdir.pages[0]; + qp->rq.ring = &qp->sq.ring[1]; + } + break; + default: + ret = -EINVAL; + goto err_qp; + } + + /* Not supported */ + init_attr->cap.max_inline_data = 0; + + memset(cmd, 0, sizeof(*cmd)); + cmd->hdr.cmd = PVRDMA_CMD_CREATE_QP; + cmd->pd_handle = to_vpd(pd)->pd_handle; + cmd->send_cq_handle = to_vcq(init_attr->send_cq)->cq_handle; + cmd->recv_cq_handle = to_vcq(init_attr->recv_cq)->cq_handle; + cmd->max_send_wr = init_attr->cap.max_send_wr; + cmd->max_recv_wr = init_attr->cap.max_recv_wr; + cmd->max_send_sge = init_attr->cap.max_send_sge; + cmd->max_recv_sge = init_attr->cap.max_recv_sge; + cmd->max_inline_data = init_attr->cap.max_inline_data; + cmd->sq_sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? 1 : 0; + cmd->qp_type = ib_qp_type_to_pvrdma(init_attr->qp_type); + cmd->access_flags = IB_ACCESS_LOCAL_WRITE; + cmd->total_chunks = qp->npages; + cmd->send_chunks = qp->npages_send - 1; + cmd->pdir_dma = qp->pdir.dir_dma; + + dev_dbg(&dev->pdev->dev, "create queuepair with %d, %d, %d, %d\n", + cmd->max_send_wr, cmd->max_recv_wr, cmd->max_send_sge, + cmd->max_recv_sge); + + ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_QP_RESP); + if (ret < 0) { + dev_warn(&dev->pdev->dev, + "could not create queuepair, error: %d\n", ret); + goto err_pdir; + } + + /* max_send_wr/_recv_wr/_send_sge/_recv_sge/_inline_data */ + qp->qp_handle = resp->qpn; + qp->port = init_attr->port_num; + qp->ibqp.qp_num = resp->qpn; + spin_lock_irqsave(&dev->qp_tbl_lock, flags); + dev->qp_tbl[qp->qp_handle % dev->dsr->caps.max_qp] = qp; + spin_unlock_irqrestore(&dev->qp_tbl_lock, flags); + + return &qp->ibqp; + +err_pdir: + pvrdma_page_dir_cleanup(dev, &qp->pdir); +err_umem: + if (pd->uobject && udata) { + if (qp->rumem) + ib_umem_release(qp->rumem); + if (qp->sumem) + ib_umem_release(qp->sumem); + } +err_qp: + kfree(qp); + atomic_dec(&dev->num_qps); + + return ERR_PTR(ret); +} + +static void pvrdma_free_qp(struct pvrdma_qp *qp) +{ + struct pvrdma_dev *dev = to_vdev(qp->ibqp.device); + struct pvrdma_cq *scq; + struct pvrdma_cq *rcq; + unsigned long flags, scq_flags, rcq_flags; + + /* In case cq is polling */ + get_cqs(qp, &scq, &rcq); + pvrdma_lock_cqs(scq, rcq, &scq_flags, &rcq_flags); + + _pvrdma_flush_cqe(qp, scq); + if (scq != rcq) + _pvrdma_flush_cqe(qp, rcq); + + spin_lock_irqsave(&dev->qp_tbl_lock, flags); + dev->qp_tbl[qp->qp_handle] = NULL; + spin_unlock_irqrestore(&dev->qp_tbl_lock, flags); + + pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags); + + atomic_dec(&qp->refcnt); + wait_event(qp->wait, !atomic_read(&qp->refcnt)); + + pvrdma_page_dir_cleanup(dev, &qp->pdir); + + kfree(qp); + + atomic_dec(&dev->num_qps); +} + +/** + * pvrdma_destroy_qp - destroy a queue pair + * @qp: the queue pair to destroy + * + * @return: 0 on success. + */ +int pvrdma_destroy_qp(struct ib_qp *qp) +{ + struct pvrdma_qp *vqp = to_vqp(qp); + union pvrdma_cmd_req req; + struct pvrdma_cmd_destroy_qp *cmd = &req.destroy_qp; + int ret; + + memset(cmd, 0, sizeof(*cmd)); + cmd->hdr.cmd = PVRDMA_CMD_DESTROY_QP; + cmd->qp_handle = vqp->qp_handle; + + ret = pvrdma_cmd_post(to_vdev(qp->device), &req, NULL, 0); + if (ret < 0) + dev_warn(&to_vdev(qp->device)->pdev->dev, + "destroy queuepair failed, error: %d\n", ret); + + pvrdma_free_qp(vqp); + + return 0; +} + +/** + * pvrdma_modify_qp - modify queue pair attributes + * @ibqp: the queue pair + * @attr: the new queue pair's attributes + * @attr_mask: attributes mask + * @udata: user data + * + * @returns 0 on success, otherwise returns an errno. + */ +int pvrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) +{ + struct pvrdma_dev *dev = to_vdev(ibqp->device); + struct pvrdma_qp *qp = to_vqp(ibqp); + union pvrdma_cmd_req req; + union pvrdma_cmd_resp rsp; + struct pvrdma_cmd_modify_qp *cmd = &req.modify_qp; + int cur_state, next_state; + int ret; + + /* Sanity checking. Should need lock here */ + mutex_lock(&qp->mutex); + cur_state = (attr_mask & IB_QP_CUR_STATE) ? attr->cur_qp_state : + qp->state; + next_state = (attr_mask & IB_QP_STATE) ? attr->qp_state : cur_state; + + if (!ib_modify_qp_is_ok(cur_state, next_state, ibqp->qp_type, + attr_mask, IB_LINK_LAYER_ETHERNET)) { + ret = -EINVAL; + goto out; + } + + if (attr_mask & IB_QP_PORT) { + if (attr->port_num == 0 || + attr->port_num > ibqp->device->phys_port_cnt) { + ret = -EINVAL; + goto out; + } + } + + if (attr_mask & IB_QP_MIN_RNR_TIMER) { + if (attr->min_rnr_timer > 31) { + ret = -EINVAL; + goto out; + } + } + + if (attr_mask & IB_QP_PKEY_INDEX) { + if (attr->pkey_index >= dev->dsr->caps.max_pkeys) { + ret = -EINVAL; + goto out; + } + } + + if (attr_mask & IB_QP_QKEY) + qp->qkey = attr->qkey; + + if (cur_state == next_state && cur_state == IB_QPS_RESET) { + ret = 0; + goto out; + } + + qp->state = next_state; + memset(cmd, 0, sizeof(*cmd)); + cmd->hdr.cmd = PVRDMA_CMD_MODIFY_QP; + cmd->qp_handle = qp->qp_handle; + cmd->attr_mask = ib_qp_attr_mask_to_pvrdma(attr_mask); + cmd->attrs.qp_state = ib_qp_state_to_pvrdma(attr->qp_state); + cmd->attrs.cur_qp_state = + ib_qp_state_to_pvrdma(attr->cur_qp_state); + cmd->attrs.path_mtu = ib_mtu_to_pvrdma(attr->path_mtu); + cmd->attrs.path_mig_state = + ib_mig_state_to_pvrdma(attr->path_mig_state); + cmd->attrs.qkey = attr->qkey; + cmd->attrs.rq_psn = attr->rq_psn; + cmd->attrs.sq_psn = attr->sq_psn; + cmd->attrs.dest_qp_num = attr->dest_qp_num; + cmd->attrs.qp_access_flags = + ib_access_flags_to_pvrdma(attr->qp_access_flags); + cmd->attrs.pkey_index = attr->pkey_index; + cmd->attrs.alt_pkey_index = attr->alt_pkey_index; + cmd->attrs.en_sqd_async_notify = attr->en_sqd_async_notify; + cmd->attrs.sq_draining = attr->sq_draining; + cmd->attrs.max_rd_atomic = attr->max_rd_atomic; + cmd->attrs.max_dest_rd_atomic = attr->max_dest_rd_atomic; + cmd->attrs.min_rnr_timer = attr->min_rnr_timer; + cmd->attrs.port_num = attr->port_num; + cmd->attrs.timeout = attr->timeout; + cmd->attrs.retry_cnt = attr->retry_cnt; + cmd->attrs.rnr_retry = attr->rnr_retry; + cmd->attrs.alt_port_num = attr->alt_port_num; + cmd->attrs.alt_timeout = attr->alt_timeout; + ib_qp_cap_to_pvrdma(&cmd->attrs.cap, &attr->cap); + ib_ah_attr_to_pvrdma(&cmd->attrs.ah_attr, &attr->ah_attr); + ib_ah_attr_to_pvrdma(&cmd->attrs.alt_ah_attr, &attr->alt_ah_attr); + + ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_MODIFY_QP_RESP); + if (ret < 0) { + dev_warn(&dev->pdev->dev, + "could not modify queuepair, error: %d\n", ret); + } else if (rsp.hdr.err > 0) { + dev_warn(&dev->pdev->dev, + "cannot modify queuepair, error: %d\n", rsp.hdr.err); + ret = -EINVAL; + } + + if (ret == 0 && next_state == IB_QPS_RESET) + pvrdma_reset_qp(qp); + +out: + mutex_unlock(&qp->mutex); + + return ret; +} + +static inline void *get_sq_wqe(struct pvrdma_qp *qp, int n) +{ + return pvrdma_page_dir_get_ptr(&qp->pdir, + qp->sq.offset + n * qp->sq.wqe_size); +} + +static inline void *get_rq_wqe(struct pvrdma_qp *qp, int n) +{ + return pvrdma_page_dir_get_ptr(&qp->pdir, + qp->rq.offset + n * qp->rq.wqe_size); +} + +static int set_reg_seg(struct pvrdma_sq_wqe_hdr *wqe_hdr, struct ib_reg_wr *wr) +{ + struct pvrdma_user_mr *mr = to_vmr(wr->mr); + + wqe_hdr->wr.fast_reg.iova_start = mr->ibmr.iova; + wqe_hdr->wr.fast_reg.pl_pdir_dma = mr->pdir.dir_dma; + wqe_hdr->wr.fast_reg.page_shift = mr->page_shift; + wqe_hdr->wr.fast_reg.page_list_len = mr->npages; + wqe_hdr->wr.fast_reg.length = mr->ibmr.length; + wqe_hdr->wr.fast_reg.access_flags = wr->access; + wqe_hdr->wr.fast_reg.rkey = wr->key; + + return pvrdma_page_dir_insert_page_list(&mr->pdir, mr->pages, + mr->npages); +} + +/** + * pvrdma_post_send - post send work request entries on a QP + * @ibqp: the QP + * @wr: work request list to post + * @bad_wr: the first bad WR returned + * + * @return: 0 on success, otherwise errno returned. + */ +int pvrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + struct pvrdma_qp *qp = to_vqp(ibqp); + struct pvrdma_dev *dev = to_vdev(ibqp->device); + unsigned long flags; + struct pvrdma_sq_wqe_hdr *wqe_hdr; + struct pvrdma_sge *sge; + int i, index; + int nreq; + int ret; + + /* + * In states lower than RTS, we can fail immediately. In other states, + * just post and let the device figure it out. + */ + if (qp->state < IB_QPS_RTS) { + *bad_wr = wr; + return -EINVAL; + } + + spin_lock_irqsave(&qp->sq.lock, flags); + + index = pvrdma_idx(&qp->sq.ring->prod_tail, qp->sq.wqe_cnt); + for (nreq = 0; wr; nreq++, wr = wr->next) { + unsigned int tail; + + if (unlikely(!pvrdma_idx_ring_has_space( + qp->sq.ring, qp->sq.wqe_cnt, &tail))) { + dev_warn_ratelimited(&dev->pdev->dev, + "send queue is full\n"); + *bad_wr = wr; + ret = -ENOMEM; + goto out; + } + + if (unlikely(wr->num_sge > qp->sq.max_sg || wr->num_sge < 0)) { + dev_warn_ratelimited(&dev->pdev->dev, + "send SGE overflow\n"); + *bad_wr = wr; + ret = -EINVAL; + goto out; + } + + if (unlikely(wr->opcode < 0)) { + dev_warn_ratelimited(&dev->pdev->dev, + "invalid send opcode\n"); + *bad_wr = wr; + ret = -EINVAL; + goto out; + } + + /* + * Only support UD, RC. + * Need to check opcode table for thorough checking. + * opcode _UD _UC _RC + * _SEND x x x + * _SEND_WITH_IMM x x x + * _RDMA_WRITE x x + * _RDMA_WRITE_WITH_IMM x x + * _LOCAL_INV x x + * _SEND_WITH_INV x x + * _RDMA_READ x + * _ATOMIC_CMP_AND_SWP x + * _ATOMIC_FETCH_AND_ADD x + * _MASK_ATOMIC_CMP_AND_SWP x + * _MASK_ATOMIC_FETCH_AND_ADD x + * _REG_MR x + * + */ + if (qp->ibqp.qp_type != IB_QPT_UD && + qp->ibqp.qp_type != IB_QPT_RC && + wr->opcode != IB_WR_SEND) { + dev_warn_ratelimited(&dev->pdev->dev, + "unsupported queuepair type\n"); + *bad_wr = wr; + ret = -EINVAL; + goto out; + } else if (qp->ibqp.qp_type == IB_QPT_UD || + qp->ibqp.qp_type == IB_QPT_GSI) { + if (wr->opcode != IB_WR_SEND && + wr->opcode != IB_WR_SEND_WITH_IMM) { + dev_warn_ratelimited(&dev->pdev->dev, + "invalid send opcode\n"); + *bad_wr = wr; + ret = -EINVAL; + goto out; + } + } + + wqe_hdr = (struct pvrdma_sq_wqe_hdr *)get_sq_wqe(qp, index); + memset(wqe_hdr, 0, sizeof(*wqe_hdr)); + wqe_hdr->wr_id = wr->wr_id; + wqe_hdr->num_sge = wr->num_sge; + wqe_hdr->opcode = ib_wr_opcode_to_pvrdma(wr->opcode); + wqe_hdr->send_flags = ib_send_flags_to_pvrdma(wr->send_flags); + if (wr->opcode == IB_WR_SEND_WITH_IMM || + wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) + wqe_hdr->ex.imm_data = wr->ex.imm_data; + + switch (qp->ibqp.qp_type) { + case IB_QPT_GSI: + case IB_QPT_UD: + if (unlikely(!ud_wr(wr)->ah)) { + dev_warn_ratelimited(&dev->pdev->dev, + "invalid address handle\n"); + *bad_wr = wr; + ret = -EINVAL; + goto out; + } + + /* + * Use qkey from qp context if high order bit set, + * otherwise from work request. + */ + wqe_hdr->wr.ud.remote_qpn = ud_wr(wr)->remote_qpn; + wqe_hdr->wr.ud.remote_qkey = + ud_wr(wr)->remote_qkey & 0x80000000 ? + qp->qkey : ud_wr(wr)->remote_qkey; + wqe_hdr->wr.ud.av = to_vah(ud_wr(wr)->ah)->av; + + break; + case IB_QPT_RC: + switch (wr->opcode) { + case IB_WR_RDMA_READ: + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + wqe_hdr->wr.rdma.remote_addr = + rdma_wr(wr)->remote_addr; + wqe_hdr->wr.rdma.rkey = rdma_wr(wr)->rkey; + break; + case IB_WR_LOCAL_INV: + case IB_WR_SEND_WITH_INV: + wqe_hdr->ex.invalidate_rkey = + wr->ex.invalidate_rkey; + break; + case IB_WR_ATOMIC_CMP_AND_SWP: + case IB_WR_ATOMIC_FETCH_AND_ADD: + wqe_hdr->wr.atomic.remote_addr = + atomic_wr(wr)->remote_addr; + wqe_hdr->wr.atomic.rkey = atomic_wr(wr)->rkey; + wqe_hdr->wr.atomic.compare_add = + atomic_wr(wr)->compare_add; + if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) + wqe_hdr->wr.atomic.swap = + atomic_wr(wr)->swap; + break; + case IB_WR_REG_MR: + ret = set_reg_seg(wqe_hdr, reg_wr(wr)); + if (ret < 0) { + dev_warn_ratelimited(&dev->pdev->dev, + "Failed to set fast register work request\n"); + *bad_wr = wr; + goto out; + } + break; + default: + break; + } + + break; + default: + dev_warn_ratelimited(&dev->pdev->dev, + "invalid queuepair type\n"); + ret = -EINVAL; + *bad_wr = wr; + goto out; + } + + sge = (struct pvrdma_sge *)(wqe_hdr + 1); + for (i = 0; i < wr->num_sge; i++) { + /* Need to check wqe_size 0 or max size */ + sge->addr = wr->sg_list[i].addr; + sge->length = wr->sg_list[i].length; + sge->lkey = wr->sg_list[i].lkey; + sge++; + } + + /* Make sure wqe is written before index update */ + smp_wmb(); + + index++; + if (unlikely(index >= qp->sq.wqe_cnt)) + index = 0; + /* Update shared sq ring */ + pvrdma_idx_ring_inc(&qp->sq.ring->prod_tail, + qp->sq.wqe_cnt); + } + + ret = 0; + +out: + spin_unlock_irqrestore(&qp->sq.lock, flags); + + if (!ret) + pvrdma_write_uar_qp(dev, PVRDMA_UAR_QP_SEND | qp->qp_handle); + + return ret; +} + +/** + * pvrdma_post_receive - post receive work request entries on a QP + * @ibqp: the QP + * @wr: the work request list to post + * @bad_wr: the first bad WR returned + * + * @return: 0 on success, otherwise errno returned. + */ +int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + struct pvrdma_dev *dev = to_vdev(ibqp->device); + unsigned long flags; + struct pvrdma_qp *qp = to_vqp(ibqp); + struct pvrdma_rq_wqe_hdr *wqe_hdr; + struct pvrdma_sge *sge; + int index, nreq; + int ret = 0; + int i; + + /* + * In the RESET state, we can fail immediately. For other states, + * just post and let the device figure it out. + */ + if (qp->state == IB_QPS_RESET) { + *bad_wr = wr; + return -EINVAL; + } + + spin_lock_irqsave(&qp->rq.lock, flags); + + index = pvrdma_idx(&qp->rq.ring->prod_tail, qp->rq.wqe_cnt); + for (nreq = 0; wr; nreq++, wr = wr->next) { + unsigned int tail; + + if (unlikely(wr->num_sge > qp->rq.max_sg || + wr->num_sge < 0)) { + ret = -EINVAL; + *bad_wr = wr; + dev_warn_ratelimited(&dev->pdev->dev, + "recv SGE overflow\n"); + goto out; + } + + if (unlikely(!pvrdma_idx_ring_has_space( + qp->rq.ring, qp->rq.wqe_cnt, &tail))) { + ret = -ENOMEM; + *bad_wr = wr; + dev_warn_ratelimited(&dev->pdev->dev, + "recv queue full\n"); + goto out; + } + + wqe_hdr = (struct pvrdma_rq_wqe_hdr *)get_rq_wqe(qp, index); + wqe_hdr->wr_id = wr->wr_id; + wqe_hdr->num_sge = wr->num_sge; + wqe_hdr->total_len = 0; + + sge = (struct pvrdma_sge *)(wqe_hdr + 1); + for (i = 0; i < wr->num_sge; i++) { + sge->addr = wr->sg_list[i].addr; + sge->length = wr->sg_list[i].length; + sge->lkey = wr->sg_list[i].lkey; + sge++; + } + + /* Make sure wqe is written before index update */ + smp_wmb(); + + index++; + if (unlikely(index >= qp->rq.wqe_cnt)) + index = 0; + /* Update shared rq ring */ + pvrdma_idx_ring_inc(&qp->rq.ring->prod_tail, + qp->rq.wqe_cnt); + } + + spin_unlock_irqrestore(&qp->rq.lock, flags); + + pvrdma_write_uar_qp(dev, PVRDMA_UAR_QP_RECV | qp->qp_handle); + + return ret; + +out: + spin_unlock_irqrestore(&qp->rq.lock, flags); + + return ret; +} + +/** + * pvrdma_query_qp - query a queue pair's attributes + * @ibqp: the queue pair to query + * @attr: the queue pair's attributes + * @attr_mask: attributes mask + * @init_attr: initial queue pair attributes + * + * @returns 0 on success, otherwise returns an errno. + */ +int pvrdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_qp_init_attr *init_attr) +{ + struct pvrdma_dev *dev = to_vdev(ibqp->device); + struct pvrdma_qp *qp = to_vqp(ibqp); + union pvrdma_cmd_req req; + union pvrdma_cmd_resp rsp; + struct pvrdma_cmd_query_qp *cmd = &req.query_qp; + struct pvrdma_cmd_query_qp_resp *resp = &rsp.query_qp_resp; + int ret = 0; + + mutex_lock(&qp->mutex); + + if (qp->state == IB_QPS_RESET) { + attr->qp_state = IB_QPS_RESET; + goto out; + } + + memset(cmd, 0, sizeof(*cmd)); + cmd->hdr.cmd = PVRDMA_CMD_QUERY_QP; + cmd->qp_handle = qp->qp_handle; + cmd->attr_mask = ib_qp_attr_mask_to_pvrdma(attr_mask); + + ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_QUERY_QP_RESP); + if (ret < 0) { + dev_warn(&dev->pdev->dev, + "could not query queuepair, error: %d\n", ret); + goto out; + } + + attr->qp_state = pvrdma_qp_state_to_ib(resp->attrs.qp_state); + attr->cur_qp_state = + pvrdma_qp_state_to_ib(resp->attrs.cur_qp_state); + attr->path_mtu = pvrdma_mtu_to_ib(resp->attrs.path_mtu); + attr->path_mig_state = + pvrdma_mig_state_to_ib(resp->attrs.path_mig_state); + attr->qkey = resp->attrs.qkey; + attr->rq_psn = resp->attrs.rq_psn; + attr->sq_psn = resp->attrs.sq_psn; + attr->dest_qp_num = resp->attrs.dest_qp_num; + attr->qp_access_flags = + pvrdma_access_flags_to_ib(resp->attrs.qp_access_flags); + attr->pkey_index = resp->attrs.pkey_index; + attr->alt_pkey_index = resp->attrs.alt_pkey_index; + attr->en_sqd_async_notify = resp->attrs.en_sqd_async_notify; + attr->sq_draining = resp->attrs.sq_draining; + attr->max_rd_atomic = resp->attrs.max_rd_atomic; + attr->max_dest_rd_atomic = resp->attrs.max_dest_rd_atomic; + attr->min_rnr_timer = resp->attrs.min_rnr_timer; + attr->port_num = resp->attrs.port_num; + attr->timeout = resp->attrs.timeout; + attr->retry_cnt = resp->attrs.retry_cnt; + attr->rnr_retry = resp->attrs.rnr_retry; + attr->alt_port_num = resp->attrs.alt_port_num; + attr->alt_timeout = resp->attrs.alt_timeout; + pvrdma_qp_cap_to_ib(&attr->cap, &resp->attrs.cap); + pvrdma_ah_attr_to_ib(&attr->ah_attr, &resp->attrs.ah_attr); + pvrdma_ah_attr_to_ib(&attr->alt_ah_attr, &resp->attrs.alt_ah_attr); + + qp->state = attr->qp_state; + + ret = 0; + +out: + attr->cur_qp_state = attr->qp_state; + + init_attr->event_handler = qp->ibqp.event_handler; + init_attr->qp_context = qp->ibqp.qp_context; + init_attr->send_cq = qp->ibqp.send_cq; + init_attr->recv_cq = qp->ibqp.recv_cq; + init_attr->srq = qp->ibqp.srq; + init_attr->xrcd = NULL; + init_attr->cap = attr->cap; + init_attr->sq_sig_type = 0; + init_attr->qp_type = qp->ibqp.qp_type; + init_attr->create_flags = 0; + init_attr->port_num = qp->port; + + mutex_unlock(&qp->mutex); + return ret; +} diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_ring.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_ring.h new file mode 100644 index 000000000000..ed9022a91a1d --- /dev/null +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_ring.h @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2012-2016 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of EITHER the GNU General Public License + * version 2 as published by the Free Software Foundation or the BSD + * 2-Clause License. This program is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED + * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License version 2 for more details at + * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html. + * + * You should have received a copy of the GNU General Public License + * along with this program available in the file COPYING in the main + * directory of this source tree. + * + * The BSD 2-Clause License + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __PVRDMA_RING_H__ +#define __PVRDMA_RING_H__ + +#include <linux/types.h> + +#define PVRDMA_INVALID_IDX -1 /* Invalid index. */ + +struct pvrdma_ring { + atomic_t prod_tail; /* Producer tail. */ + atomic_t cons_head; /* Consumer head. */ +}; + +struct pvrdma_ring_state { + struct pvrdma_ring tx; /* Tx ring. */ + struct pvrdma_ring rx; /* Rx ring. */ +}; + +static inline int pvrdma_idx_valid(__u32 idx, __u32 max_elems) +{ + /* Generates fewer instructions than a less-than. */ + return (idx & ~((max_elems << 1) - 1)) == 0; +} + +static inline __s32 pvrdma_idx(atomic_t *var, __u32 max_elems) +{ + const unsigned int idx = atomic_read(var); + + if (pvrdma_idx_valid(idx, max_elems)) + return idx & (max_elems - 1); + return PVRDMA_INVALID_IDX; +} + +static inline void pvrdma_idx_ring_inc(atomic_t *var, __u32 max_elems) +{ + __u32 idx = atomic_read(var) + 1; /* Increment. */ + + idx &= (max_elems << 1) - 1; /* Modulo size, flip gen. */ + atomic_set(var, idx); +} + +static inline __s32 pvrdma_idx_ring_has_space(const struct pvrdma_ring *r, + __u32 max_elems, __u32 *out_tail) +{ + const __u32 tail = atomic_read(&r->prod_tail); + const __u32 head = atomic_read(&r->cons_head); + + if (pvrdma_idx_valid(tail, max_elems) && + pvrdma_idx_valid(head, max_elems)) { + *out_tail = tail & (max_elems - 1); + return tail != (head ^ max_elems); + } + return PVRDMA_INVALID_IDX; +} + +static inline __s32 pvrdma_idx_ring_has_data(const struct pvrdma_ring *r, + __u32 max_elems, __u32 *out_head) +{ + const __u32 tail = atomic_read(&r->prod_tail); + const __u32 head = atomic_read(&r->cons_head); + + if (pvrdma_idx_valid(tail, max_elems) && + pvrdma_idx_valid(head, max_elems)) { + *out_head = head & (max_elems - 1); + return tail != head; + } + return PVRDMA_INVALID_IDX; +} + +static inline bool pvrdma_idx_ring_is_valid_idx(const struct pvrdma_ring *r, + __u32 max_elems, __u32 *idx) +{ + const __u32 tail = atomic_read(&r->prod_tail); + const __u32 head = atomic_read(&r->cons_head); + + if (pvrdma_idx_valid(tail, max_elems) && + pvrdma_idx_valid(head, max_elems) && + pvrdma_idx_valid(*idx, max_elems)) { + if (tail > head && (*idx < tail && *idx >= head)) + return true; + else if (head > tail && (*idx >= head || *idx < tail)) + return true; + } + return false; +} + +#endif /* __PVRDMA_RING_H__ */ diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c new file mode 100644 index 000000000000..54891370d18a --- /dev/null +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c @@ -0,0 +1,579 @@ +/* + * Copyright (c) 2012-2016 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of EITHER the GNU General Public License + * version 2 as published by the Free Software Foundation or the BSD + * 2-Clause License. This program is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED + * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License version 2 for more details at + * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html. + * + * You should have received a copy of the GNU General Public License + * along with this program available in the file COPYING in the main + * directory of this source tree. + * + * The BSD 2-Clause License + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <asm/page.h> +#include <linux/inet.h> +#include <linux/io.h> +#include <rdma/ib_addr.h> +#include <rdma/ib_smi.h> +#include <rdma/ib_user_verbs.h> +#include <rdma/vmw_pvrdma-abi.h> + +#include "pvrdma.h" + +/** + * pvrdma_query_device - query device + * @ibdev: the device to query + * @props: the device properties + * @uhw: user data + * + * @return: 0 on success, otherwise negative errno + */ +int pvrdma_query_device(struct ib_device *ibdev, + struct ib_device_attr *props, + struct ib_udata *uhw) +{ + struct pvrdma_dev *dev = to_vdev(ibdev); + + if (uhw->inlen || uhw->outlen) + return -EINVAL; + + memset(props, 0, sizeof(*props)); + + props->fw_ver = dev->dsr->caps.fw_ver; + props->sys_image_guid = dev->dsr->caps.sys_image_guid; + props->max_mr_size = dev->dsr->caps.max_mr_size; + props->page_size_cap = dev->dsr->caps.page_size_cap; + props->vendor_id = dev->dsr->caps.vendor_id; + props->vendor_part_id = dev->pdev->device; + props->hw_ver = dev->dsr->caps.hw_ver; + props->max_qp = dev->dsr->caps.max_qp; + props->max_qp_wr = dev->dsr->caps.max_qp_wr; + props->device_cap_flags = dev->dsr->caps.device_cap_flags; + props->max_sge = dev->dsr->caps.max_sge; + props->max_cq = dev->dsr->caps.max_cq; + props->max_cqe = dev->dsr->caps.max_cqe; + props->max_mr = dev->dsr->caps.max_mr; + props->max_pd = dev->dsr->caps.max_pd; + props->max_qp_rd_atom = dev->dsr->caps.max_qp_rd_atom; + props->max_qp_init_rd_atom = dev->dsr->caps.max_qp_init_rd_atom; + props->atomic_cap = + dev->dsr->caps.atomic_ops & + (PVRDMA_ATOMIC_OP_COMP_SWAP | PVRDMA_ATOMIC_OP_FETCH_ADD) ? + IB_ATOMIC_HCA : IB_ATOMIC_NONE; + props->masked_atomic_cap = props->atomic_cap; + props->max_ah = dev->dsr->caps.max_ah; + props->max_pkeys = dev->dsr->caps.max_pkeys; + props->local_ca_ack_delay = dev->dsr->caps.local_ca_ack_delay; + if ((dev->dsr->caps.bmme_flags & PVRDMA_BMME_FLAG_LOCAL_INV) && + (dev->dsr->caps.bmme_flags & PVRDMA_BMME_FLAG_REMOTE_INV) && + (dev->dsr->caps.bmme_flags & PVRDMA_BMME_FLAG_FAST_REG_WR)) { + props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; + } + + return 0; +} + +/** + * pvrdma_query_port - query device port attributes + * @ibdev: the device to query + * @port: the port number + * @props: the device properties + * + * @return: 0 on success, otherwise negative errno + */ +int pvrdma_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props) +{ + struct pvrdma_dev *dev = to_vdev(ibdev); + union pvrdma_cmd_req req; + union pvrdma_cmd_resp rsp; + struct pvrdma_cmd_query_port *cmd = &req.query_port; + struct pvrdma_cmd_query_port_resp *resp = &rsp.query_port_resp; + int err; + + memset(cmd, 0, sizeof(*cmd)); + cmd->hdr.cmd = PVRDMA_CMD_QUERY_PORT; + cmd->port_num = port; + + err = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_QUERY_PORT_RESP); + if (err < 0) { + dev_warn(&dev->pdev->dev, + "could not query port, error: %d\n", err); + return err; + } + + memset(props, 0, sizeof(*props)); + + props->state = pvrdma_port_state_to_ib(resp->attrs.state); + props->max_mtu = pvrdma_mtu_to_ib(resp->attrs.max_mtu); + props->active_mtu = pvrdma_mtu_to_ib(resp->attrs.active_mtu); + props->gid_tbl_len = resp->attrs.gid_tbl_len; + props->port_cap_flags = + pvrdma_port_cap_flags_to_ib(resp->attrs.port_cap_flags); + props->max_msg_sz = resp->attrs.max_msg_sz; + props->bad_pkey_cntr = resp->attrs.bad_pkey_cntr; + props->qkey_viol_cntr = resp->attrs.qkey_viol_cntr; + props->pkey_tbl_len = resp->attrs.pkey_tbl_len; + props->lid = resp->attrs.lid; + props->sm_lid = resp->attrs.sm_lid; + props->lmc = resp->attrs.lmc; + props->max_vl_num = resp->attrs.max_vl_num; + props->sm_sl = resp->attrs.sm_sl; + props->subnet_timeout = resp->attrs.subnet_timeout; + props->init_type_reply = resp->attrs.init_type_reply; + props->active_width = pvrdma_port_width_to_ib(resp->attrs.active_width); + props->active_speed = pvrdma_port_speed_to_ib(resp->attrs.active_speed); + props->phys_state = resp->attrs.phys_state; + + return 0; +} + +/** + * pvrdma_query_gid - query device gid + * @ibdev: the device to query + * @port: the port number + * @index: the index + * @gid: the device gid value + * + * @return: 0 on success, otherwise negative errno + */ +int pvrdma_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *gid) +{ + struct pvrdma_dev *dev = to_vdev(ibdev); + + if (index >= dev->dsr->caps.gid_tbl_len) + return -EINVAL; + + memcpy(gid, &dev->sgid_tbl[index], sizeof(union ib_gid)); + + return 0; +} + +/** + * pvrdma_query_pkey - query device port's P_Key table + * @ibdev: the device to query + * @port: the port number + * @index: the index + * @pkey: the device P_Key value + * + * @return: 0 on success, otherwise negative errno + */ +int pvrdma_query_pkey(struct ib_device *ibdev, u8 port, u16 index, + u16 *pkey) +{ + int err = 0; + union pvrdma_cmd_req req; + union pvrdma_cmd_resp rsp; + struct pvrdma_cmd_query_pkey *cmd = &req.query_pkey; + + memset(cmd, 0, sizeof(*cmd)); + cmd->hdr.cmd = PVRDMA_CMD_QUERY_PKEY; + cmd->port_num = port; + cmd->index = index; + + err = pvrdma_cmd_post(to_vdev(ibdev), &req, &rsp, + PVRDMA_CMD_QUERY_PKEY_RESP); + if (err < 0) { + dev_warn(&to_vdev(ibdev)->pdev->dev, + "could not query pkey, error: %d\n", err); + return err; + } + + *pkey = rsp.query_pkey_resp.pkey; + + return 0; +} + +enum rdma_link_layer pvrdma_port_link_layer(struct ib_device *ibdev, + u8 port) +{ + return IB_LINK_LAYER_ETHERNET; +} + +int pvrdma_modify_device(struct ib_device *ibdev, int mask, + struct ib_device_modify *props) +{ + unsigned long flags; + + if (mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID | + IB_DEVICE_MODIFY_NODE_DESC)) { + dev_warn(&to_vdev(ibdev)->pdev->dev, + "unsupported device modify mask %#x\n", mask); + return -EOPNOTSUPP; + } + + if (mask & IB_DEVICE_MODIFY_NODE_DESC) { + spin_lock_irqsave(&to_vdev(ibdev)->desc_lock, flags); + memcpy(ibdev->node_desc, props->node_desc, 64); + spin_unlock_irqrestore(&to_vdev(ibdev)->desc_lock, flags); + } + + if (mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) { + mutex_lock(&to_vdev(ibdev)->port_mutex); + to_vdev(ibdev)->sys_image_guid = + cpu_to_be64(props->sys_image_guid); + mutex_unlock(&to_vdev(ibdev)->port_mutex); + } + + return 0; +} + +/** + * pvrdma_modify_port - modify device port attributes + * @ibdev: the device to modify + * @port: the port number + * @mask: attributes to modify + * @props: the device properties + * + * @return: 0 on success, otherwise negative errno + */ +int pvrdma_modify_port(struct ib_device *ibdev, u8 port, int mask, + struct ib_port_modify *props) +{ + struct ib_port_attr attr; + struct pvrdma_dev *vdev = to_vdev(ibdev); + int ret; + + if (mask & ~IB_PORT_SHUTDOWN) { + dev_warn(&vdev->pdev->dev, + "unsupported port modify mask %#x\n", mask); + return -EOPNOTSUPP; + } + + mutex_lock(&vdev->port_mutex); + ret = pvrdma_query_port(ibdev, port, &attr); + if (ret) + goto out; + + vdev->port_cap_mask |= props->set_port_cap_mask; + vdev->port_cap_mask &= ~props->clr_port_cap_mask; + + if (mask & IB_PORT_SHUTDOWN) + vdev->ib_active = false; + +out: + mutex_unlock(&vdev->port_mutex); + return ret; +} + +/** + * pvrdma_alloc_ucontext - allocate ucontext + * @ibdev: the IB device + * @udata: user data + * + * @return: the ib_ucontext pointer on success, otherwise errno. + */ +struct ib_ucontext *pvrdma_alloc_ucontext(struct ib_device *ibdev, + struct ib_udata *udata) +{ + struct pvrdma_dev *vdev = to_vdev(ibdev); + struct pvrdma_ucontext *context; + union pvrdma_cmd_req req; + union pvrdma_cmd_resp rsp; + struct pvrdma_cmd_create_uc *cmd = &req.create_uc; + struct pvrdma_cmd_create_uc_resp *resp = &rsp.create_uc_resp; + struct pvrdma_alloc_ucontext_resp uresp; + int ret; + void *ptr; + + if (!vdev->ib_active) + return ERR_PTR(-EAGAIN); + + context = kmalloc(sizeof(*context), GFP_KERNEL); + if (!context) + return ERR_PTR(-ENOMEM); + + context->dev = vdev; + ret = pvrdma_uar_alloc(vdev, &context->uar); + if (ret) { + kfree(context); + return ERR_PTR(-ENOMEM); + } + + /* get ctx_handle from host */ + memset(cmd, 0, sizeof(*cmd)); + cmd->pfn = context->uar.pfn; + cmd->hdr.cmd = PVRDMA_CMD_CREATE_UC; + ret = pvrdma_cmd_post(vdev, &req, &rsp, PVRDMA_CMD_CREATE_UC_RESP); + if (ret < 0) { + dev_warn(&vdev->pdev->dev, + "could not create ucontext, error: %d\n", ret); + ptr = ERR_PTR(ret); + goto err; + } + + context->ctx_handle = resp->ctx_handle; + + /* copy back to user */ + uresp.qp_tab_size = vdev->dsr->caps.max_qp; + ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + if (ret) { + pvrdma_uar_free(vdev, &context->uar); + context->ibucontext.device = ibdev; + pvrdma_dealloc_ucontext(&context->ibucontext); + return ERR_PTR(-EFAULT); + } + + return &context->ibucontext; + +err: + pvrdma_uar_free(vdev, &context->uar); + kfree(context); + return ptr; +} + +/** + * pvrdma_dealloc_ucontext - deallocate ucontext + * @ibcontext: the ucontext + * + * @return: 0 on success, otherwise errno. + */ +int pvrdma_dealloc_ucontext(struct ib_ucontext *ibcontext) +{ + struct pvrdma_ucontext *context = to_vucontext(ibcontext); + union pvrdma_cmd_req req; + struct pvrdma_cmd_destroy_uc *cmd = &req.destroy_uc; + int ret; + + memset(cmd, 0, sizeof(*cmd)); + cmd->hdr.cmd = PVRDMA_CMD_DESTROY_UC; + cmd->ctx_handle = context->ctx_handle; + + ret = pvrdma_cmd_post(context->dev, &req, NULL, 0); + if (ret < 0) + dev_warn(&context->dev->pdev->dev, + "destroy ucontext failed, error: %d\n", ret); + + /* Free the UAR even if the device command failed */ + pvrdma_uar_free(to_vdev(ibcontext->device), &context->uar); + kfree(context); + + return ret; +} + +/** + * pvrdma_mmap - create mmap region + * @ibcontext: the user context + * @vma: the VMA + * + * @return: 0 on success, otherwise errno. + */ +int pvrdma_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) +{ + struct pvrdma_ucontext *context = to_vucontext(ibcontext); + unsigned long start = vma->vm_start; + unsigned long size = vma->vm_end - vma->vm_start; + unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; + + dev_dbg(&context->dev->pdev->dev, "create mmap region\n"); + + if ((size != PAGE_SIZE) || (offset & ~PAGE_MASK)) { + dev_warn(&context->dev->pdev->dev, + "invalid params for mmap region\n"); + return -EINVAL; + } + + /* Map UAR to kernel space, VM_LOCKED? */ + vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + if (io_remap_pfn_range(vma, start, context->uar.pfn, size, + vma->vm_page_prot)) + return -EAGAIN; + + return 0; +} + +/** + * pvrdma_alloc_pd - allocate protection domain + * @ibdev: the IB device + * @context: user context + * @udata: user data + * + * @return: the ib_pd protection domain pointer on success, otherwise errno. + */ +struct ib_pd *pvrdma_alloc_pd(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct pvrdma_pd *pd; + struct pvrdma_dev *dev = to_vdev(ibdev); + union pvrdma_cmd_req req; + union pvrdma_cmd_resp rsp; + struct pvrdma_cmd_create_pd *cmd = &req.create_pd; + struct pvrdma_cmd_create_pd_resp *resp = &rsp.create_pd_resp; + int ret; + void *ptr; + + /* Check allowed max pds */ + if (!atomic_add_unless(&dev->num_pds, 1, dev->dsr->caps.max_pd)) + return ERR_PTR(-ENOMEM); + + pd = kmalloc(sizeof(*pd), GFP_KERNEL); + if (!pd) { + ptr = ERR_PTR(-ENOMEM); + goto err; + } + + memset(cmd, 0, sizeof(*cmd)); + cmd->hdr.cmd = PVRDMA_CMD_CREATE_PD; + cmd->ctx_handle = (context) ? to_vucontext(context)->ctx_handle : 0; + ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_PD_RESP); + if (ret < 0) { + dev_warn(&dev->pdev->dev, + "failed to allocate protection domain, error: %d\n", + ret); + ptr = ERR_PTR(ret); + goto freepd; + } + + pd->privileged = !context; + pd->pd_handle = resp->pd_handle; + pd->pdn = resp->pd_handle; + + if (context) { + if (ib_copy_to_udata(udata, &pd->pdn, sizeof(__u32))) { + dev_warn(&dev->pdev->dev, + "failed to copy back protection domain\n"); + pvrdma_dealloc_pd(&pd->ibpd); + return ERR_PTR(-EFAULT); + } + } + + /* u32 pd handle */ + return &pd->ibpd; + +freepd: + kfree(pd); +err: + atomic_dec(&dev->num_pds); + return ptr; +} + +/** + * pvrdma_dealloc_pd - deallocate protection domain + * @pd: the protection domain to be released + * + * @return: 0 on success, otherwise errno. + */ +int pvrdma_dealloc_pd(struct ib_pd *pd) +{ + struct pvrdma_dev *dev = to_vdev(pd->device); + union pvrdma_cmd_req req; + struct pvrdma_cmd_destroy_pd *cmd = &req.destroy_pd; + int ret; + + memset(cmd, 0, sizeof(*cmd)); + cmd->hdr.cmd = PVRDMA_CMD_DESTROY_PD; + cmd->pd_handle = to_vpd(pd)->pd_handle; + + ret = pvrdma_cmd_post(dev, &req, NULL, 0); + if (ret) + dev_warn(&dev->pdev->dev, + "could not dealloc protection domain, error: %d\n", + ret); + + kfree(to_vpd(pd)); + atomic_dec(&dev->num_pds); + + return 0; +} + +/** + * pvrdma_create_ah - create an address handle + * @pd: the protection domain + * @ah_attr: the attributes of the AH + * @udata: user data blob + * + * @return: the ib_ah pointer on success, otherwise errno. + */ +struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, + struct ib_udata *udata) +{ + struct pvrdma_dev *dev = to_vdev(pd->device); + struct pvrdma_ah *ah; + enum rdma_link_layer ll; + + if (!(ah_attr->ah_flags & IB_AH_GRH)) + return ERR_PTR(-EINVAL); + + ll = rdma_port_get_link_layer(pd->device, ah_attr->port_num); + + if (ll != IB_LINK_LAYER_ETHERNET || + rdma_is_multicast_addr((struct in6_addr *)ah_attr->grh.dgid.raw)) + return ERR_PTR(-EINVAL); + + if (!atomic_add_unless(&dev->num_ahs, 1, dev->dsr->caps.max_ah)) + return ERR_PTR(-ENOMEM); + + ah = kzalloc(sizeof(*ah), GFP_KERNEL); + if (!ah) { + atomic_dec(&dev->num_ahs); + return ERR_PTR(-ENOMEM); + } + + ah->av.port_pd = to_vpd(pd)->pd_handle | (ah_attr->port_num << 24); + ah->av.src_path_bits = ah_attr->src_path_bits; + ah->av.src_path_bits |= 0x80; + ah->av.gid_index = ah_attr->grh.sgid_index; + ah->av.hop_limit = ah_attr->grh.hop_limit; + ah->av.sl_tclass_flowlabel = (ah_attr->grh.traffic_class << 20) | + ah_attr->grh.flow_label; + memcpy(ah->av.dgid, ah_attr->grh.dgid.raw, 16); + memcpy(ah->av.dmac, ah_attr->dmac, 6); + + ah->ibah.device = pd->device; + ah->ibah.pd = pd; + ah->ibah.uobject = NULL; + + return &ah->ibah; +} + +/** + * pvrdma_destroy_ah - destroy an address handle + * @ah: the address handle to destroyed + * + * @return: 0 on success. + */ +int pvrdma_destroy_ah(struct ib_ah *ah) +{ + struct pvrdma_dev *dev = to_vdev(ah->device); + + kfree(to_vah(ah)); + atomic_dec(&dev->num_ahs); + + return 0; +} diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h new file mode 100644 index 000000000000..bfbe96b56255 --- /dev/null +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h @@ -0,0 +1,436 @@ +/* + * Copyright (c) 2012-2016 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of EITHER the GNU General Public License + * version 2 as published by the Free Software Foundation or the BSD + * 2-Clause License. This program is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED + * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License version 2 for more details at + * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html. + * + * You should have received a copy of the GNU General Public License + * along with this program available in the file COPYING in the main + * directory of this source tree. + * + * The BSD 2-Clause License + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __PVRDMA_VERBS_H__ +#define __PVRDMA_VERBS_H__ + +#include <linux/types.h> + +union pvrdma_gid { + u8 raw[16]; + struct { + __be64 subnet_prefix; + __be64 interface_id; + } global; +}; + +enum pvrdma_link_layer { + PVRDMA_LINK_LAYER_UNSPECIFIED, + PVRDMA_LINK_LAYER_INFINIBAND, + PVRDMA_LINK_LAYER_ETHERNET, +}; + +enum pvrdma_mtu { + PVRDMA_MTU_256 = 1, + PVRDMA_MTU_512 = 2, + PVRDMA_MTU_1024 = 3, + PVRDMA_MTU_2048 = 4, + PVRDMA_MTU_4096 = 5, +}; + +static inline int pvrdma_mtu_enum_to_int(enum pvrdma_mtu mtu) +{ + switch (mtu) { + case PVRDMA_MTU_256: return 256; + case PVRDMA_MTU_512: return 512; + case PVRDMA_MTU_1024: return 1024; + case PVRDMA_MTU_2048: return 2048; + case PVRDMA_MTU_4096: return 4096; + default: return -1; + } +} + +static inline enum pvrdma_mtu pvrdma_mtu_int_to_enum(int mtu) +{ + switch (mtu) { + case 256: return PVRDMA_MTU_256; + case 512: return PVRDMA_MTU_512; + case 1024: return PVRDMA_MTU_1024; + case 2048: return PVRDMA_MTU_2048; + case 4096: + default: return PVRDMA_MTU_4096; + } +} + +enum pvrdma_port_state { + PVRDMA_PORT_NOP = 0, + PVRDMA_PORT_DOWN = 1, + PVRDMA_PORT_INIT = 2, + PVRDMA_PORT_ARMED = 3, + PVRDMA_PORT_ACTIVE = 4, + PVRDMA_PORT_ACTIVE_DEFER = 5, +}; + +enum pvrdma_port_cap_flags { + PVRDMA_PORT_SM = 1 << 1, + PVRDMA_PORT_NOTICE_SUP = 1 << 2, + PVRDMA_PORT_TRAP_SUP = 1 << 3, + PVRDMA_PORT_OPT_IPD_SUP = 1 << 4, + PVRDMA_PORT_AUTO_MIGR_SUP = 1 << 5, + PVRDMA_PORT_SL_MAP_SUP = 1 << 6, + PVRDMA_PORT_MKEY_NVRAM = 1 << 7, + PVRDMA_PORT_PKEY_NVRAM = 1 << 8, + PVRDMA_PORT_LED_INFO_SUP = 1 << 9, + PVRDMA_PORT_SM_DISABLED = 1 << 10, + PVRDMA_PORT_SYS_IMAGE_GUID_SUP = 1 << 11, + PVRDMA_PORT_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12, + PVRDMA_PORT_EXTENDED_SPEEDS_SUP = 1 << 14, + PVRDMA_PORT_CM_SUP = 1 << 16, + PVRDMA_PORT_SNMP_TUNNEL_SUP = 1 << 17, + PVRDMA_PORT_REINIT_SUP = 1 << 18, + PVRDMA_PORT_DEVICE_MGMT_SUP = 1 << 19, + PVRDMA_PORT_VENDOR_CLASS_SUP = 1 << 20, + PVRDMA_PORT_DR_NOTICE_SUP = 1 << 21, + PVRDMA_PORT_CAP_MASK_NOTICE_SUP = 1 << 22, + PVRDMA_PORT_BOOT_MGMT_SUP = 1 << 23, + PVRDMA_PORT_LINK_LATENCY_SUP = 1 << 24, + PVRDMA_PORT_CLIENT_REG_SUP = 1 << 25, + PVRDMA_PORT_IP_BASED_GIDS = 1 << 26, + PVRDMA_PORT_CAP_FLAGS_MAX = PVRDMA_PORT_IP_BASED_GIDS, +}; + +enum pvrdma_port_width { + PVRDMA_WIDTH_1X = 1, + PVRDMA_WIDTH_4X = 2, + PVRDMA_WIDTH_8X = 4, + PVRDMA_WIDTH_12X = 8, +}; + +static inline int pvrdma_width_enum_to_int(enum pvrdma_port_width width) +{ + switch (width) { + case PVRDMA_WIDTH_1X: return 1; + case PVRDMA_WIDTH_4X: return 4; + case PVRDMA_WIDTH_8X: return 8; + case PVRDMA_WIDTH_12X: return 12; + default: return -1; + } +} + +enum pvrdma_port_speed { + PVRDMA_SPEED_SDR = 1, + PVRDMA_SPEED_DDR = 2, + PVRDMA_SPEED_QDR = 4, + PVRDMA_SPEED_FDR10 = 8, + PVRDMA_SPEED_FDR = 16, + PVRDMA_SPEED_EDR = 32, +}; + +struct pvrdma_port_attr { + enum pvrdma_port_state state; + enum pvrdma_mtu max_mtu; + enum pvrdma_mtu active_mtu; + u32 gid_tbl_len; + u32 port_cap_flags; + u32 max_msg_sz; + u32 bad_pkey_cntr; + u32 qkey_viol_cntr; + u16 pkey_tbl_len; + u16 lid; + u16 sm_lid; + u8 lmc; + u8 max_vl_num; + u8 sm_sl; + u8 subnet_timeout; + u8 init_type_reply; + u8 active_width; + u8 active_speed; + u8 phys_state; + u8 reserved[2]; +}; + +struct pvrdma_global_route { + union pvrdma_gid dgid; + u32 flow_label; + u8 sgid_index; + u8 hop_limit; + u8 traffic_class; + u8 reserved; +}; + +struct pvrdma_grh { + __be32 version_tclass_flow; + __be16 paylen; + u8 next_hdr; + u8 hop_limit; + union pvrdma_gid sgid; + union pvrdma_gid dgid; +}; + +enum pvrdma_ah_flags { + PVRDMA_AH_GRH = 1, +}; + +enum pvrdma_rate { + PVRDMA_RATE_PORT_CURRENT = 0, + PVRDMA_RATE_2_5_GBPS = 2, + PVRDMA_RATE_5_GBPS = 5, + PVRDMA_RATE_10_GBPS = 3, + PVRDMA_RATE_20_GBPS = 6, + PVRDMA_RATE_30_GBPS = 4, + PVRDMA_RATE_40_GBPS = 7, + PVRDMA_RATE_60_GBPS = 8, + PVRDMA_RATE_80_GBPS = 9, + PVRDMA_RATE_120_GBPS = 10, + PVRDMA_RATE_14_GBPS = 11, + PVRDMA_RATE_56_GBPS = 12, + PVRDMA_RATE_112_GBPS = 13, + PVRDMA_RATE_168_GBPS = 14, + PVRDMA_RATE_25_GBPS = 15, + PVRDMA_RATE_100_GBPS = 16, + PVRDMA_RATE_200_GBPS = 17, + PVRDMA_RATE_300_GBPS = 18, +}; + +struct pvrdma_ah_attr { + struct pvrdma_global_route grh; + u16 dlid; + u16 vlan_id; + u8 sl; + u8 src_path_bits; + u8 static_rate; + u8 ah_flags; + u8 port_num; + u8 dmac[6]; + u8 reserved; +}; + +enum pvrdma_cq_notify_flags { + PVRDMA_CQ_SOLICITED = 1 << 0, + PVRDMA_CQ_NEXT_COMP = 1 << 1, + PVRDMA_CQ_SOLICITED_MASK = PVRDMA_CQ_SOLICITED | + PVRDMA_CQ_NEXT_COMP, + PVRDMA_CQ_REPORT_MISSED_EVENTS = 1 << 2, +}; + +struct pvrdma_qp_cap { + u32 max_send_wr; + u32 max_recv_wr; + u32 max_send_sge; + u32 max_recv_sge; + u32 max_inline_data; + u32 reserved; +}; + +enum pvrdma_sig_type { + PVRDMA_SIGNAL_ALL_WR, + PVRDMA_SIGNAL_REQ_WR, +}; + +enum pvrdma_qp_type { + PVRDMA_QPT_SMI, + PVRDMA_QPT_GSI, + PVRDMA_QPT_RC, + PVRDMA_QPT_UC, + PVRDMA_QPT_UD, + PVRDMA_QPT_RAW_IPV6, + PVRDMA_QPT_RAW_ETHERTYPE, + PVRDMA_QPT_RAW_PACKET = 8, + PVRDMA_QPT_XRC_INI = 9, + PVRDMA_QPT_XRC_TGT, + PVRDMA_QPT_MAX, +}; + +enum pvrdma_qp_create_flags { + PVRDMA_QP_CREATE_IPOPVRDMA_UD_LSO = 1 << 0, + PVRDMA_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1 << 1, +}; + +enum pvrdma_qp_attr_mask { + PVRDMA_QP_STATE = 1 << 0, + PVRDMA_QP_CUR_STATE = 1 << 1, + PVRDMA_QP_EN_SQD_ASYNC_NOTIFY = 1 << 2, + PVRDMA_QP_ACCESS_FLAGS = 1 << 3, + PVRDMA_QP_PKEY_INDEX = 1 << 4, + PVRDMA_QP_PORT = 1 << 5, + PVRDMA_QP_QKEY = 1 << 6, + PVRDMA_QP_AV = 1 << 7, + PVRDMA_QP_PATH_MTU = 1 << 8, + PVRDMA_QP_TIMEOUT = 1 << 9, + PVRDMA_QP_RETRY_CNT = 1 << 10, + PVRDMA_QP_RNR_RETRY = 1 << 11, + PVRDMA_QP_RQ_PSN = 1 << 12, + PVRDMA_QP_MAX_QP_RD_ATOMIC = 1 << 13, + PVRDMA_QP_ALT_PATH = 1 << 14, + PVRDMA_QP_MIN_RNR_TIMER = 1 << 15, + PVRDMA_QP_SQ_PSN = 1 << 16, + PVRDMA_QP_MAX_DEST_RD_ATOMIC = 1 << 17, + PVRDMA_QP_PATH_MIG_STATE = 1 << 18, + PVRDMA_QP_CAP = 1 << 19, + PVRDMA_QP_DEST_QPN = 1 << 20, + PVRDMA_QP_ATTR_MASK_MAX = PVRDMA_QP_DEST_QPN, +}; + +enum pvrdma_qp_state { + PVRDMA_QPS_RESET, + PVRDMA_QPS_INIT, + PVRDMA_QPS_RTR, + PVRDMA_QPS_RTS, + PVRDMA_QPS_SQD, + PVRDMA_QPS_SQE, + PVRDMA_QPS_ERR, +}; + +enum pvrdma_mig_state { + PVRDMA_MIG_MIGRATED, + PVRDMA_MIG_REARM, + PVRDMA_MIG_ARMED, +}; + +enum pvrdma_mw_type { + PVRDMA_MW_TYPE_1 = 1, + PVRDMA_MW_TYPE_2 = 2, +}; + +struct pvrdma_qp_attr { + enum pvrdma_qp_state qp_state; + enum pvrdma_qp_state cur_qp_state; + enum pvrdma_mtu path_mtu; + enum pvrdma_mig_state path_mig_state; + u32 qkey; + u32 rq_psn; + u32 sq_psn; + u32 dest_qp_num; + u32 qp_access_flags; + u16 pkey_index; + u16 alt_pkey_index; + u8 en_sqd_async_notify; + u8 sq_draining; + u8 max_rd_atomic; + u8 max_dest_rd_atomic; + u8 min_rnr_timer; + u8 port_num; + u8 timeout; + u8 retry_cnt; + u8 rnr_retry; + u8 alt_port_num; + u8 alt_timeout; + u8 reserved[5]; + struct pvrdma_qp_cap cap; + struct pvrdma_ah_attr ah_attr; + struct pvrdma_ah_attr alt_ah_attr; +}; + +enum pvrdma_send_flags { + PVRDMA_SEND_FENCE = 1 << 0, + PVRDMA_SEND_SIGNALED = 1 << 1, + PVRDMA_SEND_SOLICITED = 1 << 2, + PVRDMA_SEND_INLINE = 1 << 3, + PVRDMA_SEND_IP_CSUM = 1 << 4, + PVRDMA_SEND_FLAGS_MAX = PVRDMA_SEND_IP_CSUM, +}; + +enum pvrdma_access_flags { + PVRDMA_ACCESS_LOCAL_WRITE = 1 << 0, + PVRDMA_ACCESS_REMOTE_WRITE = 1 << 1, + PVRDMA_ACCESS_REMOTE_READ = 1 << 2, + PVRDMA_ACCESS_REMOTE_ATOMIC = 1 << 3, + PVRDMA_ACCESS_MW_BIND = 1 << 4, + PVRDMA_ZERO_BASED = 1 << 5, + PVRDMA_ACCESS_ON_DEMAND = 1 << 6, + PVRDMA_ACCESS_FLAGS_MAX = PVRDMA_ACCESS_ON_DEMAND, +}; + +int pvrdma_query_device(struct ib_device *ibdev, + struct ib_device_attr *props, + struct ib_udata *udata); +int pvrdma_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props); +int pvrdma_query_gid(struct ib_device *ibdev, u8 port, + int index, union ib_gid *gid); +int pvrdma_query_pkey(struct ib_device *ibdev, u8 port, + u16 index, u16 *pkey); +enum rdma_link_layer pvrdma_port_link_layer(struct ib_device *ibdev, + u8 port); +int pvrdma_modify_device(struct ib_device *ibdev, int mask, + struct ib_device_modify *props); +int pvrdma_modify_port(struct ib_device *ibdev, u8 port, + int mask, struct ib_port_modify *props); +int pvrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); +struct ib_ucontext *pvrdma_alloc_ucontext(struct ib_device *ibdev, + struct ib_udata *udata); +int pvrdma_dealloc_ucontext(struct ib_ucontext *context); +struct ib_pd *pvrdma_alloc_pd(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_udata *udata); +int pvrdma_dealloc_pd(struct ib_pd *ibpd); +struct ib_mr *pvrdma_get_dma_mr(struct ib_pd *pd, int acc); +struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt_addr, int access_flags, + struct ib_udata *udata); +int pvrdma_dereg_mr(struct ib_mr *mr); +struct ib_mr *pvrdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg); +int pvrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, + int sg_nents, unsigned int *sg_offset); +int pvrdma_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); +int pvrdma_resize_cq(struct ib_cq *ibcq, int entries, + struct ib_udata *udata); +struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *context, + struct ib_udata *udata); +int pvrdma_resize_cq(struct ib_cq *ibcq, int entries, + struct ib_udata *udata); +int pvrdma_destroy_cq(struct ib_cq *cq); +int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); +int pvrdma_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); +struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, + struct ib_udata *udata); +int pvrdma_destroy_ah(struct ib_ah *ah); +struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata); +int pvrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata); +int pvrdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); +int pvrdma_destroy_qp(struct ib_qp *qp); +int pvrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr); +int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr); + +#endif /* __PVRDMA_VERBS_H__ */ diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index 6d9904a4a0ab..7aa7a4e312f1 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -119,18 +119,17 @@ void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited) if (cq->notify == IB_CQ_NEXT_COMP || (cq->notify == IB_CQ_SOLICITED && (solicited || entry->status != IB_WC_SUCCESS))) { - struct kthread_worker *worker; /* * This will cause send_complete() to be called in * another thread. */ - smp_read_barrier_depends(); /* see rvt_cq_exit */ - worker = cq->rdi->worker; - if (likely(worker)) { + spin_lock(&cq->rdi->n_cqs_lock); + if (likely(cq->rdi->worker)) { cq->notify = RVT_CQ_NONE; cq->triggered++; - kthread_queue_work(worker, &cq->comptask); + kthread_queue_work(cq->rdi->worker, &cq->comptask); } + spin_unlock(&cq->rdi->n_cqs_lock); } spin_unlock_irqrestore(&cq->lock, flags); @@ -240,15 +239,15 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev, } } - spin_lock(&rdi->n_cqs_lock); + spin_lock_irq(&rdi->n_cqs_lock); if (rdi->n_cqs_allocated == rdi->dparms.props.max_cq) { - spin_unlock(&rdi->n_cqs_lock); + spin_unlock_irq(&rdi->n_cqs_lock); ret = ERR_PTR(-ENOMEM); goto bail_ip; } rdi->n_cqs_allocated++; - spin_unlock(&rdi->n_cqs_lock); + spin_unlock_irq(&rdi->n_cqs_lock); if (cq->ip) { spin_lock_irq(&rdi->pending_lock); @@ -296,9 +295,9 @@ int rvt_destroy_cq(struct ib_cq *ibcq) struct rvt_dev_info *rdi = cq->rdi; kthread_flush_work(&cq->comptask); - spin_lock(&rdi->n_cqs_lock); + spin_lock_irq(&rdi->n_cqs_lock); rdi->n_cqs_allocated--; - spin_unlock(&rdi->n_cqs_lock); + spin_unlock_irq(&rdi->n_cqs_lock); if (cq->ip) kref_put(&cq->ip->ref, rvt_release_mmap_info); else @@ -504,33 +503,23 @@ int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) */ int rvt_driver_cq_init(struct rvt_dev_info *rdi) { - int ret = 0; int cpu; - struct task_struct *task; + struct kthread_worker *worker; if (rdi->worker) return 0; + spin_lock_init(&rdi->n_cqs_lock); - rdi->worker = kzalloc(sizeof(*rdi->worker), GFP_KERNEL); - if (!rdi->worker) - return -ENOMEM; - kthread_init_worker(rdi->worker); - task = kthread_create_on_node( - kthread_worker_fn, - rdi->worker, - rdi->dparms.node, - "%s", rdi->dparms.cq_name); - if (IS_ERR(task)) { - kfree(rdi->worker); - rdi->worker = NULL; - return PTR_ERR(task); - } - set_user_nice(task, MIN_NICE); cpu = cpumask_first(cpumask_of_node(rdi->dparms.node)); - kthread_bind(task, cpu); - wake_up_process(task); - return ret; + worker = kthread_create_worker_on_cpu(cpu, 0, + "%s", rdi->dparms.cq_name); + if (IS_ERR(worker)) + return PTR_ERR(worker); + + set_user_nice(worker->task, MIN_NICE); + rdi->worker = worker; + return 0; } /** @@ -541,13 +530,15 @@ void rvt_cq_exit(struct rvt_dev_info *rdi) { struct kthread_worker *worker; + /* block future queuing from send_complete() */ + spin_lock_irq(&rdi->n_cqs_lock); worker = rdi->worker; - if (!worker) + if (!worker) { + spin_unlock_irq(&rdi->n_cqs_lock); return; - /* blocks future queuing from send_complete() */ + } rdi->worker = NULL; - smp_wmb(); /* See rdi_cq_enter */ - kthread_flush_worker(worker); - kthread_stop(worker->task); - kfree(worker); + spin_unlock_irq(&rdi->n_cqs_lock); + + kthread_destroy_worker(worker); } diff --git a/drivers/infiniband/sw/rdmavt/mcast.c b/drivers/infiniband/sw/rdmavt/mcast.c index 983d319ac976..05c8c2afb0e3 100644 --- a/drivers/infiniband/sw/rdmavt/mcast.c +++ b/drivers/infiniband/sw/rdmavt/mcast.c @@ -81,7 +81,7 @@ static struct rvt_mcast_qp *rvt_mcast_qp_alloc(struct rvt_qp *qp) goto bail; mqp->qp = qp; - atomic_inc(&qp->refcount); + rvt_get_qp(qp); bail: return mqp; @@ -92,8 +92,7 @@ static void rvt_mcast_qp_free(struct rvt_mcast_qp *mqp) struct rvt_qp *qp = mqp->qp; /* Notify hfi1_destroy_qp() if it is waiting. */ - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); kfree(mqp); } diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 46b64970058e..52fd15276ee6 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -51,6 +51,7 @@ #include <rdma/rdma_vt.h> #include "vt.h" #include "mr.h" +#include "trace.h" /** * rvt_driver_mr_init - Init MR resources per driver @@ -84,6 +85,7 @@ int rvt_driver_mr_init(struct rvt_dev_info *rdi) lkey_table_size = rdi->dparms.lkey_table_size; } rdi->lkey_table.max = 1 << lkey_table_size; + rdi->lkey_table.shift = 32 - lkey_table_size; lk_tab_size = rdi->lkey_table.max * sizeof(*rdi->lkey_table.table); rdi->lkey_table.table = (struct rvt_mregion __rcu **) vmalloc_node(lk_tab_size, rdi->dparms.node); @@ -402,6 +404,7 @@ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } mr->mr.map[m]->segs[n].vaddr = vaddr; mr->mr.map[m]->segs[n].length = umem->page_size; + trace_rvt_mr_user_seg(&mr->mr, m, n, vaddr, umem->page_size); n++; if (n == RVT_SEGSZ) { m++; @@ -506,6 +509,7 @@ static int rvt_set_page(struct ib_mr *ibmr, u64 addr) n = mapped_segs % RVT_SEGSZ; mr->mr.map[m]->segs[n].vaddr = (void *)addr; mr->mr.map[m]->segs[n].length = ps; + trace_rvt_mr_page_seg(&mr->mr, m, n, (void *)addr, ps); mr->mr.length += ps; return 0; @@ -692,6 +696,7 @@ int rvt_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, for (i = 0; i < list_len; i++) { fmr->mr.map[m]->segs[n].vaddr = (void *)page_list[i]; fmr->mr.map[m]->segs[n].length = ps; + trace_rvt_mr_fmr_seg(&fmr->mr, m, n, (void *)page_list[i], ps); if (++n == RVT_SEGSZ) { m++; n = 0; @@ -774,7 +779,6 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, struct rvt_mregion *mr; unsigned n, m; size_t off; - struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device); /* * We use LKEY == zero for kernel virtual addresses @@ -782,12 +786,14 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, */ rcu_read_lock(); if (sge->lkey == 0) { + struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device); + if (pd->user) goto bail; mr = rcu_dereference(dev->dma_mr); if (!mr) goto bail; - atomic_inc(&mr->refcount); + rvt_get_mr(mr); rcu_read_unlock(); isge->mr = mr; @@ -798,8 +804,7 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, isge->n = 0; goto ok; } - mr = rcu_dereference( - rkt->table[(sge->lkey >> (32 - dev->dparms.lkey_table_size))]); + mr = rcu_dereference(rkt->table[sge->lkey >> rkt->shift]); if (unlikely(!mr || atomic_read(&mr->lkey_invalid) || mr->lkey != sge->lkey || mr->pd != &pd->ibpd)) goto bail; @@ -809,7 +814,7 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, off + sge->length > mr->length || (mr->access_flags & acc) != acc)) goto bail; - atomic_inc(&mr->refcount); + rvt_get_mr(mr); rcu_read_unlock(); off += mr->offset; @@ -887,7 +892,7 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, mr = rcu_dereference(rdi->dma_mr); if (!mr) goto bail; - atomic_inc(&mr->refcount); + rvt_get_mr(mr); rcu_read_unlock(); sge->mr = mr; @@ -899,8 +904,7 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, goto ok; } - mr = rcu_dereference( - rkt->table[(rkey >> (32 - dev->dparms.lkey_table_size))]); + mr = rcu_dereference(rkt->table[rkey >> rkt->shift]); if (unlikely(!mr || atomic_read(&mr->lkey_invalid) || mr->lkey != rkey || qp->ibqp.pd != mr->pd)) goto bail; @@ -909,7 +913,7 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, if (unlikely(vaddr < mr->iova || off + len > mr->length || (mr->access_flags & acc) == 0)) goto bail; - atomic_inc(&mr->refcount); + rvt_get_mr(mr); rcu_read_unlock(); off += mr->offset; diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 6500c3b5a89c..2a13ac660f2b 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -76,6 +76,23 @@ const int ib_rvt_state_ops[IB_QPS_ERR + 1] = { }; EXPORT_SYMBOL(ib_rvt_state_ops); +/* + * Translate ib_wr_opcode into ib_wc_opcode. + */ +const enum ib_wc_opcode ib_rvt_wc_opcode[] = { + [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE, + [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE, + [IB_WR_SEND] = IB_WC_SEND, + [IB_WR_SEND_WITH_IMM] = IB_WC_SEND, + [IB_WR_RDMA_READ] = IB_WC_RDMA_READ, + [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP, + [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD, + [IB_WR_SEND_WITH_INV] = IB_WC_SEND, + [IB_WR_LOCAL_INV] = IB_WC_LOCAL_INV, + [IB_WR_REG_MR] = IB_WC_REG_MR +}; +EXPORT_SYMBOL(ib_rvt_wc_opcode); + static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map, gfp_t gfp) @@ -884,7 +901,8 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, return ret; bail_ip: - kref_put(&qp->ip->ref, rvt_release_mmap_info); + if (qp->ip) + kref_put(&qp->ip->ref, rvt_release_mmap_info); bail_qpn: free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num); diff --git a/drivers/infiniband/sw/rdmavt/trace.h b/drivers/infiniband/sw/rdmavt/trace.h index 6c0457db5499..e2d23acb6a7d 100644 --- a/drivers/infiniband/sw/rdmavt/trace.h +++ b/drivers/infiniband/sw/rdmavt/trace.h @@ -45,143 +45,10 @@ * */ -#undef TRACE_SYSTEM_VAR -#define TRACE_SYSTEM_VAR rdmavt - -#if !defined(__RDMAVT_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) -#define __RDMAVT_TRACE_H - -#include <linux/tracepoint.h> -#include <linux/trace_seq.h> - -#include <rdma/ib_verbs.h> -#include <rdma/rdma_vt.h> - #define RDI_DEV_ENTRY(rdi) __string(dev, rdi->driver_f.get_card_name(rdi)) #define RDI_DEV_ASSIGN(rdi) __assign_str(dev, rdi->driver_f.get_card_name(rdi)) -#undef TRACE_SYSTEM -#define TRACE_SYSTEM rdmavt - -TRACE_EVENT(rvt_dbg, - TP_PROTO(struct rvt_dev_info *rdi, - const char *msg), - TP_ARGS(rdi, msg), - TP_STRUCT__entry( - RDI_DEV_ENTRY(rdi) - __string(msg, msg) - ), - TP_fast_assign( - RDI_DEV_ASSIGN(rdi); - __assign_str(msg, msg); - ), - TP_printk("[%s]: %s", __get_str(dev), __get_str(msg)) -); - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM rvt_qphash -DECLARE_EVENT_CLASS(rvt_qphash_template, - TP_PROTO(struct rvt_qp *qp, u32 bucket), - TP_ARGS(qp, bucket), - TP_STRUCT__entry( - RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device)) - __field(u32, qpn) - __field(u32, bucket) - ), - TP_fast_assign( - RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device)) - __entry->qpn = qp->ibqp.qp_num; - __entry->bucket = bucket; - ), - TP_printk( - "[%s] qpn 0x%x bucket %u", - __get_str(dev), - __entry->qpn, - __entry->bucket - ) -); - -DEFINE_EVENT(rvt_qphash_template, rvt_qpinsert, - TP_PROTO(struct rvt_qp *qp, u32 bucket), - TP_ARGS(qp, bucket)); - -DEFINE_EVENT(rvt_qphash_template, rvt_qpremove, - TP_PROTO(struct rvt_qp *qp, u32 bucket), - TP_ARGS(qp, bucket)); - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM rvt_tx - -#define wr_opcode_name(opcode) { IB_WR_##opcode, #opcode } -#define show_wr_opcode(opcode) \ -__print_symbolic(opcode, \ - wr_opcode_name(RDMA_WRITE), \ - wr_opcode_name(RDMA_WRITE_WITH_IMM), \ - wr_opcode_name(SEND), \ - wr_opcode_name(SEND_WITH_IMM), \ - wr_opcode_name(RDMA_READ), \ - wr_opcode_name(ATOMIC_CMP_AND_SWP), \ - wr_opcode_name(ATOMIC_FETCH_AND_ADD), \ - wr_opcode_name(LSO), \ - wr_opcode_name(SEND_WITH_INV), \ - wr_opcode_name(RDMA_READ_WITH_INV), \ - wr_opcode_name(LOCAL_INV), \ - wr_opcode_name(MASKED_ATOMIC_CMP_AND_SWP), \ - wr_opcode_name(MASKED_ATOMIC_FETCH_AND_ADD)) - -#define POS_PRN \ -"[%s] wr_id %llx qpn %x psn 0x%x lpsn 0x%x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u" - -TRACE_EVENT( - rvt_post_one_wr, - TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe), - TP_ARGS(qp, wqe), - TP_STRUCT__entry( - RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device)) - __field(u64, wr_id) - __field(u32, qpn) - __field(u32, psn) - __field(u32, lpsn) - __field(u32, length) - __field(u32, opcode) - __field(u32, size) - __field(u32, avail) - __field(u32, head) - __field(u32, last) - ), - TP_fast_assign( - RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device)) - __entry->wr_id = wqe->wr.wr_id; - __entry->qpn = qp->ibqp.qp_num; - __entry->psn = wqe->psn; - __entry->lpsn = wqe->lpsn; - __entry->length = wqe->length; - __entry->opcode = wqe->wr.opcode; - __entry->size = qp->s_size; - __entry->avail = qp->s_avail; - __entry->head = qp->s_head; - __entry->last = qp->s_last; - ), - TP_printk( - POS_PRN, - __get_str(dev), - __entry->wr_id, - __entry->qpn, - __entry->psn, - __entry->lpsn, - __entry->length, - __entry->opcode, show_wr_opcode(__entry->opcode), - __entry->size, - __entry->avail, - __entry->head, - __entry->last - ) -); - -#endif /* __RDMAVT_TRACE_H */ - -#undef TRACE_INCLUDE_PATH -#undef TRACE_INCLUDE_FILE -#define TRACE_INCLUDE_PATH . -#define TRACE_INCLUDE_FILE trace -#include <trace/define_trace.h> +#include "trace_rvt.h" +#include "trace_qp.h" +#include "trace_tx.h" +#include "trace_mr.h" diff --git a/drivers/infiniband/sw/rdmavt/trace_mr.h b/drivers/infiniband/sw/rdmavt/trace_mr.h new file mode 100644 index 000000000000..3318a6c36373 --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/trace_mr.h @@ -0,0 +1,112 @@ +/* + * Copyright(c) 2016 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#if !defined(__RVT_TRACE_MR_H) || defined(TRACE_HEADER_MULTI_READ) +#define __RVT_TRACE_MR_H + +#include <linux/tracepoint.h> +#include <linux/trace_seq.h> + +#include <rdma/ib_verbs.h> +#include <rdma/rdma_vt.h> +#include <rdma/rdmavt_mr.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rvt_mr +DECLARE_EVENT_CLASS( + rvt_mr_template, + TP_PROTO(struct rvt_mregion *mr, u16 m, u16 n, void *v, size_t len), + TP_ARGS(mr, m, n, v, len), + TP_STRUCT__entry( + RDI_DEV_ENTRY(ib_to_rvt(mr->pd->device)) + __field(void *, vaddr) + __field(struct page *, page) + __field(size_t, len) + __field(u32, lkey) + __field(u16, m) + __field(u16, n) + ), + TP_fast_assign( + RDI_DEV_ASSIGN(ib_to_rvt(mr->pd->device)); + __entry->vaddr = v; + __entry->page = virt_to_page(v); + __entry->m = m; + __entry->n = n; + __entry->len = len; + ), + TP_printk( + "[%s] vaddr %p page %p m %u n %u len %ld", + __get_str(dev), + __entry->vaddr, + __entry->page, + __entry->m, + __entry->n, + __entry->len + ) +); + +DEFINE_EVENT( + rvt_mr_template, rvt_mr_page_seg, + TP_PROTO(struct rvt_mregion *mr, u16 m, u16 n, void *v, size_t len), + TP_ARGS(mr, m, n, v, len)); + +DEFINE_EVENT( + rvt_mr_template, rvt_mr_fmr_seg, + TP_PROTO(struct rvt_mregion *mr, u16 m, u16 n, void *v, size_t len), + TP_ARGS(mr, m, n, v, len)); + +DEFINE_EVENT( + rvt_mr_template, rvt_mr_user_seg, + TP_PROTO(struct rvt_mregion *mr, u16 m, u16 n, void *v, size_t len), + TP_ARGS(mr, m, n, v, len)); + +#endif /* __RVT_TRACE_MR_H */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_mr +#include <trace/define_trace.h> diff --git a/drivers/infiniband/sw/rdmavt/trace_qp.h b/drivers/infiniband/sw/rdmavt/trace_qp.h new file mode 100644 index 000000000000..4c77a3119bda --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/trace_qp.h @@ -0,0 +1,96 @@ +/* + * Copyright(c) 2016 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#if !defined(__RVT_TRACE_QP_H) || defined(TRACE_HEADER_MULTI_READ) +#define __RVT_TRACE_QP_H + +#include <linux/tracepoint.h> +#include <linux/trace_seq.h> + +#include <rdma/ib_verbs.h> +#include <rdma/rdma_vt.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rvt_qp + +DECLARE_EVENT_CLASS(rvt_qphash_template, + TP_PROTO(struct rvt_qp *qp, u32 bucket), + TP_ARGS(qp, bucket), + TP_STRUCT__entry( + RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device)) + __field(u32, qpn) + __field(u32, bucket) + ), + TP_fast_assign( + RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device)) + __entry->qpn = qp->ibqp.qp_num; + __entry->bucket = bucket; + ), + TP_printk( + "[%s] qpn 0x%x bucket %u", + __get_str(dev), + __entry->qpn, + __entry->bucket + ) +); + +DEFINE_EVENT(rvt_qphash_template, rvt_qpinsert, + TP_PROTO(struct rvt_qp *qp, u32 bucket), + TP_ARGS(qp, bucket)); + +DEFINE_EVENT(rvt_qphash_template, rvt_qpremove, + TP_PROTO(struct rvt_qp *qp, u32 bucket), + TP_ARGS(qp, bucket)); + + +#endif /* __RVT_TRACE_QP_H */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_qp +#include <trace/define_trace.h> + diff --git a/drivers/infiniband/sw/rdmavt/trace_rvt.h b/drivers/infiniband/sw/rdmavt/trace_rvt.h new file mode 100644 index 000000000000..746f33461d9a --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/trace_rvt.h @@ -0,0 +1,81 @@ +/* + * Copyright(c) 2016 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#if !defined(__RVT_TRACE_RVT_H) || defined(TRACE_HEADER_MULTI_READ) +#define __RVT_TRACE_RVT_H + +#include <linux/tracepoint.h> +#include <linux/trace_seq.h> + +#include <rdma/ib_verbs.h> +#include <rdma/rdma_vt.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rvt + +TRACE_EVENT(rvt_dbg, + TP_PROTO(struct rvt_dev_info *rdi, + const char *msg), + TP_ARGS(rdi, msg), + TP_STRUCT__entry( + RDI_DEV_ENTRY(rdi) + __string(msg, msg) + ), + TP_fast_assign( + RDI_DEV_ASSIGN(rdi); + __assign_str(msg, msg); + ), + TP_printk("[%s]: %s", __get_str(dev), __get_str(msg)) +); + +#endif /* __RVT_TRACE_MISC_H */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_rvt +#include <trace/define_trace.h> + diff --git a/drivers/infiniband/sw/rdmavt/trace_tx.h b/drivers/infiniband/sw/rdmavt/trace_tx.h new file mode 100644 index 000000000000..0e03173662d8 --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/trace_tx.h @@ -0,0 +1,132 @@ +/* + * Copyright(c) 2016 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#if !defined(__RVT_TRACE_TX_H) || defined(TRACE_HEADER_MULTI_READ) +#define __RVT_TRACE_TX_H + +#include <linux/tracepoint.h> +#include <linux/trace_seq.h> + +#include <rdma/ib_verbs.h> +#include <rdma/rdma_vt.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rvt_tx + +#define wr_opcode_name(opcode) { IB_WR_##opcode, #opcode } +#define show_wr_opcode(opcode) \ +__print_symbolic(opcode, \ + wr_opcode_name(RDMA_WRITE), \ + wr_opcode_name(RDMA_WRITE_WITH_IMM), \ + wr_opcode_name(SEND), \ + wr_opcode_name(SEND_WITH_IMM), \ + wr_opcode_name(RDMA_READ), \ + wr_opcode_name(ATOMIC_CMP_AND_SWP), \ + wr_opcode_name(ATOMIC_FETCH_AND_ADD), \ + wr_opcode_name(LSO), \ + wr_opcode_name(SEND_WITH_INV), \ + wr_opcode_name(RDMA_READ_WITH_INV), \ + wr_opcode_name(LOCAL_INV), \ + wr_opcode_name(MASKED_ATOMIC_CMP_AND_SWP), \ + wr_opcode_name(MASKED_ATOMIC_FETCH_AND_ADD)) + +#define POS_PRN \ +"[%s] wr_id %llx qpn %x psn 0x%x lpsn 0x%x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u" + +TRACE_EVENT( + rvt_post_one_wr, + TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe), + TP_ARGS(qp, wqe), + TP_STRUCT__entry( + RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device)) + __field(u64, wr_id) + __field(u32, qpn) + __field(u32, psn) + __field(u32, lpsn) + __field(u32, length) + __field(u32, opcode) + __field(u32, size) + __field(u32, avail) + __field(u32, head) + __field(u32, last) + ), + TP_fast_assign( + RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device)) + __entry->wr_id = wqe->wr.wr_id; + __entry->qpn = qp->ibqp.qp_num; + __entry->psn = wqe->psn; + __entry->lpsn = wqe->lpsn; + __entry->length = wqe->length; + __entry->opcode = wqe->wr.opcode; + __entry->size = qp->s_size; + __entry->avail = qp->s_avail; + __entry->head = qp->s_head; + __entry->last = qp->s_last; + ), + TP_printk( + POS_PRN, + __get_str(dev), + __entry->wr_id, + __entry->qpn, + __entry->psn, + __entry->lpsn, + __entry->length, + __entry->opcode, show_wr_opcode(__entry->opcode), + __entry->size, + __entry->avail, + __entry->head, + __entry->last + ) +); + +#endif /* __RVT_TRACE_TX_H */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_tx +#include <trace/define_trace.h> + diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index 6c5e29db88e3..cd27cbde7652 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -420,11 +420,12 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe) (wqe->wr.send_flags & IB_SEND_SIGNALED) || (qp->req.state == QP_STATE_ERROR)) { make_send_cqe(qp, wqe, &cqe); + advance_consumer(qp->sq.queue); rxe_cq_post(qp->scq, &cqe, 0); + } else { + advance_consumer(qp->sq.queue); } - advance_consumer(qp->sq.queue); - /* * we completed something so let req run again * if it is trying to fence @@ -510,6 +511,8 @@ int rxe_completer(void *arg) struct rxe_pkt_info *pkt = NULL; enum comp_state state; + rxe_add_ref(qp); + if (!qp->valid) { while ((skb = skb_dequeue(&qp->resp_pkts))) { rxe_drop_ref(qp); @@ -739,11 +742,13 @@ exit: /* we come here if we are done with processing and want the task to * exit from the loop calling us */ + rxe_drop_ref(qp); return -EAGAIN; done: /* we come here if we have processed a packet we want the task to call * us again to see if there is anything else to do */ + rxe_drop_ref(qp); return 0; } diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index 73849a5a91b3..efe4c6a35442 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -266,8 +266,6 @@ static inline int rxe_xmit_packet(struct rxe_dev *rxe, struct rxe_qp *qp, return err; } - atomic_inc(&qp->skb_out); - if ((qp_type(qp) != IB_QPT_RC) && (pkt->mask & RXE_END_MASK)) { pkt->wqe->state = wqe_state_done; diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 1869152f1d23..d0faca294006 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -355,6 +355,9 @@ int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length, size_t offset; u32 crc = crcp ? (*crcp) : 0; + if (length == 0) + return 0; + if (mem->type == RXE_MEM_TYPE_DMA) { u8 *src, *dest; diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index ffff5a54cb34..16967cdb45df 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -46,7 +46,7 @@ #include "rxe_loc.h" static LIST_HEAD(rxe_dev_list); -static spinlock_t dev_list_lock; /* spinlock for device list */ +static DEFINE_SPINLOCK(dev_list_lock); /* spinlock for device list */ struct rxe_dev *net_to_rxe(struct net_device *ndev) { @@ -455,6 +455,8 @@ static int send(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, return -EAGAIN; } + if (pkt->qp) + atomic_inc(&pkt->qp->skb_out); kfree_skb(skb); return 0; @@ -659,8 +661,6 @@ struct notifier_block rxe_net_notifier = { int rxe_net_ipv4_init(void) { - spin_lock_init(&dev_list_lock); - recv_sockets.sk4 = rxe_setup_udp_tunnel(&init_net, htons(ROCE_V2_UDP_DPORT), false); if (IS_ERR(recv_sockets.sk4)) { @@ -676,8 +676,6 @@ int rxe_net_ipv6_init(void) { #if IS_ENABLED(CONFIG_IPV6) - spin_lock_init(&dev_list_lock); - recv_sockets.sk6 = rxe_setup_udp_tunnel(&init_net, htons(ROCE_V2_UDP_DPORT), true); if (IS_ERR(recv_sockets.sk6)) { diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h index f459c43a77c8..13ed2cc6eaa2 100644 --- a/drivers/infiniband/sw/rxe/rxe_param.h +++ b/drivers/infiniband/sw/rxe/rxe_param.h @@ -82,7 +82,7 @@ enum rxe_device_param { RXE_MAX_SGE = 32, RXE_MAX_SGE_RD = 32, RXE_MAX_CQ = 16384, - RXE_MAX_LOG_CQE = 13, + RXE_MAX_LOG_CQE = 15, RXE_MAX_MR = 2 * 1024, RXE_MAX_PD = 0x7ffc, RXE_MAX_QP_RD_ATOM = 128, diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index 6bac0717c540..d723947a8542 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -180,7 +180,6 @@ static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min) size = BITS_TO_LONGS(max - min + 1) * sizeof(long); pool->table = kmalloc(size, GFP_KERNEL); if (!pool->table) { - pr_warn("no memory for bit table\n"); err = -ENOMEM; goto out; } diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index 46f062842a9a..252b4d637d45 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -391,16 +391,15 @@ int rxe_rcv(struct sk_buff *skb) payload_size(pkt)); calc_icrc = cpu_to_be32(~calc_icrc); if (unlikely(calc_icrc != pack_icrc)) { - char saddr[sizeof(struct in6_addr)]; - if (skb->protocol == htons(ETH_P_IPV6)) - sprintf(saddr, "%pI6", &ipv6_hdr(skb)->saddr); + pr_warn_ratelimited("bad ICRC from %pI6c\n", + &ipv6_hdr(skb)->saddr); else if (skb->protocol == htons(ETH_P_IP)) - sprintf(saddr, "%pI4", &ip_hdr(skb)->saddr); + pr_warn_ratelimited("bad ICRC from %pI4\n", + &ip_hdr(skb)->saddr); else - sprintf(saddr, "unknown"); + pr_warn_ratelimited("bad ICRC from unknown\n"); - pr_warn_ratelimited("bad ICRC from %s\n", saddr); goto drop; } diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 22bd9630dcd9..73d4a97603a1 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -548,23 +548,23 @@ static void update_wqe_psn(struct rxe_qp *qp, static void save_state(struct rxe_send_wqe *wqe, struct rxe_qp *qp, struct rxe_send_wqe *rollback_wqe, - struct rxe_qp *rollback_qp) + u32 *rollback_psn) { rollback_wqe->state = wqe->state; rollback_wqe->first_psn = wqe->first_psn; rollback_wqe->last_psn = wqe->last_psn; - rollback_qp->req.psn = qp->req.psn; + *rollback_psn = qp->req.psn; } static void rollback_state(struct rxe_send_wqe *wqe, struct rxe_qp *qp, struct rxe_send_wqe *rollback_wqe, - struct rxe_qp *rollback_qp) + u32 rollback_psn) { wqe->state = rollback_wqe->state; wqe->first_psn = rollback_wqe->first_psn; wqe->last_psn = rollback_wqe->last_psn; - qp->req.psn = rollback_qp->req.psn; + qp->req.psn = rollback_psn; } static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe, @@ -593,8 +593,10 @@ int rxe_requester(void *arg) int mtu; int opcode; int ret; - struct rxe_qp rollback_qp; struct rxe_send_wqe rollback_wqe; + u32 rollback_psn; + + rxe_add_ref(qp); next_wqe: if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR)) @@ -697,6 +699,7 @@ next_wqe: wqe->state = wqe_state_done; wqe->status = IB_WC_SUCCESS; __rxe_do_task(&qp->comp.task); + rxe_drop_ref(qp); return 0; } payload = mtu; @@ -719,7 +722,7 @@ next_wqe: * rxe_xmit_packet(). * Otherwise, completer might initiate an unjustified retry flow. */ - save_state(wqe, qp, &rollback_wqe, &rollback_qp); + save_state(wqe, qp, &rollback_wqe, &rollback_psn); update_wqe_state(qp, wqe, &pkt); update_wqe_psn(qp, wqe, &pkt, payload); ret = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, &pkt, skb); @@ -727,7 +730,7 @@ next_wqe: qp->need_req_skb = 1; kfree_skb(skb); - rollback_state(wqe, qp, &rollback_wqe, &rollback_qp); + rollback_state(wqe, qp, &rollback_wqe, rollback_psn); if (ret == -EAGAIN) { rxe_run_task(&qp->req.task, 1); @@ -756,8 +759,7 @@ err: */ wqe->wr.send_flags |= IB_SEND_SIGNALED; __rxe_do_task(&qp->comp.task); - return -EAGAIN; - exit: + rxe_drop_ref(qp); return -EAGAIN; } diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index dd3d88adc003..7a36ec9dbc0c 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -444,6 +444,13 @@ static enum resp_states check_rkey(struct rxe_qp *qp, return RESPST_EXECUTE; } + /* A zero-byte op is not required to set an addr or rkey. */ + if ((pkt->mask & (RXE_READ_MASK | RXE_WRITE_OR_SEND)) && + (pkt->mask & RXE_RETH_MASK) && + reth_len(pkt) == 0) { + return RESPST_EXECUTE; + } + va = qp->resp.va; rkey = qp->resp.rkey; resid = qp->resp.resid; @@ -680,9 +687,14 @@ static enum resp_states read_reply(struct rxe_qp *qp, res->read.va_org = qp->resp.va; res->first_psn = req_pkt->psn; - res->last_psn = req_pkt->psn + - (reth_len(req_pkt) + mtu - 1) / - mtu - 1; + + if (reth_len(req_pkt)) { + res->last_psn = (req_pkt->psn + + (reth_len(req_pkt) + mtu - 1) / + mtu - 1) & BTH_PSN_MASK; + } else { + res->last_psn = res->first_psn; + } res->cur_psn = req_pkt->psn; res->read.resid = qp->resp.resid; @@ -742,7 +754,8 @@ static enum resp_states read_reply(struct rxe_qp *qp, } else { qp->resp.res = NULL; qp->resp.opcode = -1; - qp->resp.psn = res->cur_psn; + if (psn_compare(res->cur_psn, qp->resp.psn) >= 0) + qp->resp.psn = res->cur_psn; state = RESPST_CLEANUP; } @@ -1132,6 +1145,7 @@ static enum resp_states duplicate_request(struct rxe_qp *qp, pkt, skb_copy); if (rc) { pr_err("Failed resending result. This flow is not handled - skb ignored\n"); + rxe_drop_ref(qp); kfree_skb(skb_copy); rc = RESPST_CLEANUP; goto out; @@ -1198,6 +1212,8 @@ int rxe_responder(void *arg) struct rxe_pkt_info *pkt = NULL; int ret = 0; + rxe_add_ref(qp); + qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED; if (!qp->valid) { @@ -1386,5 +1402,6 @@ int rxe_responder(void *arg) exit: ret = -EAGAIN; done: + rxe_drop_ref(qp); return ret; } diff --git a/drivers/infiniband/sw/rxe/rxe_srq.c b/drivers/infiniband/sw/rxe/rxe_srq.c index 2a6e3cd2d4e8..efc832a2d7c6 100644 --- a/drivers/infiniband/sw/rxe/rxe_srq.c +++ b/drivers/infiniband/sw/rxe/rxe_srq.c @@ -169,7 +169,7 @@ int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq, } } - err = rxe_queue_resize(q, (unsigned int *)&attr->max_wr, + err = rxe_queue_resize(q, &attr->max_wr, rcv_wqe_size(srq->rq.max_sge), srq->rq.queue->ip ? srq->rq.queue->ip->context : diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c index 1e19bf828a6e..d2a14a1bdc7f 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.c +++ b/drivers/infiniband/sw/rxe/rxe_task.c @@ -121,6 +121,7 @@ int rxe_init_task(void *obj, struct rxe_task *task, task->arg = arg; task->func = func; snprintf(task->name, sizeof(task->name), "%s", name); + task->destroyed = false; tasklet_init(&task->tasklet, rxe_do_task, (unsigned long)task); @@ -132,11 +133,29 @@ int rxe_init_task(void *obj, struct rxe_task *task, void rxe_cleanup_task(struct rxe_task *task) { + unsigned long flags; + bool idle; + + /* + * Mark the task, then wait for it to finish. It might be + * running in a non-tasklet (direct call) context. + */ + task->destroyed = true; + + do { + spin_lock_irqsave(&task->state_lock, flags); + idle = (task->state == TASK_STATE_START); + spin_unlock_irqrestore(&task->state_lock, flags); + } while (!idle); + tasklet_kill(&task->tasklet); } void rxe_run_task(struct rxe_task *task, int sched) { + if (task->destroyed) + return; + if (sched) tasklet_schedule(&task->tasklet); else diff --git a/drivers/infiniband/sw/rxe/rxe_task.h b/drivers/infiniband/sw/rxe/rxe_task.h index d14aa6daed05..08ff42d451c6 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.h +++ b/drivers/infiniband/sw/rxe/rxe_task.h @@ -54,6 +54,7 @@ struct rxe_task { int (*func)(void *arg); int ret; char name[16]; + bool destroyed; }; /* diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 19841c863daf..beb7021ff18a 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -316,7 +316,9 @@ static int rxe_init_av(struct rxe_dev *rxe, struct ib_ah_attr *attr, return err; } -static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) +static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr, + struct ib_udata *udata) + { int err; struct rxe_dev *rxe = to_rdev(ibpd->device); @@ -564,7 +566,7 @@ static struct ib_qp *rxe_create_qp(struct ib_pd *ibpd, if (udata) { if (udata->inlen) { err = -EINVAL; - goto err1; + goto err2; } qp->is_user = 1; } @@ -573,12 +575,13 @@ static struct ib_qp *rxe_create_qp(struct ib_pd *ibpd, err = rxe_qp_from_init(rxe, qp, pd, init, udata, ibpd); if (err) - goto err2; + goto err3; return &qp->ibqp; -err2: +err3: rxe_drop_index(qp); +err2: rxe_drop_ref(qp); err1: return ERR_PTR(err); @@ -1007,11 +1010,19 @@ static int rxe_peek_cq(struct ib_cq *ibcq, int wc_cnt) static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) { struct rxe_cq *cq = to_rcq(ibcq); + unsigned long irq_flags; + int ret = 0; + spin_lock_irqsave(&cq->cq_lock, irq_flags); if (cq->notify != IB_CQ_NEXT_COMP) cq->notify = flags & IB_CQ_SOLICITED_MASK; - return 0; + if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !queue_empty(cq->queue)) + ret = 1; + + spin_unlock_irqrestore(&cq->cq_lock, irq_flags); + + return ret; } static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 339a1eecdfe3..096c4f6fbd65 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -357,11 +357,8 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i int i; rx->rx_ring = vzalloc(ipoib_recvq_size * sizeof *rx->rx_ring); - if (!rx->rx_ring) { - printk(KERN_WARNING "%s: failed to allocate CM non-SRQ ring (%d entries)\n", - priv->ca->name, ipoib_recvq_size); + if (!rx->rx_ring) return -ENOMEM; - } t = kmalloc(sizeof *t, GFP_KERNEL); if (!t) { @@ -1054,8 +1051,6 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_ tx_qp = ib_create_qp(priv->pd, &attr); if (PTR_ERR(tx_qp) == -EINVAL) { - ipoib_warn(priv, "can't use GFP_NOIO for QPs on device %s, using GFP_KERNEL\n", - priv->ca->name); attr.create_flags &= ~IB_QP_CREATE_USE_GFP_NOIO; tx_qp = ib_create_qp(priv->pd, &attr); } @@ -1134,7 +1129,6 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn, p->tx_ring = __vmalloc(ipoib_sendq_size * sizeof *p->tx_ring, GFP_NOIO, PAGE_KERNEL); if (!p->tx_ring) { - ipoib_warn(priv, "failed to allocate tx ring\n"); ret = -ENOMEM; goto err_tx; } @@ -1550,8 +1544,6 @@ static void ipoib_cm_create_srq(struct net_device *dev, int max_sge) priv->cm.srq_ring = vzalloc(ipoib_recvq_size * sizeof *priv->cm.srq_ring); if (!priv->cm.srq_ring) { - printk(KERN_WARNING "%s: failed to allocate CM SRQ ring (%d entries)\n", - priv->ca->name, ipoib_recvq_size); ib_destroy_srq(priv->cm.srq); priv->cm.srq = NULL; return; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 830fecb6934c..5038f9d2d753 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -416,11 +416,8 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) "(status=%d, wrid=%d vend_err %x)\n", wc->status, wr_id, wc->vendor_err); qp_work = kzalloc(sizeof(*qp_work), GFP_ATOMIC); - if (!qp_work) { - ipoib_warn(priv, "%s Failed alloc ipoib_qp_state_validate for qp: 0x%x\n", - __func__, priv->qp->qp_num); + if (!qp_work) return; - } INIT_WORK(&qp_work->work, ipoib_qp_state_validate_work); qp_work->priv = priv; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index c50794fb92db..3ce0765a05ab 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1619,11 +1619,8 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) /* Allocate RX/TX "rings" to hold queued skbs */ priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring, GFP_KERNEL); - if (!priv->rx_ring) { - printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n", - ca->name, ipoib_recvq_size); + if (!priv->rx_ring) goto out; - } priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring); if (!priv->tx_ring) { diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 1909dd252c94..fddff403d5d2 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -575,8 +575,11 @@ void ipoib_mcast_join_task(struct work_struct *work) if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) return; - if (ib_query_port(priv->ca, priv->port, &port_attr) || - port_attr.state != IB_PORT_ACTIVE) { + if (ib_query_port(priv->ca, priv->port, &port_attr)) { + ipoib_dbg(priv, "ib_query_port() failed\n"); + return; + } + if (port_attr.state != IB_PORT_ACTIVE) { ipoib_dbg(priv, "port state is not ACTIVE (state = %d) suspending join task\n", port_attr.state); return; diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index a4b791dfaa1d..8ae7a3beddb7 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -890,11 +890,14 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve case RDMA_CM_EVENT_ESTABLISHED: iser_connected_handler(cma_id, event->param.conn.private_data); break; + case RDMA_CM_EVENT_REJECTED: + iser_info("Connection rejected: %s\n", + rdma_reject_msg(cma_id, event->status)); + /* FALLTHROUGH */ case RDMA_CM_EVENT_ADDR_ERROR: case RDMA_CM_EVENT_ROUTE_ERROR: case RDMA_CM_EVENT_CONNECT_ERROR: case RDMA_CM_EVENT_UNREACHABLE: - case RDMA_CM_EVENT_REJECTED: iser_connect_error(cma_id); break; case RDMA_CM_EVENT_DISCONNECTED: diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 6dd43f63238e..314e95516068 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -184,7 +184,7 @@ isert_alloc_rx_descriptors(struct isert_conn *isert_conn) isert_conn->rx_descs = kzalloc(ISERT_QP_MAX_RECV_DTOS * sizeof(struct iser_rx_desc), GFP_KERNEL); if (!isert_conn->rx_descs) - goto fail; + return -ENOMEM; rx_desc = isert_conn->rx_descs; @@ -213,9 +213,7 @@ dma_map_fail: } kfree(isert_conn->rx_descs); isert_conn->rx_descs = NULL; -fail: isert_err("conn %p failed to allocate rx descriptors\n", isert_conn); - return -ENOMEM; } @@ -269,10 +267,8 @@ isert_alloc_comps(struct isert_device *device) device->comps = kcalloc(device->comps_used, sizeof(struct isert_comp), GFP_KERNEL); - if (!device->comps) { - isert_err("Unable to allocate completion contexts\n"); + if (!device->comps) return -ENOMEM; - } max_cqe = min(ISER_MAX_CQ_LEN, device->ib_device->attrs.max_cqe); @@ -432,10 +428,8 @@ isert_alloc_login_buf(struct isert_conn *isert_conn, isert_conn->login_req_buf = kzalloc(sizeof(*isert_conn->login_req_buf), GFP_KERNEL); - if (!isert_conn->login_req_buf) { - isert_err("Unable to allocate isert_conn->login_buf\n"); + if (!isert_conn->login_req_buf) return -ENOMEM; - } isert_conn->login_req_dma = ib_dma_map_single(ib_dev, isert_conn->login_req_buf, @@ -795,6 +789,8 @@ isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) */ return 1; case RDMA_CM_EVENT_REJECTED: /* FALLTHRU */ + isert_info("Connection rejected: %s\n", + rdma_reject_msg(cma_id, event->status)); case RDMA_CM_EVENT_UNREACHABLE: /* FALLTHRU */ case RDMA_CM_EVENT_CONNECT_ERROR: ret = isert_connect_error(cma_id); @@ -1276,11 +1272,8 @@ isert_handle_text_cmd(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd if (payload_length) { text_in = kzalloc(payload_length, GFP_KERNEL); - if (!text_in) { - isert_err("Unable to allocate text_in of payload_length: %u\n", - payload_length); + if (!text_in) return -ENOMEM; - } } cmd->text_in_ptr = text_in; @@ -1851,6 +1844,8 @@ isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd) isert_cmd->pdu_buf_dma = ib_dma_map_single(ib_dev, (void *)cmd->sense_buffer, pdu_len, DMA_TO_DEVICE); + if (ib_dma_mapping_error(ib_dev, isert_cmd->pdu_buf_dma)) + return -ENOMEM; isert_cmd->pdu_buf_len = pdu_len; tx_dsg->addr = isert_cmd->pdu_buf_dma; @@ -1978,6 +1973,8 @@ isert_put_reject(struct iscsi_cmd *cmd, struct iscsi_conn *conn) isert_cmd->pdu_buf_dma = ib_dma_map_single(ib_dev, (void *)cmd->buf_ptr, ISCSI_HDR_LEN, DMA_TO_DEVICE); + if (ib_dma_mapping_error(ib_dev, isert_cmd->pdu_buf_dma)) + return -ENOMEM; isert_cmd->pdu_buf_len = ISCSI_HDR_LEN; tx_dsg->addr = isert_cmd->pdu_buf_dma; tx_dsg->length = ISCSI_HDR_LEN; @@ -2018,6 +2015,8 @@ isert_put_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn) isert_cmd->pdu_buf_dma = ib_dma_map_single(ib_dev, txt_rsp_buf, txt_rsp_len, DMA_TO_DEVICE); + if (ib_dma_mapping_error(ib_dev, isert_cmd->pdu_buf_dma)) + return -ENOMEM; isert_cmd->pdu_buf_len = txt_rsp_len; tx_dsg->addr = isert_cmd->pdu_buf_dma; @@ -2307,10 +2306,9 @@ isert_setup_np(struct iscsi_np *np, int ret; isert_np = kzalloc(sizeof(struct isert_np), GFP_KERNEL); - if (!isert_np) { - isert_err("Unable to allocate struct isert_np\n"); + if (!isert_np) return -ENOMEM; - } + sema_init(&isert_np->sem, 0); mutex_init(&isert_np->mutex); INIT_LIST_HEAD(&isert_np->accepted); @@ -2651,7 +2649,6 @@ static int __init isert_init(void) WQ_UNBOUND | WQ_HIGHPRI, 0); if (!isert_comp_wq) { isert_err("Unable to allocate isert_comp_wq\n"); - ret = -ENOMEM; return -ENOMEM; } diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index d980fb458ad4..8ddc07123193 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -64,6 +64,11 @@ MODULE_LICENSE("Dual BSD/GPL"); MODULE_VERSION(DRV_VERSION); MODULE_INFO(release_date, DRV_RELDATE); +#if !defined(CONFIG_DYNAMIC_DEBUG) +#define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt) +#define DYNAMIC_DEBUG_BRANCH(descriptor) false +#endif + static unsigned int srp_sg_tablesize; static unsigned int cmd_sg_entries; static unsigned int indirect_sg_entries; @@ -384,6 +389,9 @@ static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device, max_page_list_len); if (IS_ERR(mr)) { ret = PTR_ERR(mr); + if (ret == -ENOMEM) + pr_info("%s: ib_alloc_mr() failed. Try to reduce max_cmd_per_lun, max_sect or ch_count\n", + dev_name(&device->dev)); goto destroy_pool; } d->mr = mr; @@ -1266,8 +1274,12 @@ static int srp_map_finish_fmr(struct srp_map_state *state, struct ib_pool_fmr *fmr; u64 io_addr = 0; - if (state->fmr.next >= state->fmr.end) + if (state->fmr.next >= state->fmr.end) { + shost_printk(KERN_ERR, ch->target->scsi_host, + PFX "Out of MRs (mr_per_cmd = %d)\n", + ch->target->mr_per_cmd); return -ENOMEM; + } WARN_ON_ONCE(!dev->use_fmr); @@ -1323,8 +1335,12 @@ static int srp_map_finish_fr(struct srp_map_state *state, u32 rkey; int n, err; - if (state->fr.next >= state->fr.end) + if (state->fr.next >= state->fr.end) { + shost_printk(KERN_ERR, ch->target->scsi_host, + PFX "Out of MRs (mr_per_cmd = %d)\n", + ch->target->mr_per_cmd); return -ENOMEM; + } WARN_ON_ONCE(!dev->use_fast_reg); @@ -1556,7 +1572,6 @@ static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req, return 0; } -#if defined(DYNAMIC_DATA_DEBUG) static void srp_check_mapping(struct srp_map_state *state, struct srp_rdma_ch *ch, struct srp_request *req, struct scatterlist *scat, int count) @@ -1580,7 +1595,6 @@ static void srp_check_mapping(struct srp_map_state *state, scsi_bufflen(req->scmnd), desc_len, mr_len, state->ndesc, state->nmdesc); } -#endif /** * srp_map_data() - map SCSI data buffer onto an SRP request @@ -1669,14 +1683,12 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, if (ret < 0) goto unmap; -#if defined(DYNAMIC_DEBUG) { DEFINE_DYNAMIC_DEBUG_METADATA(ddm, "Memory mapping consistency check"); - if (unlikely(ddm.flags & _DPRINTK_FLAGS_PRINT)) + if (DYNAMIC_DEBUG_BRANCH(ddm)) srp_check_mapping(&state, ch, req, scat, count); } -#endif /* We've mapped the request, now pull as much of the indirect * descriptor table as we can into the command buffer. If this @@ -3287,7 +3299,9 @@ static ssize_t srp_create_target(struct device *dev, */ scsi_host_get(target->scsi_host); - mutex_lock(&host->add_target_mutex); + ret = mutex_lock_interruptible(&host->add_target_mutex); + if (ret < 0) + goto put; ret = srp_parse_options(buf, target); if (ret) @@ -3443,6 +3457,7 @@ connected: out: mutex_unlock(&host->add_target_mutex); +put: scsi_host_put(target->scsi_host); if (ret < 0) scsi_host_put(target->scsi_host); @@ -3526,6 +3541,7 @@ free_host: static void srp_add_one(struct ib_device *device) { struct srp_device *srp_dev; + struct ib_device_attr *attr = &device->attrs; struct srp_host *host; int mr_page_shift, p; u64 max_pages_per_mr; @@ -3540,25 +3556,25 @@ static void srp_add_one(struct ib_device *device) * minimum of 4096 bytes. We're unlikely to build large sglists * out of smaller entries. */ - mr_page_shift = max(12, ffs(device->attrs.page_size_cap) - 1); + mr_page_shift = max(12, ffs(attr->page_size_cap) - 1); srp_dev->mr_page_size = 1 << mr_page_shift; srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1); - max_pages_per_mr = device->attrs.max_mr_size; + max_pages_per_mr = attr->max_mr_size; do_div(max_pages_per_mr, srp_dev->mr_page_size); pr_debug("%s: %llu / %u = %llu <> %u\n", __func__, - device->attrs.max_mr_size, srp_dev->mr_page_size, + attr->max_mr_size, srp_dev->mr_page_size, max_pages_per_mr, SRP_MAX_PAGES_PER_MR); srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR, max_pages_per_mr); srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr && device->map_phys_fmr && device->unmap_fmr); - srp_dev->has_fr = (device->attrs.device_cap_flags & + srp_dev->has_fr = (attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS); if (!never_register && !srp_dev->has_fmr && !srp_dev->has_fr) { dev_warn(&device->dev, "neither FMR nor FR is supported\n"); } else if (!never_register && - device->attrs.max_mr_size >= 2 * srp_dev->mr_page_size) { + attr->max_mr_size >= 2 * srp_dev->mr_page_size) { srp_dev->use_fast_reg = (srp_dev->has_fr && (!srp_dev->has_fmr || prefer_fr)); srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr; @@ -3571,13 +3587,13 @@ static void srp_add_one(struct ib_device *device) if (srp_dev->use_fast_reg) { srp_dev->max_pages_per_mr = min_t(u32, srp_dev->max_pages_per_mr, - device->attrs.max_fast_reg_page_list_len); + attr->max_fast_reg_page_list_len); } srp_dev->mr_max_size = srp_dev->mr_page_size * srp_dev->max_pages_per_mr; pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n", - device->name, mr_page_shift, device->attrs.max_mr_size, - device->attrs.max_fast_reg_page_list_len, + device->name, mr_page_shift, attr->max_mr_size, + attr->max_fast_reg_page_list_len, srp_dev->max_pages_per_mr, srp_dev->mr_max_size); INIT_LIST_HEAD(&srp_dev->dev_list); diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 0b1f69ed2e92..d21ba9d857c3 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1840,7 +1840,6 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, struct srpt_rdma_ch *ch, *tmp_ch; u32 it_iu_len; int i, ret = 0; - unsigned char *p; WARN_ON_ONCE(irqs_disabled()); @@ -1994,21 +1993,18 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, be64_to_cpu(*(__be64 *)(ch->i_port_id + 8))); pr_debug("registering session %s\n", ch->sess_name); - p = &ch->sess_name[0]; -try_again: ch->sess = target_alloc_session(&sport->port_tpg_1, 0, 0, - TARGET_PROT_NORMAL, p, ch, NULL); + TARGET_PROT_NORMAL, ch->sess_name, ch, + NULL); + /* Retry without leading "0x" */ + if (IS_ERR(ch->sess)) + ch->sess = target_alloc_session(&sport->port_tpg_1, 0, 0, + TARGET_PROT_NORMAL, + ch->sess_name + 2, ch, NULL); if (IS_ERR(ch->sess)) { - pr_info("Rejected login because no ACL has been" - " configured yet for initiator %s.\n", p); - /* - * XXX: Hack to retry of ch->i_port_id without leading '0x' - */ - if (p == &ch->sess_name[0]) { - p += 2; - goto try_again; - } + pr_info("Rejected login because no ACL has been configured yet for initiator %s.\n", + ch->sess_name); rej->reason = cpu_to_be32((PTR_ERR(ch->sess) == -ENOMEM) ? SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES : SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED); |