diff options
Diffstat (limited to 'drivers/infiniband')
179 files changed, 4378 insertions, 13187 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index b44b1c322ec8..ade86388434f 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -83,7 +83,6 @@ config INFINIBAND_ADDR_TRANS_CONFIGFS if INFINIBAND_USER_ACCESS || !INFINIBAND_USER_ACCESS source "drivers/infiniband/hw/mthca/Kconfig" source "drivers/infiniband/hw/qib/Kconfig" -source "drivers/infiniband/hw/cxgb3/Kconfig" source "drivers/infiniband/hw/cxgb4/Kconfig" source "drivers/infiniband/hw/efa/Kconfig" source "drivers/infiniband/hw/i40iw/Kconfig" diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index 09881bd5f12d..9a8871e21545 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -11,7 +11,7 @@ ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \ device.o fmr_pool.o cache.o netlink.o \ roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \ multicast.o mad.o smi.o agent.o mad_rmpp.o \ - nldev.o restrack.o counters.o + nldev.o restrack.o counters.o ib_core_uverbs.o ib_core-$(CONFIG_SECURITY_INFINIBAND) += security.o ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 00fb3eacda19..d535995711c3 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -819,22 +819,16 @@ static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port, struct ib_gid_table *table) { int i; - bool deleted = false; if (!table) return; mutex_lock(&table->lock); for (i = 0; i < table->sz; ++i) { - if (is_gid_entry_valid(table->data_vec[i])) { + if (is_gid_entry_valid(table->data_vec[i])) del_gid(ib_dev, port, table, i); - deleted = true; - } } mutex_unlock(&table->lock); - - if (deleted) - dispatch_gid_change_event(ib_dev, port); } void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port, diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index da10e6ccb43c..455b3659d84b 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1,36 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2004-2007 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. + * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved. */ #include <linux/completion.h> @@ -246,7 +220,7 @@ struct cm_work { }; struct cm_timewait_info { - struct cm_work work; /* Must be first. */ + struct cm_work work; struct list_head list; struct rb_node remote_qp_node; struct rb_node remote_id_node; @@ -263,7 +237,7 @@ struct cm_id_private { struct rb_node sidr_id_node; spinlock_t lock; /* Do not acquire inside cm.lock */ struct completion comp; - atomic_t refcount; + refcount_t refcount; /* Number of clients sharing this ib_cm_id. Only valid for listeners. * Protected by the cm.lock spinlock. */ int listen_sharecount; @@ -308,7 +282,7 @@ static void cm_work_handler(struct work_struct *work); static inline void cm_deref_id(struct cm_id_private *cm_id_priv) { - if (atomic_dec_and_test(&cm_id_priv->refcount)) + if (refcount_dec_and_test(&cm_id_priv->refcount)) complete(&cm_id_priv->comp); } @@ -365,7 +339,7 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv, m->ah = ah; m->retries = cm_id_priv->max_cm_retries; - atomic_inc(&cm_id_priv->refcount); + refcount_inc(&cm_id_priv->refcount); m->context[0] = cm_id_priv; *msg = m; @@ -626,7 +600,7 @@ static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id) cm_id_priv = xa_load(&cm.local_id_table, cm_local_id(local_id)); if (cm_id_priv) { if (cm_id_priv->id.remote_id == remote_id) - atomic_inc(&cm_id_priv->refcount); + refcount_inc(&cm_id_priv->refcount); else cm_id_priv = NULL; } @@ -883,7 +857,7 @@ struct ib_cm_id *ib_create_cm_id(struct ib_device *device, INIT_LIST_HEAD(&cm_id_priv->prim_list); INIT_LIST_HEAD(&cm_id_priv->altr_list); atomic_set(&cm_id_priv->work_count, -1); - atomic_set(&cm_id_priv->refcount, 1); + refcount_set(&cm_id_priv->refcount, 1); return &cm_id_priv->id; error: @@ -1230,7 +1204,7 @@ struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device, spin_unlock_irqrestore(&cm.lock, flags); return ERR_PTR(-EINVAL); } - atomic_inc(&cm_id_priv->refcount); + refcount_inc(&cm_id_priv->refcount); ++cm_id_priv->listen_sharecount; spin_unlock_irqrestore(&cm.lock, flags); @@ -1525,14 +1499,6 @@ static int cm_issue_rej(struct cm_port *port, return ret; } -static inline int cm_is_active_peer(__be64 local_ca_guid, __be64 remote_ca_guid, - __be32 local_qpn, __be32 remote_qpn) -{ - return (be64_to_cpu(local_ca_guid) > be64_to_cpu(remote_ca_guid) || - ((local_ca_guid == remote_ca_guid) && - (be32_to_cpu(local_qpn) > be32_to_cpu(remote_qpn)))); -} - static bool cm_req_has_alt_path(struct cm_req_msg *req_msg) { return ((req_msg->alt_local_lid) || @@ -1895,8 +1861,8 @@ static struct cm_id_private * cm_match_req(struct cm_work *work, NULL, 0); goto out; } - atomic_inc(&listen_cm_id_priv->refcount); - atomic_inc(&cm_id_priv->refcount); + refcount_inc(&listen_cm_id_priv->refcount); + refcount_inc(&cm_id_priv->refcount); cm_id_priv->id.state = IB_CM_REQ_RCVD; atomic_inc(&cm_id_priv->work_count); spin_unlock_irq(&cm.lock); @@ -2052,7 +2018,7 @@ static int cm_req_handler(struct cm_work *work) return 0; rejected: - atomic_dec(&cm_id_priv->refcount); + refcount_dec(&cm_id_priv->refcount); cm_deref_id(listen_cm_id_priv); free_timeinfo: kfree(cm_id_priv->timewait_info); @@ -2826,7 +2792,7 @@ static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg) cm_local_id(timewait_info->work.local_id)); if (cm_id_priv) { if (cm_id_priv->id.remote_id == remote_id) - atomic_inc(&cm_id_priv->refcount); + refcount_inc(&cm_id_priv->refcount); else cm_id_priv = NULL; } @@ -3434,7 +3400,7 @@ static int cm_timewait_handler(struct cm_work *work) struct cm_id_private *cm_id_priv; int ret; - timewait_info = (struct cm_timewait_info *)work; + timewait_info = container_of(work, struct cm_timewait_info, work); spin_lock_irq(&cm.lock); list_del(&timewait_info->list); spin_unlock_irq(&cm.lock); @@ -3596,8 +3562,8 @@ static int cm_sidr_req_handler(struct cm_work *work) cm_reject_sidr_req(cm_id_priv, IB_SIDR_UNSUPPORTED); goto out; /* No match. */ } - atomic_inc(&cur_cm_id_priv->refcount); - atomic_inc(&cm_id_priv->refcount); + refcount_inc(&cur_cm_id_priv->refcount); + refcount_inc(&cm_id_priv->refcount); spin_unlock_irq(&cm.lock); cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler; @@ -4399,6 +4365,7 @@ error2: error1: port_modify.set_port_cap_mask = 0; port_modify.clr_port_cap_mask = IB_PORT_CM_SUP; + kfree(port); while (--i) { if (!rdma_cap_ib_cm(ib_device, i)) continue; @@ -4407,6 +4374,7 @@ error1: ib_modify_port(ib_device, port->port_num, 0, &port_modify); ib_unregister_mad_agent(port->mad_agent); cm_remove_port_fs(port); + kfree(port); } free: kfree(cm_dev); @@ -4460,6 +4428,7 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data) spin_unlock_irq(&cm.state_lock); ib_unregister_mad_agent(cur_mad_agent); cm_remove_port_fs(port); + kfree(port); } kfree(cm_dev); diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h index 3d16d614aff6..92d7260ac913 100644 --- a/drivers/infiniband/core/cm_msgs.h +++ b/drivers/infiniband/core/cm_msgs.h @@ -1,37 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2004, 2011 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004 Voltaire Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING the madirectory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use source and binary forms, with or - * withmodification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retathe above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHWARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS THE - * SOFTWARE. + * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved. */ -#if !defined(CM_MSGS_H) +#ifndef CM_MSGS_H #define CM_MSGS_H #include <rdma/ib_mad.h> diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 0e3cf3461999..25f2b70fd8ef 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1,36 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2005 Voltaire Inc. All rights reserved. * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved. - * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. + * Copyright (c) 1999-2019, Mellanox Technologies, Inc. All rights reserved. * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #include <linux/completion.h> @@ -2396,9 +2369,10 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, conn_id->cm_id.iw = NULL; cma_exch(conn_id, RDMA_CM_DESTROYING); mutex_unlock(&conn_id->handler_mutex); + mutex_unlock(&listen_id->handler_mutex); cma_deref_id(conn_id); rdma_destroy_id(&conn_id->id); - goto out; + return ret; } mutex_unlock(&conn_id->handler_mutex); @@ -2530,7 +2504,9 @@ EXPORT_SYMBOL(rdma_set_service_type); * This function should be called before rdma_connect() on active side, * and on passive side before rdma_accept(). It is applicable to primary * path only. The timeout will affect the local side of the QP, it is not - * negotiated with remote side and zero disables the timer. + * negotiated with remote side and zero disables the timer. In case it is + * set before rdma_resolve_route, the value will also be used to determine + * PacketLifeTime for RoCE. * * Return: 0 for success */ @@ -2827,22 +2803,65 @@ static int cma_resolve_iw_route(struct rdma_id_private *id_priv) return 0; } -static int iboe_tos_to_sl(struct net_device *ndev, int tos) +static int get_vlan_ndev_tc(struct net_device *vlan_ndev, int prio) { - int prio; struct net_device *dev; - prio = rt_tos2priority(tos); - dev = is_vlan_dev(ndev) ? vlan_dev_real_dev(ndev) : ndev; + dev = vlan_dev_real_dev(vlan_ndev); if (dev->num_tc) return netdev_get_prio_tc_map(dev, prio); -#if IS_ENABLED(CONFIG_VLAN_8021Q) + return (vlan_dev_get_egress_qos_mask(vlan_ndev, prio) & + VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; +} + +struct iboe_prio_tc_map { + int input_prio; + int output_tc; + bool found; +}; + +static int get_lower_vlan_dev_tc(struct net_device *dev, void *data) +{ + struct iboe_prio_tc_map *map = data; + + if (is_vlan_dev(dev)) + map->output_tc = get_vlan_ndev_tc(dev, map->input_prio); + else if (dev->num_tc) + map->output_tc = netdev_get_prio_tc_map(dev, map->input_prio); + else + map->output_tc = 0; + /* We are interested only in first level VLAN device, so always + * return 1 to stop iterating over next level devices. + */ + map->found = true; + return 1; +} + +static int iboe_tos_to_sl(struct net_device *ndev, int tos) +{ + struct iboe_prio_tc_map prio_tc_map = {}; + int prio = rt_tos2priority(tos); + + /* If VLAN device, get it directly from the VLAN netdev */ if (is_vlan_dev(ndev)) - return (vlan_dev_get_egress_qos_mask(ndev, prio) & - VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; -#endif - return 0; + return get_vlan_ndev_tc(ndev, prio); + + prio_tc_map.input_prio = prio; + rcu_read_lock(); + netdev_walk_all_lower_dev_rcu(ndev, + get_lower_vlan_dev_tc, + &prio_tc_map); + rcu_read_unlock(); + /* If map is found from lower device, use it; Otherwise + * continue with the current netdevice to get priority to tc map. + */ + if (prio_tc_map.found) + return prio_tc_map.output_tc; + else if (ndev->num_tc) + return netdev_get_prio_tc_map(ndev, prio); + else + return 0; } static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) @@ -2896,7 +2915,16 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) route->path_rec->rate = iboe_get_rate(ndev); dev_put(ndev); route->path_rec->packet_life_time_selector = IB_SA_EQ; - route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME; + /* In case ACK timeout is set, use this value to calculate + * PacketLifeTime. As per IBTA 12.7.34, + * local ACK timeout = (2 * PacketLifeTime + Local CA’s ACK delay). + * Assuming a negligible local ACK delay, we can use + * PacketLifeTime = local ACK timeout/2 + * as a reasonable approximation for RoCE networks. + */ + route->path_rec->packet_life_time = id_priv->timeout_set ? + id_priv->timeout - 1 : CMA_IBOE_PACKET_LIFETIME; + if (!route->path_rec->mtu) { ret = -EINVAL; goto err2; diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 3a8b0911c3bc..3645e092e1c7 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -199,6 +199,7 @@ void ib_mad_cleanup(void); int ib_sa_init(void); void ib_sa_cleanup(void); +void rdma_nl_init(void); void rdma_nl_exit(void); int ib_nl_handle_resolve_resp(struct sk_buff *skb, @@ -387,4 +388,15 @@ int ib_device_set_netns_put(struct sk_buff *skb, int rdma_nl_net_init(struct rdma_dev_net *rnet); void rdma_nl_net_exit(struct rdma_dev_net *rnet); + +struct rdma_umap_priv { + struct vm_area_struct *vma; + struct list_head list; + struct rdma_user_mmap_entry *entry; +}; + +void rdma_umap_priv_init(struct rdma_umap_priv *priv, + struct vm_area_struct *vma, + struct rdma_user_mmap_entry *entry); + #endif /* _CORE_PRIV_H */ diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c index 680ad27f497d..8434ec082c3a 100644 --- a/drivers/infiniband/core/counters.c +++ b/drivers/infiniband/core/counters.c @@ -149,11 +149,18 @@ static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter, struct auto_mode_param *param = &counter->mode.param; bool match = true; - if (!rdma_is_visible_in_pid_ns(&qp->res)) - return false; - - /* Ensure that counter belongs to the right PID */ - if (task_pid_nr(counter->res.task) != task_pid_nr(qp->res.task)) + /* + * Ensure that counter belongs to the right PID. This operation can + * race with user space which kills the process and leaves QP and + * counters orphans. + * + * It is not a big deal because exitted task will leave both QP and + * counter in the same bucket of zombie process. Just ensure that + * process is still alive before procedding. + * + */ + if (task_pid_nr(counter->res.task) != task_pid_nr(qp->res.task) || + !task_pid_nr(qp->res.task)) return false; if (auto_mask & RDMA_COUNTER_MASK_QP_TYPE) @@ -229,9 +236,6 @@ static struct rdma_counter *rdma_get_counter_auto_mode(struct ib_qp *qp, rt = &dev->res[RDMA_RESTRACK_COUNTER]; xa_lock(&rt->xa); xa_for_each(&rt->xa, id, res) { - if (!rdma_is_visible_in_pid_ns(res)) - continue; - counter = container_of(res, struct rdma_counter, res); if ((counter->device != qp->device) || (counter->port != port)) goto next; @@ -412,9 +416,6 @@ static struct ib_qp *rdma_counter_get_qp(struct ib_device *dev, u32 qp_num) if (IS_ERR(res)) return NULL; - if (!rdma_is_visible_in_pid_ns(res)) - goto err; - qp = container_of(res, struct ib_qp, res); if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) goto err; @@ -445,11 +446,6 @@ static struct rdma_counter *rdma_get_counter_by_id(struct ib_device *dev, if (IS_ERR(res)) return NULL; - if (!rdma_is_visible_in_pid_ns(res)) { - rdma_restrack_put(res); - return NULL; - } - counter = container_of(res, struct rdma_counter, res); kref_get(&counter->kref); rdma_restrack_put(res); @@ -463,10 +459,15 @@ static struct rdma_counter *rdma_get_counter_by_id(struct ib_device *dev, int rdma_counter_bind_qpn(struct ib_device *dev, u8 port, u32 qp_num, u32 counter_id) { + struct rdma_port_counter *port_counter; struct rdma_counter *counter; struct ib_qp *qp; int ret; + port_counter = &dev->port_data[port].port_counter; + if (port_counter->mode.mode == RDMA_COUNTER_MODE_AUTO) + return -EINVAL; + qp = rdma_counter_get_qp(dev, qp_num); if (!qp) return -ENOENT; @@ -503,6 +504,7 @@ err: int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u8 port, u32 qp_num, u32 *counter_id) { + struct rdma_port_counter *port_counter; struct rdma_counter *counter; struct ib_qp *qp; int ret; @@ -510,9 +512,13 @@ int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u8 port, if (!rdma_is_port_valid(dev, port)) return -EINVAL; - if (!dev->port_data[port].port_counter.hstats) + port_counter = &dev->port_data[port].port_counter; + if (!port_counter->hstats) return -EOPNOTSUPP; + if (port_counter->mode.mode == RDMA_COUNTER_MODE_AUTO) + return -EINVAL; + qp = rdma_counter_get_qp(dev, qp_num); if (!qp) return -ENOENT; diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 99c4a55545cf..84dd74fe13b8 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -128,17 +128,14 @@ module_param_named(netns_mode, ib_devices_shared_netns, bool, 0444); MODULE_PARM_DESC(netns_mode, "Share device among net namespaces; default=1 (shared)"); /** - * rdma_dev_access_netns() - Return whether a rdma device can be accessed + * rdma_dev_access_netns() - Return whether an rdma device can be accessed * from a specified net namespace or not. - * @device: Pointer to rdma device which needs to be checked + * @dev: Pointer to rdma device which needs to be checked * @net: Pointer to net namesapce for which access to be checked * - * rdma_dev_access_netns() - Return whether a rdma device can be accessed - * from a specified net namespace or not. When - * rdma device is in shared mode, it ignores the - * net namespace. When rdma device is exclusive - * to a net namespace, rdma device net namespace is - * checked against the specified one. + * When the rdma device is in shared mode, it ignores the net namespace. + * When the rdma device is exclusive to a net namespace, rdma device net + * namespace is checked against the specified one. */ bool rdma_dev_access_netns(const struct ib_device *dev, const struct net *net) { @@ -1199,9 +1196,21 @@ static void setup_dma_device(struct ib_device *device) WARN_ON_ONCE(!parent); device->dma_device = parent; } - /* Setup default max segment size for all IB devices */ - dma_set_max_seg_size(device->dma_device, SZ_2G); + if (!device->dev.dma_parms) { + if (parent) { + /* + * The caller did not provide DMA parameters, so + * 'parent' probably represents a PCI device. The PCI + * core sets the maximum segment size to 64 + * KB. Increase this parameter to 2 GB. + */ + device->dev.dma_parms = parent->dma_parms; + dma_set_max_seg_size(device->dma_device, SZ_2G); + } else { + WARN_ON_ONCE(true); + } + } } /* @@ -1317,7 +1326,9 @@ out: /** * ib_register_device - Register an IB device with IB core - * @device:Device to register + * @device: Device to register + * @name: unique string device name. This may include a '%' which will + * cause a unique index to be added to the passed device name. * * Low-level drivers use ib_register_device() to register their * devices with the IB core. All registered clients will receive a @@ -1444,7 +1455,7 @@ out: /** * ib_unregister_device - Unregister an IB device - * @device: The device to unregister + * @ib_dev: The device to unregister * * Unregister an IB device. All clients will receive a remove callback. * @@ -1466,7 +1477,7 @@ EXPORT_SYMBOL(ib_unregister_device); /** * ib_unregister_device_and_put - Unregister a device while holding a 'get' - * device: The device to unregister + * @ib_dev: The device to unregister * * This is the same as ib_unregister_device(), except it includes an internal * ib_device_put() that should match a 'get' obtained by the caller. @@ -1536,7 +1547,7 @@ static void ib_unregister_work(struct work_struct *work) /** * ib_unregister_device_queued - Unregister a device using a work queue - * device: The device to unregister + * @ib_dev: The device to unregister * * This schedules an asynchronous unregistration using a WQ for the device. A * driver should use this to avoid holding locks while doing unregistration, @@ -1987,8 +1998,6 @@ static int iw_query_port(struct ib_device *device, if (!netdev) return -ENODEV; - dev_put(netdev); - port_attr->max_mtu = IB_MTU_4096; port_attr->active_mtu = ib_mtu_int_to_enum(netdev->mtu); @@ -1996,19 +2005,22 @@ static int iw_query_port(struct ib_device *device, port_attr->state = IB_PORT_DOWN; port_attr->phys_state = IB_PORT_PHYS_STATE_DISABLED; } else { - inetdev = in_dev_get(netdev); + rcu_read_lock(); + inetdev = __in_dev_get_rcu(netdev); if (inetdev && inetdev->ifa_list) { port_attr->state = IB_PORT_ACTIVE; port_attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP; - in_dev_put(inetdev); } else { port_attr->state = IB_PORT_INIT; port_attr->phys_state = IB_PORT_PHYS_STATE_PORT_CONFIGURATION_TRAINING; } + + rcu_read_unlock(); } + dev_put(netdev); err = device->ops.query_port(device, port_num, port_attr); if (err) return err; @@ -2365,7 +2377,7 @@ int ib_modify_device(struct ib_device *device, struct ib_device_modify *device_modify) { if (!device->ops.modify_device) - return -ENOSYS; + return -EOPNOTSUPP; return device->ops.modify_device(device, device_modify_mask, device_modify); @@ -2396,8 +2408,12 @@ int ib_modify_port(struct ib_device *device, rc = device->ops.modify_port(device, port_num, port_modify_mask, port_modify); + else if (rdma_protocol_roce(device, port_num) && + ((port_modify->set_port_cap_mask & ~IB_PORT_CM_SUP) == 0 || + (port_modify->clr_port_cap_mask & ~IB_PORT_CM_SUP) == 0)) + rc = 0; else - rc = rdma_protocol_roce(device, port_num) ? 0 : -ENOSYS; + rc = -EOPNOTSUPP; return rc; } EXPORT_SYMBOL(ib_modify_port); @@ -2606,6 +2622,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, drain_sq); SET_DEVICE_OP(dev_ops, enable_driver); SET_DEVICE_OP(dev_ops, fill_res_entry); + SET_DEVICE_OP(dev_ops, fill_stat_entry); SET_DEVICE_OP(dev_ops, get_dev_fw_str); SET_DEVICE_OP(dev_ops, get_dma_mr); SET_DEVICE_OP(dev_ops, get_hw_stats); @@ -2614,9 +2631,9 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, get_port_immutable); SET_DEVICE_OP(dev_ops, get_vector_affinity); SET_DEVICE_OP(dev_ops, get_vf_config); + SET_DEVICE_OP(dev_ops, get_vf_guid); SET_DEVICE_OP(dev_ops, get_vf_stats); SET_DEVICE_OP(dev_ops, init_port); - SET_DEVICE_OP(dev_ops, invalidate_range); SET_DEVICE_OP(dev_ops, iw_accept); SET_DEVICE_OP(dev_ops, iw_add_ref); SET_DEVICE_OP(dev_ops, iw_connect); @@ -2629,6 +2646,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, map_mr_sg_pi); SET_DEVICE_OP(dev_ops, map_phys_fmr); SET_DEVICE_OP(dev_ops, mmap); + SET_DEVICE_OP(dev_ops, mmap_free); SET_DEVICE_OP(dev_ops, modify_ah); SET_DEVICE_OP(dev_ops, modify_cq); SET_DEVICE_OP(dev_ops, modify_device); @@ -2715,6 +2733,8 @@ static int __init ib_core_init(void) goto err_comp_unbound; } + rdma_nl_init(); + ret = addr_init(); if (ret) { pr_warn("Could't init IB address resolution\n"); diff --git a/drivers/infiniband/core/ib_core_uverbs.c b/drivers/infiniband/core/ib_core_uverbs.c new file mode 100644 index 000000000000..f509c478b469 --- /dev/null +++ b/drivers/infiniband/core/ib_core_uverbs.c @@ -0,0 +1,335 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2019 Marvell. All rights reserved. + */ +#include <linux/xarray.h> +#include "uverbs.h" +#include "core_priv.h" + +/** + * rdma_umap_priv_init() - Initialize the private data of a vma + * + * @priv: The already allocated private data + * @vma: The vm area struct that needs private data + * @entry: entry into the mmap_xa that needs to be linked with + * this vma + * + * Each time we map IO memory into user space this keeps track of the + * mapping. When the device is hot-unplugged we 'zap' the mmaps in user space + * to point to the zero page and allow the hot unplug to proceed. + * + * This is necessary for cases like PCI physical hot unplug as the actual BAR + * memory may vanish after this and access to it from userspace could MCE. + * + * RDMA drivers supporting disassociation must have their user space designed + * to cope in some way with their IO pages going to the zero page. + * + */ +void rdma_umap_priv_init(struct rdma_umap_priv *priv, + struct vm_area_struct *vma, + struct rdma_user_mmap_entry *entry) +{ + struct ib_uverbs_file *ufile = vma->vm_file->private_data; + + priv->vma = vma; + if (entry) { + kref_get(&entry->ref); + priv->entry = entry; + } + vma->vm_private_data = priv; + /* vm_ops is setup in ib_uverbs_mmap() to avoid module dependencies */ + + mutex_lock(&ufile->umap_lock); + list_add(&priv->list, &ufile->umaps); + mutex_unlock(&ufile->umap_lock); +} +EXPORT_SYMBOL(rdma_umap_priv_init); + +/** + * rdma_user_mmap_io() - Map IO memory into a process + * + * @ucontext: associated user context + * @vma: the vma related to the current mmap call + * @pfn: pfn to map + * @size: size to map + * @prot: pgprot to use in remap call + * @entry: mmap_entry retrieved from rdma_user_mmap_entry_get(), or NULL + * if mmap_entry is not used by the driver + * + * This is to be called by drivers as part of their mmap() functions if they + * wish to send something like PCI-E BAR memory to userspace. + * + * Return -EINVAL on wrong flags or size, -EAGAIN on failure to map. 0 on + * success. + */ +int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, + unsigned long pfn, unsigned long size, pgprot_t prot, + struct rdma_user_mmap_entry *entry) +{ + struct ib_uverbs_file *ufile = ucontext->ufile; + struct rdma_umap_priv *priv; + + if (!(vma->vm_flags & VM_SHARED)) + return -EINVAL; + + if (vma->vm_end - vma->vm_start != size) + return -EINVAL; + + /* Driver is using this wrong, must be called by ib_uverbs_mmap */ + if (WARN_ON(!vma->vm_file || + vma->vm_file->private_data != ufile)) + return -EINVAL; + lockdep_assert_held(&ufile->device->disassociate_srcu); + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + vma->vm_page_prot = prot; + if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) { + kfree(priv); + return -EAGAIN; + } + + rdma_umap_priv_init(priv, vma, entry); + return 0; +} +EXPORT_SYMBOL(rdma_user_mmap_io); + +/** + * rdma_user_mmap_entry_get_pgoff() - Get an entry from the mmap_xa + * + * @ucontext: associated user context + * @pgoff: The mmap offset >> PAGE_SHIFT + * + * This function is called when a user tries to mmap with an offset (returned + * by rdma_user_mmap_get_offset()) it initially received from the driver. The + * rdma_user_mmap_entry was created by the function + * rdma_user_mmap_entry_insert(). This function increases the refcnt of the + * entry so that it won't be deleted from the xarray in the meantime. + * + * Return an reference to an entry if exists or NULL if there is no + * match. rdma_user_mmap_entry_put() must be called to put the reference. + */ +struct rdma_user_mmap_entry * +rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext, + unsigned long pgoff) +{ + struct rdma_user_mmap_entry *entry; + + if (pgoff > U32_MAX) + return NULL; + + xa_lock(&ucontext->mmap_xa); + + entry = xa_load(&ucontext->mmap_xa, pgoff); + + /* + * If refcount is zero, entry is already being deleted, driver_removed + * indicates that the no further mmaps are possible and we waiting for + * the active VMAs to be closed. + */ + if (!entry || entry->start_pgoff != pgoff || entry->driver_removed || + !kref_get_unless_zero(&entry->ref)) + goto err; + + xa_unlock(&ucontext->mmap_xa); + + ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#zx] returned\n", + pgoff, entry->npages); + + return entry; + +err: + xa_unlock(&ucontext->mmap_xa); + return NULL; +} +EXPORT_SYMBOL(rdma_user_mmap_entry_get_pgoff); + +/** + * rdma_user_mmap_entry_get() - Get an entry from the mmap_xa + * + * @ucontext: associated user context + * @vma: the vma being mmap'd into + * + * This function is like rdma_user_mmap_entry_get_pgoff() except that it also + * checks that the VMA is correct. + */ +struct rdma_user_mmap_entry * +rdma_user_mmap_entry_get(struct ib_ucontext *ucontext, + struct vm_area_struct *vma) +{ + struct rdma_user_mmap_entry *entry; + + if (!(vma->vm_flags & VM_SHARED)) + return NULL; + entry = rdma_user_mmap_entry_get_pgoff(ucontext, vma->vm_pgoff); + if (!entry) + return NULL; + if (entry->npages * PAGE_SIZE != vma->vm_end - vma->vm_start) { + rdma_user_mmap_entry_put(entry); + return NULL; + } + return entry; +} +EXPORT_SYMBOL(rdma_user_mmap_entry_get); + +static void rdma_user_mmap_entry_free(struct kref *kref) +{ + struct rdma_user_mmap_entry *entry = + container_of(kref, struct rdma_user_mmap_entry, ref); + struct ib_ucontext *ucontext = entry->ucontext; + unsigned long i; + + /* + * Erase all entries occupied by this single entry, this is deferred + * until all VMA are closed so that the mmap offsets remain unique. + */ + xa_lock(&ucontext->mmap_xa); + for (i = 0; i < entry->npages; i++) + __xa_erase(&ucontext->mmap_xa, entry->start_pgoff + i); + xa_unlock(&ucontext->mmap_xa); + + ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#zx] removed\n", + entry->start_pgoff, entry->npages); + + if (ucontext->device->ops.mmap_free) + ucontext->device->ops.mmap_free(entry); +} + +/** + * rdma_user_mmap_entry_put() - Drop reference to the mmap entry + * + * @entry: an entry in the mmap_xa + * + * This function is called when the mapping is closed if it was + * an io mapping or when the driver is done with the entry for + * some other reason. + * Should be called after rdma_user_mmap_entry_get was called + * and entry is no longer needed. This function will erase the + * entry and free it if its refcnt reaches zero. + */ +void rdma_user_mmap_entry_put(struct rdma_user_mmap_entry *entry) +{ + kref_put(&entry->ref, rdma_user_mmap_entry_free); +} +EXPORT_SYMBOL(rdma_user_mmap_entry_put); + +/** + * rdma_user_mmap_entry_remove() - Drop reference to entry and + * mark it as unmmapable + * + * @entry: the entry to insert into the mmap_xa + * + * Drivers can call this to prevent userspace from creating more mappings for + * entry, however existing mmaps continue to exist and ops->mmap_free() will + * not be called until all user mmaps are destroyed. + */ +void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry) +{ + if (!entry) + return; + + entry->driver_removed = true; + kref_put(&entry->ref, rdma_user_mmap_entry_free); +} +EXPORT_SYMBOL(rdma_user_mmap_entry_remove); + +/** + * rdma_user_mmap_entry_insert() - Insert an entry to the mmap_xa + * + * @ucontext: associated user context. + * @entry: the entry to insert into the mmap_xa + * @length: length of the address that will be mmapped + * + * This function should be called by drivers that use the rdma_user_mmap + * interface for implementing their mmap syscall A database of mmap offsets is + * handled in the core and helper functions are provided to insert entries + * into the database and extract entries when the user calls mmap with the + * given offset. The function allocates a unique page offset that should be + * provided to user, the user will use the offset to retrieve information such + * as address to be mapped and how. + * + * Return: 0 on success and -ENOMEM on failure + */ +int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext, + struct rdma_user_mmap_entry *entry, + size_t length) +{ + struct ib_uverbs_file *ufile = ucontext->ufile; + XA_STATE(xas, &ucontext->mmap_xa, 0); + u32 xa_first, xa_last, npages; + int err; + u32 i; + + if (!entry) + return -EINVAL; + + kref_init(&entry->ref); + entry->ucontext = ucontext; + + /* + * We want the whole allocation to be done without interruption from a + * different thread. The allocation requires finding a free range and + * storing. During the xa_insert the lock could be released, possibly + * allowing another thread to choose the same range. + */ + mutex_lock(&ufile->umap_lock); + + xa_lock(&ucontext->mmap_xa); + + /* We want to find an empty range */ + npages = (u32)DIV_ROUND_UP(length, PAGE_SIZE); + entry->npages = npages; + while (true) { + /* First find an empty index */ + xas_find_marked(&xas, U32_MAX, XA_FREE_MARK); + if (xas.xa_node == XAS_RESTART) + goto err_unlock; + + xa_first = xas.xa_index; + + /* Is there enough room to have the range? */ + if (check_add_overflow(xa_first, npages, &xa_last)) + goto err_unlock; + + /* + * Now look for the next present entry. If an entry doesn't + * exist, we found an empty range and can proceed. + */ + xas_next_entry(&xas, xa_last - 1); + if (xas.xa_node == XAS_BOUNDS || xas.xa_index >= xa_last) + break; + } + + for (i = xa_first; i < xa_last; i++) { + err = __xa_insert(&ucontext->mmap_xa, i, entry, GFP_KERNEL); + if (err) + goto err_undo; + } + + /* + * Internally the kernel uses a page offset, in libc this is a byte + * offset. Drivers should not return pgoff to userspace. + */ + entry->start_pgoff = xa_first; + xa_unlock(&ucontext->mmap_xa); + mutex_unlock(&ufile->umap_lock); + + ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#x] inserted\n", + entry->start_pgoff, npages); + + return 0; + +err_undo: + for (; i > xa_first; i--) + __xa_erase(&ucontext->mmap_xa, i - 1); + +err_unlock: + xa_unlock(&ucontext->mmap_xa); + mutex_unlock(&ufile->umap_lock); + return -ENOMEM; +} +EXPORT_SYMBOL(rdma_user_mmap_entry_insert); diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index 72141c5b7c95..ade71823370f 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -372,6 +372,7 @@ EXPORT_SYMBOL(iw_cm_disconnect); static void destroy_cm_id(struct iw_cm_id *cm_id) { struct iwcm_id_private *cm_id_priv; + struct ib_qp *qp; unsigned long flags; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); @@ -389,6 +390,9 @@ static void destroy_cm_id(struct iw_cm_id *cm_id) set_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags); spin_lock_irqsave(&cm_id_priv->lock, flags); + qp = cm_id_priv->qp; + cm_id_priv->qp = NULL; + switch (cm_id_priv->state) { case IW_CM_STATE_LISTEN: cm_id_priv->state = IW_CM_STATE_DESTROYING; @@ -401,7 +405,7 @@ static void destroy_cm_id(struct iw_cm_id *cm_id) cm_id_priv->state = IW_CM_STATE_DESTROYING; spin_unlock_irqrestore(&cm_id_priv->lock, flags); /* Abrupt close of the connection */ - (void)iwcm_modify_qp_err(cm_id_priv->qp); + (void)iwcm_modify_qp_err(qp); spin_lock_irqsave(&cm_id_priv->lock, flags); break; case IW_CM_STATE_IDLE: @@ -426,11 +430,9 @@ static void destroy_cm_id(struct iw_cm_id *cm_id) BUG(); break; } - if (cm_id_priv->qp) { - cm_id_priv->id.device->ops.iw_rem_ref(cm_id_priv->qp); - cm_id_priv->qp = NULL; - } spin_unlock_irqrestore(&cm_id_priv->lock, flags); + if (qp) + cm_id_priv->id.device->ops.iw_rem_ref(qp); if (cm_id->mapped) { iwpm_remove_mapinfo(&cm_id->local_addr, &cm_id->m_local_addr); @@ -671,11 +673,11 @@ int iw_cm_accept(struct iw_cm_id *cm_id, BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV); cm_id_priv->state = IW_CM_STATE_IDLE; spin_lock_irqsave(&cm_id_priv->lock, flags); - if (cm_id_priv->qp) { - cm_id->device->ops.iw_rem_ref(qp); - cm_id_priv->qp = NULL; - } + qp = cm_id_priv->qp; + cm_id_priv->qp = NULL; spin_unlock_irqrestore(&cm_id_priv->lock, flags); + if (qp) + cm_id->device->ops.iw_rem_ref(qp); clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); wake_up_all(&cm_id_priv->connect_wait); } @@ -696,7 +698,7 @@ int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) struct iwcm_id_private *cm_id_priv; int ret; unsigned long flags; - struct ib_qp *qp; + struct ib_qp *qp = NULL; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); @@ -730,13 +732,13 @@ int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) return 0; /* success */ spin_lock_irqsave(&cm_id_priv->lock, flags); - if (cm_id_priv->qp) { - cm_id->device->ops.iw_rem_ref(qp); - cm_id_priv->qp = NULL; - } + qp = cm_id_priv->qp; + cm_id_priv->qp = NULL; cm_id_priv->state = IW_CM_STATE_IDLE; err: spin_unlock_irqrestore(&cm_id_priv->lock, flags); + if (qp) + cm_id->device->ops.iw_rem_ref(qp); clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); wake_up_all(&cm_id_priv->connect_wait); return ret; @@ -878,6 +880,7 @@ static int cm_conn_est_handler(struct iwcm_id_private *cm_id_priv, static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv, struct iw_cm_event *iw_event) { + struct ib_qp *qp = NULL; unsigned long flags; int ret; @@ -896,11 +899,13 @@ static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv, cm_id_priv->state = IW_CM_STATE_ESTABLISHED; } else { /* REJECTED or RESET */ - cm_id_priv->id.device->ops.iw_rem_ref(cm_id_priv->qp); + qp = cm_id_priv->qp; cm_id_priv->qp = NULL; cm_id_priv->state = IW_CM_STATE_IDLE; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); + if (qp) + cm_id_priv->id.device->ops.iw_rem_ref(qp); ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); if (iw_event->private_data_len) @@ -942,21 +947,18 @@ static void cm_disconnect_handler(struct iwcm_id_private *cm_id_priv, static int cm_close_handler(struct iwcm_id_private *cm_id_priv, struct iw_cm_event *iw_event) { + struct ib_qp *qp; unsigned long flags; - int ret = 0; + int ret = 0, notify_event = 0; spin_lock_irqsave(&cm_id_priv->lock, flags); + qp = cm_id_priv->qp; + cm_id_priv->qp = NULL; - if (cm_id_priv->qp) { - cm_id_priv->id.device->ops.iw_rem_ref(cm_id_priv->qp); - cm_id_priv->qp = NULL; - } switch (cm_id_priv->state) { case IW_CM_STATE_ESTABLISHED: case IW_CM_STATE_CLOSING: cm_id_priv->state = IW_CM_STATE_IDLE; - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); - spin_lock_irqsave(&cm_id_priv->lock, flags); + notify_event = 1; break; case IW_CM_STATE_DESTROYING: break; @@ -965,6 +967,10 @@ static int cm_close_handler(struct iwcm_id_private *cm_id_priv, } spin_unlock_irqrestore(&cm_id_priv->lock, flags); + if (qp) + cm_id_priv->id.device->ops.iw_rem_ref(qp); + if (notify_event) + ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); return ret; } diff --git a/drivers/infiniband/core/iwpm_util.h b/drivers/infiniband/core/iwpm_util.h index 7e2bcc72f66c..1bf87d9fd0bd 100644 --- a/drivers/infiniband/core/iwpm_util.h +++ b/drivers/infiniband/core/iwpm_util.h @@ -210,8 +210,10 @@ int iwpm_mapinfo_available(void); /** * iwpm_compare_sockaddr - Compare two sockaddr storage structs + * @a_sockaddr: first sockaddr to compare + * @b_sockaddr: second sockaddr to compare * - * Returns 0 if they are holding the same ip/tcp address info, + * Return: 0 if they are holding the same ip/tcp address info, * otherwise returns 1 */ int iwpm_compare_sockaddr(struct sockaddr_storage *a_sockaddr, @@ -272,6 +274,7 @@ void iwpm_print_sockaddr(struct sockaddr_storage *sockaddr, char *msg); * iwpm_send_hello - Send hello response to iwpmd * * @nl_client: The index of the netlink client + * @iwpm_pid: The pid of the user space port mapper * @abi_version: The kernel's abi_version * * Returns 0 on success or a negative error code diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 9947d16edef2..c54db13fa9b0 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -913,9 +913,9 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, /* No GRH for DR SMP */ ret = device->ops.process_mad(device, 0, port_num, &mad_wc, NULL, - (const struct ib_mad_hdr *)smp, mad_size, - (struct ib_mad_hdr *)mad_priv->mad, - &mad_size, &out_mad_pkey_index); + (const struct ib_mad *)smp, + (struct ib_mad *)mad_priv->mad, &mad_size, + &out_mad_pkey_index); switch (ret) { case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY: @@ -1397,25 +1397,6 @@ void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc) } EXPORT_SYMBOL(ib_free_recv_mad); -struct ib_mad_agent *ib_redirect_mad_qp(struct ib_qp *qp, - u8 rmpp_version, - ib_mad_send_handler send_handler, - ib_mad_recv_handler recv_handler, - void *context) -{ - return ERR_PTR(-EINVAL); /* XXX: for now */ -} -EXPORT_SYMBOL(ib_redirect_mad_qp); - -int ib_process_mad_wc(struct ib_mad_agent *mad_agent, - struct ib_wc *wc) -{ - dev_err(&mad_agent->device->dev, - "ib_process_mad_wc() not implemented yet\n"); - return 0; -} -EXPORT_SYMBOL(ib_process_mad_wc); - static int method_in_use(struct ib_mad_mgmt_method_table **method, struct ib_mad_reg_req *mad_reg_req) { @@ -2340,9 +2321,9 @@ static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc) if (port_priv->device->ops.process_mad) { ret = port_priv->device->ops.process_mad( port_priv->device, 0, port_priv->port_num, wc, - &recv->grh, (const struct ib_mad_hdr *)recv->mad, - recv->mad_size, (struct ib_mad_hdr *)response->mad, - &mad_size, &resp_mad_pkey_index); + &recv->grh, (const struct ib_mad *)recv->mad, + (struct ib_mad *)response->mad, &mad_size, + &resp_mad_pkey_index); if (opa) wc->pkey_index = resp_mad_pkey_index; diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c index 81dbd5f41bed..8cd31ef25eff 100644 --- a/drivers/infiniband/core/netlink.c +++ b/drivers/infiniband/core/netlink.c @@ -42,9 +42,12 @@ #include <linux/module.h> #include "core_priv.h" -static DEFINE_MUTEX(rdma_nl_mutex); static struct { - const struct rdma_nl_cbs *cb_table; + const struct rdma_nl_cbs *cb_table; + /* Synchronizes between ongoing netlink commands and netlink client + * unregistration. + */ + struct rw_semaphore sem; } rdma_nl_types[RDMA_NL_NUM_CLIENTS]; bool rdma_nl_chk_listeners(unsigned int group) @@ -75,70 +78,53 @@ static bool is_nl_msg_valid(unsigned int type, unsigned int op) return (op < max_num_ops[type]) ? true : false; } -static bool -is_nl_valid(const struct sk_buff *skb, unsigned int type, unsigned int op) +static const struct rdma_nl_cbs * +get_cb_table(const struct sk_buff *skb, unsigned int type, unsigned int op) { const struct rdma_nl_cbs *cb_table; - if (!is_nl_msg_valid(type, op)) - return false; - /* * Currently only NLDEV client is supporting netlink commands in * non init_net net namespace. */ if (sock_net(skb->sk) != &init_net && type != RDMA_NL_NLDEV) - return false; + return NULL; - if (!rdma_nl_types[type].cb_table) { - mutex_unlock(&rdma_nl_mutex); - request_module("rdma-netlink-subsys-%d", type); - mutex_lock(&rdma_nl_mutex); - } + cb_table = READ_ONCE(rdma_nl_types[type].cb_table); + if (!cb_table) { + /* + * Didn't get valid reference of the table, attempt module + * load once. + */ + up_read(&rdma_nl_types[type].sem); - cb_table = rdma_nl_types[type].cb_table; + request_module("rdma-netlink-subsys-%d", type); + down_read(&rdma_nl_types[type].sem); + cb_table = READ_ONCE(rdma_nl_types[type].cb_table); + } if (!cb_table || (!cb_table[op].dump && !cb_table[op].doit)) - return false; - return true; + return NULL; + return cb_table; } void rdma_nl_register(unsigned int index, const struct rdma_nl_cbs cb_table[]) { - mutex_lock(&rdma_nl_mutex); - if (!is_nl_msg_valid(index, 0)) { - /* - * All clients are not interesting in success/failure of - * this call. They want to see the print to error log and - * continue their initialization. Print warning for them, - * because it is programmer's error to be here. - */ - mutex_unlock(&rdma_nl_mutex); - WARN(true, - "The not-valid %u index was supplied to RDMA netlink\n", - index); + if (WARN_ON(!is_nl_msg_valid(index, 0)) || + WARN_ON(READ_ONCE(rdma_nl_types[index].cb_table))) return; - } - - if (rdma_nl_types[index].cb_table) { - mutex_unlock(&rdma_nl_mutex); - WARN(true, - "The %u index is already registered in RDMA netlink\n", - index); - return; - } - rdma_nl_types[index].cb_table = cb_table; - mutex_unlock(&rdma_nl_mutex); + /* Pairs with the READ_ONCE in is_nl_valid() */ + smp_store_release(&rdma_nl_types[index].cb_table, cb_table); } EXPORT_SYMBOL(rdma_nl_register); void rdma_nl_unregister(unsigned int index) { - mutex_lock(&rdma_nl_mutex); + down_write(&rdma_nl_types[index].sem); rdma_nl_types[index].cb_table = NULL; - mutex_unlock(&rdma_nl_mutex); + up_write(&rdma_nl_types[index].sem); } EXPORT_SYMBOL(rdma_nl_unregister); @@ -170,15 +156,21 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, unsigned int index = RDMA_NL_GET_CLIENT(type); unsigned int op = RDMA_NL_GET_OP(type); const struct rdma_nl_cbs *cb_table; + int err = -EINVAL; - if (!is_nl_valid(skb, index, op)) + if (!is_nl_msg_valid(index, op)) return -EINVAL; - cb_table = rdma_nl_types[index].cb_table; + down_read(&rdma_nl_types[index].sem); + cb_table = get_cb_table(skb, index, op); + if (!cb_table) + goto done; if ((cb_table[op].flags & RDMA_NL_ADMIN_PERM) && - !netlink_capable(skb, CAP_NET_ADMIN)) - return -EPERM; + !netlink_capable(skb, CAP_NET_ADMIN)) { + err = -EPERM; + goto done; + } /* * LS responses overload the 0x100 (NLM_F_ROOT) flag. Don't @@ -186,8 +178,8 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, */ if (index == RDMA_NL_LS) { if (cb_table[op].doit) - return cb_table[op].doit(skb, nlh, extack); - return -EINVAL; + err = cb_table[op].doit(skb, nlh, extack); + goto done; } /* FIXME: Convert IWCM to properly handle doit callbacks */ if ((nlh->nlmsg_flags & NLM_F_DUMP) || index == RDMA_NL_IWCM) { @@ -195,14 +187,15 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, .dump = cb_table[op].dump, }; if (c.dump) - return netlink_dump_start(skb->sk, skb, nlh, &c); - return -EINVAL; + err = netlink_dump_start(skb->sk, skb, nlh, &c); + goto done; } if (cb_table[op].doit) - return cb_table[op].doit(skb, nlh, extack); - - return 0; + err = cb_table[op].doit(skb, nlh, extack); +done: + up_read(&rdma_nl_types[index].sem); + return err; } /* @@ -263,9 +256,7 @@ skip: static void rdma_nl_rcv(struct sk_buff *skb) { - mutex_lock(&rdma_nl_mutex); rdma_nl_rcv_skb(skb, &rdma_nl_rcv_msg); - mutex_unlock(&rdma_nl_mutex); } int rdma_nl_unicast(struct net *net, struct sk_buff *skb, u32 pid) @@ -297,6 +288,14 @@ int rdma_nl_multicast(struct net *net, struct sk_buff *skb, } EXPORT_SYMBOL(rdma_nl_multicast); +void rdma_nl_init(void) +{ + int idx; + + for (idx = 0; idx < RDMA_NL_NUM_CLIENTS; idx++) + init_rwsem(&rdma_nl_types[idx].sem); +} + void rdma_nl_exit(void) { int idx; diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 7a7474000100..cbf6041a5d4a 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -42,6 +42,9 @@ #include "cma_priv.h" #include "restrack.h" +typedef int (*res_fill_func_t)(struct sk_buff*, bool, + struct rdma_restrack_entry*, uint32_t); + /* * Sort array elements by the netlink attribute name */ @@ -180,6 +183,19 @@ static int _rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name, return 0; } +int rdma_nl_put_driver_string(struct sk_buff *msg, const char *name, + const char *str) +{ + if (put_driver_name_print_type(msg, name, + RDMA_NLDEV_PRINT_TYPE_UNSPEC)) + return -EMSGSIZE; + if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, str)) + return -EMSGSIZE; + + return 0; +} +EXPORT_SYMBOL(rdma_nl_put_driver_string); + int rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name, u32 value) { return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC, @@ -399,20 +415,34 @@ err: static int fill_res_name_pid(struct sk_buff *msg, struct rdma_restrack_entry *res) { + int err = 0; + /* * For user resources, user is should read /proc/PID/comm to get the * name of the task file. */ if (rdma_is_kernel_res(res)) { - if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME, - res->kern_name)) - return -EMSGSIZE; + err = nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME, + res->kern_name); } else { - if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, - task_pid_vnr(res->task))) - return -EMSGSIZE; + pid_t pid; + + pid = task_pid_vnr(res->task); + /* + * Task is dead and in zombie state. + * There is no need to print PID anymore. + */ + if (pid) + /* + * This part is racy, task can be killed and PID will + * be zero right here but it is ok, next query won't + * return PID. We don't promise real-time reflection + * of SW objects. + */ + err = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, pid); } - return 0; + + return err ? -EMSGSIZE : 0; } static bool fill_res_entry(struct ib_device *dev, struct sk_buff *msg, @@ -423,6 +453,14 @@ static bool fill_res_entry(struct ib_device *dev, struct sk_buff *msg, return dev->ops.fill_res_entry(msg, res); } +static bool fill_stat_entry(struct ib_device *dev, struct sk_buff *msg, + struct rdma_restrack_entry *res) +{ + if (!dev->ops.fill_stat_entry) + return false; + return dev->ops.fill_stat_entry(msg, res); +} + static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin, struct rdma_restrack_entry *res, uint32_t port) { @@ -698,9 +736,6 @@ static int fill_stat_counter_qps(struct sk_buff *msg, rt = &counter->device->res[RDMA_RESTRACK_QP]; xa_lock(&rt->xa); xa_for_each(&rt->xa, id, res) { - if (!rdma_is_visible_in_pid_ns(res)) - continue; - qp = container_of(res, struct ib_qp, res); if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) continue; @@ -723,8 +758,8 @@ err: return ret; } -static int fill_stat_hwcounter_entry(struct sk_buff *msg, - const char *name, u64 value) +int rdma_nl_stat_hwcounter_entry(struct sk_buff *msg, const char *name, + u64 value) { struct nlattr *entry_attr; @@ -746,6 +781,25 @@ err: nla_nest_cancel(msg, entry_attr); return -EMSGSIZE; } +EXPORT_SYMBOL(rdma_nl_stat_hwcounter_entry); + +static int fill_stat_mr_entry(struct sk_buff *msg, bool has_cap_net_admin, + struct rdma_restrack_entry *res, uint32_t port) +{ + struct ib_mr *mr = container_of(res, struct ib_mr, res); + struct ib_device *dev = mr->pd->device; + + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id)) + goto err; + + if (fill_stat_entry(dev, msg, res)) + goto err; + + return 0; + +err: + return -EMSGSIZE; +} static int fill_stat_counter_hwcounters(struct sk_buff *msg, struct rdma_counter *counter) @@ -759,7 +813,7 @@ static int fill_stat_counter_hwcounters(struct sk_buff *msg, return -EMSGSIZE; for (i = 0; i < st->num_counters; i++) - if (fill_stat_hwcounter_entry(msg, st->names[i], st->value[i])) + if (rdma_nl_stat_hwcounter_entry(msg, st->names[i], st->value[i])) goto err; nla_nest_end(msg, table_attr); @@ -778,7 +832,7 @@ static int fill_res_counter_entry(struct sk_buff *msg, bool has_cap_net_admin, container_of(res, struct rdma_counter, res); if (port && port != counter->port) - return 0; + return -EAGAIN; /* Dump it even query failed */ rdma_counter_query_stats(counter); @@ -1117,8 +1171,6 @@ static int nldev_res_get_dumpit(struct sk_buff *skb, } struct nldev_fill_res_entry { - int (*fill_res_func)(struct sk_buff *msg, bool has_cap_net_admin, - struct rdma_restrack_entry *res, u32 port); enum rdma_nldev_attr nldev_attr; enum rdma_nldev_command nldev_cmd; u8 flags; @@ -1132,21 +1184,18 @@ enum nldev_res_flags { static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = { [RDMA_RESTRACK_QP] = { - .fill_res_func = fill_res_qp_entry, .nldev_cmd = RDMA_NLDEV_CMD_RES_QP_GET, .nldev_attr = RDMA_NLDEV_ATTR_RES_QP, .entry = RDMA_NLDEV_ATTR_RES_QP_ENTRY, .id = RDMA_NLDEV_ATTR_RES_LQPN, }, [RDMA_RESTRACK_CM_ID] = { - .fill_res_func = fill_res_cm_id_entry, .nldev_cmd = RDMA_NLDEV_CMD_RES_CM_ID_GET, .nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID, .entry = RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY, .id = RDMA_NLDEV_ATTR_RES_CM_IDN, }, [RDMA_RESTRACK_CQ] = { - .fill_res_func = fill_res_cq_entry, .nldev_cmd = RDMA_NLDEV_CMD_RES_CQ_GET, .nldev_attr = RDMA_NLDEV_ATTR_RES_CQ, .flags = NLDEV_PER_DEV, @@ -1154,7 +1203,6 @@ static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = { .id = RDMA_NLDEV_ATTR_RES_CQN, }, [RDMA_RESTRACK_MR] = { - .fill_res_func = fill_res_mr_entry, .nldev_cmd = RDMA_NLDEV_CMD_RES_MR_GET, .nldev_attr = RDMA_NLDEV_ATTR_RES_MR, .flags = NLDEV_PER_DEV, @@ -1162,7 +1210,6 @@ static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = { .id = RDMA_NLDEV_ATTR_RES_MRN, }, [RDMA_RESTRACK_PD] = { - .fill_res_func = fill_res_pd_entry, .nldev_cmd = RDMA_NLDEV_CMD_RES_PD_GET, .nldev_attr = RDMA_NLDEV_ATTR_RES_PD, .flags = NLDEV_PER_DEV, @@ -1170,7 +1217,6 @@ static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = { .id = RDMA_NLDEV_ATTR_RES_PDN, }, [RDMA_RESTRACK_COUNTER] = { - .fill_res_func = fill_res_counter_entry, .nldev_cmd = RDMA_NLDEV_CMD_STAT_GET, .nldev_attr = RDMA_NLDEV_ATTR_STAT_COUNTER, .entry = RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY, @@ -1180,7 +1226,8 @@ static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = { static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack, - enum rdma_restrack_type res_type) + enum rdma_restrack_type res_type, + res_fill_func_t fill_func) { const struct nldev_fill_res_entry *fe = &fill_entries[res_type]; struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; @@ -1222,15 +1269,10 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh, goto err; } - if (!rdma_is_visible_in_pid_ns(res)) { - ret = -ENOENT; - goto err_get; - } - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { ret = -ENOMEM; - goto err; + goto err_get; } nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, @@ -1243,7 +1285,9 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh, } has_cap_net_admin = netlink_capable(skb, CAP_NET_ADMIN); - ret = fe->fill_res_func(msg, has_cap_net_admin, res, port); + + ret = fill_func(msg, has_cap_net_admin, res, port); + rdma_restrack_put(res); if (ret) goto err_free; @@ -1263,7 +1307,8 @@ err: static int res_get_common_dumpit(struct sk_buff *skb, struct netlink_callback *cb, - enum rdma_restrack_type res_type) + enum rdma_restrack_type res_type, + res_fill_func_t fill_func) { const struct nldev_fill_res_entry *fe = &fill_entries[res_type]; struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; @@ -1334,9 +1379,6 @@ static int res_get_common_dumpit(struct sk_buff *skb, * objects. */ xa_for_each(&rt->xa, id, res) { - if (!rdma_is_visible_in_pid_ns(res)) - continue; - if (idx < start || !rdma_restrack_get(res)) goto next; @@ -1351,7 +1393,8 @@ static int res_get_common_dumpit(struct sk_buff *skb, goto msg_full; } - ret = fe->fill_res_func(skb, has_cap_net_admin, res, port); + ret = fill_func(skb, has_cap_net_admin, res, port); + rdma_restrack_put(res); if (ret) { @@ -1394,17 +1437,19 @@ err_index: return ret; } -#define RES_GET_FUNCS(name, type) \ - static int nldev_res_get_##name##_dumpit(struct sk_buff *skb, \ +#define RES_GET_FUNCS(name, type) \ + static int nldev_res_get_##name##_dumpit(struct sk_buff *skb, \ struct netlink_callback *cb) \ - { \ - return res_get_common_dumpit(skb, cb, type); \ - } \ - static int nldev_res_get_##name##_doit(struct sk_buff *skb, \ - struct nlmsghdr *nlh, \ + { \ + return res_get_common_dumpit(skb, cb, type, \ + fill_res_##name##_entry); \ + } \ + static int nldev_res_get_##name##_doit(struct sk_buff *skb, \ + struct nlmsghdr *nlh, \ struct netlink_ext_ack *extack) \ - { \ - return res_get_common_doit(skb, nlh, extack, type); \ + { \ + return res_get_common_doit(skb, nlh, extack, type, \ + fill_res_##name##_entry); \ } RES_GET_FUNCS(qp, RDMA_RESTRACK_QP); @@ -1787,10 +1832,6 @@ static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh, cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]); qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]); - ret = rdma_counter_unbind_qpn(device, port, qpn, cntn); - if (ret) - goto err_unbind; - if (fill_nldev_handle(msg, device) || nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) || nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) || @@ -1799,13 +1840,15 @@ static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh, goto err_fill; } + ret = rdma_counter_unbind_qpn(device, port, qpn, cntn); + if (ret) + goto err_fill; + nlmsg_end(msg, nlh); ib_device_put(device); return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); err_fill: - rdma_counter_bind_qpn(device, port, qpn, cntn); -err_unbind: nlmsg_free(msg); err: ib_device_put(device); @@ -1882,7 +1925,7 @@ static int stat_get_doit_default_counter(struct sk_buff *skb, for (i = 0; i < num_cnts; i++) { v = stats->value[i] + rdma_counter_get_hwstat_value(device, port, i); - if (fill_stat_hwcounter_entry(msg, stats->names[i], v)) { + if (rdma_nl_stat_hwcounter_entry(msg, stats->names[i], v)) { ret = -EMSGSIZE; goto err_table; } @@ -1991,7 +2034,10 @@ static int nldev_stat_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, case RDMA_NLDEV_ATTR_RES_QP: ret = stat_get_doit_qp(skb, nlh, extack, tb); break; - + case RDMA_NLDEV_ATTR_RES_MR: + ret = res_get_common_doit(skb, nlh, extack, RDMA_RESTRACK_MR, + fill_stat_mr_entry); + break; default: ret = -EINVAL; break; @@ -2015,7 +2061,10 @@ static int nldev_stat_get_dumpit(struct sk_buff *skb, case RDMA_NLDEV_ATTR_RES_QP: ret = nldev_res_get_counter_dumpit(skb, cb); break; - + case RDMA_NLDEV_ATTR_RES_MR: + ret = res_get_common_dumpit(skb, cb, RDMA_RESTRACK_MR, + fill_stat_mr_entry); + break; default: ret = -EINVAL; break; diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index ccf4d069c25c..6c72773faf29 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -817,6 +817,7 @@ static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile, rdma_restrack_del(&ucontext->res); ib_dev->ops.dealloc_ucontext(ucontext); + WARN_ON(!xa_empty(&ucontext->mmap_xa)); kfree(ucontext); ufile->ucontext = NULL; diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c index a07665f7ef8c..62fbb0ae9cb4 100644 --- a/drivers/infiniband/core/restrack.c +++ b/drivers/infiniband/core/restrack.c @@ -116,11 +116,8 @@ int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type) u32 cnt = 0; xa_lock(&rt->xa); - xas_for_each(&xas, e, U32_MAX) { - if (!rdma_is_visible_in_pid_ns(e)) - continue; + xas_for_each(&xas, e, U32_MAX) cnt++; - } xa_unlock(&rt->xa); return cnt; } @@ -346,18 +343,3 @@ out: } } EXPORT_SYMBOL(rdma_restrack_del); - -bool rdma_is_visible_in_pid_ns(struct rdma_restrack_entry *res) -{ - /* - * 1. Kern resources should be visible in init - * namespace only - * 2. Present only resources visible in the current - * namespace - */ - if (rdma_is_kernel_res(res)) - return task_active_pid_ns(current) == &init_pid_ns; - - /* PID 0 means that resource is not found in current namespace */ - return task_pid_vnr(res->task); -} diff --git a/drivers/infiniband/core/restrack.h b/drivers/infiniband/core/restrack.h index 7bd177cc0a61..d084e5f89849 100644 --- a/drivers/infiniband/core/restrack.h +++ b/drivers/infiniband/core/restrack.h @@ -27,5 +27,4 @@ int rdma_restrack_init(struct ib_device *dev); void rdma_restrack_clean(struct ib_device *dev); void rdma_restrack_attach_task(struct rdma_restrack_entry *res, struct task_struct *task); -bool rdma_is_visible_in_pid_ns(struct rdma_restrack_entry *res); #endif /* _RDMA_CORE_RESTRACK_H_ */ diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index 5337393d4dfe..4fad732f9b3c 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -20,14 +20,17 @@ module_param_named(force_mr, rdma_rw_force_mr, bool, 0); MODULE_PARM_DESC(force_mr, "Force usage of MRs for RDMA READ/WRITE operations"); /* - * Check if the device might use memory registration. This is currently only - * true for iWarp devices. In the future we can hopefully fine tune this based - * on HCA driver input. + * Report whether memory registration should be used. Memory registration must + * be used for iWarp devices because of iWARP-specific limitations. Memory + * registration is also enabled if registering memory might yield better + * performance than using multiple SGE entries, see rdma_rw_io_needs_mr() */ static inline bool rdma_rw_can_use_mr(struct ib_device *dev, u8 port_num) { if (rdma_protocol_iwarp(dev, port_num)) return true; + if (dev->attrs.max_sgl_rd) + return true; if (unlikely(rdma_rw_force_mr)) return true; return false; @@ -35,17 +38,19 @@ static inline bool rdma_rw_can_use_mr(struct ib_device *dev, u8 port_num) /* * Check if the device will use memory registration for this RW operation. - * We currently always use memory registrations for iWarp RDMA READs, and - * have a debug option to force usage of MRs. - * - * XXX: In the future we can hopefully fine tune this based on HCA driver - * input. + * For RDMA READs we must use MRs on iWarp and can optionally use them as an + * optimization otherwise. Additionally we have a debug option to force usage + * of MRs to help testing this code path. */ static inline bool rdma_rw_io_needs_mr(struct ib_device *dev, u8 port_num, enum dma_data_direction dir, int dma_nents) { - if (rdma_protocol_iwarp(dev, port_num) && dir == DMA_FROM_DEVICE) - return true; + if (dir == DMA_FROM_DEVICE) { + if (rdma_protocol_iwarp(dev, port_num)) + return true; + if (dev->attrs.max_sgl_rd && dma_nents > dev->attrs.max_sgl_rd) + return true; + } if (unlikely(rdma_rw_force_mr)) return true; return false; diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 17fc2936c077..8917125ea16d 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1246,7 +1246,7 @@ static int init_ah_attr_grh_fields(struct ib_device *device, u8 port_num, * @port_num: Port on the specified device. * @rec: path record entry to use for ah attributes initialization. * @ah_attr: address handle attributes to initialization from path record. - * @sgid_attr: SGID attribute to consider during initialization. + * @gid_attr: SGID attribute to consider during initialization. * * When ib_init_ah_attr_from_path() returns success, * (a) for IB link layer it optionally contains a reference to SGID attribute diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c index 1ab423b19f77..6eb6d2717ca5 100644 --- a/drivers/infiniband/core/security.c +++ b/drivers/infiniband/core/security.c @@ -426,7 +426,7 @@ int ib_create_qp_security(struct ib_qp *qp, struct ib_device *dev) int ret; rdma_for_each_port (dev, i) { - is_ib = rdma_protocol_ib(dev, i++); + is_ib = rdma_protocol_ib(dev, i); if (is_ib) break; } diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 7a50cedcef1f..087682e6969e 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -481,8 +481,8 @@ static int get_perf_mad(struct ib_device *dev, int port_num, __be16 attr, if (!dev->ops.process_mad) return -ENOSYS; - in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); - out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); + in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); + out_mad = kzalloc(sizeof(*out_mad), GFP_KERNEL); if (!in_mad || !out_mad) { ret = -ENOMEM; goto out; @@ -497,10 +497,8 @@ static int get_perf_mad(struct ib_device *dev, int port_num, __be16 attr, if (attr != IB_PMA_CLASS_PORT_INFO) in_mad->data[41] = port_num; /* PortSelect field */ - if ((dev->ops.process_mad(dev, IB_MAD_IGNORE_MKEY, - port_num, NULL, NULL, - (const struct ib_mad_hdr *)in_mad, mad_size, - (struct ib_mad_hdr *)out_mad, &mad_size, + if ((dev->ops.process_mad(dev, IB_MAD_IGNORE_MKEY, port_num, NULL, NULL, + in_mad, out_mad, &mad_size, &out_mad_pkey_index) & (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) != (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) { @@ -1268,7 +1266,7 @@ static ssize_t node_desc_store(struct device *device, int ret; if (!dev->ops.modify_device) - return -EIO; + return -EOPNOTSUPP; memcpy(desc.node_desc, buf, min_t(int, count, IB_DEVICE_NODE_DESC_MAX)); ret = ib_modify_device(dev, IB_DEVICE_MODIFY_NODE_DESC, &desc); diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 24244a2f68cc..7a3b99597ead 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -185,10 +185,9 @@ EXPORT_SYMBOL(ib_umem_find_best_pgsz); * @addr: userspace virtual address to start at * @size: length of region to pin * @access: IB_ACCESS_xxx flags for memory being pinned - * @dmasync: flush in-flight DMA when the memory region is written */ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, - size_t size, int access, int dmasync) + size_t size, int access) { struct ib_ucontext *context; struct ib_umem *umem; @@ -199,7 +198,6 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, struct mm_struct *mm; unsigned long npages; int ret; - unsigned long dma_attrs = 0; struct scatterlist *sg; unsigned int gup_flags = FOLL_WRITE; @@ -211,9 +209,6 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, if (!context) return ERR_PTR(-EIO); - if (dmasync) - dma_attrs |= DMA_ATTR_WRITE_BARRIER; - /* * If the combination of the addr and size requested for this memory * region causes an integer overflow, return error. @@ -294,11 +289,10 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, sg_mark_end(sg); - umem->nmap = ib_dma_map_sg_attrs(context->device, + umem->nmap = ib_dma_map_sg(context->device, umem->sg_head.sgl, umem->sg_nents, - DMA_BIDIRECTIONAL, - dma_attrs); + DMA_BIDIRECTIONAL); if (!umem->nmap) { ret = -ENOMEM; diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index f67a30fda1ed..e42d44e501fd 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -48,197 +48,33 @@ #include "uverbs.h" -static void ib_umem_notifier_start_account(struct ib_umem_odp *umem_odp) +static inline int ib_init_umem_odp(struct ib_umem_odp *umem_odp, + const struct mmu_interval_notifier_ops *ops) { - mutex_lock(&umem_odp->umem_mutex); - if (umem_odp->notifiers_count++ == 0) - /* - * Initialize the completion object for waiting on - * notifiers. Since notifier_count is zero, no one should be - * waiting right now. - */ - reinit_completion(&umem_odp->notifier_completion); - mutex_unlock(&umem_odp->umem_mutex); -} - -static void ib_umem_notifier_end_account(struct ib_umem_odp *umem_odp) -{ - mutex_lock(&umem_odp->umem_mutex); - /* - * This sequence increase will notify the QP page fault that the page - * that is going to be mapped in the spte could have been freed. - */ - ++umem_odp->notifiers_seq; - if (--umem_odp->notifiers_count == 0) - complete_all(&umem_odp->notifier_completion); - mutex_unlock(&umem_odp->umem_mutex); -} - -static void ib_umem_notifier_release(struct mmu_notifier *mn, - struct mm_struct *mm) -{ - struct ib_ucontext_per_mm *per_mm = - container_of(mn, struct ib_ucontext_per_mm, mn); - struct rb_node *node; - - down_read(&per_mm->umem_rwsem); - if (!per_mm->mn.users) - goto out; - - for (node = rb_first_cached(&per_mm->umem_tree); node; - node = rb_next(node)) { - struct ib_umem_odp *umem_odp = - rb_entry(node, struct ib_umem_odp, interval_tree.rb); - - /* - * Increase the number of notifiers running, to prevent any - * further fault handling on this MR. - */ - ib_umem_notifier_start_account(umem_odp); - complete_all(&umem_odp->notifier_completion); - umem_odp->umem.ibdev->ops.invalidate_range( - umem_odp, ib_umem_start(umem_odp), - ib_umem_end(umem_odp)); - } - -out: - up_read(&per_mm->umem_rwsem); -} - -static int invalidate_range_start_trampoline(struct ib_umem_odp *item, - u64 start, u64 end, void *cookie) -{ - ib_umem_notifier_start_account(item); - item->umem.ibdev->ops.invalidate_range(item, start, end); - return 0; -} - -static int ib_umem_notifier_invalidate_range_start(struct mmu_notifier *mn, - const struct mmu_notifier_range *range) -{ - struct ib_ucontext_per_mm *per_mm = - container_of(mn, struct ib_ucontext_per_mm, mn); - int rc; - - if (mmu_notifier_range_blockable(range)) - down_read(&per_mm->umem_rwsem); - else if (!down_read_trylock(&per_mm->umem_rwsem)) - return -EAGAIN; - - if (!per_mm->mn.users) { - up_read(&per_mm->umem_rwsem); - /* - * At this point users is permanently zero and visible to this - * CPU without a lock, that fact is relied on to skip the unlock - * in range_end. - */ - return 0; - } - - rc = rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, range->start, - range->end, - invalidate_range_start_trampoline, - mmu_notifier_range_blockable(range), - NULL); - if (rc) - up_read(&per_mm->umem_rwsem); - return rc; -} - -static int invalidate_range_end_trampoline(struct ib_umem_odp *item, u64 start, - u64 end, void *cookie) -{ - ib_umem_notifier_end_account(item); - return 0; -} - -static void ib_umem_notifier_invalidate_range_end(struct mmu_notifier *mn, - const struct mmu_notifier_range *range) -{ - struct ib_ucontext_per_mm *per_mm = - container_of(mn, struct ib_ucontext_per_mm, mn); - - if (unlikely(!per_mm->mn.users)) - return; - - rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, range->start, - range->end, - invalidate_range_end_trampoline, true, NULL); - up_read(&per_mm->umem_rwsem); -} - -static struct mmu_notifier *ib_umem_alloc_notifier(struct mm_struct *mm) -{ - struct ib_ucontext_per_mm *per_mm; - - per_mm = kzalloc(sizeof(*per_mm), GFP_KERNEL); - if (!per_mm) - return ERR_PTR(-ENOMEM); - - per_mm->umem_tree = RB_ROOT_CACHED; - init_rwsem(&per_mm->umem_rwsem); - - WARN_ON(mm != current->mm); - rcu_read_lock(); - per_mm->tgid = get_task_pid(current->group_leader, PIDTYPE_PID); - rcu_read_unlock(); - return &per_mm->mn; -} - -static void ib_umem_free_notifier(struct mmu_notifier *mn) -{ - struct ib_ucontext_per_mm *per_mm = - container_of(mn, struct ib_ucontext_per_mm, mn); - - WARN_ON(!RB_EMPTY_ROOT(&per_mm->umem_tree.rb_root)); - - put_pid(per_mm->tgid); - kfree(per_mm); -} - -static const struct mmu_notifier_ops ib_umem_notifiers = { - .release = ib_umem_notifier_release, - .invalidate_range_start = ib_umem_notifier_invalidate_range_start, - .invalidate_range_end = ib_umem_notifier_invalidate_range_end, - .alloc_notifier = ib_umem_alloc_notifier, - .free_notifier = ib_umem_free_notifier, -}; - -static inline int ib_init_umem_odp(struct ib_umem_odp *umem_odp) -{ - struct ib_ucontext_per_mm *per_mm; - struct mmu_notifier *mn; int ret; umem_odp->umem.is_odp = 1; + mutex_init(&umem_odp->umem_mutex); + if (!umem_odp->is_implicit_odp) { size_t page_size = 1UL << umem_odp->page_shift; + unsigned long start; + unsigned long end; size_t pages; - umem_odp->interval_tree.start = - ALIGN_DOWN(umem_odp->umem.address, page_size); + start = ALIGN_DOWN(umem_odp->umem.address, page_size); if (check_add_overflow(umem_odp->umem.address, (unsigned long)umem_odp->umem.length, - &umem_odp->interval_tree.last)) + &end)) return -EOVERFLOW; - umem_odp->interval_tree.last = - ALIGN(umem_odp->interval_tree.last, page_size); - if (unlikely(umem_odp->interval_tree.last < page_size)) + end = ALIGN(end, page_size); + if (unlikely(end < page_size)) return -EOVERFLOW; - pages = (umem_odp->interval_tree.last - - umem_odp->interval_tree.start) >> - umem_odp->page_shift; + pages = (end - start) >> umem_odp->page_shift; if (!pages) return -EINVAL; - /* - * Note that the representation of the intervals in the - * interval tree considers the ending point as contained in - * the interval. - */ - umem_odp->interval_tree.last--; - umem_odp->page_list = kvcalloc( pages, sizeof(*umem_odp->page_list), GFP_KERNEL); if (!umem_odp->page_list) @@ -250,26 +86,13 @@ static inline int ib_init_umem_odp(struct ib_umem_odp *umem_odp) ret = -ENOMEM; goto out_page_list; } - } - - mn = mmu_notifier_get(&ib_umem_notifiers, umem_odp->umem.owning_mm); - if (IS_ERR(mn)) { - ret = PTR_ERR(mn); - goto out_dma_list; - } - umem_odp->per_mm = per_mm = - container_of(mn, struct ib_ucontext_per_mm, mn); - mutex_init(&umem_odp->umem_mutex); - init_completion(&umem_odp->notifier_completion); - - if (!umem_odp->is_implicit_odp) { - down_write(&per_mm->umem_rwsem); - interval_tree_insert(&umem_odp->interval_tree, - &per_mm->umem_tree); - up_write(&per_mm->umem_rwsem); + ret = mmu_interval_notifier_insert(&umem_odp->notifier, + umem_odp->umem.owning_mm, + start, end - start, ops); + if (ret) + goto out_dma_list; } - mmgrab(umem_odp->umem.owning_mm); return 0; @@ -305,8 +128,6 @@ struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_udata *udata, if (!context) return ERR_PTR(-EIO); - if (WARN_ON_ONCE(!context->device->ops.invalidate_range)) - return ERR_PTR(-EINVAL); umem_odp = kzalloc(sizeof(*umem_odp), GFP_KERNEL); if (!umem_odp) @@ -318,8 +139,10 @@ struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_udata *udata, umem_odp->is_implicit_odp = 1; umem_odp->page_shift = PAGE_SHIFT; - ret = ib_init_umem_odp(umem_odp); + umem_odp->tgid = get_task_pid(current->group_leader, PIDTYPE_PID); + ret = ib_init_umem_odp(umem_odp, NULL); if (ret) { + put_pid(umem_odp->tgid); kfree(umem_odp); return ERR_PTR(ret); } @@ -336,8 +159,10 @@ EXPORT_SYMBOL(ib_umem_odp_alloc_implicit); * @addr: The starting userspace VA * @size: The length of the userspace VA */ -struct ib_umem_odp *ib_umem_odp_alloc_child(struct ib_umem_odp *root, - unsigned long addr, size_t size) +struct ib_umem_odp * +ib_umem_odp_alloc_child(struct ib_umem_odp *root, unsigned long addr, + size_t size, + const struct mmu_interval_notifier_ops *ops) { /* * Caller must ensure that root cannot be freed during the call to @@ -360,9 +185,12 @@ struct ib_umem_odp *ib_umem_odp_alloc_child(struct ib_umem_odp *root, umem->writable = root->umem.writable; umem->owning_mm = root->umem.owning_mm; odp_data->page_shift = PAGE_SHIFT; + odp_data->notifier.ops = ops; - ret = ib_init_umem_odp(odp_data); + odp_data->tgid = get_pid(root->tgid); + ret = ib_init_umem_odp(odp_data, ops); if (ret) { + put_pid(odp_data->tgid); kfree(odp_data); return ERR_PTR(ret); } @@ -383,7 +211,8 @@ EXPORT_SYMBOL(ib_umem_odp_alloc_child); * conjunction with MMU notifiers. */ struct ib_umem_odp *ib_umem_odp_get(struct ib_udata *udata, unsigned long addr, - size_t size, int access) + size_t size, int access, + const struct mmu_interval_notifier_ops *ops) { struct ib_umem_odp *umem_odp; struct ib_ucontext *context; @@ -398,8 +227,7 @@ struct ib_umem_odp *ib_umem_odp_get(struct ib_udata *udata, unsigned long addr, if (!context) return ERR_PTR(-EIO); - if (WARN_ON_ONCE(!(access & IB_ACCESS_ON_DEMAND)) || - WARN_ON_ONCE(!context->device->ops.invalidate_range)) + if (WARN_ON_ONCE(!(access & IB_ACCESS_ON_DEMAND))) return ERR_PTR(-EINVAL); umem_odp = kzalloc(sizeof(struct ib_umem_odp), GFP_KERNEL); @@ -411,6 +239,7 @@ struct ib_umem_odp *ib_umem_odp_get(struct ib_udata *udata, unsigned long addr, umem_odp->umem.address = addr; umem_odp->umem.writable = ib_access_writable(access); umem_odp->umem.owning_mm = mm = current->mm; + umem_odp->notifier.ops = ops; umem_odp->page_shift = PAGE_SHIFT; if (access & IB_ACCESS_HUGETLB) { @@ -429,11 +258,14 @@ struct ib_umem_odp *ib_umem_odp_get(struct ib_udata *udata, unsigned long addr, up_read(&mm->mmap_sem); } - ret = ib_init_umem_odp(umem_odp); + umem_odp->tgid = get_task_pid(current->group_leader, PIDTYPE_PID); + ret = ib_init_umem_odp(umem_odp, ops); if (ret) - goto err_free; + goto err_put_pid; return umem_odp; +err_put_pid: + put_pid(umem_odp->tgid); err_free: kfree(umem_odp); return ERR_PTR(ret); @@ -442,8 +274,6 @@ EXPORT_SYMBOL(ib_umem_odp_get); void ib_umem_odp_release(struct ib_umem_odp *umem_odp) { - struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm; - /* * Ensure that no more pages are mapped in the umem. * @@ -451,30 +281,15 @@ void ib_umem_odp_release(struct ib_umem_odp *umem_odp) * that the hardware will not attempt to access the MR any more. */ if (!umem_odp->is_implicit_odp) { + mutex_lock(&umem_odp->umem_mutex); ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp), ib_umem_end(umem_odp)); + mutex_unlock(&umem_odp->umem_mutex); + mmu_interval_notifier_remove(&umem_odp->notifier); kvfree(umem_odp->dma_list); kvfree(umem_odp->page_list); + put_pid(umem_odp->tgid); } - - down_write(&per_mm->umem_rwsem); - if (!umem_odp->is_implicit_odp) { - interval_tree_remove(&umem_odp->interval_tree, - &per_mm->umem_tree); - complete_all(&umem_odp->notifier_completion); - } - /* - * NOTE! mmu_notifier_unregister() can happen between a start/end - * callback, resulting in a missing end, and thus an unbalanced - * lock. This doesn't really matter to us since we are about to kfree - * the memory that holds the lock, however LOCKDEP doesn't like this. - * Thus we call the mmu_notifier_put under the rwsem and test the - * internal users count to reliably see if we are past this point. - */ - mmu_notifier_put(&per_mm->mn); - up_write(&per_mm->umem_rwsem); - - mmdrop(umem_odp->umem.owning_mm); kfree(umem_odp); } EXPORT_SYMBOL(ib_umem_odp_release); @@ -499,22 +314,16 @@ EXPORT_SYMBOL(ib_umem_odp_release); */ static int ib_umem_odp_map_dma_single_page( struct ib_umem_odp *umem_odp, - int page_index, + unsigned int page_index, struct page *page, u64 access_mask, unsigned long current_seq) { struct ib_device *dev = umem_odp->umem.ibdev; dma_addr_t dma_addr; - int remove_existing_mapping = 0; int ret = 0; - /* - * Note: we avoid writing if seq is different from the initial seq, to - * handle case of a racing notifier. This check also allows us to bail - * early if we have a notifier running in parallel with us. - */ - if (ib_umem_mmu_notifier_retry(umem_odp, current_seq)) { + if (mmu_interval_check_retry(&umem_odp->notifier, current_seq)) { ret = -EAGAIN; goto out; } @@ -532,28 +341,29 @@ static int ib_umem_odp_map_dma_single_page( } else if (umem_odp->page_list[page_index] == page) { umem_odp->dma_list[page_index] |= access_mask; } else { - pr_err("error: got different pages in IB device and from get_user_pages. IB device page: %p, gup page: %p\n", - umem_odp->page_list[page_index], page); - /* Better remove the mapping now, to prevent any further - * damage. */ - remove_existing_mapping = 1; + /* + * This is a race here where we could have done: + * + * CPU0 CPU1 + * get_user_pages() + * invalidate() + * page_fault() + * mutex_lock(umem_mutex) + * page from GUP != page in ODP + * + * It should be prevented by the retry test above as reading + * the seq number should be reliable under the + * umem_mutex. Thus something is really not working right if + * things get here. + */ + WARN(true, + "Got different pages in IB device and from get_user_pages. IB device page: %p, gup page: %p\n", + umem_odp->page_list[page_index], page); + ret = -EAGAIN; } out: put_user_page(page); - - if (remove_existing_mapping) { - ib_umem_notifier_start_account(umem_odp); - dev->ops.invalidate_range( - umem_odp, - ib_umem_start(umem_odp) + - (page_index << umem_odp->page_shift), - ib_umem_start(umem_odp) + - ((page_index + 1) << umem_odp->page_shift)); - ib_umem_notifier_end_account(umem_odp); - ret = -EAGAIN; - } - return ret; } @@ -616,7 +426,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt, * existing beyond the lifetime of the originating process.. Presumably * mmget_not_zero will fail in this case. */ - owning_process = get_pid_task(umem_odp->per_mm->tgid, PIDTYPE_PID); + owning_process = get_pid_task(umem_odp->tgid, PIDTYPE_PID); if (!owning_process || !mmget_not_zero(owning_mm)) { ret = -EINVAL; goto out_put_task; @@ -719,6 +529,8 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt, u64 addr; struct ib_device *dev = umem_odp->umem.ibdev; + lockdep_assert_held(&umem_odp->umem_mutex); + virt = max_t(u64, virt, ib_umem_start(umem_odp)); bound = min_t(u64, bound, ib_umem_end(umem_odp)); /* Note that during the run of this function, the @@ -726,7 +538,6 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt, * faults from completion. We might be racing with other * invalidations, so we must make sure we free each page only * once. */ - mutex_lock(&umem_odp->umem_mutex); for (addr = virt; addr < bound; addr += BIT(umem_odp->page_shift)) { idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift; if (umem_odp->page_list[idx]) { @@ -757,35 +568,5 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt, umem_odp->npages--; } } - mutex_unlock(&umem_odp->umem_mutex); } EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages); - -/* @last is not a part of the interval. See comment for function - * node_last. - */ -int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root, - u64 start, u64 last, - umem_call_back cb, - bool blockable, - void *cookie) -{ - int ret_val = 0; - struct interval_tree_node *node, *next; - struct ib_umem_odp *umem; - - if (unlikely(start == last)) - return ret_val; - - for (node = interval_tree_iter_first(root, start, last - 1); - node; node = next) { - /* TODO move the blockable decision up to the callback */ - if (!blockable) - return -EAGAIN; - next = interval_tree_iter_next(node, start, last - 1); - umem = container_of(node, struct ib_umem_odp, interval_tree); - ret_val = cb(umem, start, last, cookie) || ret_val; - } - - return ret_val; -} diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 1e5aeb39f774..63f7f7db5902 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -98,7 +98,7 @@ ib_uverbs_init_udata_buf_or_null(struct ib_udata *udata, struct ib_uverbs_device { atomic_t refcount; - int num_comp_vectors; + u32 num_comp_vectors; struct completion comp; struct device dev; /* First group for device attributes, NULL terminated array */ diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 14a80fd9f464..06ed32c8662f 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -252,6 +252,8 @@ static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs) ucontext->closing = false; ucontext->cleanup_retryable = false; + xa_init_flags(&ucontext->mmap_xa, XA_FLAGS_ALLOC); + ret = get_unused_fd_flags(O_CLOEXEC); if (ret < 0) goto err_free; diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index 61758201d9b2..269938f59d3f 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -795,6 +795,9 @@ int uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle, { const struct uverbs_attr *attr = uverbs_attr_get(bundle, idx); + if (IS_ERR(attr)) + return PTR_ERR(attr); + if (size < attr->ptr_attr.len) { if (clear_user(u64_to_user_ptr(attr->ptr_attr.data) + size, attr->ptr_attr.len - size)) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index c4c652e87044..970d8e31dd65 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -772,6 +772,8 @@ out_unlock: return (ret) ? : count; } +static const struct vm_operations_struct rdma_umap_ops; + static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) { struct ib_uverbs_file *file = filp->private_data; @@ -785,7 +787,7 @@ static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) ret = PTR_ERR(ucontext); goto out; } - + vma->vm_ops = &rdma_umap_ops; ret = ucontext->device->ops.mmap(ucontext, vma); out: srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); @@ -793,38 +795,6 @@ out: } /* - * Each time we map IO memory into user space this keeps track of the mapping. - * When the device is hot-unplugged we 'zap' the mmaps in user space to point - * to the zero page and allow the hot unplug to proceed. - * - * This is necessary for cases like PCI physical hot unplug as the actual BAR - * memory may vanish after this and access to it from userspace could MCE. - * - * RDMA drivers supporting disassociation must have their user space designed - * to cope in some way with their IO pages going to the zero page. - */ -struct rdma_umap_priv { - struct vm_area_struct *vma; - struct list_head list; -}; - -static const struct vm_operations_struct rdma_umap_ops; - -static void rdma_umap_priv_init(struct rdma_umap_priv *priv, - struct vm_area_struct *vma) -{ - struct ib_uverbs_file *ufile = vma->vm_file->private_data; - - priv->vma = vma; - vma->vm_private_data = priv; - vma->vm_ops = &rdma_umap_ops; - - mutex_lock(&ufile->umap_lock); - list_add(&priv->list, &ufile->umaps); - mutex_unlock(&ufile->umap_lock); -} - -/* * The VMA has been dup'd, initialize the vm_private_data with a new tracking * struct */ @@ -849,7 +819,7 @@ static void rdma_umap_open(struct vm_area_struct *vma) priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) goto out_unlock; - rdma_umap_priv_init(priv, vma); + rdma_umap_priv_init(priv, vma, opriv->entry); up_read(&ufile->hw_destroy_rwsem); return; @@ -880,6 +850,9 @@ static void rdma_umap_close(struct vm_area_struct *vma) * this point. */ mutex_lock(&ufile->umap_lock); + if (priv->entry) + rdma_user_mmap_entry_put(priv->entry); + list_del(&priv->list); mutex_unlock(&ufile->umap_lock); kfree(priv); @@ -931,44 +904,6 @@ static const struct vm_operations_struct rdma_umap_ops = { .fault = rdma_umap_fault, }; -/* - * Map IO memory into a process. This is to be called by drivers as part of - * their mmap() functions if they wish to send something like PCI-E BAR memory - * to userspace. - */ -int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, - unsigned long pfn, unsigned long size, pgprot_t prot) -{ - struct ib_uverbs_file *ufile = ucontext->ufile; - struct rdma_umap_priv *priv; - - if (!(vma->vm_flags & VM_SHARED)) - return -EINVAL; - - if (vma->vm_end - vma->vm_start != size) - return -EINVAL; - - /* Driver is using this wrong, must be called by ib_uverbs_mmap */ - if (WARN_ON(!vma->vm_file || - vma->vm_file->private_data != ufile)) - return -EINVAL; - lockdep_assert_held(&ufile->device->disassociate_srcu); - - priv = kzalloc(sizeof(*priv), GFP_KERNEL); - if (!priv) - return -ENOMEM; - - vma->vm_page_prot = prot; - if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) { - kfree(priv); - return -EAGAIN; - } - - rdma_umap_priv_init(priv, vma); - return 0; -} -EXPORT_SYMBOL(rdma_user_mmap_io); - void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) { struct rdma_umap_priv *priv, *next_priv; @@ -1018,6 +953,11 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start); + + if (priv->entry) { + rdma_user_mmap_entry_put(priv->entry); + priv->entry = NULL; + } } mutex_unlock(&ufile->umap_lock); skip_mm: diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index f974b6854224..dd765e176cdd 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -244,6 +244,8 @@ EXPORT_SYMBOL(rdma_port_get_link_layer); /** * ib_alloc_pd - Allocates an unused protection domain. * @device: The device on which to allocate the protection domain. + * @flags: protection domain flags + * @caller: caller's build-time module name * * A protection domain object provides an association between QPs, shared * receive queues, address handles, memory regions, and memory windows. @@ -662,16 +664,17 @@ static bool find_gid_index(const union ib_gid *gid, void *context) { struct find_gid_index_context *ctx = context; + u16 vlan_id = 0xffff; + int ret; if (ctx->gid_type != gid_attr->gid_type) return false; - if ((!!(ctx->vlan_id != 0xffff) == !is_vlan_dev(gid_attr->ndev)) || - (is_vlan_dev(gid_attr->ndev) && - vlan_dev_vlan_id(gid_attr->ndev) != ctx->vlan_id)) + ret = rdma_read_gid_l2_fields(gid_attr, &vlan_id, NULL); + if (ret) return false; - return true; + return ctx->vlan_id == vlan_id; } static const struct ib_gid_attr * @@ -2458,6 +2461,16 @@ int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid, } EXPORT_SYMBOL(ib_set_vf_guid); +int ib_get_vf_guid(struct ib_device *device, int vf, u8 port, + struct ifla_vf_guid *node_guid, + struct ifla_vf_guid *port_guid) +{ + if (!device->ops.get_vf_guid) + return -EOPNOTSUPP; + + return device->ops.get_vf_guid(device, vf, port, node_guid, port_guid); +} +EXPORT_SYMBOL(ib_get_vf_guid); /** * ib_map_mr_sg_pi() - Map the dma mapped SG lists for PI (protection * information) and set an appropriate memory region for registration. diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile index 433fca59febd..0aeccd984889 100644 --- a/drivers/infiniband/hw/Makefile +++ b/drivers/infiniband/hw/Makefile @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_INFINIBAND_MTHCA) += mthca/ obj-$(CONFIG_INFINIBAND_QIB) += qib/ -obj-$(CONFIG_INFINIBAND_CXGB3) += cxgb3/ obj-$(CONFIG_INFINIBAND_CXGB4) += cxgb4/ obj-$(CONFIG_INFINIBAND_EFA) += efa/ obj-$(CONFIG_INFINIBAND_I40IW) += i40iw/ diff --git a/drivers/infiniband/hw/bnxt_re/Kconfig b/drivers/infiniband/hw/bnxt_re/Kconfig index ab8779d23382..b83f1cc38c52 100644 --- a/drivers/infiniband/hw/bnxt_re/Kconfig +++ b/drivers/infiniband/hw/bnxt_re/Kconfig @@ -1,11 +1,11 @@ # SPDX-License-Identifier: GPL-2.0-only config INFINIBAND_BNXT_RE - tristate "Broadcom Netxtreme HCA support" - depends on 64BIT - depends on ETHERNET && NETDEVICES && PCI && INET && DCB - select NET_VENDOR_BROADCOM - select BNXT - ---help--- + tristate "Broadcom Netxtreme HCA support" + depends on 64BIT + depends on ETHERNET && NETDEVICES && PCI && INET && DCB + select NET_VENDOR_BROADCOM + select BNXT + ---help--- This driver supports Broadcom NetXtreme-E 10/25/40/50 gigabit RoCE HCAs. To compile this driver as a module, choose M here: the module will be called bnxt_re. diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h index e55a1666c0cd..725b2350e349 100644 --- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h +++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h @@ -108,6 +108,7 @@ struct bnxt_re_sqp_entries { #define BNXT_RE_MAX_MSIX 9 #define BNXT_RE_AEQ_IDX 0 #define BNXT_RE_NQ_IDX 1 +#define BNXT_RE_GEN_P5_MAX_VF 64 struct bnxt_re_dev { struct ib_device ibdev; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index b4149dc9e824..9b6ca15a183c 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -191,24 +191,6 @@ int bnxt_re_query_device(struct ib_device *ibdev, return 0; } -int bnxt_re_modify_device(struct ib_device *ibdev, - int device_modify_mask, - struct ib_device_modify *device_modify) -{ - switch (device_modify_mask) { - case IB_DEVICE_MODIFY_SYS_IMAGE_GUID: - /* Modify the GUID requires the modification of the GID table */ - /* GUID should be made as READ-ONLY */ - break; - case IB_DEVICE_MODIFY_NODE_DESC: - /* Node Desc should be made as READ-ONLY */ - break; - default: - break; - } - return 0; -} - /* Port */ int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num, struct ib_port_attr *port_attr) @@ -855,7 +837,7 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd, bytes += (qplib_qp->sq.max_wqe * psn_sz); } bytes = PAGE_ALIGN(bytes); - umem = ib_umem_get(udata, ureq.qpsva, bytes, IB_ACCESS_LOCAL_WRITE, 1); + umem = ib_umem_get(udata, ureq.qpsva, bytes, IB_ACCESS_LOCAL_WRITE); if (IS_ERR(umem)) return PTR_ERR(umem); @@ -869,7 +851,7 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd, bytes = (qplib_qp->rq.max_wqe * BNXT_QPLIB_MAX_RQE_ENTRY_SIZE); bytes = PAGE_ALIGN(bytes); umem = ib_umem_get(udata, ureq.qprva, bytes, - IB_ACCESS_LOCAL_WRITE, 1); + IB_ACCESS_LOCAL_WRITE); if (IS_ERR(umem)) goto rqfail; qp->rumem = umem; @@ -1322,7 +1304,7 @@ static int bnxt_re_init_user_srq(struct bnxt_re_dev *rdev, bytes = (qplib_srq->max_wqe * BNXT_QPLIB_MAX_RQE_ENTRY_SIZE); bytes = PAGE_ALIGN(bytes); - umem = ib_umem_get(udata, ureq.srqva, bytes, IB_ACCESS_LOCAL_WRITE, 1); + umem = ib_umem_get(udata, ureq.srqva, bytes, IB_ACCESS_LOCAL_WRITE); if (IS_ERR(umem)) return PTR_ERR(umem); @@ -2565,7 +2547,7 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, cq->umem = ib_umem_get(udata, req.cq_va, entries * sizeof(struct cq_base), - IB_ACCESS_LOCAL_WRITE, 1); + IB_ACCESS_LOCAL_WRITE); if (IS_ERR(cq->umem)) { rc = PTR_ERR(cq->umem); goto fail; @@ -3530,7 +3512,7 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, /* The fixed portion of the rkey is the same as the lkey */ mr->ib_mr.rkey = mr->qplib_mr.rkey; - umem = ib_umem_get(udata, start, length, mr_access_flags, 0); + umem = ib_umem_get(udata, start, length, mr_access_flags); if (IS_ERR(umem)) { dev_err(rdev_to_dev(rdev), "Failed to get umem"); rc = -EFAULT; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index 31662b1ee35a..23d972da5652 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -145,9 +145,6 @@ struct bnxt_re_ucontext { int bnxt_re_query_device(struct ib_device *ibdev, struct ib_device_attr *ib_attr, struct ib_udata *udata); -int bnxt_re_modify_device(struct ib_device *ibdev, - int device_modify_mask, - struct ib_device_modify *device_modify); int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num, struct ib_port_attr *port_attr); int bnxt_re_get_port_immutable(struct ib_device *ibdev, u8 port_num, diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 30a54f8aa42c..e7e8a0f49464 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -119,61 +119,76 @@ static void bnxt_re_get_sriov_func_type(struct bnxt_re_dev *rdev) * reserved for the function. The driver may choose to allocate fewer * resources than the firmware maximum. */ -static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev) +static void bnxt_re_limit_pf_res(struct bnxt_re_dev *rdev) { - u32 vf_qps = 0, vf_srqs = 0, vf_cqs = 0, vf_mrws = 0, vf_gids = 0; - u32 i; - u32 vf_pct; - u32 num_vfs; - struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; + struct bnxt_qplib_dev_attr *attr; + struct bnxt_qplib_ctx *ctx; + int i; - rdev->qplib_ctx.qpc_count = min_t(u32, BNXT_RE_MAX_QPC_COUNT, - dev_attr->max_qp); + attr = &rdev->dev_attr; + ctx = &rdev->qplib_ctx; - rdev->qplib_ctx.mrw_count = BNXT_RE_MAX_MRW_COUNT_256K; + ctx->qpc_count = min_t(u32, BNXT_RE_MAX_QPC_COUNT, + attr->max_qp); + ctx->mrw_count = BNXT_RE_MAX_MRW_COUNT_256K; /* Use max_mr from fw since max_mrw does not get set */ - rdev->qplib_ctx.mrw_count = min_t(u32, rdev->qplib_ctx.mrw_count, - dev_attr->max_mr); - rdev->qplib_ctx.srqc_count = min_t(u32, BNXT_RE_MAX_SRQC_COUNT, - dev_attr->max_srq); - rdev->qplib_ctx.cq_count = min_t(u32, BNXT_RE_MAX_CQ_COUNT, - dev_attr->max_cq); - - for (i = 0; i < MAX_TQM_ALLOC_REQ; i++) - rdev->qplib_ctx.tqm_count[i] = - rdev->dev_attr.tqm_alloc_reqs[i]; - - if (rdev->num_vfs) { - /* - * Reserve a set of resources for the PF. Divide the remaining - * resources among the VFs - */ - vf_pct = 100 - BNXT_RE_PCT_RSVD_FOR_PF; - num_vfs = 100 * rdev->num_vfs; - vf_qps = (rdev->qplib_ctx.qpc_count * vf_pct) / num_vfs; - vf_srqs = (rdev->qplib_ctx.srqc_count * vf_pct) / num_vfs; - vf_cqs = (rdev->qplib_ctx.cq_count * vf_pct) / num_vfs; - /* - * The driver allows many more MRs than other resources. If the - * firmware does also, then reserve a fixed amount for the PF - * and divide the rest among VFs. VFs may use many MRs for NFS - * mounts, ISER, NVME applications, etc. If the firmware - * severely restricts the number of MRs, then let PF have - * half and divide the rest among VFs, as for the other - * resource types. - */ - if (rdev->qplib_ctx.mrw_count < BNXT_RE_MAX_MRW_COUNT_64K) - vf_mrws = rdev->qplib_ctx.mrw_count * vf_pct / num_vfs; - else - vf_mrws = (rdev->qplib_ctx.mrw_count - - BNXT_RE_RESVD_MR_FOR_PF) / rdev->num_vfs; - vf_gids = BNXT_RE_MAX_GID_PER_VF; + ctx->mrw_count = min_t(u32, ctx->mrw_count, attr->max_mr); + ctx->srqc_count = min_t(u32, BNXT_RE_MAX_SRQC_COUNT, + attr->max_srq); + ctx->cq_count = min_t(u32, BNXT_RE_MAX_CQ_COUNT, attr->max_cq); + if (!bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx)) + for (i = 0; i < MAX_TQM_ALLOC_REQ; i++) + rdev->qplib_ctx.tqm_count[i] = + rdev->dev_attr.tqm_alloc_reqs[i]; +} + +static void bnxt_re_limit_vf_res(struct bnxt_qplib_ctx *qplib_ctx, u32 num_vf) +{ + struct bnxt_qplib_vf_res *vf_res; + u32 mrws = 0; + u32 vf_pct; + u32 nvfs; + + vf_res = &qplib_ctx->vf_res; + /* + * Reserve a set of resources for the PF. Divide the remaining + * resources among the VFs + */ + vf_pct = 100 - BNXT_RE_PCT_RSVD_FOR_PF; + nvfs = num_vf; + num_vf = 100 * num_vf; + vf_res->max_qp_per_vf = (qplib_ctx->qpc_count * vf_pct) / num_vf; + vf_res->max_srq_per_vf = (qplib_ctx->srqc_count * vf_pct) / num_vf; + vf_res->max_cq_per_vf = (qplib_ctx->cq_count * vf_pct) / num_vf; + /* + * The driver allows many more MRs than other resources. If the + * firmware does also, then reserve a fixed amount for the PF and + * divide the rest among VFs. VFs may use many MRs for NFS + * mounts, ISER, NVME applications, etc. If the firmware severely + * restricts the number of MRs, then let PF have half and divide + * the rest among VFs, as for the other resource types. + */ + if (qplib_ctx->mrw_count < BNXT_RE_MAX_MRW_COUNT_64K) { + mrws = qplib_ctx->mrw_count * vf_pct; + nvfs = num_vf; + } else { + mrws = qplib_ctx->mrw_count - BNXT_RE_RESVD_MR_FOR_PF; } - rdev->qplib_ctx.vf_res.max_mrw_per_vf = vf_mrws; - rdev->qplib_ctx.vf_res.max_gid_per_vf = vf_gids; - rdev->qplib_ctx.vf_res.max_qp_per_vf = vf_qps; - rdev->qplib_ctx.vf_res.max_srq_per_vf = vf_srqs; - rdev->qplib_ctx.vf_res.max_cq_per_vf = vf_cqs; + vf_res->max_mrw_per_vf = (mrws / nvfs); + vf_res->max_gid_per_vf = BNXT_RE_MAX_GID_PER_VF; +} + +static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev) +{ + u32 num_vfs; + + memset(&rdev->qplib_ctx.vf_res, 0, sizeof(struct bnxt_qplib_vf_res)); + bnxt_re_limit_pf_res(rdev); + + num_vfs = bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx) ? + BNXT_RE_GEN_P5_MAX_VF : rdev->num_vfs; + if (num_vfs) + bnxt_re_limit_vf_res(&rdev->qplib_ctx, num_vfs); } /* for handling bnxt_en callbacks later */ @@ -193,9 +208,11 @@ static void bnxt_re_sriov_config(void *p, int num_vfs) return; rdev->num_vfs = num_vfs; - bnxt_re_set_resource_limits(rdev); - bnxt_qplib_set_func_resources(&rdev->qplib_res, &rdev->rcfw, - &rdev->qplib_ctx); + if (!bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx)) { + bnxt_re_set_resource_limits(rdev); + bnxt_qplib_set_func_resources(&rdev->qplib_res, &rdev->rcfw, + &rdev->qplib_ctx); + } } static void bnxt_re_shutdown(void *p) @@ -477,6 +494,7 @@ static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev, bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_ALLOC, -1, -1); req.update_period_ms = cpu_to_le32(1000); req.stats_dma_addr = cpu_to_le64(dma_map); + req.stats_dma_length = cpu_to_le16(sizeof(struct ctx_hw_stats_ext)); req.stat_ctx_flags = STAT_CTX_ALLOC_REQ_STAT_CTX_FLAGS_ROCE; bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); @@ -625,7 +643,6 @@ static const struct ib_device_ops bnxt_re_dev_ops = { .map_mr_sg = bnxt_re_map_mr_sg, .mmap = bnxt_re_mmap, .modify_ah = bnxt_re_modify_ah, - .modify_device = bnxt_re_modify_device, .modify_qp = bnxt_re_modify_qp, .modify_srq = bnxt_re_modify_srq, .poll_cq = bnxt_re_poll_cq, @@ -895,10 +912,14 @@ static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq, return 0; } +#define BNXT_RE_GEN_P5_PF_NQ_DB 0x10000 +#define BNXT_RE_GEN_P5_VF_NQ_DB 0x4000 static u32 bnxt_re_get_nqdb_offset(struct bnxt_re_dev *rdev, u16 indx) { return bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx) ? - 0x10000 : rdev->msix_entries[indx].db_offset; + (rdev->is_virtfn ? BNXT_RE_GEN_P5_VF_NQ_DB : + BNXT_RE_GEN_P5_PF_NQ_DB) : + rdev->msix_entries[indx].db_offset; } static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev) @@ -1270,10 +1291,10 @@ static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev) return; } rdev->qplib_ctx.hwrm_intf_ver = - (u64)resp.hwrm_intf_major << 48 | - (u64)resp.hwrm_intf_minor << 32 | - (u64)resp.hwrm_intf_build << 16 | - resp.hwrm_intf_patch; + (u64)le16_to_cpu(resp.hwrm_intf_major) << 48 | + (u64)le16_to_cpu(resp.hwrm_intf_minor) << 32 | + (u64)le16_to_cpu(resp.hwrm_intf_build) << 16 | + le16_to_cpu(resp.hwrm_intf_patch); } static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev) @@ -1408,8 +1429,8 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev) rdev->is_virtfn); if (rc) goto disable_rcfw; - if (!rdev->is_virtfn) - bnxt_re_set_resource_limits(rdev); + + bnxt_re_set_resource_limits(rdev); rc = bnxt_qplib_alloc_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx, 0, bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx)); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index 60c8f76aab33..5cdfa84faf85 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -494,8 +494,10 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw, * shall setup this area for VF. Skipping the * HW programming */ - if (is_virtfn || bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx)) + if (is_virtfn) goto skip_ctx_setup; + if (bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx)) + goto config_vf_res; level = ctx->qpc_tbl.level; req.qpc_pg_size_qpc_lvl = (level << CMDQ_INITIALIZE_FW_QPC_LVL_SFT) | @@ -540,6 +542,7 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw, req.number_of_srq = cpu_to_le32(ctx->srqc_tbl.max_elements); req.number_of_cq = cpu_to_le32(ctx->cq_tbl.max_elements); +config_vf_res: req.max_qp_per_vf = cpu_to_le32(ctx->vf_res.max_qp_per_vf); req.max_mrw_per_vf = cpu_to_le32(ctx->vf_res.max_mrw_per_vf); req.max_srq_per_vf = cpu_to_le32(ctx->vf_res.max_srq_per_vf); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index fbda11a7ab1a..aaa76d792185 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -186,7 +186,9 @@ struct bnxt_qplib_chip_ctx { u8 chip_metal; }; -#define CHIP_NUM_57500 0x1750 +#define CHIP_NUM_57508 0x1750 +#define CHIP_NUM_57504 0x1751 +#define CHIP_NUM_57502 0x1752 struct bnxt_qplib_res { struct pci_dev *pdev; @@ -203,7 +205,9 @@ struct bnxt_qplib_res { static inline bool bnxt_qplib_is_chip_gen_p5(struct bnxt_qplib_chip_ctx *cctx) { - return (cctx->chip_num == CHIP_NUM_57500); + return (cctx->chip_num == CHIP_NUM_57508 || + cctx->chip_num == CHIP_NUM_57504 || + cctx->chip_num == CHIP_NUM_57502); } static inline u8 bnxt_qplib_get_hwq_type(struct bnxt_qplib_res *res) diff --git a/drivers/infiniband/hw/cxgb3/Kconfig b/drivers/infiniband/hw/cxgb3/Kconfig deleted file mode 100644 index 8c1a72bff447..000000000000 --- a/drivers/infiniband/hw/cxgb3/Kconfig +++ /dev/null @@ -1,19 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -config INFINIBAND_CXGB3 - tristate "Chelsio RDMA Driver" - depends on CHELSIO_T3 - select GENERIC_ALLOCATOR - ---help--- - This is an iWARP/RDMA driver for the Chelsio T3 1GbE and - 10GbE adapters. - - For general information about Chelsio and our products, visit - our website at <http://www.chelsio.com>. - - For customer support, please visit our customer support page at - <http://www.chelsio.com/support.html>. - - Please send feedback to <linux-bugs@chelsio.com>. - - To compile this driver as a module, choose M here: the module - will be called iw_cxgb3. diff --git a/drivers/infiniband/hw/cxgb3/Makefile b/drivers/infiniband/hw/cxgb3/Makefile deleted file mode 100644 index 34bb86a6ae3a..000000000000 --- a/drivers/infiniband/hw/cxgb3/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -ccflags-y := -I $(srctree)/drivers/net/ethernet/chelsio/cxgb3 - -obj-$(CONFIG_INFINIBAND_CXGB3) += iw_cxgb3.o - -iw_cxgb3-y := iwch_cm.o iwch_ev.o iwch_cq.o iwch_qp.o iwch_mem.o \ - iwch_provider.o iwch.o cxio_hal.o cxio_resource.o diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c deleted file mode 100644 index 95b22a651673..000000000000 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.c +++ /dev/null @@ -1,1312 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include <asm/delay.h> - -#include <linux/mutex.h> -#include <linux/netdevice.h> -#include <linux/sched.h> -#include <linux/spinlock.h> -#include <linux/pci.h> -#include <linux/dma-mapping.h> -#include <linux/slab.h> -#include <net/net_namespace.h> - -#include "cxio_resource.h" -#include "cxio_hal.h" -#include "cxgb3_offload.h" -#include "sge_defs.h" - -static LIST_HEAD(rdev_list); -static cxio_hal_ev_callback_func_t cxio_ev_cb = NULL; - -static struct cxio_rdev *cxio_hal_find_rdev_by_name(char *dev_name) -{ - struct cxio_rdev *rdev; - - list_for_each_entry(rdev, &rdev_list, entry) - if (!strcmp(rdev->dev_name, dev_name)) - return rdev; - return NULL; -} - -static struct cxio_rdev *cxio_hal_find_rdev_by_t3cdev(struct t3cdev *tdev) -{ - struct cxio_rdev *rdev; - - list_for_each_entry(rdev, &rdev_list, entry) - if (rdev->t3cdev_p == tdev) - return rdev; - return NULL; -} - -int cxio_hal_cq_op(struct cxio_rdev *rdev_p, struct t3_cq *cq, - enum t3_cq_opcode op, u32 credit) -{ - int ret; - struct t3_cqe *cqe; - u32 rptr; - - struct rdma_cq_op setup; - setup.id = cq->cqid; - setup.credits = (op == CQ_CREDIT_UPDATE) ? credit : 0; - setup.op = op; - ret = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_OP, &setup); - - if ((ret < 0) || (op == CQ_CREDIT_UPDATE)) - return ret; - - /* - * If the rearm returned an index other than our current index, - * then there might be CQE's in flight (being DMA'd). We must wait - * here for them to complete or the consumer can miss a notification. - */ - if (Q_PTR2IDX((cq->rptr), cq->size_log2) != ret) { - int i=0; - - rptr = cq->rptr; - - /* - * Keep the generation correct by bumping rptr until it - * matches the index returned by the rearm - 1. - */ - while (Q_PTR2IDX((rptr+1), cq->size_log2) != ret) - rptr++; - - /* - * Now rptr is the index for the (last) cqe that was - * in-flight at the time the HW rearmed the CQ. We - * spin until that CQE is valid. - */ - cqe = cq->queue + Q_PTR2IDX(rptr, cq->size_log2); - while (!CQ_VLD_ENTRY(rptr, cq->size_log2, cqe)) { - udelay(1); - if (i++ > 1000000) { - pr_err("%s: stalled rnic\n", rdev_p->dev_name); - return -EIO; - } - } - - return 1; - } - - return 0; -} - -static int cxio_hal_clear_cq_ctx(struct cxio_rdev *rdev_p, u32 cqid) -{ - struct rdma_cq_setup setup; - setup.id = cqid; - setup.base_addr = 0; /* NULL address */ - setup.size = 0; /* disaable the CQ */ - setup.credits = 0; - setup.credit_thres = 0; - setup.ovfl_mode = 0; - return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup)); -} - -static int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev_p, u32 qpid) -{ - u64 sge_cmd; - struct t3_modify_qp_wr *wqe; - struct sk_buff *skb = alloc_skb(sizeof(*wqe), GFP_KERNEL); - if (!skb) { - pr_debug("%s alloc_skb failed\n", __func__); - return -ENOMEM; - } - wqe = skb_put_zero(skb, sizeof(*wqe)); - build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, - T3_COMPLETION_FLAG | T3_NOTIFY_FLAG, 0, qpid, 7, - T3_SOPEOP); - wqe->flags = cpu_to_be32(MODQP_WRITE_EC); - sge_cmd = qpid << 8 | 3; - wqe->sge_cmd = cpu_to_be64(sge_cmd); - skb->priority = CPL_PRIORITY_CONTROL; - return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb); -} - -int cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq, int kernel) -{ - struct rdma_cq_setup setup; - int size = (1UL << (cq->size_log2)) * sizeof(struct t3_cqe); - - size += 1; /* one extra page for storing cq-in-err state */ - cq->cqid = cxio_hal_get_cqid(rdev_p->rscp); - if (!cq->cqid) - return -ENOMEM; - if (kernel) { - cq->sw_queue = kzalloc(size, GFP_KERNEL); - if (!cq->sw_queue) - return -ENOMEM; - } - cq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev), size, - &(cq->dma_addr), GFP_KERNEL); - if (!cq->queue) { - kfree(cq->sw_queue); - return -ENOMEM; - } - dma_unmap_addr_set(cq, mapping, cq->dma_addr); - setup.id = cq->cqid; - setup.base_addr = (u64) (cq->dma_addr); - setup.size = 1UL << cq->size_log2; - setup.credits = 65535; - setup.credit_thres = 1; - if (rdev_p->t3cdev_p->type != T3A) - setup.ovfl_mode = 0; - else - setup.ovfl_mode = 1; - return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup)); -} - -static u32 get_qpid(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx) -{ - struct cxio_qpid_list *entry; - u32 qpid; - int i; - - mutex_lock(&uctx->lock); - if (!list_empty(&uctx->qpids)) { - entry = list_entry(uctx->qpids.next, struct cxio_qpid_list, - entry); - list_del(&entry->entry); - qpid = entry->qpid; - kfree(entry); - } else { - qpid = cxio_hal_get_qpid(rdev_p->rscp); - if (!qpid) - goto out; - for (i = qpid+1; i & rdev_p->qpmask; i++) { - entry = kmalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) - break; - entry->qpid = i; - list_add_tail(&entry->entry, &uctx->qpids); - } - } -out: - mutex_unlock(&uctx->lock); - pr_debug("%s qpid 0x%x\n", __func__, qpid); - return qpid; -} - -static void put_qpid(struct cxio_rdev *rdev_p, u32 qpid, - struct cxio_ucontext *uctx) -{ - struct cxio_qpid_list *entry; - - entry = kmalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) - return; - pr_debug("%s qpid 0x%x\n", __func__, qpid); - entry->qpid = qpid; - mutex_lock(&uctx->lock); - list_add_tail(&entry->entry, &uctx->qpids); - mutex_unlock(&uctx->lock); -} - -void cxio_release_ucontext(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx) -{ - struct list_head *pos, *nxt; - struct cxio_qpid_list *entry; - - mutex_lock(&uctx->lock); - list_for_each_safe(pos, nxt, &uctx->qpids) { - entry = list_entry(pos, struct cxio_qpid_list, entry); - list_del_init(&entry->entry); - if (!(entry->qpid & rdev_p->qpmask)) - cxio_hal_put_qpid(rdev_p->rscp, entry->qpid); - kfree(entry); - } - mutex_unlock(&uctx->lock); -} - -void cxio_init_ucontext(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx) -{ - INIT_LIST_HEAD(&uctx->qpids); - mutex_init(&uctx->lock); -} - -int cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain, - struct t3_wq *wq, struct cxio_ucontext *uctx) -{ - int depth = 1UL << wq->size_log2; - int rqsize = 1UL << wq->rq_size_log2; - - wq->qpid = get_qpid(rdev_p, uctx); - if (!wq->qpid) - return -ENOMEM; - - wq->rq = kcalloc(depth, sizeof(struct t3_swrq), GFP_KERNEL); - if (!wq->rq) - goto err1; - - wq->rq_addr = cxio_hal_rqtpool_alloc(rdev_p, rqsize); - if (!wq->rq_addr) - goto err2; - - wq->sq = kcalloc(depth, sizeof(struct t3_swsq), GFP_KERNEL); - if (!wq->sq) - goto err3; - - wq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev), - depth * sizeof(union t3_wr), - &(wq->dma_addr), GFP_KERNEL); - if (!wq->queue) - goto err4; - - dma_unmap_addr_set(wq, mapping, wq->dma_addr); - wq->doorbell = (void __iomem *)rdev_p->rnic_info.kdb_addr; - if (!kernel_domain) - wq->udb = (u64)rdev_p->rnic_info.udbell_physbase + - (wq->qpid << rdev_p->qpshift); - wq->rdev = rdev_p; - pr_debug("%s qpid 0x%x doorbell 0x%p udb 0x%llx\n", - __func__, wq->qpid, wq->doorbell, (unsigned long long)wq->udb); - return 0; -err4: - kfree(wq->sq); -err3: - cxio_hal_rqtpool_free(rdev_p, wq->rq_addr, rqsize); -err2: - kfree(wq->rq); -err1: - put_qpid(rdev_p, wq->qpid, uctx); - return -ENOMEM; -} - -void cxio_destroy_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq) -{ - cxio_hal_clear_cq_ctx(rdev_p, cq->cqid); - kfree(cq->sw_queue); - dma_free_coherent(&(rdev_p->rnic_info.pdev->dev), - (1UL << (cq->size_log2)) - * sizeof(struct t3_cqe) + 1, cq->queue, - dma_unmap_addr(cq, mapping)); - cxio_hal_put_cqid(rdev_p->rscp, cq->cqid); -} - -int cxio_destroy_qp(struct cxio_rdev *rdev_p, struct t3_wq *wq, - struct cxio_ucontext *uctx) -{ - dma_free_coherent(&(rdev_p->rnic_info.pdev->dev), - (1UL << (wq->size_log2)) - * sizeof(union t3_wr), wq->queue, - dma_unmap_addr(wq, mapping)); - kfree(wq->sq); - cxio_hal_rqtpool_free(rdev_p, wq->rq_addr, (1UL << wq->rq_size_log2)); - kfree(wq->rq); - put_qpid(rdev_p, wq->qpid, uctx); - return 0; -} - -static void insert_recv_cqe(struct t3_wq *wq, struct t3_cq *cq) -{ - struct t3_cqe cqe; - - pr_debug("%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x\n", __func__, - wq, cq, cq->sw_rptr, cq->sw_wptr); - memset(&cqe, 0, sizeof(cqe)); - cqe.header = cpu_to_be32(V_CQE_STATUS(TPT_ERR_SWFLUSH) | - V_CQE_OPCODE(T3_SEND) | - V_CQE_TYPE(0) | - V_CQE_SWCQE(1) | - V_CQE_QPID(wq->qpid) | - V_CQE_GENBIT(Q_GENBIT(cq->sw_wptr, - cq->size_log2))); - *(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2)) = cqe; - cq->sw_wptr++; -} - -int cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count) -{ - u32 ptr; - int flushed = 0; - - pr_debug("%s wq %p cq %p\n", __func__, wq, cq); - - /* flush RQ */ - pr_debug("%s rq_rptr %u rq_wptr %u skip count %u\n", __func__, - wq->rq_rptr, wq->rq_wptr, count); - ptr = wq->rq_rptr + count; - while (ptr++ != wq->rq_wptr) { - insert_recv_cqe(wq, cq); - flushed++; - } - return flushed; -} - -static void insert_sq_cqe(struct t3_wq *wq, struct t3_cq *cq, - struct t3_swsq *sqp) -{ - struct t3_cqe cqe; - - pr_debug("%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x\n", __func__, - wq, cq, cq->sw_rptr, cq->sw_wptr); - memset(&cqe, 0, sizeof(cqe)); - cqe.header = cpu_to_be32(V_CQE_STATUS(TPT_ERR_SWFLUSH) | - V_CQE_OPCODE(sqp->opcode) | - V_CQE_TYPE(1) | - V_CQE_SWCQE(1) | - V_CQE_QPID(wq->qpid) | - V_CQE_GENBIT(Q_GENBIT(cq->sw_wptr, - cq->size_log2))); - cqe.u.scqe.wrid_hi = sqp->sq_wptr; - - *(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2)) = cqe; - cq->sw_wptr++; -} - -int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count) -{ - __u32 ptr = wq->sq_rptr + count; - int flushed = 0; - struct t3_swsq *sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2); - - while (ptr != wq->sq_wptr) { - sqp->signaled = 0; - insert_sq_cqe(wq, cq, sqp); - ptr++; - sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2); - flushed++; - } - return flushed; -} - -/* - * Move all CQEs from the HWCQ into the SWCQ. - */ -void cxio_flush_hw_cq(struct t3_cq *cq) -{ - struct t3_cqe *cqe, *swcqe; - - pr_debug("%s cq %p cqid 0x%x\n", __func__, cq, cq->cqid); - cqe = cxio_next_hw_cqe(cq); - while (cqe) { - pr_debug("%s flushing hwcq rptr 0x%x to swcq wptr 0x%x\n", - __func__, cq->rptr, cq->sw_wptr); - swcqe = cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2); - *swcqe = *cqe; - swcqe->header |= cpu_to_be32(V_CQE_SWCQE(1)); - cq->sw_wptr++; - cq->rptr++; - cqe = cxio_next_hw_cqe(cq); - } -} - -static int cqe_completes_wr(struct t3_cqe *cqe, struct t3_wq *wq) -{ - if (CQE_OPCODE(*cqe) == T3_TERMINATE) - return 0; - - if ((CQE_OPCODE(*cqe) == T3_RDMA_WRITE) && RQ_TYPE(*cqe)) - return 0; - - if ((CQE_OPCODE(*cqe) == T3_READ_RESP) && SQ_TYPE(*cqe)) - return 0; - - if (CQE_SEND_OPCODE(*cqe) && RQ_TYPE(*cqe) && - Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) - return 0; - - return 1; -} - -void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count) -{ - struct t3_cqe *cqe; - u32 ptr; - - *count = 0; - ptr = cq->sw_rptr; - while (!Q_EMPTY(ptr, cq->sw_wptr)) { - cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2)); - if ((SQ_TYPE(*cqe) || - ((CQE_OPCODE(*cqe) == T3_READ_RESP) && wq->oldest_read)) && - (CQE_QPID(*cqe) == wq->qpid)) - (*count)++; - ptr++; - } - pr_debug("%s cq %p count %d\n", __func__, cq, *count); -} - -void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count) -{ - struct t3_cqe *cqe; - u32 ptr; - - *count = 0; - pr_debug("%s count zero %d\n", __func__, *count); - ptr = cq->sw_rptr; - while (!Q_EMPTY(ptr, cq->sw_wptr)) { - cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2)); - if (RQ_TYPE(*cqe) && (CQE_OPCODE(*cqe) != T3_READ_RESP) && - (CQE_QPID(*cqe) == wq->qpid) && cqe_completes_wr(cqe, wq)) - (*count)++; - ptr++; - } - pr_debug("%s cq %p count %d\n", __func__, cq, *count); -} - -static int cxio_hal_init_ctrl_cq(struct cxio_rdev *rdev_p) -{ - struct rdma_cq_setup setup; - setup.id = 0; - setup.base_addr = 0; /* NULL address */ - setup.size = 1; /* enable the CQ */ - setup.credits = 0; - - /* force SGE to redirect to RspQ and interrupt */ - setup.credit_thres = 0; - setup.ovfl_mode = 1; - return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup)); -} - -static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p) -{ - int err; - u64 sge_cmd, ctx0, ctx1; - u64 base_addr; - struct t3_modify_qp_wr *wqe; - struct sk_buff *skb; - - skb = alloc_skb(sizeof(*wqe), GFP_KERNEL); - if (!skb) { - pr_debug("%s alloc_skb failed\n", __func__); - return -ENOMEM; - } - err = cxio_hal_init_ctrl_cq(rdev_p); - if (err) { - pr_debug("%s err %d initializing ctrl_cq\n", __func__, err); - goto err; - } - rdev_p->ctrl_qp.workq = dma_alloc_coherent( - &(rdev_p->rnic_info.pdev->dev), - (1 << T3_CTRL_QP_SIZE_LOG2) * - sizeof(union t3_wr), - &(rdev_p->ctrl_qp.dma_addr), - GFP_KERNEL); - if (!rdev_p->ctrl_qp.workq) { - pr_debug("%s dma_alloc_coherent failed\n", __func__); - err = -ENOMEM; - goto err; - } - dma_unmap_addr_set(&rdev_p->ctrl_qp, mapping, - rdev_p->ctrl_qp.dma_addr); - rdev_p->ctrl_qp.doorbell = (void __iomem *)rdev_p->rnic_info.kdb_addr; - - mutex_init(&rdev_p->ctrl_qp.lock); - init_waitqueue_head(&rdev_p->ctrl_qp.waitq); - - /* update HW Ctrl QP context */ - base_addr = rdev_p->ctrl_qp.dma_addr; - base_addr >>= 12; - ctx0 = (V_EC_SIZE((1 << T3_CTRL_QP_SIZE_LOG2)) | - V_EC_BASE_LO((u32) base_addr & 0xffff)); - ctx0 <<= 32; - ctx0 |= V_EC_CREDITS(FW_WR_NUM); - base_addr >>= 16; - ctx1 = (u32) base_addr; - base_addr >>= 32; - ctx1 |= ((u64) (V_EC_BASE_HI((u32) base_addr & 0xf) | V_EC_RESPQ(0) | - V_EC_TYPE(0) | V_EC_GEN(1) | - V_EC_UP_TOKEN(T3_CTL_QP_TID) | F_EC_VALID)) << 32; - wqe = skb_put_zero(skb, sizeof(*wqe)); - build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 0, 0, - T3_CTL_QP_TID, 7, T3_SOPEOP); - wqe->flags = cpu_to_be32(MODQP_WRITE_EC); - sge_cmd = (3ULL << 56) | FW_RI_SGEEC_START << 8 | 3; - wqe->sge_cmd = cpu_to_be64(sge_cmd); - wqe->ctx1 = cpu_to_be64(ctx1); - wqe->ctx0 = cpu_to_be64(ctx0); - pr_debug("CtrlQP dma_addr %pad workq %p size %d\n", - &rdev_p->ctrl_qp.dma_addr, rdev_p->ctrl_qp.workq, - 1 << T3_CTRL_QP_SIZE_LOG2); - skb->priority = CPL_PRIORITY_CONTROL; - return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb); -err: - kfree_skb(skb); - return err; -} - -static int cxio_hal_destroy_ctrl_qp(struct cxio_rdev *rdev_p) -{ - dma_free_coherent(&(rdev_p->rnic_info.pdev->dev), - (1UL << T3_CTRL_QP_SIZE_LOG2) - * sizeof(union t3_wr), rdev_p->ctrl_qp.workq, - dma_unmap_addr(&rdev_p->ctrl_qp, mapping)); - return cxio_hal_clear_qp_ctx(rdev_p, T3_CTRL_QP_ID); -} - -/* write len bytes of data into addr (32B aligned address) - * If data is NULL, clear len byte of memory to zero. - * caller acquires the ctrl_qp lock before the call - */ -static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr, - u32 len, void *data) -{ - u32 i, nr_wqe, copy_len; - u8 *copy_data; - u8 wr_len, utx_len; /* length in 8 byte flit */ - enum t3_wr_flags flag; - __be64 *wqe; - u64 utx_cmd; - addr &= 0x7FFFFFF; - nr_wqe = len % 96 ? len / 96 + 1 : len / 96; /* 96B max per WQE */ - pr_debug("%s wptr 0x%x rptr 0x%x len %d, nr_wqe %d data %p addr 0x%0x\n", - __func__, rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, len, - nr_wqe, data, addr); - utx_len = 3; /* in 32B unit */ - for (i = 0; i < nr_wqe; i++) { - if (Q_FULL(rdev_p->ctrl_qp.rptr, rdev_p->ctrl_qp.wptr, - T3_CTRL_QP_SIZE_LOG2)) { - pr_debug("%s ctrl_qp full wtpr 0x%0x rptr 0x%0x, wait for more space i %d\n", - __func__, - rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, i); - if (wait_event_interruptible(rdev_p->ctrl_qp.waitq, - !Q_FULL(rdev_p->ctrl_qp.rptr, - rdev_p->ctrl_qp.wptr, - T3_CTRL_QP_SIZE_LOG2))) { - pr_debug("%s ctrl_qp workq interrupted\n", - __func__); - return -ERESTARTSYS; - } - pr_debug("%s ctrl_qp wakeup, continue posting work request i %d\n", - __func__, i); - } - wqe = (__be64 *)(rdev_p->ctrl_qp.workq + (rdev_p->ctrl_qp.wptr % - (1 << T3_CTRL_QP_SIZE_LOG2))); - flag = 0; - if (i == (nr_wqe - 1)) { - /* last WQE */ - flag = T3_COMPLETION_FLAG; - if (len % 32) - utx_len = len / 32 + 1; - else - utx_len = len / 32; - } - - /* - * Force a CQE to return the credit to the workq in case - * we posted more than half the max QP size of WRs - */ - if ((i != 0) && - (i % (((1 << T3_CTRL_QP_SIZE_LOG2)) >> 1) == 0)) { - flag = T3_COMPLETION_FLAG; - pr_debug("%s force completion at i %d\n", __func__, i); - } - - /* build the utx mem command */ - wqe += (sizeof(struct t3_bypass_wr) >> 3); - utx_cmd = (T3_UTX_MEM_WRITE << 28) | (addr + i * 3); - utx_cmd <<= 32; - utx_cmd |= (utx_len << 28) | ((utx_len << 2) + 1); - *wqe = cpu_to_be64(utx_cmd); - wqe++; - copy_data = (u8 *) data + i * 96; - copy_len = len > 96 ? 96 : len; - - /* clear memory content if data is NULL */ - if (data) - memcpy(wqe, copy_data, copy_len); - else - memset(wqe, 0, copy_len); - if (copy_len % 32) - memset(((u8 *) wqe) + copy_len, 0, - 32 - (copy_len % 32)); - wr_len = ((sizeof(struct t3_bypass_wr)) >> 3) + 1 + - (utx_len << 2); - wqe = (__be64 *)(rdev_p->ctrl_qp.workq + (rdev_p->ctrl_qp.wptr % - (1 << T3_CTRL_QP_SIZE_LOG2))); - - /* wptr in the WRID[31:0] */ - ((union t3_wrid *)(wqe+1))->id0.low = rdev_p->ctrl_qp.wptr; - - /* - * This must be the last write with a memory barrier - * for the genbit - */ - build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_BP, flag, - Q_GENBIT(rdev_p->ctrl_qp.wptr, - T3_CTRL_QP_SIZE_LOG2), T3_CTRL_QP_ID, - wr_len, T3_SOPEOP); - if (flag == T3_COMPLETION_FLAG) - ring_doorbell(rdev_p->ctrl_qp.doorbell, T3_CTRL_QP_ID); - len -= 96; - rdev_p->ctrl_qp.wptr++; - } - return 0; -} - -/* IN: stag key, pdid, perm, zbva, to, len, page_size, pbl_size and pbl_addr - * OUT: stag index - * TBD: shared memory region support - */ -static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry, - u32 *stag, u8 stag_state, u32 pdid, - enum tpt_mem_type type, enum tpt_mem_perm perm, - u32 zbva, u64 to, u32 len, u8 page_size, - u32 pbl_size, u32 pbl_addr) -{ - int err; - struct tpt_entry tpt; - u32 stag_idx; - u32 wptr; - - if (cxio_fatal_error(rdev_p)) - return -EIO; - - stag_state = stag_state > 0; - stag_idx = (*stag) >> 8; - - if ((!reset_tpt_entry) && !(*stag != T3_STAG_UNSET)) { - stag_idx = cxio_hal_get_stag(rdev_p->rscp); - if (!stag_idx) - return -ENOMEM; - *stag = (stag_idx << 8) | ((*stag) & 0xFF); - } - pr_debug("%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n", - __func__, stag_state, type, pdid, stag_idx); - - mutex_lock(&rdev_p->ctrl_qp.lock); - - /* write TPT entry */ - if (reset_tpt_entry) - memset(&tpt, 0, sizeof(tpt)); - else { - tpt.valid_stag_pdid = cpu_to_be32(F_TPT_VALID | - V_TPT_STAG_KEY((*stag) & M_TPT_STAG_KEY) | - V_TPT_STAG_STATE(stag_state) | - V_TPT_STAG_TYPE(type) | V_TPT_PDID(pdid)); - BUG_ON(page_size >= 28); - tpt.flags_pagesize_qpid = cpu_to_be32(V_TPT_PERM(perm) | - ((perm & TPT_MW_BIND) ? F_TPT_MW_BIND_ENABLE : 0) | - V_TPT_ADDR_TYPE((zbva ? TPT_ZBTO : TPT_VATO)) | - V_TPT_PAGE_SIZE(page_size)); - tpt.rsvd_pbl_addr = cpu_to_be32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, pbl_addr)>>3)); - tpt.len = cpu_to_be32(len); - tpt.va_hi = cpu_to_be32((u32) (to >> 32)); - tpt.va_low_or_fbo = cpu_to_be32((u32) (to & 0xFFFFFFFFULL)); - tpt.rsvd_bind_cnt_or_pstag = 0; - tpt.rsvd_pbl_size = cpu_to_be32(V_TPT_PBL_SIZE(pbl_size >> 2)); - } - err = cxio_hal_ctrl_qp_write_mem(rdev_p, - stag_idx + - (rdev_p->rnic_info.tpt_base >> 5), - sizeof(tpt), &tpt); - - /* release the stag index to free pool */ - if (reset_tpt_entry) - cxio_hal_put_stag(rdev_p->rscp, stag_idx); - - wptr = rdev_p->ctrl_qp.wptr; - mutex_unlock(&rdev_p->ctrl_qp.lock); - if (!err) - if (wait_event_interruptible(rdev_p->ctrl_qp.waitq, - SEQ32_GE(rdev_p->ctrl_qp.rptr, - wptr))) - return -ERESTARTSYS; - return err; -} - -int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl, - u32 pbl_addr, u32 pbl_size) -{ - u32 wptr; - int err; - - pr_debug("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n", - __func__, pbl_addr, rdev_p->rnic_info.pbl_base, - pbl_size); - - mutex_lock(&rdev_p->ctrl_qp.lock); - err = cxio_hal_ctrl_qp_write_mem(rdev_p, pbl_addr >> 5, pbl_size << 3, - pbl); - wptr = rdev_p->ctrl_qp.wptr; - mutex_unlock(&rdev_p->ctrl_qp.lock); - if (err) - return err; - - if (wait_event_interruptible(rdev_p->ctrl_qp.waitq, - SEQ32_GE(rdev_p->ctrl_qp.rptr, - wptr))) - return -ERESTARTSYS; - - return 0; -} - -int cxio_register_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid, - enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, - u8 page_size, u32 pbl_size, u32 pbl_addr) -{ - *stag = T3_STAG_UNSET; - return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm, - zbva, to, len, page_size, pbl_size, pbl_addr); -} - -int cxio_reregister_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid, - enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, - u8 page_size, u32 pbl_size, u32 pbl_addr) -{ - return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm, - zbva, to, len, page_size, pbl_size, pbl_addr); -} - -int cxio_dereg_mem(struct cxio_rdev *rdev_p, u32 stag, u32 pbl_size, - u32 pbl_addr) -{ - return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, - pbl_size, pbl_addr); -} - -int cxio_allocate_window(struct cxio_rdev *rdev_p, u32 * stag, u32 pdid) -{ - *stag = T3_STAG_UNSET; - return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_MW, 0, 0, 0ULL, 0, 0, - 0, 0); -} - -int cxio_deallocate_window(struct cxio_rdev *rdev_p, u32 stag) -{ - return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, - 0, 0); -} - -int cxio_allocate_stag(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid, u32 pbl_size, u32 pbl_addr) -{ - *stag = T3_STAG_UNSET; - return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_NON_SHARED_MR, - 0, 0, 0ULL, 0, 0, pbl_size, pbl_addr); -} - -int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr) -{ - struct t3_rdma_init_wr *wqe; - struct sk_buff *skb = alloc_skb(sizeof(*wqe), GFP_ATOMIC); - if (!skb) - return -ENOMEM; - pr_debug("%s rdev_p %p\n", __func__, rdev_p); - wqe = __skb_put(skb, sizeof(*wqe)); - wqe->wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_INIT)); - wqe->wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(attr->tid) | - V_FW_RIWR_LEN(sizeof(*wqe) >> 3)); - wqe->wrid.id1 = 0; - wqe->qpid = cpu_to_be32(attr->qpid); - wqe->pdid = cpu_to_be32(attr->pdid); - wqe->scqid = cpu_to_be32(attr->scqid); - wqe->rcqid = cpu_to_be32(attr->rcqid); - wqe->rq_addr = cpu_to_be32(attr->rq_addr - rdev_p->rnic_info.rqt_base); - wqe->rq_size = cpu_to_be32(attr->rq_size); - wqe->mpaattrs = attr->mpaattrs; - wqe->qpcaps = attr->qpcaps; - wqe->ulpdu_size = cpu_to_be16(attr->tcp_emss); - wqe->rqe_count = cpu_to_be16(attr->rqe_count); - wqe->flags_rtr_type = cpu_to_be16(attr->flags | - V_RTR_TYPE(attr->rtr_type) | - V_CHAN(attr->chan)); - wqe->ord = cpu_to_be32(attr->ord); - wqe->ird = cpu_to_be32(attr->ird); - wqe->qp_dma_addr = cpu_to_be64(attr->qp_dma_addr); - wqe->qp_dma_size = cpu_to_be32(attr->qp_dma_size); - wqe->irs = cpu_to_be32(attr->irs); - skb->priority = 0; /* 0=>ToeQ; 1=>CtrlQ */ - return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb); -} - -void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb) -{ - cxio_ev_cb = ev_cb; -} - -void cxio_unregister_ev_cb(cxio_hal_ev_callback_func_t ev_cb) -{ - cxio_ev_cb = NULL; -} - -static int cxio_hal_ev_handler(struct t3cdev *t3cdev_p, struct sk_buff *skb) -{ - static int cnt; - struct cxio_rdev *rdev_p = NULL; - struct respQ_msg_t *rsp_msg = (struct respQ_msg_t *) skb->data; - pr_debug("%d: %s cq_id 0x%x cq_ptr 0x%x genbit %0x overflow %0x an %0x se %0x notify %0x cqbranch %0x creditth %0x\n", - cnt, __func__, RSPQ_CQID(rsp_msg), RSPQ_CQPTR(rsp_msg), - RSPQ_GENBIT(rsp_msg), RSPQ_OVERFLOW(rsp_msg), RSPQ_AN(rsp_msg), - RSPQ_SE(rsp_msg), RSPQ_NOTIFY(rsp_msg), RSPQ_CQBRANCH(rsp_msg), - RSPQ_CREDIT_THRESH(rsp_msg)); - pr_debug("CQE: QPID 0x%0x genbit %0x type 0x%0x status 0x%0x opcode %d len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n", - CQE_QPID(rsp_msg->cqe), CQE_GENBIT(rsp_msg->cqe), - CQE_TYPE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe), - CQE_OPCODE(rsp_msg->cqe), CQE_LEN(rsp_msg->cqe), - CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe)); - rdev_p = (struct cxio_rdev *)t3cdev_p->ulp; - if (!rdev_p) { - pr_debug("%s called by t3cdev %p with null ulp\n", __func__, - t3cdev_p); - return 0; - } - if (CQE_QPID(rsp_msg->cqe) == T3_CTRL_QP_ID) { - rdev_p->ctrl_qp.rptr = CQE_WRID_LOW(rsp_msg->cqe) + 1; - wake_up_interruptible(&rdev_p->ctrl_qp.waitq); - dev_kfree_skb_irq(skb); - } else if (CQE_QPID(rsp_msg->cqe) == 0xfff8) - dev_kfree_skb_irq(skb); - else if (cxio_ev_cb) - (*cxio_ev_cb) (rdev_p, skb); - else - dev_kfree_skb_irq(skb); - cnt++; - return 0; -} - -/* Caller takes care of locking if needed */ -int cxio_rdev_open(struct cxio_rdev *rdev_p) -{ - struct net_device *netdev_p = NULL; - int err = 0; - if (strlen(rdev_p->dev_name)) { - if (cxio_hal_find_rdev_by_name(rdev_p->dev_name)) { - return -EBUSY; - } - netdev_p = dev_get_by_name(&init_net, rdev_p->dev_name); - if (!netdev_p) { - return -EINVAL; - } - dev_put(netdev_p); - } else if (rdev_p->t3cdev_p) { - if (cxio_hal_find_rdev_by_t3cdev(rdev_p->t3cdev_p)) { - return -EBUSY; - } - netdev_p = rdev_p->t3cdev_p->lldev; - strncpy(rdev_p->dev_name, rdev_p->t3cdev_p->name, - T3_MAX_DEV_NAME_LEN); - } else { - pr_debug("%s t3cdev_p or dev_name must be set\n", __func__); - return -EINVAL; - } - - list_add_tail(&rdev_p->entry, &rdev_list); - - pr_debug("%s opening rnic dev %s\n", __func__, rdev_p->dev_name); - memset(&rdev_p->ctrl_qp, 0, sizeof(rdev_p->ctrl_qp)); - if (!rdev_p->t3cdev_p) - rdev_p->t3cdev_p = dev2t3cdev(netdev_p); - rdev_p->t3cdev_p->ulp = (void *) rdev_p; - - err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, GET_EMBEDDED_INFO, - &(rdev_p->fw_info)); - if (err) { - pr_err("%s t3cdev_p(%p)->ctl returned error %d\n", - __func__, rdev_p->t3cdev_p, err); - goto err1; - } - if (G_FW_VERSION_MAJOR(rdev_p->fw_info.fw_vers) != CXIO_FW_MAJ) { - pr_err("fatal firmware version mismatch: need version %u but adapter has version %u\n", - CXIO_FW_MAJ, - G_FW_VERSION_MAJOR(rdev_p->fw_info.fw_vers)); - err = -EINVAL; - goto err1; - } - - err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_GET_PARAMS, - &(rdev_p->rnic_info)); - if (err) { - pr_err("%s t3cdev_p(%p)->ctl returned error %d\n", - __func__, rdev_p->t3cdev_p, err); - goto err1; - } - err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, GET_PORTS, - &(rdev_p->port_info)); - if (err) { - pr_err("%s t3cdev_p(%p)->ctl returned error %d\n", - __func__, rdev_p->t3cdev_p, err); - goto err1; - } - - /* - * qpshift is the number of bits to shift the qpid left in order - * to get the correct address of the doorbell for that qp. - */ - cxio_init_ucontext(rdev_p, &rdev_p->uctx); - rdev_p->qpshift = PAGE_SHIFT - - ilog2(65536 >> - ilog2(rdev_p->rnic_info.udbell_len >> - PAGE_SHIFT)); - rdev_p->qpnr = rdev_p->rnic_info.udbell_len >> PAGE_SHIFT; - rdev_p->qpmask = (65536 >> ilog2(rdev_p->qpnr)) - 1; - pr_debug("%s rnic %s info: tpt_base 0x%0x tpt_top 0x%0x num stags %d pbl_base 0x%0x pbl_top 0x%0x rqt_base 0x%0x, rqt_top 0x%0x\n", - __func__, rdev_p->dev_name, rdev_p->rnic_info.tpt_base, - rdev_p->rnic_info.tpt_top, cxio_num_stags(rdev_p), - rdev_p->rnic_info.pbl_base, - rdev_p->rnic_info.pbl_top, rdev_p->rnic_info.rqt_base, - rdev_p->rnic_info.rqt_top); - pr_debug("udbell_len 0x%0x udbell_physbase 0x%lx kdb_addr %p qpshift %lu qpnr %d qpmask 0x%x\n", - rdev_p->rnic_info.udbell_len, - rdev_p->rnic_info.udbell_physbase, rdev_p->rnic_info.kdb_addr, - rdev_p->qpshift, rdev_p->qpnr, rdev_p->qpmask); - - err = cxio_hal_init_ctrl_qp(rdev_p); - if (err) { - pr_err("%s error %d initializing ctrl_qp\n", __func__, err); - goto err1; - } - err = cxio_hal_init_resource(rdev_p, cxio_num_stags(rdev_p), 0, - 0, T3_MAX_NUM_QP, T3_MAX_NUM_CQ, - T3_MAX_NUM_PD); - if (err) { - pr_err("%s error %d initializing hal resources\n", - __func__, err); - goto err2; - } - err = cxio_hal_pblpool_create(rdev_p); - if (err) { - pr_err("%s error %d initializing pbl mem pool\n", - __func__, err); - goto err3; - } - err = cxio_hal_rqtpool_create(rdev_p); - if (err) { - pr_err("%s error %d initializing rqt mem pool\n", - __func__, err); - goto err4; - } - return 0; -err4: - cxio_hal_pblpool_destroy(rdev_p); -err3: - cxio_hal_destroy_resource(rdev_p->rscp); -err2: - cxio_hal_destroy_ctrl_qp(rdev_p); -err1: - rdev_p->t3cdev_p->ulp = NULL; - list_del(&rdev_p->entry); - return err; -} - -void cxio_rdev_close(struct cxio_rdev *rdev_p) -{ - if (rdev_p) { - cxio_hal_pblpool_destroy(rdev_p); - cxio_hal_rqtpool_destroy(rdev_p); - list_del(&rdev_p->entry); - cxio_hal_destroy_ctrl_qp(rdev_p); - cxio_hal_destroy_resource(rdev_p->rscp); - rdev_p->t3cdev_p->ulp = NULL; - } -} - -int __init cxio_hal_init(void) -{ - if (cxio_hal_init_rhdl_resource(T3_MAX_NUM_RI)) - return -ENOMEM; - t3_register_cpl_handler(CPL_ASYNC_NOTIF, cxio_hal_ev_handler); - return 0; -} - -void __exit cxio_hal_exit(void) -{ - struct cxio_rdev *rdev, *tmp; - - t3_register_cpl_handler(CPL_ASYNC_NOTIF, NULL); - list_for_each_entry_safe(rdev, tmp, &rdev_list, entry) - cxio_rdev_close(rdev); - cxio_hal_destroy_rhdl_resource(); -} - -static void flush_completed_wrs(struct t3_wq *wq, struct t3_cq *cq) -{ - struct t3_swsq *sqp; - __u32 ptr = wq->sq_rptr; - int count = Q_COUNT(wq->sq_rptr, wq->sq_wptr); - - sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2); - while (count--) - if (!sqp->signaled) { - ptr++; - sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2); - } else if (sqp->complete) { - - /* - * Insert this completed cqe into the swcq. - */ - pr_debug("%s moving cqe into swcq sq idx %ld cq idx %ld\n", - __func__, Q_PTR2IDX(ptr, wq->sq_size_log2), - Q_PTR2IDX(cq->sw_wptr, cq->size_log2)); - sqp->cqe.header |= htonl(V_CQE_SWCQE(1)); - *(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2)) - = sqp->cqe; - cq->sw_wptr++; - sqp->signaled = 0; - break; - } else - break; -} - -static void create_read_req_cqe(struct t3_wq *wq, struct t3_cqe *hw_cqe, - struct t3_cqe *read_cqe) -{ - read_cqe->u.scqe.wrid_hi = wq->oldest_read->sq_wptr; - read_cqe->len = wq->oldest_read->read_len; - read_cqe->header = htonl(V_CQE_QPID(CQE_QPID(*hw_cqe)) | - V_CQE_SWCQE(SW_CQE(*hw_cqe)) | - V_CQE_OPCODE(T3_READ_REQ) | - V_CQE_TYPE(1)); -} - -/* - * Return a ptr to the next read wr in the SWSQ or NULL. - */ -static void advance_oldest_read(struct t3_wq *wq) -{ - - u32 rptr = wq->oldest_read - wq->sq + 1; - u32 wptr = Q_PTR2IDX(wq->sq_wptr, wq->sq_size_log2); - - while (Q_PTR2IDX(rptr, wq->sq_size_log2) != wptr) { - wq->oldest_read = wq->sq + Q_PTR2IDX(rptr, wq->sq_size_log2); - - if (wq->oldest_read->opcode == T3_READ_REQ) - return; - rptr++; - } - wq->oldest_read = NULL; -} - -/* - * cxio_poll_cq - * - * Caller must: - * check the validity of the first CQE, - * supply the wq assicated with the qpid. - * - * credit: cq credit to return to sge. - * cqe_flushed: 1 iff the CQE is flushed. - * cqe: copy of the polled CQE. - * - * return value: - * 0 CQE returned, - * -1 CQE skipped, try again. - */ -int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe, - u8 *cqe_flushed, u64 *cookie, u32 *credit) -{ - int ret = 0; - struct t3_cqe *hw_cqe, read_cqe; - - *cqe_flushed = 0; - *credit = 0; - hw_cqe = cxio_next_cqe(cq); - - pr_debug("%s CQE OOO %d qpid 0x%0x genbit %d type %d status 0x%0x opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n", - __func__, CQE_OOO(*hw_cqe), CQE_QPID(*hw_cqe), - CQE_GENBIT(*hw_cqe), CQE_TYPE(*hw_cqe), CQE_STATUS(*hw_cqe), - CQE_OPCODE(*hw_cqe), CQE_LEN(*hw_cqe), CQE_WRID_HI(*hw_cqe), - CQE_WRID_LOW(*hw_cqe)); - - /* - * skip cqe's not affiliated with a QP. - */ - if (wq == NULL) { - ret = -1; - goto skip_cqe; - } - - /* - * Gotta tweak READ completions: - * 1) the cqe doesn't contain the sq_wptr from the wr. - * 2) opcode not reflected from the wr. - * 3) read_len not reflected from the wr. - * 4) cq_type is RQ_TYPE not SQ_TYPE. - */ - if (RQ_TYPE(*hw_cqe) && (CQE_OPCODE(*hw_cqe) == T3_READ_RESP)) { - - /* - * If this is an unsolicited read response, then the read - * was generated by the kernel driver as part of peer-2-peer - * connection setup. So ignore the completion. - */ - if (!wq->oldest_read) { - if (CQE_STATUS(*hw_cqe)) - wq->error = 1; - ret = -1; - goto skip_cqe; - } - - /* - * Don't write to the HWCQ, so create a new read req CQE - * in local memory. - */ - create_read_req_cqe(wq, hw_cqe, &read_cqe); - hw_cqe = &read_cqe; - advance_oldest_read(wq); - } - - /* - * T3A: Discard TERMINATE CQEs. - */ - if (CQE_OPCODE(*hw_cqe) == T3_TERMINATE) { - ret = -1; - wq->error = 1; - goto skip_cqe; - } - - if (CQE_STATUS(*hw_cqe) || wq->error) { - *cqe_flushed = wq->error; - wq->error = 1; - - /* - * T3A inserts errors into the CQE. We cannot return - * these as work completions. - */ - /* incoming write failures */ - if ((CQE_OPCODE(*hw_cqe) == T3_RDMA_WRITE) - && RQ_TYPE(*hw_cqe)) { - ret = -1; - goto skip_cqe; - } - /* incoming read request failures */ - if ((CQE_OPCODE(*hw_cqe) == T3_READ_RESP) && SQ_TYPE(*hw_cqe)) { - ret = -1; - goto skip_cqe; - } - - /* incoming SEND with no receive posted failures */ - if (CQE_SEND_OPCODE(*hw_cqe) && RQ_TYPE(*hw_cqe) && - Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) { - ret = -1; - goto skip_cqe; - } - BUG_ON((*cqe_flushed == 0) && !SW_CQE(*hw_cqe)); - goto proc_cqe; - } - - /* - * RECV completion. - */ - if (RQ_TYPE(*hw_cqe)) { - - /* - * HW only validates 4 bits of MSN. So we must validate that - * the MSN in the SEND is the next expected MSN. If its not, - * then we complete this with TPT_ERR_MSN and mark the wq in - * error. - */ - - if (Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) { - wq->error = 1; - ret = -1; - goto skip_cqe; - } - - if (unlikely((CQE_WRID_MSN(*hw_cqe) != (wq->rq_rptr + 1)))) { - wq->error = 1; - hw_cqe->header |= htonl(V_CQE_STATUS(TPT_ERR_MSN)); - goto proc_cqe; - } - goto proc_cqe; - } - - /* - * If we get here its a send completion. - * - * Handle out of order completion. These get stuffed - * in the SW SQ. Then the SW SQ is walked to move any - * now in-order completions into the SW CQ. This handles - * 2 cases: - * 1) reaping unsignaled WRs when the first subsequent - * signaled WR is completed. - * 2) out of order read completions. - */ - if (!SW_CQE(*hw_cqe) && (CQE_WRID_SQ_WPTR(*hw_cqe) != wq->sq_rptr)) { - struct t3_swsq *sqp; - - pr_debug("%s out of order completion going in swsq at idx %ld\n", - __func__, - Q_PTR2IDX(CQE_WRID_SQ_WPTR(*hw_cqe), - wq->sq_size_log2)); - sqp = wq->sq + - Q_PTR2IDX(CQE_WRID_SQ_WPTR(*hw_cqe), wq->sq_size_log2); - sqp->cqe = *hw_cqe; - sqp->complete = 1; - ret = -1; - goto flush_wq; - } - -proc_cqe: - *cqe = *hw_cqe; - - /* - * Reap the associated WR(s) that are freed up with this - * completion. - */ - if (SQ_TYPE(*hw_cqe)) { - wq->sq_rptr = CQE_WRID_SQ_WPTR(*hw_cqe); - pr_debug("%s completing sq idx %ld\n", __func__, - Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2)); - *cookie = wq->sq[Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2)].wr_id; - wq->sq_rptr++; - } else { - pr_debug("%s completing rq idx %ld\n", __func__, - Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)); - *cookie = wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].wr_id; - if (wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].pbl_addr) - cxio_hal_pblpool_free(wq->rdev, - wq->rq[Q_PTR2IDX(wq->rq_rptr, - wq->rq_size_log2)].pbl_addr, T3_STAG0_PBL_SIZE); - BUG_ON(Q_EMPTY(wq->rq_rptr, wq->rq_wptr)); - wq->rq_rptr++; - } - -flush_wq: - /* - * Flush any completed cqes that are now in-order. - */ - flush_completed_wrs(wq, cq); - -skip_cqe: - if (SW_CQE(*hw_cqe)) { - pr_debug("%s cq %p cqid 0x%x skip sw cqe sw_rptr 0x%x\n", - __func__, cq, cq->cqid, cq->sw_rptr); - ++cq->sw_rptr; - } else { - pr_debug("%s cq %p cqid 0x%x skip hw cqe rptr 0x%x\n", - __func__, cq, cq->cqid, cq->rptr); - ++cq->rptr; - - /* - * T3A: compute credits. - */ - if (((cq->rptr - cq->wptr) > (1 << (cq->size_log2 - 1))) - || ((cq->rptr - cq->wptr) >= 128)) { - *credit = cq->rptr - cq->wptr; - cq->wptr = cq->rptr; - } - } - return ret; -} diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h deleted file mode 100644 index 40c029ffa425..000000000000 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.h +++ /dev/null @@ -1,204 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __CXIO_HAL_H__ -#define __CXIO_HAL_H__ - -#include <linux/list.h> -#include <linux/mutex.h> -#include <linux/kfifo.h> - -#include "t3_cpl.h" -#include "t3cdev.h" -#include "cxgb3_ctl_defs.h" -#include "cxio_wr.h" - -#define T3_CTRL_QP_ID FW_RI_SGEEC_START -#define T3_CTL_QP_TID FW_RI_TID_START -#define T3_CTRL_QP_SIZE_LOG2 8 -#define T3_CTRL_CQ_ID 0 - -#define T3_MAX_NUM_RI (1<<15) -#define T3_MAX_NUM_QP (1<<15) -#define T3_MAX_NUM_CQ (1<<15) -#define T3_MAX_NUM_PD (1<<15) -#define T3_MAX_PBL_SIZE 256 -#define T3_MAX_RQ_SIZE 1024 -#define T3_MAX_QP_DEPTH (T3_MAX_RQ_SIZE-1) -#define T3_MAX_CQ_DEPTH 65536 -#define T3_MAX_NUM_STAG (1<<15) -#define T3_MAX_MR_SIZE 0x100000000ULL -#define T3_PAGESIZE_MASK 0xffff000 /* 4KB-128MB */ - -#define T3_STAG_UNSET 0xffffffff - -#define T3_MAX_DEV_NAME_LEN 32 - -#define CXIO_FW_MAJ 7 - -struct cxio_hal_ctrl_qp { - u32 wptr; - u32 rptr; - struct mutex lock; /* for the wtpr, can sleep */ - wait_queue_head_t waitq;/* wait for RspQ/CQE msg */ - union t3_wr *workq; /* the work request queue */ - dma_addr_t dma_addr; /* pci bus address of the workq */ - DEFINE_DMA_UNMAP_ADDR(mapping); - void __iomem *doorbell; -}; - -struct cxio_hal_resource { - struct kfifo tpt_fifo; - spinlock_t tpt_fifo_lock; - struct kfifo qpid_fifo; - spinlock_t qpid_fifo_lock; - struct kfifo cqid_fifo; - spinlock_t cqid_fifo_lock; - struct kfifo pdid_fifo; - spinlock_t pdid_fifo_lock; -}; - -struct cxio_qpid_list { - struct list_head entry; - u32 qpid; -}; - -struct cxio_ucontext { - struct list_head qpids; - struct mutex lock; -}; - -struct cxio_rdev { - char dev_name[T3_MAX_DEV_NAME_LEN]; - struct t3cdev *t3cdev_p; - struct rdma_info rnic_info; - struct adap_ports port_info; - struct cxio_hal_resource *rscp; - struct cxio_hal_ctrl_qp ctrl_qp; - void *ulp; - unsigned long qpshift; - u32 qpnr; - u32 qpmask; - struct cxio_ucontext uctx; - struct gen_pool *pbl_pool; - struct gen_pool *rqt_pool; - struct list_head entry; - struct ch_embedded_info fw_info; - u32 flags; -#define CXIO_ERROR_FATAL 1 -}; - -static inline int cxio_fatal_error(struct cxio_rdev *rdev_p) -{ - return rdev_p->flags & CXIO_ERROR_FATAL; -} - -static inline int cxio_num_stags(struct cxio_rdev *rdev_p) -{ - return min((int)T3_MAX_NUM_STAG, (int)((rdev_p->rnic_info.tpt_top - rdev_p->rnic_info.tpt_base) >> 5)); -} - -typedef void (*cxio_hal_ev_callback_func_t) (struct cxio_rdev * rdev_p, - struct sk_buff * skb); - -#define RSPQ_CQID(rsp) (be32_to_cpu(rsp->cq_ptrid) & 0xffff) -#define RSPQ_CQPTR(rsp) ((be32_to_cpu(rsp->cq_ptrid) >> 16) & 0xffff) -#define RSPQ_GENBIT(rsp) ((be32_to_cpu(rsp->flags) >> 16) & 1) -#define RSPQ_OVERFLOW(rsp) ((be32_to_cpu(rsp->flags) >> 17) & 1) -#define RSPQ_AN(rsp) ((be32_to_cpu(rsp->flags) >> 18) & 1) -#define RSPQ_SE(rsp) ((be32_to_cpu(rsp->flags) >> 19) & 1) -#define RSPQ_NOTIFY(rsp) ((be32_to_cpu(rsp->flags) >> 20) & 1) -#define RSPQ_CQBRANCH(rsp) ((be32_to_cpu(rsp->flags) >> 21) & 1) -#define RSPQ_CREDIT_THRESH(rsp) ((be32_to_cpu(rsp->flags) >> 22) & 1) - -struct respQ_msg_t { - __be32 flags; /* flit 0 */ - __be32 cq_ptrid; - __be64 rsvd; /* flit 1 */ - struct t3_cqe cqe; /* flits 2-3 */ -}; - -enum t3_cq_opcode { - CQ_ARM_AN = 0x2, - CQ_ARM_SE = 0x6, - CQ_FORCE_AN = 0x3, - CQ_CREDIT_UPDATE = 0x7 -}; - -int cxio_rdev_open(struct cxio_rdev *rdev); -void cxio_rdev_close(struct cxio_rdev *rdev); -int cxio_hal_cq_op(struct cxio_rdev *rdev, struct t3_cq *cq, - enum t3_cq_opcode op, u32 credit); -int cxio_create_cq(struct cxio_rdev *rdev, struct t3_cq *cq, int kernel); -void cxio_destroy_cq(struct cxio_rdev *rdev, struct t3_cq *cq); -void cxio_release_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx); -void cxio_init_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx); -int cxio_create_qp(struct cxio_rdev *rdev, u32 kernel_domain, struct t3_wq *wq, - struct cxio_ucontext *uctx); -int cxio_destroy_qp(struct cxio_rdev *rdev, struct t3_wq *wq, - struct cxio_ucontext *uctx); -int cxio_peek_cq(struct t3_wq *wr, struct t3_cq *cq, int opcode); -int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl, - u32 pbl_addr, u32 pbl_size); -int cxio_register_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid, - enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, - u8 page_size, u32 pbl_size, u32 pbl_addr); -int cxio_reregister_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid, - enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, - u8 page_size, u32 pbl_size, u32 pbl_addr); -int cxio_dereg_mem(struct cxio_rdev *rdev, u32 stag, u32 pbl_size, - u32 pbl_addr); -int cxio_allocate_window(struct cxio_rdev *rdev, u32 * stag, u32 pdid); -int cxio_allocate_stag(struct cxio_rdev *rdev, u32 *stag, u32 pdid, u32 pbl_size, u32 pbl_addr); -int cxio_deallocate_window(struct cxio_rdev *rdev, u32 stag); -int cxio_rdma_init(struct cxio_rdev *rdev, struct t3_rdma_init_attr *attr); -void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb); -void cxio_unregister_ev_cb(cxio_hal_ev_callback_func_t ev_cb); -u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp); -void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid); -int __init cxio_hal_init(void); -void __exit cxio_hal_exit(void); -int cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count); -int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count); -void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count); -void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count); -void cxio_flush_hw_cq(struct t3_cq *cq); -int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe, - u8 *cqe_flushed, u64 *cookie, u32 *credit); -int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb); - -#ifdef pr_fmt -#undef pr_fmt -#endif - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#endif diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c deleted file mode 100644 index c6e7bc4420b6..000000000000 --- a/drivers/infiniband/hw/cxgb3/cxio_resource.c +++ /dev/null @@ -1,344 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -/* Crude resource management */ -#include <linux/kernel.h> -#include <linux/random.h> -#include <linux/slab.h> -#include <linux/kfifo.h> -#include <linux/spinlock.h> -#include <linux/errno.h> -#include "cxio_resource.h" -#include "cxio_hal.h" - -static struct kfifo rhdl_fifo; -static spinlock_t rhdl_fifo_lock; - -#define RANDOM_SIZE 16 - -static int __cxio_init_resource_fifo(struct kfifo *fifo, - spinlock_t *fifo_lock, - u32 nr, u32 skip_low, - u32 skip_high, - int random) -{ - u32 i, j, entry = 0, idx; - u32 random_bytes; - u32 rarray[16]; - spin_lock_init(fifo_lock); - - if (kfifo_alloc(fifo, nr * sizeof(u32), GFP_KERNEL)) - return -ENOMEM; - - for (i = 0; i < skip_low + skip_high; i++) - kfifo_in(fifo, (unsigned char *) &entry, sizeof(u32)); - if (random) { - j = 0; - random_bytes = prandom_u32(); - for (i = 0; i < RANDOM_SIZE; i++) - rarray[i] = i + skip_low; - for (i = skip_low + RANDOM_SIZE; i < nr - skip_high; i++) { - if (j >= RANDOM_SIZE) { - j = 0; - random_bytes = prandom_u32(); - } - idx = (random_bytes >> (j * 2)) & 0xF; - kfifo_in(fifo, - (unsigned char *) &rarray[idx], - sizeof(u32)); - rarray[idx] = i; - j++; - } - for (i = 0; i < RANDOM_SIZE; i++) - kfifo_in(fifo, - (unsigned char *) &rarray[i], - sizeof(u32)); - } else - for (i = skip_low; i < nr - skip_high; i++) - kfifo_in(fifo, (unsigned char *) &i, sizeof(u32)); - - for (i = 0; i < skip_low + skip_high; i++) - if (kfifo_out_locked(fifo, (unsigned char *) &entry, - sizeof(u32), fifo_lock) != sizeof(u32)) - break; - return 0; -} - -static int cxio_init_resource_fifo(struct kfifo *fifo, spinlock_t * fifo_lock, - u32 nr, u32 skip_low, u32 skip_high) -{ - return (__cxio_init_resource_fifo(fifo, fifo_lock, nr, skip_low, - skip_high, 0)); -} - -static int cxio_init_resource_fifo_random(struct kfifo *fifo, - spinlock_t * fifo_lock, - u32 nr, u32 skip_low, u32 skip_high) -{ - - return (__cxio_init_resource_fifo(fifo, fifo_lock, nr, skip_low, - skip_high, 1)); -} - -static int cxio_init_qpid_fifo(struct cxio_rdev *rdev_p) -{ - u32 i; - - spin_lock_init(&rdev_p->rscp->qpid_fifo_lock); - - if (kfifo_alloc(&rdev_p->rscp->qpid_fifo, T3_MAX_NUM_QP * sizeof(u32), - GFP_KERNEL)) - return -ENOMEM; - - for (i = 16; i < T3_MAX_NUM_QP; i++) - if (!(i & rdev_p->qpmask)) - kfifo_in(&rdev_p->rscp->qpid_fifo, - (unsigned char *) &i, sizeof(u32)); - return 0; -} - -int cxio_hal_init_rhdl_resource(u32 nr_rhdl) -{ - return cxio_init_resource_fifo(&rhdl_fifo, &rhdl_fifo_lock, nr_rhdl, 1, - 0); -} - -void cxio_hal_destroy_rhdl_resource(void) -{ - kfifo_free(&rhdl_fifo); -} - -/* nr_* must be power of 2 */ -int cxio_hal_init_resource(struct cxio_rdev *rdev_p, - u32 nr_tpt, u32 nr_pbl, - u32 nr_rqt, u32 nr_qpid, u32 nr_cqid, u32 nr_pdid) -{ - int err = 0; - struct cxio_hal_resource *rscp; - - rscp = kmalloc(sizeof(*rscp), GFP_KERNEL); - if (!rscp) - return -ENOMEM; - rdev_p->rscp = rscp; - err = cxio_init_resource_fifo_random(&rscp->tpt_fifo, - &rscp->tpt_fifo_lock, - nr_tpt, 1, 0); - if (err) - goto tpt_err; - err = cxio_init_qpid_fifo(rdev_p); - if (err) - goto qpid_err; - err = cxio_init_resource_fifo(&rscp->cqid_fifo, &rscp->cqid_fifo_lock, - nr_cqid, 1, 0); - if (err) - goto cqid_err; - err = cxio_init_resource_fifo(&rscp->pdid_fifo, &rscp->pdid_fifo_lock, - nr_pdid, 1, 0); - if (err) - goto pdid_err; - return 0; -pdid_err: - kfifo_free(&rscp->cqid_fifo); -cqid_err: - kfifo_free(&rscp->qpid_fifo); -qpid_err: - kfifo_free(&rscp->tpt_fifo); -tpt_err: - return -ENOMEM; -} - -/* - * returns 0 if no resource available - */ -static u32 cxio_hal_get_resource(struct kfifo *fifo, spinlock_t * lock) -{ - u32 entry; - if (kfifo_out_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock)) - return entry; - else - return 0; /* fifo emptry */ -} - -static void cxio_hal_put_resource(struct kfifo *fifo, spinlock_t * lock, - u32 entry) -{ - BUG_ON( - kfifo_in_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock) - == 0); -} - -u32 cxio_hal_get_stag(struct cxio_hal_resource *rscp) -{ - return cxio_hal_get_resource(&rscp->tpt_fifo, &rscp->tpt_fifo_lock); -} - -void cxio_hal_put_stag(struct cxio_hal_resource *rscp, u32 stag) -{ - cxio_hal_put_resource(&rscp->tpt_fifo, &rscp->tpt_fifo_lock, stag); -} - -u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp) -{ - u32 qpid = cxio_hal_get_resource(&rscp->qpid_fifo, - &rscp->qpid_fifo_lock); - pr_debug("%s qpid 0x%x\n", __func__, qpid); - return qpid; -} - -void cxio_hal_put_qpid(struct cxio_hal_resource *rscp, u32 qpid) -{ - pr_debug("%s qpid 0x%x\n", __func__, qpid); - cxio_hal_put_resource(&rscp->qpid_fifo, &rscp->qpid_fifo_lock, qpid); -} - -u32 cxio_hal_get_cqid(struct cxio_hal_resource *rscp) -{ - return cxio_hal_get_resource(&rscp->cqid_fifo, &rscp->cqid_fifo_lock); -} - -void cxio_hal_put_cqid(struct cxio_hal_resource *rscp, u32 cqid) -{ - cxio_hal_put_resource(&rscp->cqid_fifo, &rscp->cqid_fifo_lock, cqid); -} - -u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp) -{ - return cxio_hal_get_resource(&rscp->pdid_fifo, &rscp->pdid_fifo_lock); -} - -void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid) -{ - cxio_hal_put_resource(&rscp->pdid_fifo, &rscp->pdid_fifo_lock, pdid); -} - -void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp) -{ - kfifo_free(&rscp->tpt_fifo); - kfifo_free(&rscp->cqid_fifo); - kfifo_free(&rscp->qpid_fifo); - kfifo_free(&rscp->pdid_fifo); - kfree(rscp); -} - -/* - * PBL Memory Manager. Uses Linux generic allocator. - */ - -#define MIN_PBL_SHIFT 8 /* 256B == min PBL size (32 entries) */ - -u32 cxio_hal_pblpool_alloc(struct cxio_rdev *rdev_p, int size) -{ - unsigned long addr = gen_pool_alloc(rdev_p->pbl_pool, size); - pr_debug("%s addr 0x%x size %d\n", __func__, (u32)addr, size); - return (u32)addr; -} - -void cxio_hal_pblpool_free(struct cxio_rdev *rdev_p, u32 addr, int size) -{ - pr_debug("%s addr 0x%x size %d\n", __func__, addr, size); - gen_pool_free(rdev_p->pbl_pool, (unsigned long)addr, size); -} - -int cxio_hal_pblpool_create(struct cxio_rdev *rdev_p) -{ - unsigned pbl_start, pbl_chunk; - - rdev_p->pbl_pool = gen_pool_create(MIN_PBL_SHIFT, -1); - if (!rdev_p->pbl_pool) - return -ENOMEM; - - pbl_start = rdev_p->rnic_info.pbl_base; - pbl_chunk = rdev_p->rnic_info.pbl_top - pbl_start + 1; - - while (pbl_start < rdev_p->rnic_info.pbl_top) { - pbl_chunk = min(rdev_p->rnic_info.pbl_top - pbl_start + 1, - pbl_chunk); - if (gen_pool_add(rdev_p->pbl_pool, pbl_start, pbl_chunk, -1)) { - pr_debug("%s failed to add PBL chunk (%x/%x)\n", - __func__, pbl_start, pbl_chunk); - if (pbl_chunk <= 1024 << MIN_PBL_SHIFT) { - pr_warn("%s: Failed to add all PBL chunks (%x/%x)\n", - __func__, pbl_start, - rdev_p->rnic_info.pbl_top - pbl_start); - return 0; - } - pbl_chunk >>= 1; - } else { - pr_debug("%s added PBL chunk (%x/%x)\n", - __func__, pbl_start, pbl_chunk); - pbl_start += pbl_chunk; - } - } - - return 0; -} - -void cxio_hal_pblpool_destroy(struct cxio_rdev *rdev_p) -{ - gen_pool_destroy(rdev_p->pbl_pool); -} - -/* - * RQT Memory Manager. Uses Linux generic allocator. - */ - -#define MIN_RQT_SHIFT 10 /* 1KB == mini RQT size (16 entries) */ -#define RQT_CHUNK 2*1024*1024 - -u32 cxio_hal_rqtpool_alloc(struct cxio_rdev *rdev_p, int size) -{ - unsigned long addr = gen_pool_alloc(rdev_p->rqt_pool, size << 6); - pr_debug("%s addr 0x%x size %d\n", __func__, (u32)addr, size << 6); - return (u32)addr; -} - -void cxio_hal_rqtpool_free(struct cxio_rdev *rdev_p, u32 addr, int size) -{ - pr_debug("%s addr 0x%x size %d\n", __func__, addr, size << 6); - gen_pool_free(rdev_p->rqt_pool, (unsigned long)addr, size << 6); -} - -int cxio_hal_rqtpool_create(struct cxio_rdev *rdev_p) -{ - unsigned long i; - rdev_p->rqt_pool = gen_pool_create(MIN_RQT_SHIFT, -1); - if (rdev_p->rqt_pool) - for (i = rdev_p->rnic_info.rqt_base; - i <= rdev_p->rnic_info.rqt_top - RQT_CHUNK + 1; - i += RQT_CHUNK) - gen_pool_add(rdev_p->rqt_pool, i, RQT_CHUNK, -1); - return rdev_p->rqt_pool ? 0 : -ENOMEM; -} - -void cxio_hal_rqtpool_destroy(struct cxio_rdev *rdev_p) -{ - gen_pool_destroy(rdev_p->rqt_pool); -} diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.h b/drivers/infiniband/hw/cxgb3/cxio_resource.h deleted file mode 100644 index a2703a3d882d..000000000000 --- a/drivers/infiniband/hw/cxgb3/cxio_resource.h +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __CXIO_RESOURCE_H__ -#define __CXIO_RESOURCE_H__ - -#include <linux/kernel.h> -#include <linux/random.h> -#include <linux/slab.h> -#include <linux/kfifo.h> -#include <linux/spinlock.h> -#include <linux/errno.h> -#include <linux/genalloc.h> -#include "cxio_hal.h" - -extern int cxio_hal_init_rhdl_resource(u32 nr_rhdl); -extern void cxio_hal_destroy_rhdl_resource(void); -extern int cxio_hal_init_resource(struct cxio_rdev *rdev_p, - u32 nr_tpt, u32 nr_pbl, - u32 nr_rqt, u32 nr_qpid, u32 nr_cqid, - u32 nr_pdid); -extern u32 cxio_hal_get_stag(struct cxio_hal_resource *rscp); -extern void cxio_hal_put_stag(struct cxio_hal_resource *rscp, u32 stag); -extern u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp); -extern void cxio_hal_put_qpid(struct cxio_hal_resource *rscp, u32 qpid); -extern u32 cxio_hal_get_cqid(struct cxio_hal_resource *rscp); -extern void cxio_hal_put_cqid(struct cxio_hal_resource *rscp, u32 cqid); -extern void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp); - -#define PBL_OFF(rdev_p, a) ( (a) - (rdev_p)->rnic_info.pbl_base ) -extern int cxio_hal_pblpool_create(struct cxio_rdev *rdev_p); -extern void cxio_hal_pblpool_destroy(struct cxio_rdev *rdev_p); -extern u32 cxio_hal_pblpool_alloc(struct cxio_rdev *rdev_p, int size); -extern void cxio_hal_pblpool_free(struct cxio_rdev *rdev_p, u32 addr, int size); - -#define RQT_OFF(rdev_p, a) ( (a) - (rdev_p)->rnic_info.rqt_base ) -extern int cxio_hal_rqtpool_create(struct cxio_rdev *rdev_p); -extern void cxio_hal_rqtpool_destroy(struct cxio_rdev *rdev_p); -extern u32 cxio_hal_rqtpool_alloc(struct cxio_rdev *rdev_p, int size); -extern void cxio_hal_rqtpool_free(struct cxio_rdev *rdev_p, u32 addr, int size); -#endif diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h deleted file mode 100644 index 53aa5c36247a..000000000000 --- a/drivers/infiniband/hw/cxgb3/cxio_wr.h +++ /dev/null @@ -1,802 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __CXIO_WR_H__ -#define __CXIO_WR_H__ - -#include <asm/io.h> -#include <linux/pci.h> -#include <linux/timer.h> -#include "firmware_exports.h" - -#define T3_MAX_SGE 4 -#define T3_MAX_INLINE 64 -#define T3_STAG0_PBL_SIZE (2 * T3_MAX_SGE << 3) -#define T3_STAG0_MAX_PBE_LEN (128 * 1024 * 1024) -#define T3_STAG0_PAGE_SHIFT 15 - -#define Q_EMPTY(rptr,wptr) ((rptr)==(wptr)) -#define Q_FULL(rptr,wptr,size_log2) ( (((wptr)-(rptr))>>(size_log2)) && \ - ((rptr)!=(wptr)) ) -#define Q_GENBIT(ptr,size_log2) (!(((ptr)>>size_log2)&0x1)) -#define Q_FREECNT(rptr,wptr,size_log2) ((1UL<<size_log2)-((wptr)-(rptr))) -#define Q_COUNT(rptr,wptr) ((wptr)-(rptr)) -#define Q_PTR2IDX(ptr,size_log2) (ptr & ((1UL<<size_log2)-1)) - -static inline void ring_doorbell(void __iomem *doorbell, u32 qpid) -{ - writel(((1<<31) | qpid), doorbell); -} - -#define SEQ32_GE(x,y) (!( (((u32) (x)) - ((u32) (y))) & 0x80000000 )) - -enum t3_wr_flags { - T3_COMPLETION_FLAG = 0x01, - T3_NOTIFY_FLAG = 0x02, - T3_SOLICITED_EVENT_FLAG = 0x04, - T3_READ_FENCE_FLAG = 0x08, - T3_LOCAL_FENCE_FLAG = 0x10 -} __packed; - -enum t3_wr_opcode { - T3_WR_BP = FW_WROPCODE_RI_BYPASS, - T3_WR_SEND = FW_WROPCODE_RI_SEND, - T3_WR_WRITE = FW_WROPCODE_RI_RDMA_WRITE, - T3_WR_READ = FW_WROPCODE_RI_RDMA_READ, - T3_WR_INV_STAG = FW_WROPCODE_RI_LOCAL_INV, - T3_WR_BIND = FW_WROPCODE_RI_BIND_MW, - T3_WR_RCV = FW_WROPCODE_RI_RECEIVE, - T3_WR_INIT = FW_WROPCODE_RI_RDMA_INIT, - T3_WR_QP_MOD = FW_WROPCODE_RI_MODIFY_QP, - T3_WR_FASTREG = FW_WROPCODE_RI_FASTREGISTER_MR -} __packed; - -enum t3_rdma_opcode { - T3_RDMA_WRITE, /* IETF RDMAP v1.0 ... */ - T3_READ_REQ, - T3_READ_RESP, - T3_SEND, - T3_SEND_WITH_INV, - T3_SEND_WITH_SE, - T3_SEND_WITH_SE_INV, - T3_TERMINATE, - T3_RDMA_INIT, /* CHELSIO RI specific ... */ - T3_BIND_MW, - T3_FAST_REGISTER, - T3_LOCAL_INV, - T3_QP_MOD, - T3_BYPASS, - T3_RDMA_READ_REQ_WITH_INV, -} __packed; - -static inline enum t3_rdma_opcode wr2opcode(enum t3_wr_opcode wrop) -{ - switch (wrop) { - case T3_WR_BP: return T3_BYPASS; - case T3_WR_SEND: return T3_SEND; - case T3_WR_WRITE: return T3_RDMA_WRITE; - case T3_WR_READ: return T3_READ_REQ; - case T3_WR_INV_STAG: return T3_LOCAL_INV; - case T3_WR_BIND: return T3_BIND_MW; - case T3_WR_INIT: return T3_RDMA_INIT; - case T3_WR_QP_MOD: return T3_QP_MOD; - case T3_WR_FASTREG: return T3_FAST_REGISTER; - default: break; - } - return -1; -} - - -/* Work request id */ -union t3_wrid { - struct { - u32 hi; - u32 low; - } id0; - u64 id1; -}; - -#define WRID(wrid) (wrid.id1) -#define WRID_GEN(wrid) (wrid.id0.wr_gen) -#define WRID_IDX(wrid) (wrid.id0.wr_idx) -#define WRID_LO(wrid) (wrid.id0.wr_lo) - -struct fw_riwrh { - __be32 op_seop_flags; - __be32 gen_tid_len; -}; - -#define S_FW_RIWR_OP 24 -#define M_FW_RIWR_OP 0xff -#define V_FW_RIWR_OP(x) ((x) << S_FW_RIWR_OP) -#define G_FW_RIWR_OP(x) ((((x) >> S_FW_RIWR_OP)) & M_FW_RIWR_OP) - -#define S_FW_RIWR_SOPEOP 22 -#define M_FW_RIWR_SOPEOP 0x3 -#define V_FW_RIWR_SOPEOP(x) ((x) << S_FW_RIWR_SOPEOP) - -#define S_FW_RIWR_FLAGS 8 -#define M_FW_RIWR_FLAGS 0x3fffff -#define V_FW_RIWR_FLAGS(x) ((x) << S_FW_RIWR_FLAGS) -#define G_FW_RIWR_FLAGS(x) ((((x) >> S_FW_RIWR_FLAGS)) & M_FW_RIWR_FLAGS) - -#define S_FW_RIWR_TID 8 -#define V_FW_RIWR_TID(x) ((x) << S_FW_RIWR_TID) - -#define S_FW_RIWR_LEN 0 -#define V_FW_RIWR_LEN(x) ((x) << S_FW_RIWR_LEN) - -#define S_FW_RIWR_GEN 31 -#define V_FW_RIWR_GEN(x) ((x) << S_FW_RIWR_GEN) - -struct t3_sge { - __be32 stag; - __be32 len; - __be64 to; -}; - -/* If num_sgle is zero, flit 5+ contains immediate data.*/ -struct t3_send_wr { - struct fw_riwrh wrh; /* 0 */ - union t3_wrid wrid; /* 1 */ - - u8 rdmaop; /* 2 */ - u8 reserved[3]; - __be32 rem_stag; - __be32 plen; /* 3 */ - __be32 num_sgle; - struct t3_sge sgl[T3_MAX_SGE]; /* 4+ */ -}; - -#define T3_MAX_FASTREG_DEPTH 10 -#define T3_MAX_FASTREG_FRAG 10 - -struct t3_fastreg_wr { - struct fw_riwrh wrh; /* 0 */ - union t3_wrid wrid; /* 1 */ - __be32 stag; /* 2 */ - __be32 len; - __be32 va_base_hi; /* 3 */ - __be32 va_base_lo_fbo; - __be32 page_type_perms; /* 4 */ - __be32 reserved1; - __be64 pbl_addrs[0]; /* 5+ */ -}; - -/* - * If a fastreg wr spans multiple wqes, then the 2nd fragment look like this. - */ -struct t3_pbl_frag { - struct fw_riwrh wrh; /* 0 */ - __be64 pbl_addrs[14]; /* 1..14 */ -}; - -#define S_FR_PAGE_COUNT 24 -#define M_FR_PAGE_COUNT 0xff -#define V_FR_PAGE_COUNT(x) ((x) << S_FR_PAGE_COUNT) -#define G_FR_PAGE_COUNT(x) ((((x) >> S_FR_PAGE_COUNT)) & M_FR_PAGE_COUNT) - -#define S_FR_PAGE_SIZE 16 -#define M_FR_PAGE_SIZE 0x1f -#define V_FR_PAGE_SIZE(x) ((x) << S_FR_PAGE_SIZE) -#define G_FR_PAGE_SIZE(x) ((((x) >> S_FR_PAGE_SIZE)) & M_FR_PAGE_SIZE) - -#define S_FR_TYPE 8 -#define M_FR_TYPE 0x1 -#define V_FR_TYPE(x) ((x) << S_FR_TYPE) -#define G_FR_TYPE(x) ((((x) >> S_FR_TYPE)) & M_FR_TYPE) - -#define S_FR_PERMS 0 -#define M_FR_PERMS 0xff -#define V_FR_PERMS(x) ((x) << S_FR_PERMS) -#define G_FR_PERMS(x) ((((x) >> S_FR_PERMS)) & M_FR_PERMS) - -struct t3_local_inv_wr { - struct fw_riwrh wrh; /* 0 */ - union t3_wrid wrid; /* 1 */ - __be32 stag; /* 2 */ - __be32 reserved; -}; - -struct t3_rdma_write_wr { - struct fw_riwrh wrh; /* 0 */ - union t3_wrid wrid; /* 1 */ - u8 rdmaop; /* 2 */ - u8 reserved[3]; - __be32 stag_sink; - __be64 to_sink; /* 3 */ - __be32 plen; /* 4 */ - __be32 num_sgle; - struct t3_sge sgl[T3_MAX_SGE]; /* 5+ */ -}; - -struct t3_rdma_read_wr { - struct fw_riwrh wrh; /* 0 */ - union t3_wrid wrid; /* 1 */ - u8 rdmaop; /* 2 */ - u8 local_inv; - u8 reserved[2]; - __be32 rem_stag; - __be64 rem_to; /* 3 */ - __be32 local_stag; /* 4 */ - __be32 local_len; - __be64 local_to; /* 5 */ -}; - -struct t3_bind_mw_wr { - struct fw_riwrh wrh; /* 0 */ - union t3_wrid wrid; /* 1 */ - u16 reserved; /* 2 */ - u8 type; - u8 perms; - __be32 mr_stag; - __be32 mw_stag; /* 3 */ - __be32 mw_len; - __be64 mw_va; /* 4 */ - __be32 mr_pbl_addr; /* 5 */ - u8 reserved2[3]; - u8 mr_pagesz; -}; - -struct t3_receive_wr { - struct fw_riwrh wrh; /* 0 */ - union t3_wrid wrid; /* 1 */ - u8 pagesz[T3_MAX_SGE]; - __be32 num_sgle; /* 2 */ - struct t3_sge sgl[T3_MAX_SGE]; /* 3+ */ - __be32 pbl_addr[T3_MAX_SGE]; -}; - -struct t3_bypass_wr { - struct fw_riwrh wrh; - union t3_wrid wrid; /* 1 */ -}; - -struct t3_modify_qp_wr { - struct fw_riwrh wrh; /* 0 */ - union t3_wrid wrid; /* 1 */ - __be32 flags; /* 2 */ - __be32 quiesce; /* 2 */ - __be32 max_ird; /* 3 */ - __be32 max_ord; /* 3 */ - __be64 sge_cmd; /* 4 */ - __be64 ctx1; /* 5 */ - __be64 ctx0; /* 6 */ -}; - -enum t3_modify_qp_flags { - MODQP_QUIESCE = 0x01, - MODQP_MAX_IRD = 0x02, - MODQP_MAX_ORD = 0x04, - MODQP_WRITE_EC = 0x08, - MODQP_READ_EC = 0x10, -}; - - -enum t3_mpa_attrs { - uP_RI_MPA_RX_MARKER_ENABLE = 0x1, - uP_RI_MPA_TX_MARKER_ENABLE = 0x2, - uP_RI_MPA_CRC_ENABLE = 0x4, - uP_RI_MPA_IETF_ENABLE = 0x8 -} __packed; - -enum t3_qp_caps { - uP_RI_QP_RDMA_READ_ENABLE = 0x01, - uP_RI_QP_RDMA_WRITE_ENABLE = 0x02, - uP_RI_QP_BIND_ENABLE = 0x04, - uP_RI_QP_FAST_REGISTER_ENABLE = 0x08, - uP_RI_QP_STAG0_ENABLE = 0x10 -} __packed; - -enum rdma_init_rtr_types { - RTR_READ = 1, - RTR_WRITE = 2, - RTR_SEND = 3, -}; - -#define S_RTR_TYPE 2 -#define M_RTR_TYPE 0x3 -#define V_RTR_TYPE(x) ((x) << S_RTR_TYPE) -#define G_RTR_TYPE(x) ((((x) >> S_RTR_TYPE)) & M_RTR_TYPE) - -#define S_CHAN 4 -#define M_CHAN 0x3 -#define V_CHAN(x) ((x) << S_CHAN) -#define G_CHAN(x) ((((x) >> S_CHAN)) & M_CHAN) - -struct t3_rdma_init_attr { - u32 tid; - u32 qpid; - u32 pdid; - u32 scqid; - u32 rcqid; - u32 rq_addr; - u32 rq_size; - enum t3_mpa_attrs mpaattrs; - enum t3_qp_caps qpcaps; - u16 tcp_emss; - u32 ord; - u32 ird; - u64 qp_dma_addr; - u32 qp_dma_size; - enum rdma_init_rtr_types rtr_type; - u16 flags; - u16 rqe_count; - u32 irs; - u32 chan; -}; - -struct t3_rdma_init_wr { - struct fw_riwrh wrh; /* 0 */ - union t3_wrid wrid; /* 1 */ - __be32 qpid; /* 2 */ - __be32 pdid; - __be32 scqid; /* 3 */ - __be32 rcqid; - __be32 rq_addr; /* 4 */ - __be32 rq_size; - u8 mpaattrs; /* 5 */ - u8 qpcaps; - __be16 ulpdu_size; - __be16 flags_rtr_type; - __be16 rqe_count; - __be32 ord; /* 6 */ - __be32 ird; - __be64 qp_dma_addr; /* 7 */ - __be32 qp_dma_size; /* 8 */ - __be32 irs; -}; - -struct t3_genbit { - u64 flit[15]; - __be64 genbit; -}; - -struct t3_wq_in_err { - u64 flit[13]; - u64 err; -}; - -enum rdma_init_wr_flags { - MPA_INITIATOR = (1<<0), - PRIV_QP = (1<<1), -}; - -union t3_wr { - struct t3_send_wr send; - struct t3_rdma_write_wr write; - struct t3_rdma_read_wr read; - struct t3_receive_wr recv; - struct t3_fastreg_wr fastreg; - struct t3_pbl_frag pbl_frag; - struct t3_local_inv_wr local_inv; - struct t3_bind_mw_wr bind; - struct t3_bypass_wr bypass; - struct t3_rdma_init_wr init; - struct t3_modify_qp_wr qp_mod; - struct t3_genbit genbit; - struct t3_wq_in_err wq_in_err; - __be64 flit[16]; -}; - -#define T3_SQ_CQE_FLIT 13 -#define T3_SQ_COOKIE_FLIT 14 - -#define T3_RQ_COOKIE_FLIT 13 -#define T3_RQ_CQE_FLIT 14 - -static inline enum t3_wr_opcode fw_riwrh_opcode(struct fw_riwrh *wqe) -{ - return G_FW_RIWR_OP(be32_to_cpu(wqe->op_seop_flags)); -} - -enum t3_wr_hdr_bits { - T3_EOP = 1, - T3_SOP = 2, - T3_SOPEOP = T3_EOP|T3_SOP, -}; - -static inline void build_fw_riwrh(struct fw_riwrh *wqe, enum t3_wr_opcode op, - enum t3_wr_flags flags, u8 genbit, u32 tid, - u8 len, u8 sopeop) -{ - wqe->op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(op) | - V_FW_RIWR_SOPEOP(sopeop) | - V_FW_RIWR_FLAGS(flags)); - wmb(); - wqe->gen_tid_len = cpu_to_be32(V_FW_RIWR_GEN(genbit) | - V_FW_RIWR_TID(tid) | - V_FW_RIWR_LEN(len)); - /* 2nd gen bit... */ - ((union t3_wr *)wqe)->genbit.genbit = cpu_to_be64(genbit); -} - -/* - * T3 ULP2_TX commands - */ -enum t3_utx_mem_op { - T3_UTX_MEM_READ = 2, - T3_UTX_MEM_WRITE = 3 -}; - -/* T3 MC7 RDMA TPT entry format */ - -enum tpt_mem_type { - TPT_NON_SHARED_MR = 0x0, - TPT_SHARED_MR = 0x1, - TPT_MW = 0x2, - TPT_MW_RELAXED_PROTECTION = 0x3 -}; - -enum tpt_addr_type { - TPT_ZBTO = 0, - TPT_VATO = 1 -}; - -enum tpt_mem_perm { - TPT_MW_BIND = 0x10, - TPT_LOCAL_READ = 0x8, - TPT_LOCAL_WRITE = 0x4, - TPT_REMOTE_READ = 0x2, - TPT_REMOTE_WRITE = 0x1 -}; - -struct tpt_entry { - __be32 valid_stag_pdid; - __be32 flags_pagesize_qpid; - - __be32 rsvd_pbl_addr; - __be32 len; - __be32 va_hi; - __be32 va_low_or_fbo; - - __be32 rsvd_bind_cnt_or_pstag; - __be32 rsvd_pbl_size; -}; - -#define S_TPT_VALID 31 -#define V_TPT_VALID(x) ((x) << S_TPT_VALID) -#define F_TPT_VALID V_TPT_VALID(1U) - -#define S_TPT_STAG_KEY 23 -#define M_TPT_STAG_KEY 0xFF -#define V_TPT_STAG_KEY(x) ((x) << S_TPT_STAG_KEY) -#define G_TPT_STAG_KEY(x) (((x) >> S_TPT_STAG_KEY) & M_TPT_STAG_KEY) - -#define S_TPT_STAG_STATE 22 -#define V_TPT_STAG_STATE(x) ((x) << S_TPT_STAG_STATE) -#define F_TPT_STAG_STATE V_TPT_STAG_STATE(1U) - -#define S_TPT_STAG_TYPE 20 -#define M_TPT_STAG_TYPE 0x3 -#define V_TPT_STAG_TYPE(x) ((x) << S_TPT_STAG_TYPE) -#define G_TPT_STAG_TYPE(x) (((x) >> S_TPT_STAG_TYPE) & M_TPT_STAG_TYPE) - -#define S_TPT_PDID 0 -#define M_TPT_PDID 0xFFFFF -#define V_TPT_PDID(x) ((x) << S_TPT_PDID) -#define G_TPT_PDID(x) (((x) >> S_TPT_PDID) & M_TPT_PDID) - -#define S_TPT_PERM 28 -#define M_TPT_PERM 0xF -#define V_TPT_PERM(x) ((x) << S_TPT_PERM) -#define G_TPT_PERM(x) (((x) >> S_TPT_PERM) & M_TPT_PERM) - -#define S_TPT_REM_INV_DIS 27 -#define V_TPT_REM_INV_DIS(x) ((x) << S_TPT_REM_INV_DIS) -#define F_TPT_REM_INV_DIS V_TPT_REM_INV_DIS(1U) - -#define S_TPT_ADDR_TYPE 26 -#define V_TPT_ADDR_TYPE(x) ((x) << S_TPT_ADDR_TYPE) -#define F_TPT_ADDR_TYPE V_TPT_ADDR_TYPE(1U) - -#define S_TPT_MW_BIND_ENABLE 25 -#define V_TPT_MW_BIND_ENABLE(x) ((x) << S_TPT_MW_BIND_ENABLE) -#define F_TPT_MW_BIND_ENABLE V_TPT_MW_BIND_ENABLE(1U) - -#define S_TPT_PAGE_SIZE 20 -#define M_TPT_PAGE_SIZE 0x1F -#define V_TPT_PAGE_SIZE(x) ((x) << S_TPT_PAGE_SIZE) -#define G_TPT_PAGE_SIZE(x) (((x) >> S_TPT_PAGE_SIZE) & M_TPT_PAGE_SIZE) - -#define S_TPT_PBL_ADDR 0 -#define M_TPT_PBL_ADDR 0x1FFFFFFF -#define V_TPT_PBL_ADDR(x) ((x) << S_TPT_PBL_ADDR) -#define G_TPT_PBL_ADDR(x) (((x) >> S_TPT_PBL_ADDR) & M_TPT_PBL_ADDR) - -#define S_TPT_QPID 0 -#define M_TPT_QPID 0xFFFFF -#define V_TPT_QPID(x) ((x) << S_TPT_QPID) -#define G_TPT_QPID(x) (((x) >> S_TPT_QPID) & M_TPT_QPID) - -#define S_TPT_PSTAG 0 -#define M_TPT_PSTAG 0xFFFFFF -#define V_TPT_PSTAG(x) ((x) << S_TPT_PSTAG) -#define G_TPT_PSTAG(x) (((x) >> S_TPT_PSTAG) & M_TPT_PSTAG) - -#define S_TPT_PBL_SIZE 0 -#define M_TPT_PBL_SIZE 0xFFFFF -#define V_TPT_PBL_SIZE(x) ((x) << S_TPT_PBL_SIZE) -#define G_TPT_PBL_SIZE(x) (((x) >> S_TPT_PBL_SIZE) & M_TPT_PBL_SIZE) - -/* - * CQE defs - */ -struct t3_cqe { - __be32 header; - __be32 len; - union { - struct { - __be32 stag; - __be32 msn; - } rcqe; - struct { - u32 wrid_hi; - u32 wrid_low; - } scqe; - } u; -}; - -#define S_CQE_OOO 31 -#define M_CQE_OOO 0x1 -#define G_CQE_OOO(x) ((((x) >> S_CQE_OOO)) & M_CQE_OOO) -#define V_CEQ_OOO(x) ((x)<<S_CQE_OOO) - -#define S_CQE_QPID 12 -#define M_CQE_QPID 0x7FFFF -#define G_CQE_QPID(x) ((((x) >> S_CQE_QPID)) & M_CQE_QPID) -#define V_CQE_QPID(x) ((x)<<S_CQE_QPID) - -#define S_CQE_SWCQE 11 -#define M_CQE_SWCQE 0x1 -#define G_CQE_SWCQE(x) ((((x) >> S_CQE_SWCQE)) & M_CQE_SWCQE) -#define V_CQE_SWCQE(x) ((x)<<S_CQE_SWCQE) - -#define S_CQE_GENBIT 10 -#define M_CQE_GENBIT 0x1 -#define G_CQE_GENBIT(x) (((x) >> S_CQE_GENBIT) & M_CQE_GENBIT) -#define V_CQE_GENBIT(x) ((x)<<S_CQE_GENBIT) - -#define S_CQE_STATUS 5 -#define M_CQE_STATUS 0x1F -#define G_CQE_STATUS(x) ((((x) >> S_CQE_STATUS)) & M_CQE_STATUS) -#define V_CQE_STATUS(x) ((x)<<S_CQE_STATUS) - -#define S_CQE_TYPE 4 -#define M_CQE_TYPE 0x1 -#define G_CQE_TYPE(x) ((((x) >> S_CQE_TYPE)) & M_CQE_TYPE) -#define V_CQE_TYPE(x) ((x)<<S_CQE_TYPE) - -#define S_CQE_OPCODE 0 -#define M_CQE_OPCODE 0xF -#define G_CQE_OPCODE(x) ((((x) >> S_CQE_OPCODE)) & M_CQE_OPCODE) -#define V_CQE_OPCODE(x) ((x)<<S_CQE_OPCODE) - -#define SW_CQE(x) (G_CQE_SWCQE(be32_to_cpu((x).header))) -#define CQE_OOO(x) (G_CQE_OOO(be32_to_cpu((x).header))) -#define CQE_QPID(x) (G_CQE_QPID(be32_to_cpu((x).header))) -#define CQE_GENBIT(x) (G_CQE_GENBIT(be32_to_cpu((x).header))) -#define CQE_TYPE(x) (G_CQE_TYPE(be32_to_cpu((x).header))) -#define SQ_TYPE(x) (CQE_TYPE((x))) -#define RQ_TYPE(x) (!CQE_TYPE((x))) -#define CQE_STATUS(x) (G_CQE_STATUS(be32_to_cpu((x).header))) -#define CQE_OPCODE(x) (G_CQE_OPCODE(be32_to_cpu((x).header))) - -#define CQE_SEND_OPCODE(x)( \ - (G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND) || \ - (G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND_WITH_SE) || \ - (G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND_WITH_INV) || \ - (G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND_WITH_SE_INV)) - -#define CQE_LEN(x) (be32_to_cpu((x).len)) - -/* used for RQ completion processing */ -#define CQE_WRID_STAG(x) (be32_to_cpu((x).u.rcqe.stag)) -#define CQE_WRID_MSN(x) (be32_to_cpu((x).u.rcqe.msn)) - -/* used for SQ completion processing */ -#define CQE_WRID_SQ_WPTR(x) ((x).u.scqe.wrid_hi) -#define CQE_WRID_WPTR(x) ((x).u.scqe.wrid_low) - -/* generic accessor macros */ -#define CQE_WRID_HI(x) ((x).u.scqe.wrid_hi) -#define CQE_WRID_LOW(x) ((x).u.scqe.wrid_low) - -#define TPT_ERR_SUCCESS 0x0 -#define TPT_ERR_STAG 0x1 /* STAG invalid: either the */ - /* STAG is offlimt, being 0, */ - /* or STAG_key mismatch */ -#define TPT_ERR_PDID 0x2 /* PDID mismatch */ -#define TPT_ERR_QPID 0x3 /* QPID mismatch */ -#define TPT_ERR_ACCESS 0x4 /* Invalid access right */ -#define TPT_ERR_WRAP 0x5 /* Wrap error */ -#define TPT_ERR_BOUND 0x6 /* base and bounds voilation */ -#define TPT_ERR_INVALIDATE_SHARED_MR 0x7 /* attempt to invalidate a */ - /* shared memory region */ -#define TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND 0x8 /* attempt to invalidate a */ - /* shared memory region */ -#define TPT_ERR_ECC 0x9 /* ECC error detected */ -#define TPT_ERR_ECC_PSTAG 0xA /* ECC error detected when */ - /* reading PSTAG for a MW */ - /* Invalidate */ -#define TPT_ERR_PBL_ADDR_BOUND 0xB /* pbl addr out of bounds: */ - /* software error */ -#define TPT_ERR_SWFLUSH 0xC /* SW FLUSHED */ -#define TPT_ERR_CRC 0x10 /* CRC error */ -#define TPT_ERR_MARKER 0x11 /* Marker error */ -#define TPT_ERR_PDU_LEN_ERR 0x12 /* invalid PDU length */ -#define TPT_ERR_OUT_OF_RQE 0x13 /* out of RQE */ -#define TPT_ERR_DDP_VERSION 0x14 /* wrong DDP version */ -#define TPT_ERR_RDMA_VERSION 0x15 /* wrong RDMA version */ -#define TPT_ERR_OPCODE 0x16 /* invalid rdma opcode */ -#define TPT_ERR_DDP_QUEUE_NUM 0x17 /* invalid ddp queue number */ -#define TPT_ERR_MSN 0x18 /* MSN error */ -#define TPT_ERR_TBIT 0x19 /* tag bit not set correctly */ -#define TPT_ERR_MO 0x1A /* MO not 0 for TERMINATE */ - /* or READ_REQ */ -#define TPT_ERR_MSN_GAP 0x1B -#define TPT_ERR_MSN_RANGE 0x1C -#define TPT_ERR_IRD_OVERFLOW 0x1D -#define TPT_ERR_RQE_ADDR_BOUND 0x1E /* RQE addr out of bounds: */ - /* software error */ -#define TPT_ERR_INTERNAL_ERR 0x1F /* internal error (opcode */ - /* mismatch) */ - -struct t3_swsq { - __u64 wr_id; - struct t3_cqe cqe; - __u32 sq_wptr; - __be32 read_len; - int opcode; - int complete; - int signaled; -}; - -struct t3_swrq { - __u64 wr_id; - __u32 pbl_addr; -}; - -/* - * A T3 WQ implements both the SQ and RQ. - */ -struct t3_wq { - union t3_wr *queue; /* DMA accessible memory */ - dma_addr_t dma_addr; /* DMA address for HW */ - DEFINE_DMA_UNMAP_ADDR(mapping); /* unmap kruft */ - u32 error; /* 1 once we go to ERROR */ - u32 qpid; - u32 wptr; /* idx to next available WR slot */ - u32 size_log2; /* total wq size */ - struct t3_swsq *sq; /* SW SQ */ - struct t3_swsq *oldest_read; /* tracks oldest pending read */ - u32 sq_wptr; /* sq_wptr - sq_rptr == count of */ - u32 sq_rptr; /* pending wrs */ - u32 sq_size_log2; /* sq size */ - struct t3_swrq *rq; /* SW RQ (holds consumer wr_ids */ - u32 rq_wptr; /* rq_wptr - rq_rptr == count of */ - u32 rq_rptr; /* pending wrs */ - struct t3_swrq *rq_oldest_wr; /* oldest wr on the SW RQ */ - u32 rq_size_log2; /* rq size */ - u32 rq_addr; /* rq adapter address */ - void __iomem *doorbell; /* kernel db */ - u64 udb; /* user db if any */ - struct cxio_rdev *rdev; -}; - -struct t3_cq { - u32 cqid; - u32 rptr; - u32 wptr; - u32 size_log2; - dma_addr_t dma_addr; - DEFINE_DMA_UNMAP_ADDR(mapping); - struct t3_cqe *queue; - struct t3_cqe *sw_queue; - u32 sw_rptr; - u32 sw_wptr; -}; - -#define CQ_VLD_ENTRY(ptr,size_log2,cqe) (Q_GENBIT(ptr,size_log2) == \ - CQE_GENBIT(*cqe)) - -struct t3_cq_status_page { - u32 cq_err; -}; - -static inline int cxio_cq_in_error(struct t3_cq *cq) -{ - return ((struct t3_cq_status_page *) - &cq->queue[1 << cq->size_log2])->cq_err; -} - -static inline void cxio_set_cq_in_error(struct t3_cq *cq) -{ - ((struct t3_cq_status_page *) - &cq->queue[1 << cq->size_log2])->cq_err = 1; -} - -static inline void cxio_set_wq_in_error(struct t3_wq *wq) -{ - wq->queue->wq_in_err.err |= 1; -} - -static inline void cxio_disable_wq_db(struct t3_wq *wq) -{ - wq->queue->wq_in_err.err |= 2; -} - -static inline void cxio_enable_wq_db(struct t3_wq *wq) -{ - wq->queue->wq_in_err.err &= ~2; -} - -static inline int cxio_wq_db_enabled(struct t3_wq *wq) -{ - return !(wq->queue->wq_in_err.err & 2); -} - -static inline struct t3_cqe *cxio_next_hw_cqe(struct t3_cq *cq) -{ - struct t3_cqe *cqe; - - cqe = cq->queue + (Q_PTR2IDX(cq->rptr, cq->size_log2)); - if (CQ_VLD_ENTRY(cq->rptr, cq->size_log2, cqe)) - return cqe; - return NULL; -} - -static inline struct t3_cqe *cxio_next_sw_cqe(struct t3_cq *cq) -{ - struct t3_cqe *cqe; - - if (!Q_EMPTY(cq->sw_rptr, cq->sw_wptr)) { - cqe = cq->sw_queue + (Q_PTR2IDX(cq->sw_rptr, cq->size_log2)); - return cqe; - } - return NULL; -} - -static inline struct t3_cqe *cxio_next_cqe(struct t3_cq *cq) -{ - struct t3_cqe *cqe; - - if (!Q_EMPTY(cq->sw_rptr, cq->sw_wptr)) { - cqe = cq->sw_queue + (Q_PTR2IDX(cq->sw_rptr, cq->size_log2)); - return cqe; - } - cqe = cq->queue + (Q_PTR2IDX(cq->rptr, cq->size_log2)); - if (CQ_VLD_ENTRY(cq->rptr, cq->size_log2, cqe)) - return cqe; - return NULL; -} - -#endif diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c deleted file mode 100644 index 56a8ab6210cf..000000000000 --- a/drivers/infiniband/hw/cxgb3/iwch.c +++ /dev/null @@ -1,282 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include <linux/module.h> -#include <linux/moduleparam.h> - -#include <rdma/ib_verbs.h> - -#include "cxgb3_offload.h" -#include "iwch_provider.h" -#include <rdma/cxgb3-abi.h> -#include "iwch.h" -#include "iwch_cm.h" - -#define DRV_VERSION "1.1" - -MODULE_AUTHOR("Boyd Faulkner, Steve Wise"); -MODULE_DESCRIPTION("Chelsio T3 RDMA Driver"); -MODULE_LICENSE("Dual BSD/GPL"); - -static void open_rnic_dev(struct t3cdev *); -static void close_rnic_dev(struct t3cdev *); -static void iwch_event_handler(struct t3cdev *, u32, u32); - -struct cxgb3_client t3c_client = { - .name = "iw_cxgb3", - .add = open_rnic_dev, - .remove = close_rnic_dev, - .handlers = t3c_handlers, - .redirect = iwch_ep_redirect, - .event_handler = iwch_event_handler -}; - -static LIST_HEAD(dev_list); -static DEFINE_MUTEX(dev_mutex); - -static void disable_dbs(struct iwch_dev *rnicp) -{ - unsigned long index; - struct iwch_qp *qhp; - - xa_lock_irq(&rnicp->qps); - xa_for_each(&rnicp->qps, index, qhp) - cxio_disable_wq_db(&qhp->wq); - xa_unlock_irq(&rnicp->qps); -} - -static void enable_dbs(struct iwch_dev *rnicp, int ring_db) -{ - unsigned long index; - struct iwch_qp *qhp; - - xa_lock_irq(&rnicp->qps); - xa_for_each(&rnicp->qps, index, qhp) { - if (ring_db) - ring_doorbell(qhp->rhp->rdev.ctrl_qp.doorbell, - qhp->wq.qpid); - cxio_enable_wq_db(&qhp->wq); - } - xa_unlock_irq(&rnicp->qps); -} - -static void iwch_db_drop_task(struct work_struct *work) -{ - struct iwch_dev *rnicp = container_of(work, struct iwch_dev, - db_drop_task.work); - enable_dbs(rnicp, 1); -} - -static void rnic_init(struct iwch_dev *rnicp) -{ - pr_debug("%s iwch_dev %p\n", __func__, rnicp); - xa_init_flags(&rnicp->cqs, XA_FLAGS_LOCK_IRQ); - xa_init_flags(&rnicp->qps, XA_FLAGS_LOCK_IRQ); - xa_init_flags(&rnicp->mrs, XA_FLAGS_LOCK_IRQ); - INIT_DELAYED_WORK(&rnicp->db_drop_task, iwch_db_drop_task); - - rnicp->attr.max_qps = T3_MAX_NUM_QP - 32; - rnicp->attr.max_wrs = T3_MAX_QP_DEPTH; - rnicp->attr.max_sge_per_wr = T3_MAX_SGE; - rnicp->attr.max_sge_per_rdma_write_wr = T3_MAX_SGE; - rnicp->attr.max_cqs = T3_MAX_NUM_CQ - 1; - rnicp->attr.max_cqes_per_cq = T3_MAX_CQ_DEPTH; - rnicp->attr.max_mem_regs = cxio_num_stags(&rnicp->rdev); - rnicp->attr.max_phys_buf_entries = T3_MAX_PBL_SIZE; - rnicp->attr.max_pds = T3_MAX_NUM_PD - 1; - rnicp->attr.mem_pgsizes_bitmask = T3_PAGESIZE_MASK; - rnicp->attr.max_mr_size = T3_MAX_MR_SIZE; - rnicp->attr.can_resize_wq = 0; - rnicp->attr.max_rdma_reads_per_qp = 8; - rnicp->attr.max_rdma_read_resources = - rnicp->attr.max_rdma_reads_per_qp * rnicp->attr.max_qps; - rnicp->attr.max_rdma_read_qp_depth = 8; /* IRD */ - rnicp->attr.max_rdma_read_depth = - rnicp->attr.max_rdma_read_qp_depth * rnicp->attr.max_qps; - rnicp->attr.rq_overflow_handled = 0; - rnicp->attr.can_modify_ird = 0; - rnicp->attr.can_modify_ord = 0; - rnicp->attr.max_mem_windows = rnicp->attr.max_mem_regs - 1; - rnicp->attr.stag0_value = 1; - rnicp->attr.zbva_support = 1; - rnicp->attr.local_invalidate_fence = 1; - rnicp->attr.cq_overflow_detection = 1; - return; -} - -static void open_rnic_dev(struct t3cdev *tdev) -{ - struct iwch_dev *rnicp; - - pr_debug("%s t3cdev %p\n", __func__, tdev); - pr_info_once("Chelsio T3 RDMA Driver - version %s\n", DRV_VERSION); - rnicp = ib_alloc_device(iwch_dev, ibdev); - if (!rnicp) { - pr_err("Cannot allocate ib device\n"); - return; - } - rnicp->rdev.ulp = rnicp; - rnicp->rdev.t3cdev_p = tdev; - - mutex_lock(&dev_mutex); - - if (cxio_rdev_open(&rnicp->rdev)) { - mutex_unlock(&dev_mutex); - pr_err("Unable to open CXIO rdev\n"); - ib_dealloc_device(&rnicp->ibdev); - return; - } - - rnic_init(rnicp); - - list_add_tail(&rnicp->entry, &dev_list); - mutex_unlock(&dev_mutex); - - if (iwch_register_device(rnicp)) { - pr_err("Unable to register device\n"); - close_rnic_dev(tdev); - } - pr_info("Initialized device %s\n", - pci_name(rnicp->rdev.rnic_info.pdev)); - return; -} - -static void close_rnic_dev(struct t3cdev *tdev) -{ - struct iwch_dev *dev, *tmp; - pr_debug("%s t3cdev %p\n", __func__, tdev); - mutex_lock(&dev_mutex); - list_for_each_entry_safe(dev, tmp, &dev_list, entry) { - if (dev->rdev.t3cdev_p == tdev) { - dev->rdev.flags = CXIO_ERROR_FATAL; - synchronize_net(); - cancel_delayed_work_sync(&dev->db_drop_task); - list_del(&dev->entry); - iwch_unregister_device(dev); - cxio_rdev_close(&dev->rdev); - WARN_ON(!xa_empty(&dev->cqs)); - WARN_ON(!xa_empty(&dev->qps)); - WARN_ON(!xa_empty(&dev->mrs)); - ib_dealloc_device(&dev->ibdev); - break; - } - } - mutex_unlock(&dev_mutex); -} - -static void iwch_event_handler(struct t3cdev *tdev, u32 evt, u32 port_id) -{ - struct cxio_rdev *rdev = tdev->ulp; - struct iwch_dev *rnicp; - struct ib_event event; - u32 portnum = port_id + 1; - int dispatch = 0; - - if (!rdev) - return; - rnicp = rdev_to_iwch_dev(rdev); - switch (evt) { - case OFFLOAD_STATUS_DOWN: { - rdev->flags = CXIO_ERROR_FATAL; - synchronize_net(); - event.event = IB_EVENT_DEVICE_FATAL; - dispatch = 1; - break; - } - case OFFLOAD_PORT_DOWN: { - event.event = IB_EVENT_PORT_ERR; - dispatch = 1; - break; - } - case OFFLOAD_PORT_UP: { - event.event = IB_EVENT_PORT_ACTIVE; - dispatch = 1; - break; - } - case OFFLOAD_DB_FULL: { - disable_dbs(rnicp); - break; - } - case OFFLOAD_DB_EMPTY: { - enable_dbs(rnicp, 1); - break; - } - case OFFLOAD_DB_DROP: { - unsigned long delay = 1000; - unsigned short r; - - disable_dbs(rnicp); - get_random_bytes(&r, 2); - delay += r & 1023; - - /* - * delay is between 1000-2023 usecs. - */ - schedule_delayed_work(&rnicp->db_drop_task, - usecs_to_jiffies(delay)); - break; - } - } - - if (dispatch) { - event.device = &rnicp->ibdev; - event.element.port_num = portnum; - ib_dispatch_event(&event); - } - - return; -} - -static int __init iwch_init_module(void) -{ - int err; - - err = cxio_hal_init(); - if (err) - return err; - err = iwch_cm_init(); - if (err) - return err; - cxio_register_ev_cb(iwch_ev_dispatch); - cxgb3_register_client(&t3c_client); - return 0; -} - -static void __exit iwch_exit_module(void) -{ - cxgb3_unregister_client(&t3c_client); - cxio_unregister_ev_cb(iwch_ev_dispatch); - iwch_cm_term(); - cxio_hal_exit(); -} - -module_init(iwch_init_module); -module_exit(iwch_exit_module); diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h deleted file mode 100644 index 310a937bffcf..000000000000 --- a/drivers/infiniband/hw/cxgb3/iwch.h +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __IWCH_H__ -#define __IWCH_H__ - -#include <linux/mutex.h> -#include <linux/list.h> -#include <linux/spinlock.h> -#include <linux/xarray.h> -#include <linux/workqueue.h> - -#include <rdma/ib_verbs.h> - -#include "cxio_hal.h" -#include "cxgb3_offload.h" - -struct iwch_pd; -struct iwch_cq; -struct iwch_qp; -struct iwch_mr; - -struct iwch_rnic_attributes { - u32 max_qps; - u32 max_wrs; /* Max for any SQ/RQ */ - u32 max_sge_per_wr; - u32 max_sge_per_rdma_write_wr; /* for RDMA Write WR */ - u32 max_cqs; - u32 max_cqes_per_cq; - u32 max_mem_regs; - u32 max_phys_buf_entries; /* for phys buf list */ - u32 max_pds; - - /* - * The memory page sizes supported by this RNIC. - * Bit position i in bitmap indicates page of - * size (4k)^i. Phys block list mode unsupported. - */ - u32 mem_pgsizes_bitmask; - u64 max_mr_size; - u8 can_resize_wq; - - /* - * The maximum number of RDMA Reads that can be outstanding - * per QP with this RNIC as the target. - */ - u32 max_rdma_reads_per_qp; - - /* - * The maximum number of resources used for RDMA Reads - * by this RNIC with this RNIC as the target. - */ - u32 max_rdma_read_resources; - - /* - * The max depth per QP for initiation of RDMA Read - * by this RNIC. - */ - u32 max_rdma_read_qp_depth; - - /* - * The maximum depth for initiation of RDMA Read - * operations by this RNIC on all QPs - */ - u32 max_rdma_read_depth; - u8 rq_overflow_handled; - u32 can_modify_ird; - u32 can_modify_ord; - u32 max_mem_windows; - u32 stag0_value; - u8 zbva_support; - u8 local_invalidate_fence; - u32 cq_overflow_detection; -}; - -struct iwch_dev { - struct ib_device ibdev; - struct cxio_rdev rdev; - u32 device_cap_flags; - struct iwch_rnic_attributes attr; - struct xarray cqs; - struct xarray qps; - struct xarray mrs; - struct list_head entry; - struct delayed_work db_drop_task; -}; - -static inline struct iwch_dev *to_iwch_dev(struct ib_device *ibdev) -{ - return container_of(ibdev, struct iwch_dev, ibdev); -} - -static inline struct iwch_dev *rdev_to_iwch_dev(struct cxio_rdev *rdev) -{ - return container_of(rdev, struct iwch_dev, rdev); -} - -static inline int t3b_device(const struct iwch_dev *rhp) -{ - return rhp->rdev.t3cdev_p->type == T3B; -} - -static inline int t3a_device(const struct iwch_dev *rhp) -{ - return rhp->rdev.t3cdev_p->type == T3A; -} - -static inline struct iwch_cq *get_chp(struct iwch_dev *rhp, u32 cqid) -{ - return xa_load(&rhp->cqs, cqid); -} - -static inline struct iwch_qp *get_qhp(struct iwch_dev *rhp, u32 qpid) -{ - return xa_load(&rhp->qps, qpid); -} - -static inline struct iwch_mr *get_mhp(struct iwch_dev *rhp, u32 mmid) -{ - return xa_load(&rhp->mrs, mmid); -} - -extern struct cxgb3_client t3c_client; -extern cxgb3_cpl_handler_func t3c_handlers[NUM_CPL_CMDS]; -extern void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb); - -#endif diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c deleted file mode 100644 index 0bca72cb4d9a..000000000000 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ /dev/null @@ -1,2258 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include <linux/module.h> -#include <linux/list.h> -#include <linux/slab.h> -#include <linux/workqueue.h> -#include <linux/skbuff.h> -#include <linux/timer.h> -#include <linux/notifier.h> -#include <linux/inetdevice.h> - -#include <net/neighbour.h> -#include <net/netevent.h> -#include <net/route.h> - -#include "tcb.h" -#include "cxgb3_offload.h" -#include "iwch.h" -#include "iwch_provider.h" -#include "iwch_cm.h" - -static char *states[] = { - "idle", - "listen", - "connecting", - "mpa_wait_req", - "mpa_req_sent", - "mpa_req_rcvd", - "mpa_rep_sent", - "fpdu_mode", - "aborting", - "closing", - "moribund", - "dead", - NULL, -}; - -int peer2peer = 0; -module_param(peer2peer, int, 0644); -MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=0)"); - -static int ep_timeout_secs = 60; -module_param(ep_timeout_secs, int, 0644); -MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout " - "in seconds (default=60)"); - -static int mpa_rev = 1; -module_param(mpa_rev, int, 0644); -MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, " - "1 is spec compliant. (default=1)"); - -static int markers_enabled = 0; -module_param(markers_enabled, int, 0644); -MODULE_PARM_DESC(markers_enabled, "Enable MPA MARKERS (default(0)=disabled)"); - -static int crc_enabled = 1; -module_param(crc_enabled, int, 0644); -MODULE_PARM_DESC(crc_enabled, "Enable MPA CRC (default(1)=enabled)"); - -static int rcv_win = 256 * 1024; -module_param(rcv_win, int, 0644); -MODULE_PARM_DESC(rcv_win, "TCP receive window in bytes (default=256)"); - -static int snd_win = 32 * 1024; -module_param(snd_win, int, 0644); -MODULE_PARM_DESC(snd_win, "TCP send window in bytes (default=32KB)"); - -static unsigned int nocong = 0; -module_param(nocong, uint, 0644); -MODULE_PARM_DESC(nocong, "Turn off congestion control (default=0)"); - -static unsigned int cong_flavor = 1; -module_param(cong_flavor, uint, 0644); -MODULE_PARM_DESC(cong_flavor, "TCP Congestion control flavor (default=1)"); - -static struct workqueue_struct *workq; - -static struct sk_buff_head rxq; - -static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp); -static void ep_timeout(struct timer_list *t); -static void connect_reply_upcall(struct iwch_ep *ep, int status); - -static void start_ep_timer(struct iwch_ep *ep) -{ - pr_debug("%s ep %p\n", __func__, ep); - if (timer_pending(&ep->timer)) { - pr_debug("%s stopped / restarted timer ep %p\n", __func__, ep); - del_timer_sync(&ep->timer); - } else - get_ep(&ep->com); - ep->timer.expires = jiffies + ep_timeout_secs * HZ; - add_timer(&ep->timer); -} - -static void stop_ep_timer(struct iwch_ep *ep) -{ - pr_debug("%s ep %p\n", __func__, ep); - if (!timer_pending(&ep->timer)) { - WARN(1, "%s timer stopped when its not running! ep %p state %u\n", - __func__, ep, ep->com.state); - return; - } - del_timer_sync(&ep->timer); - put_ep(&ep->com); -} - -static int iwch_l2t_send(struct t3cdev *tdev, struct sk_buff *skb, struct l2t_entry *l2e) -{ - int error = 0; - struct cxio_rdev *rdev; - - rdev = (struct cxio_rdev *)tdev->ulp; - if (cxio_fatal_error(rdev)) { - kfree_skb(skb); - return -EIO; - } - error = l2t_send(tdev, skb, l2e); - if (error < 0) - kfree_skb(skb); - return error < 0 ? error : 0; -} - -int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb) -{ - int error = 0; - struct cxio_rdev *rdev; - - rdev = (struct cxio_rdev *)tdev->ulp; - if (cxio_fatal_error(rdev)) { - kfree_skb(skb); - return -EIO; - } - error = cxgb3_ofld_send(tdev, skb); - if (error < 0) - kfree_skb(skb); - return error < 0 ? error : 0; -} - -static void release_tid(struct t3cdev *tdev, u32 hwtid, struct sk_buff *skb) -{ - struct cpl_tid_release *req; - - skb = get_skb(skb, sizeof(*req), GFP_KERNEL); - if (!skb) - return; - req = skb_put(skb, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, hwtid)); - skb->priority = CPL_PRIORITY_SETUP; - iwch_cxgb3_ofld_send(tdev, skb); - return; -} - -int iwch_quiesce_tid(struct iwch_ep *ep) -{ - struct cpl_set_tcb_field *req; - struct sk_buff *skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); - - if (!skb) - return -ENOMEM; - req = skb_put(skb, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid)); - req->reply = 0; - req->cpu_idx = 0; - req->word = htons(W_TCB_RX_QUIESCE); - req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE); - req->val = cpu_to_be64(1 << S_TCB_RX_QUIESCE); - - skb->priority = CPL_PRIORITY_DATA; - return iwch_cxgb3_ofld_send(ep->com.tdev, skb); -} - -int iwch_resume_tid(struct iwch_ep *ep) -{ - struct cpl_set_tcb_field *req; - struct sk_buff *skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); - - if (!skb) - return -ENOMEM; - req = skb_put(skb, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid)); - req->reply = 0; - req->cpu_idx = 0; - req->word = htons(W_TCB_RX_QUIESCE); - req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE); - req->val = 0; - - skb->priority = CPL_PRIORITY_DATA; - return iwch_cxgb3_ofld_send(ep->com.tdev, skb); -} - -static void set_emss(struct iwch_ep *ep, u16 opt) -{ - pr_debug("%s ep %p opt %u\n", __func__, ep, opt); - ep->emss = T3C_DATA(ep->com.tdev)->mtus[G_TCPOPT_MSS(opt)] - 40; - if (G_TCPOPT_TSTAMP(opt)) - ep->emss -= 12; - if (ep->emss < 128) - ep->emss = 128; - pr_debug("emss=%d\n", ep->emss); -} - -static enum iwch_ep_state state_read(struct iwch_ep_common *epc) -{ - unsigned long flags; - enum iwch_ep_state state; - - spin_lock_irqsave(&epc->lock, flags); - state = epc->state; - spin_unlock_irqrestore(&epc->lock, flags); - return state; -} - -static void __state_set(struct iwch_ep_common *epc, enum iwch_ep_state new) -{ - epc->state = new; -} - -static void state_set(struct iwch_ep_common *epc, enum iwch_ep_state new) -{ - unsigned long flags; - - spin_lock_irqsave(&epc->lock, flags); - pr_debug("%s - %s -> %s\n", __func__, states[epc->state], states[new]); - __state_set(epc, new); - spin_unlock_irqrestore(&epc->lock, flags); - return; -} - -static void *alloc_ep(int size, gfp_t gfp) -{ - struct iwch_ep_common *epc; - - epc = kzalloc(size, gfp); - if (epc) { - kref_init(&epc->kref); - spin_lock_init(&epc->lock); - init_waitqueue_head(&epc->waitq); - } - pr_debug("%s alloc ep %p\n", __func__, epc); - return epc; -} - -void __free_ep(struct kref *kref) -{ - struct iwch_ep *ep; - ep = container_of(container_of(kref, struct iwch_ep_common, kref), - struct iwch_ep, com); - pr_debug("%s ep %p state %s\n", - __func__, ep, states[state_read(&ep->com)]); - if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) { - cxgb3_remove_tid(ep->com.tdev, (void *)ep, ep->hwtid); - dst_release(ep->dst); - l2t_release(ep->com.tdev, ep->l2t); - } - kfree(ep); -} - -static void release_ep_resources(struct iwch_ep *ep) -{ - pr_debug("%s ep %p tid %d\n", __func__, ep, ep->hwtid); - set_bit(RELEASE_RESOURCES, &ep->com.flags); - put_ep(&ep->com); -} - -static int status2errno(int status) -{ - switch (status) { - case CPL_ERR_NONE: - return 0; - case CPL_ERR_CONN_RESET: - return -ECONNRESET; - case CPL_ERR_ARP_MISS: - return -EHOSTUNREACH; - case CPL_ERR_CONN_TIMEDOUT: - return -ETIMEDOUT; - case CPL_ERR_TCAM_FULL: - return -ENOMEM; - case CPL_ERR_CONN_EXIST: - return -EADDRINUSE; - default: - return -EIO; - } -} - -/* - * Try and reuse skbs already allocated... - */ -static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp) -{ - if (skb && !skb_is_nonlinear(skb) && !skb_cloned(skb)) { - skb_trim(skb, 0); - skb_get(skb); - } else { - skb = alloc_skb(len, gfp); - } - return skb; -} - -static struct rtable *find_route(struct t3cdev *dev, __be32 local_ip, - __be32 peer_ip, __be16 local_port, - __be16 peer_port, u8 tos) -{ - struct rtable *rt; - struct flowi4 fl4; - - rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip, - peer_port, local_port, IPPROTO_TCP, - tos, 0); - if (IS_ERR(rt)) - return NULL; - return rt; -} - -static unsigned int find_best_mtu(const struct t3c_data *d, unsigned short mtu) -{ - int i = 0; - - while (i < d->nmtus - 1 && d->mtus[i + 1] <= mtu) - ++i; - return i; -} - -static void arp_failure_discard(struct t3cdev *dev, struct sk_buff *skb) -{ - pr_debug("%s t3cdev %p\n", __func__, dev); - kfree_skb(skb); -} - -/* - * Handle an ARP failure for an active open. - */ -static void act_open_req_arp_failure(struct t3cdev *dev, struct sk_buff *skb) -{ - pr_err("ARP failure during connect\n"); - kfree_skb(skb); -} - -/* - * Handle an ARP failure for a CPL_ABORT_REQ. Change it into a no RST variant - * and send it along. - */ -static void abort_arp_failure(struct t3cdev *dev, struct sk_buff *skb) -{ - struct cpl_abort_req *req = cplhdr(skb); - - pr_debug("%s t3cdev %p\n", __func__, dev); - req->cmd = CPL_ABORT_NO_RST; - iwch_cxgb3_ofld_send(dev, skb); -} - -static int send_halfclose(struct iwch_ep *ep, gfp_t gfp) -{ - struct cpl_close_con_req *req; - struct sk_buff *skb; - - pr_debug("%s ep %p\n", __func__, ep); - skb = get_skb(NULL, sizeof(*req), gfp); - if (!skb) { - pr_err("%s - failed to alloc skb\n", __func__); - return -ENOMEM; - } - skb->priority = CPL_PRIORITY_DATA; - set_arp_failure_handler(skb, arp_failure_discard); - req = skb_put(skb, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON)); - req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, ep->hwtid)); - return iwch_l2t_send(ep->com.tdev, skb, ep->l2t); -} - -static int send_abort(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp) -{ - struct cpl_abort_req *req; - - pr_debug("%s ep %p\n", __func__, ep); - skb = get_skb(skb, sizeof(*req), gfp); - if (!skb) { - pr_err("%s - failed to alloc skb\n", __func__); - return -ENOMEM; - } - skb->priority = CPL_PRIORITY_DATA; - set_arp_failure_handler(skb, abort_arp_failure); - req = skb_put_zero(skb, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ)); - req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid)); - req->cmd = CPL_ABORT_SEND_RST; - return iwch_l2t_send(ep->com.tdev, skb, ep->l2t); -} - -static int send_connect(struct iwch_ep *ep) -{ - struct cpl_act_open_req *req; - struct sk_buff *skb; - u32 opt0h, opt0l, opt2; - unsigned int mtu_idx; - int wscale; - - pr_debug("%s ep %p\n", __func__, ep); - - skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); - if (!skb) { - pr_err("%s - failed to alloc skb\n", __func__); - return -ENOMEM; - } - mtu_idx = find_best_mtu(T3C_DATA(ep->com.tdev), dst_mtu(ep->dst)); - wscale = compute_wscale(rcv_win); - opt0h = V_NAGLE(0) | - V_NO_CONG(nocong) | - V_KEEP_ALIVE(1) | - F_TCAM_BYPASS | - V_WND_SCALE(wscale) | - V_MSS_IDX(mtu_idx) | - V_L2T_IDX(ep->l2t->idx) | V_TX_CHANNEL(ep->l2t->smt_idx); - opt0l = V_TOS((ep->tos >> 2) & M_TOS) | V_RCV_BUFSIZ(rcv_win>>10); - opt2 = F_RX_COALESCE_VALID | V_RX_COALESCE(0) | V_FLAVORS_VALID(1) | - V_CONG_CONTROL_FLAVOR(cong_flavor); - skb->priority = CPL_PRIORITY_SETUP; - set_arp_failure_handler(skb, act_open_req_arp_failure); - - req = skb_put(skb, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, ep->atid)); - req->local_port = ep->com.local_addr.sin_port; - req->peer_port = ep->com.remote_addr.sin_port; - req->local_ip = ep->com.local_addr.sin_addr.s_addr; - req->peer_ip = ep->com.remote_addr.sin_addr.s_addr; - req->opt0h = htonl(opt0h); - req->opt0l = htonl(opt0l); - req->params = 0; - req->opt2 = htonl(opt2); - return iwch_l2t_send(ep->com.tdev, skb, ep->l2t); -} - -static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb) -{ - int mpalen; - struct tx_data_wr *req; - struct mpa_message *mpa; - int len; - - pr_debug("%s ep %p pd_len %d\n", __func__, ep, ep->plen); - - BUG_ON(skb_cloned(skb)); - - mpalen = sizeof(*mpa) + ep->plen; - if (skb->data + mpalen + sizeof(*req) > skb_end_pointer(skb)) { - kfree_skb(skb); - skb=alloc_skb(mpalen + sizeof(*req), GFP_KERNEL); - if (!skb) { - connect_reply_upcall(ep, -ENOMEM); - return; - } - } - skb_trim(skb, 0); - skb_reserve(skb, sizeof(*req)); - skb_put(skb, mpalen); - skb->priority = CPL_PRIORITY_DATA; - mpa = (struct mpa_message *) skb->data; - memset(mpa, 0, sizeof(*mpa)); - memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)); - mpa->flags = (crc_enabled ? MPA_CRC : 0) | - (markers_enabled ? MPA_MARKERS : 0); - mpa->private_data_size = htons(ep->plen); - mpa->revision = mpa_rev; - - if (ep->plen) - memcpy(mpa->private_data, ep->mpa_pkt + sizeof(*mpa), ep->plen); - - /* - * Reference the mpa skb. This ensures the data area - * will remain in memory until the hw acks the tx. - * Function tx_ack() will deref it. - */ - skb_get(skb); - set_arp_failure_handler(skb, arp_failure_discard); - skb_reset_transport_header(skb); - len = skb->len; - req = skb_push(skb, sizeof(*req)); - req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL); - req->wr_lo = htonl(V_WR_TID(ep->hwtid)); - req->len = htonl(len); - req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) | - V_TX_SNDBUF(snd_win>>15)); - req->flags = htonl(F_TX_INIT); - req->sndseq = htonl(ep->snd_seq); - BUG_ON(ep->mpa_skb); - ep->mpa_skb = skb; - iwch_l2t_send(ep->com.tdev, skb, ep->l2t); - start_ep_timer(ep); - state_set(&ep->com, MPA_REQ_SENT); - return; -} - -static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen) -{ - int mpalen; - struct tx_data_wr *req; - struct mpa_message *mpa; - struct sk_buff *skb; - - pr_debug("%s ep %p plen %d\n", __func__, ep, plen); - - mpalen = sizeof(*mpa) + plen; - - skb = get_skb(NULL, mpalen + sizeof(*req), GFP_KERNEL); - if (!skb) { - pr_err("%s - cannot alloc skb!\n", __func__); - return -ENOMEM; - } - skb_reserve(skb, sizeof(*req)); - mpa = skb_put(skb, mpalen); - memset(mpa, 0, sizeof(*mpa)); - memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); - mpa->flags = MPA_REJECT; - mpa->revision = mpa_rev; - mpa->private_data_size = htons(plen); - if (plen) - memcpy(mpa->private_data, pdata, plen); - - /* - * Reference the mpa skb again. This ensures the data area - * will remain in memory until the hw acks the tx. - * Function tx_ack() will deref it. - */ - skb_get(skb); - skb->priority = CPL_PRIORITY_DATA; - set_arp_failure_handler(skb, arp_failure_discard); - skb_reset_transport_header(skb); - req = skb_push(skb, sizeof(*req)); - req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL); - req->wr_lo = htonl(V_WR_TID(ep->hwtid)); - req->len = htonl(mpalen); - req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) | - V_TX_SNDBUF(snd_win>>15)); - req->flags = htonl(F_TX_INIT); - req->sndseq = htonl(ep->snd_seq); - BUG_ON(ep->mpa_skb); - ep->mpa_skb = skb; - return iwch_l2t_send(ep->com.tdev, skb, ep->l2t); -} - -static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen) -{ - int mpalen; - struct tx_data_wr *req; - struct mpa_message *mpa; - int len; - struct sk_buff *skb; - - pr_debug("%s ep %p plen %d\n", __func__, ep, plen); - - mpalen = sizeof(*mpa) + plen; - - skb = get_skb(NULL, mpalen + sizeof(*req), GFP_KERNEL); - if (!skb) { - pr_err("%s - cannot alloc skb!\n", __func__); - return -ENOMEM; - } - skb->priority = CPL_PRIORITY_DATA; - skb_reserve(skb, sizeof(*req)); - mpa = skb_put(skb, mpalen); - memset(mpa, 0, sizeof(*mpa)); - memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); - mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) | - (markers_enabled ? MPA_MARKERS : 0); - mpa->revision = mpa_rev; - mpa->private_data_size = htons(plen); - if (plen) - memcpy(mpa->private_data, pdata, plen); - - /* - * Reference the mpa skb. This ensures the data area - * will remain in memory until the hw acks the tx. - * Function tx_ack() will deref it. - */ - skb_get(skb); - set_arp_failure_handler(skb, arp_failure_discard); - skb_reset_transport_header(skb); - len = skb->len; - req = skb_push(skb, sizeof(*req)); - req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL); - req->wr_lo = htonl(V_WR_TID(ep->hwtid)); - req->len = htonl(len); - req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) | - V_TX_SNDBUF(snd_win>>15)); - req->flags = htonl(F_TX_INIT); - req->sndseq = htonl(ep->snd_seq); - ep->mpa_skb = skb; - state_set(&ep->com, MPA_REP_SENT); - return iwch_l2t_send(ep->com.tdev, skb, ep->l2t); -} - -static int act_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *ep = ctx; - struct cpl_act_establish *req = cplhdr(skb); - unsigned int tid = GET_TID(req); - - pr_debug("%s ep %p tid %d\n", __func__, ep, tid); - - dst_confirm(ep->dst); - - /* setup the hwtid for this connection */ - ep->hwtid = tid; - cxgb3_insert_tid(ep->com.tdev, &t3c_client, ep, tid); - - ep->snd_seq = ntohl(req->snd_isn); - ep->rcv_seq = ntohl(req->rcv_isn); - - set_emss(ep, ntohs(req->tcp_opt)); - - /* dealloc the atid */ - cxgb3_free_atid(ep->com.tdev, ep->atid); - - /* start MPA negotiation */ - send_mpa_req(ep, skb); - - return 0; -} - -static void abort_connection(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp) -{ - pr_debug("%s ep %p\n", __FILE__, ep); - state_set(&ep->com, ABORTING); - send_abort(ep, skb, gfp); -} - -static void close_complete_upcall(struct iwch_ep *ep) -{ - struct iw_cm_event event; - - pr_debug("%s ep %p\n", __func__, ep); - memset(&event, 0, sizeof(event)); - event.event = IW_CM_EVENT_CLOSE; - if (ep->com.cm_id) { - pr_debug("close complete delivered ep %p cm_id %p tid %d\n", - ep, ep->com.cm_id, ep->hwtid); - ep->com.cm_id->event_handler(ep->com.cm_id, &event); - ep->com.cm_id->rem_ref(ep->com.cm_id); - ep->com.cm_id = NULL; - ep->com.qp = NULL; - } -} - -static void peer_close_upcall(struct iwch_ep *ep) -{ - struct iw_cm_event event; - - pr_debug("%s ep %p\n", __func__, ep); - memset(&event, 0, sizeof(event)); - event.event = IW_CM_EVENT_DISCONNECT; - if (ep->com.cm_id) { - pr_debug("peer close delivered ep %p cm_id %p tid %d\n", - ep, ep->com.cm_id, ep->hwtid); - ep->com.cm_id->event_handler(ep->com.cm_id, &event); - } -} - -static void peer_abort_upcall(struct iwch_ep *ep) -{ - struct iw_cm_event event; - - pr_debug("%s ep %p\n", __func__, ep); - memset(&event, 0, sizeof(event)); - event.event = IW_CM_EVENT_CLOSE; - event.status = -ECONNRESET; - if (ep->com.cm_id) { - pr_debug("abort delivered ep %p cm_id %p tid %d\n", ep, - ep->com.cm_id, ep->hwtid); - ep->com.cm_id->event_handler(ep->com.cm_id, &event); - ep->com.cm_id->rem_ref(ep->com.cm_id); - ep->com.cm_id = NULL; - ep->com.qp = NULL; - } -} - -static void connect_reply_upcall(struct iwch_ep *ep, int status) -{ - struct iw_cm_event event; - - pr_debug("%s ep %p status %d\n", __func__, ep, status); - memset(&event, 0, sizeof(event)); - event.event = IW_CM_EVENT_CONNECT_REPLY; - event.status = status; - memcpy(&event.local_addr, &ep->com.local_addr, - sizeof(ep->com.local_addr)); - memcpy(&event.remote_addr, &ep->com.remote_addr, - sizeof(ep->com.remote_addr)); - - if ((status == 0) || (status == -ECONNREFUSED)) { - event.private_data_len = ep->plen; - event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); - } - if (ep->com.cm_id) { - pr_debug("%s ep %p tid %d status %d\n", __func__, ep, - ep->hwtid, status); - ep->com.cm_id->event_handler(ep->com.cm_id, &event); - } - if (status < 0) { - ep->com.cm_id->rem_ref(ep->com.cm_id); - ep->com.cm_id = NULL; - ep->com.qp = NULL; - } -} - -static void connect_request_upcall(struct iwch_ep *ep) -{ - struct iw_cm_event event; - - pr_debug("%s ep %p tid %d\n", __func__, ep, ep->hwtid); - memset(&event, 0, sizeof(event)); - event.event = IW_CM_EVENT_CONNECT_REQUEST; - memcpy(&event.local_addr, &ep->com.local_addr, - sizeof(ep->com.local_addr)); - memcpy(&event.remote_addr, &ep->com.remote_addr, - sizeof(ep->com.local_addr)); - event.private_data_len = ep->plen; - event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); - event.provider_data = ep; - /* - * Until ird/ord negotiation via MPAv2 support is added, send max - * supported values - */ - event.ird = event.ord = 8; - if (state_read(&ep->parent_ep->com) != DEAD) { - get_ep(&ep->com); - ep->parent_ep->com.cm_id->event_handler( - ep->parent_ep->com.cm_id, - &event); - } - put_ep(&ep->parent_ep->com); - ep->parent_ep = NULL; -} - -static void established_upcall(struct iwch_ep *ep) -{ - struct iw_cm_event event; - - pr_debug("%s ep %p\n", __func__, ep); - memset(&event, 0, sizeof(event)); - event.event = IW_CM_EVENT_ESTABLISHED; - /* - * Until ird/ord negotiation via MPAv2 support is added, send max - * supported values - */ - event.ird = event.ord = 8; - if (ep->com.cm_id) { - pr_debug("%s ep %p tid %d\n", __func__, ep, ep->hwtid); - ep->com.cm_id->event_handler(ep->com.cm_id, &event); - } -} - -static int update_rx_credits(struct iwch_ep *ep, u32 credits) -{ - struct cpl_rx_data_ack *req; - struct sk_buff *skb; - - pr_debug("%s ep %p credits %u\n", __func__, ep, credits); - skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); - if (!skb) { - pr_err("update_rx_credits - cannot alloc skb!\n"); - return 0; - } - - req = skb_put(skb, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, ep->hwtid)); - req->credit_dack = htonl(V_RX_CREDITS(credits) | V_RX_FORCE_ACK(1)); - skb->priority = CPL_PRIORITY_ACK; - iwch_cxgb3_ofld_send(ep->com.tdev, skb); - return credits; -} - -static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb) -{ - struct mpa_message *mpa; - u16 plen; - struct iwch_qp_attributes attrs; - enum iwch_qp_attr_mask mask; - int err; - - pr_debug("%s ep %p\n", __func__, ep); - - /* - * Stop mpa timer. If it expired, then the state has - * changed and we bail since ep_timeout already aborted - * the connection. - */ - stop_ep_timer(ep); - if (state_read(&ep->com) != MPA_REQ_SENT) - return; - - /* - * If we get more than the supported amount of private data - * then we must fail this connection. - */ - if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) { - err = -EINVAL; - goto err; - } - - /* - * copy the new data into our accumulation buffer. - */ - skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]), - skb->len); - ep->mpa_pkt_len += skb->len; - - /* - * if we don't even have the mpa message, then bail. - */ - if (ep->mpa_pkt_len < sizeof(*mpa)) - return; - mpa = (struct mpa_message *) ep->mpa_pkt; - - /* Validate MPA header. */ - if (mpa->revision != mpa_rev) { - err = -EPROTO; - goto err; - } - if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) { - err = -EPROTO; - goto err; - } - - plen = ntohs(mpa->private_data_size); - - /* - * Fail if there's too much private data. - */ - if (plen > MPA_MAX_PRIVATE_DATA) { - err = -EPROTO; - goto err; - } - - /* - * If plen does not account for pkt size - */ - if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { - err = -EPROTO; - goto err; - } - - ep->plen = (u8) plen; - - /* - * If we don't have all the pdata yet, then bail. - * We'll continue process when more data arrives. - */ - if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) - return; - - if (mpa->flags & MPA_REJECT) { - err = -ECONNREFUSED; - goto err; - } - - /* - * If we get here we have accumulated the entire mpa - * start reply message including private data. And - * the MPA header is valid. - */ - state_set(&ep->com, FPDU_MODE); - ep->mpa_attr.initiator = 1; - ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; - ep->mpa_attr.recv_marker_enabled = markers_enabled; - ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; - ep->mpa_attr.version = mpa_rev; - pr_debug("%s - crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d\n", - __func__, - ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, - ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version); - - attrs.mpa_attr = ep->mpa_attr; - attrs.max_ird = ep->ird; - attrs.max_ord = ep->ord; - attrs.llp_stream_handle = ep; - attrs.next_state = IWCH_QP_STATE_RTS; - - mask = IWCH_QP_ATTR_NEXT_STATE | - IWCH_QP_ATTR_LLP_STREAM_HANDLE | IWCH_QP_ATTR_MPA_ATTR | - IWCH_QP_ATTR_MAX_IRD | IWCH_QP_ATTR_MAX_ORD; - - /* bind QP and TID with INIT_WR */ - err = iwch_modify_qp(ep->com.qp->rhp, - ep->com.qp, mask, &attrs, 1); - if (err) - goto err; - - if (peer2peer && iwch_rqes_posted(ep->com.qp) == 0) { - iwch_post_zb_read(ep); - } - - goto out; -err: - abort_connection(ep, skb, GFP_KERNEL); -out: - connect_reply_upcall(ep, err); - return; -} - -static void process_mpa_request(struct iwch_ep *ep, struct sk_buff *skb) -{ - struct mpa_message *mpa; - u16 plen; - - pr_debug("%s ep %p\n", __func__, ep); - - /* - * Stop mpa timer. If it expired, then the state has - * changed and we bail since ep_timeout already aborted - * the connection. - */ - stop_ep_timer(ep); - if (state_read(&ep->com) != MPA_REQ_WAIT) - return; - - /* - * If we get more than the supported amount of private data - * then we must fail this connection. - */ - if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) { - abort_connection(ep, skb, GFP_KERNEL); - return; - } - - pr_debug("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__); - - /* - * Copy the new data into our accumulation buffer. - */ - skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]), - skb->len); - ep->mpa_pkt_len += skb->len; - - /* - * If we don't even have the mpa message, then bail. - * We'll continue process when more data arrives. - */ - if (ep->mpa_pkt_len < sizeof(*mpa)) - return; - pr_debug("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__); - mpa = (struct mpa_message *) ep->mpa_pkt; - - /* - * Validate MPA Header. - */ - if (mpa->revision != mpa_rev) { - abort_connection(ep, skb, GFP_KERNEL); - return; - } - - if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) { - abort_connection(ep, skb, GFP_KERNEL); - return; - } - - plen = ntohs(mpa->private_data_size); - - /* - * Fail if there's too much private data. - */ - if (plen > MPA_MAX_PRIVATE_DATA) { - abort_connection(ep, skb, GFP_KERNEL); - return; - } - - /* - * If plen does not account for pkt size - */ - if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { - abort_connection(ep, skb, GFP_KERNEL); - return; - } - ep->plen = (u8) plen; - - /* - * If we don't have all the pdata yet, then bail. - */ - if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) - return; - - /* - * If we get here we have accumulated the entire mpa - * start reply message including private data. - */ - ep->mpa_attr.initiator = 0; - ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; - ep->mpa_attr.recv_marker_enabled = markers_enabled; - ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; - ep->mpa_attr.version = mpa_rev; - pr_debug("%s - crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d\n", - __func__, - ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, - ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version); - - state_set(&ep->com, MPA_REQ_RCVD); - - /* drive upcall */ - connect_request_upcall(ep); - return; -} - -static int rx_data(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *ep = ctx; - struct cpl_rx_data *hdr = cplhdr(skb); - unsigned int dlen = ntohs(hdr->len); - - pr_debug("%s ep %p dlen %u\n", __func__, ep, dlen); - - skb_pull(skb, sizeof(*hdr)); - skb_trim(skb, dlen); - - ep->rcv_seq += dlen; - BUG_ON(ep->rcv_seq != (ntohl(hdr->seq) + dlen)); - - switch (state_read(&ep->com)) { - case MPA_REQ_SENT: - process_mpa_reply(ep, skb); - break; - case MPA_REQ_WAIT: - process_mpa_request(ep, skb); - break; - case MPA_REP_SENT: - break; - default: - pr_err("%s Unexpected streaming data. ep %p state %d tid %d\n", - __func__, ep, state_read(&ep->com), ep->hwtid); - - /* - * The ep will timeout and inform the ULP of the failure. - * See ep_timeout(). - */ - break; - } - - /* update RX credits */ - update_rx_credits(ep, dlen); - - return CPL_RET_BUF_DONE; -} - -/* - * Upcall from the adapter indicating data has been transmitted. - * For us its just the single MPA request or reply. We can now free - * the skb holding the mpa message. - */ -static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *ep = ctx; - struct cpl_wr_ack *hdr = cplhdr(skb); - unsigned int credits = ntohs(hdr->credits); - unsigned long flags; - int post_zb = 0; - - pr_debug("%s ep %p credits %u\n", __func__, ep, credits); - - if (credits == 0) { - pr_debug("%s 0 credit ack ep %p state %u\n", - __func__, ep, state_read(&ep->com)); - return CPL_RET_BUF_DONE; - } - - spin_lock_irqsave(&ep->com.lock, flags); - BUG_ON(credits != 1); - dst_confirm(ep->dst); - if (!ep->mpa_skb) { - pr_debug("%s rdma_init wr_ack ep %p state %u\n", - __func__, ep, ep->com.state); - if (ep->mpa_attr.initiator) { - pr_debug("%s initiator ep %p state %u\n", - __func__, ep, ep->com.state); - if (peer2peer && ep->com.state == FPDU_MODE) - post_zb = 1; - } else { - pr_debug("%s responder ep %p state %u\n", - __func__, ep, ep->com.state); - if (ep->com.state == MPA_REQ_RCVD) { - ep->com.rpl_done = 1; - wake_up(&ep->com.waitq); - } - } - } else { - pr_debug("%s lsm ack ep %p state %u freeing skb\n", - __func__, ep, ep->com.state); - kfree_skb(ep->mpa_skb); - ep->mpa_skb = NULL; - } - spin_unlock_irqrestore(&ep->com.lock, flags); - if (post_zb) - iwch_post_zb_read(ep); - return CPL_RET_BUF_DONE; -} - -static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *ep = ctx; - unsigned long flags; - int release = 0; - - pr_debug("%s ep %p\n", __func__, ep); - BUG_ON(!ep); - - /* - * We get 2 abort replies from the HW. The first one must - * be ignored except for scribbling that we need one more. - */ - if (!test_and_set_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags)) { - return CPL_RET_BUF_DONE; - } - - spin_lock_irqsave(&ep->com.lock, flags); - switch (ep->com.state) { - case ABORTING: - close_complete_upcall(ep); - __state_set(&ep->com, DEAD); - release = 1; - break; - default: - pr_err("%s ep %p state %d\n", __func__, ep, ep->com.state); - break; - } - spin_unlock_irqrestore(&ep->com.lock, flags); - - if (release) - release_ep_resources(ep); - return CPL_RET_BUF_DONE; -} - -/* - * Return whether a failed active open has allocated a TID - */ -static inline int act_open_has_tid(int status) -{ - return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST && - status != CPL_ERR_ARP_MISS; -} - -static int act_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *ep = ctx; - struct cpl_act_open_rpl *rpl = cplhdr(skb); - - pr_debug("%s ep %p status %u errno %d\n", __func__, ep, rpl->status, - status2errno(rpl->status)); - connect_reply_upcall(ep, status2errno(rpl->status)); - state_set(&ep->com, DEAD); - if (ep->com.tdev->type != T3A && act_open_has_tid(rpl->status)) - release_tid(ep->com.tdev, GET_TID(rpl), NULL); - cxgb3_free_atid(ep->com.tdev, ep->atid); - dst_release(ep->dst); - l2t_release(ep->com.tdev, ep->l2t); - put_ep(&ep->com); - return CPL_RET_BUF_DONE; -} - -static int listen_start(struct iwch_listen_ep *ep) -{ - struct sk_buff *skb; - struct cpl_pass_open_req *req; - - pr_debug("%s ep %p\n", __func__, ep); - skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); - if (!skb) { - pr_err("t3c_listen_start failed to alloc skb!\n"); - return -ENOMEM; - } - - req = skb_put(skb, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, ep->stid)); - req->local_port = ep->com.local_addr.sin_port; - req->local_ip = ep->com.local_addr.sin_addr.s_addr; - req->peer_port = 0; - req->peer_ip = 0; - req->peer_netmask = 0; - req->opt0h = htonl(F_DELACK | F_TCAM_BYPASS); - req->opt0l = htonl(V_RCV_BUFSIZ(rcv_win>>10)); - req->opt1 = htonl(V_CONN_POLICY(CPL_CONN_POLICY_ASK)); - - skb->priority = 1; - return iwch_cxgb3_ofld_send(ep->com.tdev, skb); -} - -static int pass_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_listen_ep *ep = ctx; - struct cpl_pass_open_rpl *rpl = cplhdr(skb); - - pr_debug("%s ep %p status %d error %d\n", __func__, ep, - rpl->status, status2errno(rpl->status)); - ep->com.rpl_err = status2errno(rpl->status); - ep->com.rpl_done = 1; - wake_up(&ep->com.waitq); - - return CPL_RET_BUF_DONE; -} - -static int listen_stop(struct iwch_listen_ep *ep) -{ - struct sk_buff *skb; - struct cpl_close_listserv_req *req; - - pr_debug("%s ep %p\n", __func__, ep); - skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); - if (!skb) { - pr_err("%s - failed to alloc skb\n", __func__); - return -ENOMEM; - } - req = skb_put(skb, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - req->cpu_idx = 0; - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ, ep->stid)); - skb->priority = 1; - return iwch_cxgb3_ofld_send(ep->com.tdev, skb); -} - -static int close_listsrv_rpl(struct t3cdev *tdev, struct sk_buff *skb, - void *ctx) -{ - struct iwch_listen_ep *ep = ctx; - struct cpl_close_listserv_rpl *rpl = cplhdr(skb); - - pr_debug("%s ep %p\n", __func__, ep); - ep->com.rpl_err = status2errno(rpl->status); - ep->com.rpl_done = 1; - wake_up(&ep->com.waitq); - return CPL_RET_BUF_DONE; -} - -static void accept_cr(struct iwch_ep *ep, __be32 peer_ip, struct sk_buff *skb) -{ - struct cpl_pass_accept_rpl *rpl; - unsigned int mtu_idx; - u32 opt0h, opt0l, opt2; - int wscale; - - pr_debug("%s ep %p\n", __func__, ep); - BUG_ON(skb_cloned(skb)); - skb_trim(skb, sizeof(*rpl)); - skb_get(skb); - mtu_idx = find_best_mtu(T3C_DATA(ep->com.tdev), dst_mtu(ep->dst)); - wscale = compute_wscale(rcv_win); - opt0h = V_NAGLE(0) | - V_NO_CONG(nocong) | - V_KEEP_ALIVE(1) | - F_TCAM_BYPASS | - V_WND_SCALE(wscale) | - V_MSS_IDX(mtu_idx) | - V_L2T_IDX(ep->l2t->idx) | V_TX_CHANNEL(ep->l2t->smt_idx); - opt0l = V_TOS((ep->tos >> 2) & M_TOS) | V_RCV_BUFSIZ(rcv_win>>10); - opt2 = F_RX_COALESCE_VALID | V_RX_COALESCE(0) | V_FLAVORS_VALID(1) | - V_CONG_CONTROL_FLAVOR(cong_flavor); - - rpl = cplhdr(skb); - rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, ep->hwtid)); - rpl->peer_ip = peer_ip; - rpl->opt0h = htonl(opt0h); - rpl->opt0l_status = htonl(opt0l | CPL_PASS_OPEN_ACCEPT); - rpl->opt2 = htonl(opt2); - rpl->rsvd = rpl->opt2; /* workaround for HW bug */ - skb->priority = CPL_PRIORITY_SETUP; - iwch_l2t_send(ep->com.tdev, skb, ep->l2t); - - return; -} - -static void reject_cr(struct t3cdev *tdev, u32 hwtid, __be32 peer_ip, - struct sk_buff *skb) -{ - pr_debug("%s t3cdev %p tid %u peer_ip %x\n", __func__, tdev, hwtid, - peer_ip); - BUG_ON(skb_cloned(skb)); - skb_trim(skb, sizeof(struct cpl_tid_release)); - skb_get(skb); - - if (tdev->type != T3A) - release_tid(tdev, hwtid, skb); - else { - struct cpl_pass_accept_rpl *rpl; - - rpl = cplhdr(skb); - skb->priority = CPL_PRIORITY_SETUP; - rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, - hwtid)); - rpl->peer_ip = peer_ip; - rpl->opt0h = htonl(F_TCAM_BYPASS); - rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT); - rpl->opt2 = 0; - rpl->rsvd = rpl->opt2; - iwch_cxgb3_ofld_send(tdev, skb); - } -} - -static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *child_ep, *parent_ep = ctx; - struct cpl_pass_accept_req *req = cplhdr(skb); - unsigned int hwtid = GET_TID(req); - struct dst_entry *dst; - struct l2t_entry *l2t; - struct rtable *rt; - struct iff_mac tim; - - pr_debug("%s parent ep %p tid %u\n", __func__, parent_ep, hwtid); - - if (state_read(&parent_ep->com) != LISTEN) { - pr_err("%s - listening ep not in LISTEN\n", __func__); - goto reject; - } - - /* - * Find the netdev for this connection request. - */ - tim.mac_addr = req->dst_mac; - tim.vlan_tag = ntohs(req->vlan_tag); - if (tdev->ctl(tdev, GET_IFF_FROM_MAC, &tim) < 0 || !tim.dev) { - pr_err("%s bad dst mac %pM\n", __func__, req->dst_mac); - goto reject; - } - - /* Find output route */ - rt = find_route(tdev, - req->local_ip, - req->peer_ip, - req->local_port, - req->peer_port, G_PASS_OPEN_TOS(ntohl(req->tos_tid))); - if (!rt) { - pr_err("%s - failed to find dst entry!\n", __func__); - goto reject; - } - dst = &rt->dst; - l2t = t3_l2t_get(tdev, dst, NULL, &req->peer_ip); - if (!l2t) { - pr_err("%s - failed to allocate l2t entry!\n", __func__); - dst_release(dst); - goto reject; - } - child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL); - if (!child_ep) { - pr_err("%s - failed to allocate ep entry!\n", __func__); - l2t_release(tdev, l2t); - dst_release(dst); - goto reject; - } - state_set(&child_ep->com, CONNECTING); - child_ep->com.tdev = tdev; - child_ep->com.cm_id = NULL; - child_ep->com.local_addr.sin_family = AF_INET; - child_ep->com.local_addr.sin_port = req->local_port; - child_ep->com.local_addr.sin_addr.s_addr = req->local_ip; - child_ep->com.remote_addr.sin_family = AF_INET; - child_ep->com.remote_addr.sin_port = req->peer_port; - child_ep->com.remote_addr.sin_addr.s_addr = req->peer_ip; - get_ep(&parent_ep->com); - child_ep->parent_ep = parent_ep; - child_ep->tos = G_PASS_OPEN_TOS(ntohl(req->tos_tid)); - child_ep->l2t = l2t; - child_ep->dst = dst; - child_ep->hwtid = hwtid; - timer_setup(&child_ep->timer, ep_timeout, 0); - cxgb3_insert_tid(tdev, &t3c_client, child_ep, hwtid); - accept_cr(child_ep, req->peer_ip, skb); - goto out; -reject: - reject_cr(tdev, hwtid, req->peer_ip, skb); -out: - return CPL_RET_BUF_DONE; -} - -static int pass_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *ep = ctx; - struct cpl_pass_establish *req = cplhdr(skb); - - pr_debug("%s ep %p\n", __func__, ep); - ep->snd_seq = ntohl(req->snd_isn); - ep->rcv_seq = ntohl(req->rcv_isn); - - set_emss(ep, ntohs(req->tcp_opt)); - - dst_confirm(ep->dst); - state_set(&ep->com, MPA_REQ_WAIT); - start_ep_timer(ep); - - return CPL_RET_BUF_DONE; -} - -static int peer_close(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *ep = ctx; - struct iwch_qp_attributes attrs; - unsigned long flags; - int disconnect = 1; - int release = 0; - - pr_debug("%s ep %p\n", __func__, ep); - dst_confirm(ep->dst); - - spin_lock_irqsave(&ep->com.lock, flags); - switch (ep->com.state) { - case MPA_REQ_WAIT: - __state_set(&ep->com, CLOSING); - break; - case MPA_REQ_SENT: - __state_set(&ep->com, CLOSING); - connect_reply_upcall(ep, -ECONNRESET); - break; - case MPA_REQ_RCVD: - - /* - * We're gonna mark this puppy DEAD, but keep - * the reference on it until the ULP accepts or - * rejects the CR. Also wake up anyone waiting - * in rdma connection migration (see iwch_accept_cr()). - */ - __state_set(&ep->com, CLOSING); - ep->com.rpl_done = 1; - ep->com.rpl_err = -ECONNRESET; - pr_debug("waking up ep %p\n", ep); - wake_up(&ep->com.waitq); - break; - case MPA_REP_SENT: - __state_set(&ep->com, CLOSING); - ep->com.rpl_done = 1; - ep->com.rpl_err = -ECONNRESET; - pr_debug("waking up ep %p\n", ep); - wake_up(&ep->com.waitq); - break; - case FPDU_MODE: - start_ep_timer(ep); - __state_set(&ep->com, CLOSING); - attrs.next_state = IWCH_QP_STATE_CLOSING; - iwch_modify_qp(ep->com.qp->rhp, ep->com.qp, - IWCH_QP_ATTR_NEXT_STATE, &attrs, 1); - peer_close_upcall(ep); - break; - case ABORTING: - disconnect = 0; - break; - case CLOSING: - __state_set(&ep->com, MORIBUND); - disconnect = 0; - break; - case MORIBUND: - stop_ep_timer(ep); - if (ep->com.cm_id && ep->com.qp) { - attrs.next_state = IWCH_QP_STATE_IDLE; - iwch_modify_qp(ep->com.qp->rhp, ep->com.qp, - IWCH_QP_ATTR_NEXT_STATE, &attrs, 1); - } - close_complete_upcall(ep); - __state_set(&ep->com, DEAD); - release = 1; - disconnect = 0; - break; - case DEAD: - disconnect = 0; - break; - default: - BUG_ON(1); - } - spin_unlock_irqrestore(&ep->com.lock, flags); - if (disconnect) - iwch_ep_disconnect(ep, 0, GFP_KERNEL); - if (release) - release_ep_resources(ep); - return CPL_RET_BUF_DONE; -} - -/* - * Returns whether an ABORT_REQ_RSS message is a negative advice. - */ -static int is_neg_adv_abort(unsigned int status) -{ - return status == CPL_ERR_RTX_NEG_ADVICE || - status == CPL_ERR_PERSIST_NEG_ADVICE; -} - -static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct cpl_abort_req_rss *req = cplhdr(skb); - struct iwch_ep *ep = ctx; - struct cpl_abort_rpl *rpl; - struct sk_buff *rpl_skb; - struct iwch_qp_attributes attrs; - int ret; - int release = 0; - unsigned long flags; - - if (is_neg_adv_abort(req->status)) { - pr_debug("%s neg_adv_abort ep %p tid %d\n", __func__, ep, - ep->hwtid); - t3_l2t_send_event(ep->com.tdev, ep->l2t); - return CPL_RET_BUF_DONE; - } - - /* - * We get 2 peer aborts from the HW. The first one must - * be ignored except for scribbling that we need one more. - */ - if (!test_and_set_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags)) { - return CPL_RET_BUF_DONE; - } - - spin_lock_irqsave(&ep->com.lock, flags); - pr_debug("%s ep %p state %u\n", __func__, ep, ep->com.state); - switch (ep->com.state) { - case CONNECTING: - break; - case MPA_REQ_WAIT: - stop_ep_timer(ep); - break; - case MPA_REQ_SENT: - stop_ep_timer(ep); - connect_reply_upcall(ep, -ECONNRESET); - break; - case MPA_REP_SENT: - ep->com.rpl_done = 1; - ep->com.rpl_err = -ECONNRESET; - pr_debug("waking up ep %p\n", ep); - wake_up(&ep->com.waitq); - break; - case MPA_REQ_RCVD: - - /* - * We're gonna mark this puppy DEAD, but keep - * the reference on it until the ULP accepts or - * rejects the CR. Also wake up anyone waiting - * in rdma connection migration (see iwch_accept_cr()). - */ - ep->com.rpl_done = 1; - ep->com.rpl_err = -ECONNRESET; - pr_debug("waking up ep %p\n", ep); - wake_up(&ep->com.waitq); - break; - case MORIBUND: - case CLOSING: - stop_ep_timer(ep); - /*FALLTHROUGH*/ - case FPDU_MODE: - if (ep->com.cm_id && ep->com.qp) { - attrs.next_state = IWCH_QP_STATE_ERROR; - ret = iwch_modify_qp(ep->com.qp->rhp, - ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, - &attrs, 1); - if (ret) - pr_err("%s - qp <- error failed!\n", __func__); - } - peer_abort_upcall(ep); - break; - case ABORTING: - break; - case DEAD: - pr_debug("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__); - spin_unlock_irqrestore(&ep->com.lock, flags); - return CPL_RET_BUF_DONE; - default: - BUG_ON(1); - break; - } - dst_confirm(ep->dst); - if (ep->com.state != ABORTING) { - __state_set(&ep->com, DEAD); - release = 1; - } - spin_unlock_irqrestore(&ep->com.lock, flags); - - rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL); - if (!rpl_skb) { - pr_err("%s - cannot allocate skb!\n", __func__); - release = 1; - goto out; - } - rpl_skb->priority = CPL_PRIORITY_DATA; - rpl = skb_put(rpl_skb, sizeof(*rpl)); - rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL)); - rpl->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); - OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid)); - rpl->cmd = CPL_ABORT_NO_RST; - iwch_cxgb3_ofld_send(ep->com.tdev, rpl_skb); -out: - if (release) - release_ep_resources(ep); - return CPL_RET_BUF_DONE; -} - -static int close_con_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *ep = ctx; - struct iwch_qp_attributes attrs; - unsigned long flags; - int release = 0; - - pr_debug("%s ep %p\n", __func__, ep); - BUG_ON(!ep); - - /* The cm_id may be null if we failed to connect */ - spin_lock_irqsave(&ep->com.lock, flags); - switch (ep->com.state) { - case CLOSING: - __state_set(&ep->com, MORIBUND); - break; - case MORIBUND: - stop_ep_timer(ep); - if ((ep->com.cm_id) && (ep->com.qp)) { - attrs.next_state = IWCH_QP_STATE_IDLE; - iwch_modify_qp(ep->com.qp->rhp, - ep->com.qp, - IWCH_QP_ATTR_NEXT_STATE, - &attrs, 1); - } - close_complete_upcall(ep); - __state_set(&ep->com, DEAD); - release = 1; - break; - case ABORTING: - case DEAD: - break; - default: - BUG_ON(1); - break; - } - spin_unlock_irqrestore(&ep->com.lock, flags); - if (release) - release_ep_resources(ep); - return CPL_RET_BUF_DONE; -} - -/* - * T3A does 3 things when a TERM is received: - * 1) send up a CPL_RDMA_TERMINATE message with the TERM packet - * 2) generate an async event on the QP with the TERMINATE opcode - * 3) post a TERMINATE opcode cqe into the associated CQ. - * - * For (1), we save the message in the qp for later consumer consumption. - * For (2), we move the QP into TERMINATE, post a QP event and disconnect. - * For (3), we toss the CQE in cxio_poll_cq(). - * - * terminate() handles case (1)... - */ -static int terminate(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *ep = ctx; - - if (state_read(&ep->com) != FPDU_MODE) - return CPL_RET_BUF_DONE; - - pr_debug("%s ep %p\n", __func__, ep); - skb_pull(skb, sizeof(struct cpl_rdma_terminate)); - pr_debug("%s saving %d bytes of term msg\n", __func__, skb->len); - skb_copy_from_linear_data(skb, ep->com.qp->attr.terminate_buffer, - skb->len); - ep->com.qp->attr.terminate_msg_len = skb->len; - ep->com.qp->attr.is_terminate_local = 0; - return CPL_RET_BUF_DONE; -} - -static int ec_status(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct cpl_rdma_ec_status *rep = cplhdr(skb); - struct iwch_ep *ep = ctx; - - pr_debug("%s ep %p tid %u status %d\n", __func__, ep, ep->hwtid, - rep->status); - if (rep->status) { - struct iwch_qp_attributes attrs; - - pr_err("%s BAD CLOSE - Aborting tid %u\n", - __func__, ep->hwtid); - stop_ep_timer(ep); - attrs.next_state = IWCH_QP_STATE_ERROR; - iwch_modify_qp(ep->com.qp->rhp, - ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, - &attrs, 1); - abort_connection(ep, NULL, GFP_KERNEL); - } - return CPL_RET_BUF_DONE; -} - -static void ep_timeout(struct timer_list *t) -{ - struct iwch_ep *ep = from_timer(ep, t, timer); - struct iwch_qp_attributes attrs; - unsigned long flags; - int abort = 1; - - spin_lock_irqsave(&ep->com.lock, flags); - pr_debug("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid, - ep->com.state); - switch (ep->com.state) { - case MPA_REQ_SENT: - __state_set(&ep->com, ABORTING); - connect_reply_upcall(ep, -ETIMEDOUT); - break; - case MPA_REQ_WAIT: - __state_set(&ep->com, ABORTING); - break; - case CLOSING: - case MORIBUND: - if (ep->com.cm_id && ep->com.qp) { - attrs.next_state = IWCH_QP_STATE_ERROR; - iwch_modify_qp(ep->com.qp->rhp, - ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, - &attrs, 1); - } - __state_set(&ep->com, ABORTING); - break; - default: - WARN(1, "%s unexpected state ep %p state %u\n", - __func__, ep, ep->com.state); - abort = 0; - } - spin_unlock_irqrestore(&ep->com.lock, flags); - if (abort) - abort_connection(ep, NULL, GFP_ATOMIC); - put_ep(&ep->com); -} - -int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) -{ - struct iwch_ep *ep = to_ep(cm_id); - - pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid); - - if (state_read(&ep->com) == DEAD) { - put_ep(&ep->com); - return -ECONNRESET; - } - BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); - if (mpa_rev == 0) - abort_connection(ep, NULL, GFP_KERNEL); - else { - send_mpa_reject(ep, pdata, pdata_len); - iwch_ep_disconnect(ep, 0, GFP_KERNEL); - } - put_ep(&ep->com); - return 0; -} - -int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) -{ - int err; - struct iwch_qp_attributes attrs; - enum iwch_qp_attr_mask mask; - struct iwch_ep *ep = to_ep(cm_id); - struct iwch_dev *h = to_iwch_dev(cm_id->device); - struct iwch_qp *qp = get_qhp(h, conn_param->qpn); - - pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid); - if (state_read(&ep->com) == DEAD) { - err = -ECONNRESET; - goto err; - } - - BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); - BUG_ON(!qp); - - if ((conn_param->ord > qp->rhp->attr.max_rdma_read_qp_depth) || - (conn_param->ird > qp->rhp->attr.max_rdma_reads_per_qp)) { - abort_connection(ep, NULL, GFP_KERNEL); - err = -EINVAL; - goto err; - } - - cm_id->add_ref(cm_id); - ep->com.cm_id = cm_id; - ep->com.qp = qp; - - ep->ird = conn_param->ird; - ep->ord = conn_param->ord; - - if (peer2peer && ep->ird == 0) - ep->ird = 1; - - pr_debug("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord); - - /* bind QP to EP and move to RTS */ - attrs.mpa_attr = ep->mpa_attr; - attrs.max_ird = ep->ird; - attrs.max_ord = ep->ord; - attrs.llp_stream_handle = ep; - attrs.next_state = IWCH_QP_STATE_RTS; - - /* bind QP and TID with INIT_WR */ - mask = IWCH_QP_ATTR_NEXT_STATE | - IWCH_QP_ATTR_LLP_STREAM_HANDLE | - IWCH_QP_ATTR_MPA_ATTR | - IWCH_QP_ATTR_MAX_IRD | - IWCH_QP_ATTR_MAX_ORD; - - err = iwch_modify_qp(ep->com.qp->rhp, - ep->com.qp, mask, &attrs, 1); - if (err) - goto err1; - - /* if needed, wait for wr_ack */ - if (iwch_rqes_posted(qp)) { - wait_event(ep->com.waitq, ep->com.rpl_done); - err = ep->com.rpl_err; - if (err) - goto err1; - } - - err = send_mpa_reply(ep, conn_param->private_data, - conn_param->private_data_len); - if (err) - goto err1; - - - state_set(&ep->com, FPDU_MODE); - established_upcall(ep); - put_ep(&ep->com); - return 0; -err1: - ep->com.cm_id = NULL; - ep->com.qp = NULL; - cm_id->rem_ref(cm_id); -err: - put_ep(&ep->com); - return err; -} - -static int is_loopback_dst(struct iw_cm_id *cm_id) -{ - struct net_device *dev; - struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr; - - dev = ip_dev_find(&init_net, raddr->sin_addr.s_addr); - if (!dev) - return 0; - dev_put(dev); - return 1; -} - -int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) -{ - struct iwch_dev *h = to_iwch_dev(cm_id->device); - struct iwch_ep *ep; - struct rtable *rt; - int err = 0; - struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr; - struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr; - - if (cm_id->m_remote_addr.ss_family != PF_INET) { - err = -ENOSYS; - goto out; - } - - if (is_loopback_dst(cm_id)) { - err = -ENOSYS; - goto out; - } - - ep = alloc_ep(sizeof(*ep), GFP_KERNEL); - if (!ep) { - pr_err("%s - cannot alloc ep\n", __func__); - err = -ENOMEM; - goto out; - } - timer_setup(&ep->timer, ep_timeout, 0); - ep->plen = conn_param->private_data_len; - if (ep->plen) - memcpy(ep->mpa_pkt + sizeof(struct mpa_message), - conn_param->private_data, ep->plen); - ep->ird = conn_param->ird; - ep->ord = conn_param->ord; - - if (peer2peer && ep->ord == 0) - ep->ord = 1; - - ep->com.tdev = h->rdev.t3cdev_p; - - cm_id->add_ref(cm_id); - ep->com.cm_id = cm_id; - ep->com.qp = get_qhp(h, conn_param->qpn); - BUG_ON(!ep->com.qp); - pr_debug("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn, - ep->com.qp, cm_id); - - /* - * Allocate an active TID to initiate a TCP connection. - */ - ep->atid = cxgb3_alloc_atid(h->rdev.t3cdev_p, &t3c_client, ep); - if (ep->atid == -1) { - pr_err("%s - cannot alloc atid\n", __func__); - err = -ENOMEM; - goto fail2; - } - - /* find a route */ - rt = find_route(h->rdev.t3cdev_p, laddr->sin_addr.s_addr, - raddr->sin_addr.s_addr, laddr->sin_port, - raddr->sin_port, IPTOS_LOWDELAY); - if (!rt) { - pr_err("%s - cannot find route\n", __func__); - err = -EHOSTUNREACH; - goto fail3; - } - ep->dst = &rt->dst; - ep->l2t = t3_l2t_get(ep->com.tdev, ep->dst, NULL, - &raddr->sin_addr.s_addr); - if (!ep->l2t) { - pr_err("%s - cannot alloc l2e\n", __func__); - err = -ENOMEM; - goto fail4; - } - - state_set(&ep->com, CONNECTING); - ep->tos = IPTOS_LOWDELAY; - memcpy(&ep->com.local_addr, &cm_id->m_local_addr, - sizeof(ep->com.local_addr)); - memcpy(&ep->com.remote_addr, &cm_id->m_remote_addr, - sizeof(ep->com.remote_addr)); - - /* send connect request to rnic */ - err = send_connect(ep); - if (!err) - goto out; - - l2t_release(h->rdev.t3cdev_p, ep->l2t); -fail4: - dst_release(ep->dst); -fail3: - cxgb3_free_atid(ep->com.tdev, ep->atid); -fail2: - cm_id->rem_ref(cm_id); - put_ep(&ep->com); -out: - return err; -} - -int iwch_create_listen(struct iw_cm_id *cm_id, int backlog) -{ - int err = 0; - struct iwch_dev *h = to_iwch_dev(cm_id->device); - struct iwch_listen_ep *ep; - - - might_sleep(); - - if (cm_id->m_local_addr.ss_family != PF_INET) { - err = -ENOSYS; - goto fail1; - } - - ep = alloc_ep(sizeof(*ep), GFP_KERNEL); - if (!ep) { - pr_err("%s - cannot alloc ep\n", __func__); - err = -ENOMEM; - goto fail1; - } - pr_debug("%s ep %p\n", __func__, ep); - ep->com.tdev = h->rdev.t3cdev_p; - cm_id->add_ref(cm_id); - ep->com.cm_id = cm_id; - ep->backlog = backlog; - memcpy(&ep->com.local_addr, &cm_id->m_local_addr, - sizeof(ep->com.local_addr)); - - /* - * Allocate a server TID. - */ - ep->stid = cxgb3_alloc_stid(h->rdev.t3cdev_p, &t3c_client, ep); - if (ep->stid == -1) { - pr_err("%s - cannot alloc atid\n", __func__); - err = -ENOMEM; - goto fail2; - } - - state_set(&ep->com, LISTEN); - err = listen_start(ep); - if (err) - goto fail3; - - /* wait for pass_open_rpl */ - wait_event(ep->com.waitq, ep->com.rpl_done); - err = ep->com.rpl_err; - if (!err) { - cm_id->provider_data = ep; - goto out; - } -fail3: - cxgb3_free_stid(ep->com.tdev, ep->stid); -fail2: - cm_id->rem_ref(cm_id); - put_ep(&ep->com); -fail1: -out: - return err; -} - -int iwch_destroy_listen(struct iw_cm_id *cm_id) -{ - int err; - struct iwch_listen_ep *ep = to_listen_ep(cm_id); - - pr_debug("%s ep %p\n", __func__, ep); - - might_sleep(); - state_set(&ep->com, DEAD); - ep->com.rpl_done = 0; - ep->com.rpl_err = 0; - err = listen_stop(ep); - if (err) - goto done; - wait_event(ep->com.waitq, ep->com.rpl_done); - cxgb3_free_stid(ep->com.tdev, ep->stid); -done: - err = ep->com.rpl_err; - cm_id->rem_ref(cm_id); - put_ep(&ep->com); - return err; -} - -int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp) -{ - int ret=0; - unsigned long flags; - int close = 0; - int fatal = 0; - struct t3cdev *tdev; - struct cxio_rdev *rdev; - - spin_lock_irqsave(&ep->com.lock, flags); - - pr_debug("%s ep %p state %s, abrupt %d\n", __func__, ep, - states[ep->com.state], abrupt); - - tdev = (struct t3cdev *)ep->com.tdev; - rdev = (struct cxio_rdev *)tdev->ulp; - if (cxio_fatal_error(rdev)) { - fatal = 1; - close_complete_upcall(ep); - ep->com.state = DEAD; - } - switch (ep->com.state) { - case MPA_REQ_WAIT: - case MPA_REQ_SENT: - case MPA_REQ_RCVD: - case MPA_REP_SENT: - case FPDU_MODE: - close = 1; - if (abrupt) - ep->com.state = ABORTING; - else { - ep->com.state = CLOSING; - start_ep_timer(ep); - } - set_bit(CLOSE_SENT, &ep->com.flags); - break; - case CLOSING: - if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) { - close = 1; - if (abrupt) { - stop_ep_timer(ep); - ep->com.state = ABORTING; - } else - ep->com.state = MORIBUND; - } - break; - case MORIBUND: - case ABORTING: - case DEAD: - pr_debug("%s ignoring disconnect ep %p state %u\n", - __func__, ep, ep->com.state); - break; - default: - BUG(); - break; - } - - spin_unlock_irqrestore(&ep->com.lock, flags); - if (close) { - if (abrupt) - ret = send_abort(ep, NULL, gfp); - else - ret = send_halfclose(ep, gfp); - if (ret) - fatal = 1; - } - if (fatal) - release_ep_resources(ep); - return ret; -} - -int iwch_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new, - struct l2t_entry *l2t) -{ - struct iwch_ep *ep = ctx; - - if (ep->dst != old) - return 0; - - pr_debug("%s ep %p redirect to dst %p l2t %p\n", __func__, ep, new, - l2t); - dst_hold(new); - l2t_release(ep->com.tdev, ep->l2t); - ep->l2t = l2t; - dst_release(old); - ep->dst = new; - return 1; -} - -/* - * All the CM events are handled on a work queue to have a safe context. - * These are the real handlers that are called from the work queue. - */ -static const cxgb3_cpl_handler_func work_handlers[NUM_CPL_CMDS] = { - [CPL_ACT_ESTABLISH] = act_establish, - [CPL_ACT_OPEN_RPL] = act_open_rpl, - [CPL_RX_DATA] = rx_data, - [CPL_TX_DMA_ACK] = tx_ack, - [CPL_ABORT_RPL_RSS] = abort_rpl, - [CPL_ABORT_RPL] = abort_rpl, - [CPL_PASS_OPEN_RPL] = pass_open_rpl, - [CPL_CLOSE_LISTSRV_RPL] = close_listsrv_rpl, - [CPL_PASS_ACCEPT_REQ] = pass_accept_req, - [CPL_PASS_ESTABLISH] = pass_establish, - [CPL_PEER_CLOSE] = peer_close, - [CPL_ABORT_REQ_RSS] = peer_abort, - [CPL_CLOSE_CON_RPL] = close_con_rpl, - [CPL_RDMA_TERMINATE] = terminate, - [CPL_RDMA_EC_STATUS] = ec_status, -}; - -static void process_work(struct work_struct *work) -{ - struct sk_buff *skb = NULL; - void *ep; - struct t3cdev *tdev; - int ret; - - while ((skb = skb_dequeue(&rxq))) { - ep = *((void **) (skb->cb)); - tdev = *((struct t3cdev **) (skb->cb + sizeof(void *))); - ret = work_handlers[G_OPCODE(ntohl((__force __be32)skb->csum))](tdev, skb, ep); - if (ret & CPL_RET_BUF_DONE) - kfree_skb(skb); - - /* - * ep was referenced in sched(), and is freed here. - */ - put_ep((struct iwch_ep_common *)ep); - } -} - -static DECLARE_WORK(skb_work, process_work); - -static int sched(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep_common *epc = ctx; - - get_ep(epc); - - /* - * Save ctx and tdev in the skb->cb area. - */ - *((void **) skb->cb) = ctx; - *((struct t3cdev **) (skb->cb + sizeof(void *))) = tdev; - - /* - * Queue the skb and schedule the worker thread. - */ - skb_queue_tail(&rxq, skb); - queue_work(workq, &skb_work); - return 0; -} - -static int set_tcb_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct cpl_set_tcb_rpl *rpl = cplhdr(skb); - - if (rpl->status != CPL_ERR_NONE) { - pr_err("Unexpected SET_TCB_RPL status %u for tid %u\n", - rpl->status, GET_TID(rpl)); - } - return CPL_RET_BUF_DONE; -} - -/* - * All upcalls from the T3 Core go to sched() to schedule the - * processing on a work queue. - */ -cxgb3_cpl_handler_func t3c_handlers[NUM_CPL_CMDS] = { - [CPL_ACT_ESTABLISH] = sched, - [CPL_ACT_OPEN_RPL] = sched, - [CPL_RX_DATA] = sched, - [CPL_TX_DMA_ACK] = sched, - [CPL_ABORT_RPL_RSS] = sched, - [CPL_ABORT_RPL] = sched, - [CPL_PASS_OPEN_RPL] = sched, - [CPL_CLOSE_LISTSRV_RPL] = sched, - [CPL_PASS_ACCEPT_REQ] = sched, - [CPL_PASS_ESTABLISH] = sched, - [CPL_PEER_CLOSE] = sched, - [CPL_CLOSE_CON_RPL] = sched, - [CPL_ABORT_REQ_RSS] = sched, - [CPL_RDMA_TERMINATE] = sched, - [CPL_RDMA_EC_STATUS] = sched, - [CPL_SET_TCB_RPL] = set_tcb_rpl, -}; - -int __init iwch_cm_init(void) -{ - skb_queue_head_init(&rxq); - - workq = alloc_ordered_workqueue("iw_cxgb3", WQ_MEM_RECLAIM); - if (!workq) - return -ENOMEM; - - return 0; -} - -void __exit iwch_cm_term(void) -{ - flush_workqueue(workq); - destroy_workqueue(workq); -} diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h deleted file mode 100644 index cc7fe644d260..000000000000 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.h +++ /dev/null @@ -1,233 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef _IWCH_CM_H_ -#define _IWCH_CM_H_ - -#include <linux/inet.h> -#include <linux/wait.h> -#include <linux/spinlock.h> -#include <linux/kref.h> - -#include <rdma/ib_verbs.h> -#include <rdma/iw_cm.h> - -#include "cxgb3_offload.h" -#include "iwch_provider.h" - -#define MPA_KEY_REQ "MPA ID Req Frame" -#define MPA_KEY_REP "MPA ID Rep Frame" - -#define MPA_MAX_PRIVATE_DATA 256 -#define MPA_REV 0 /* XXX - amso1100 uses rev 0 ! */ -#define MPA_REJECT 0x20 -#define MPA_CRC 0x40 -#define MPA_MARKERS 0x80 -#define MPA_FLAGS_MASK 0xE0 - -#define put_ep(ep) { \ - pr_debug("put_ep (via %s:%u) ep %p refcnt %d\n", \ - __func__, __LINE__, ep, kref_read(&((ep)->kref))); \ - WARN_ON(kref_read(&((ep)->kref)) < 1); \ - kref_put(&((ep)->kref), __free_ep); \ -} - -#define get_ep(ep) { \ - pr_debug("get_ep (via %s:%u) ep %p, refcnt %d\n", \ - __func__, __LINE__, ep, kref_read(&((ep)->kref))); \ - kref_get(&((ep)->kref)); \ -} - -struct mpa_message { - u8 key[16]; - u8 flags; - u8 revision; - __be16 private_data_size; - u8 private_data[0]; -}; - -struct terminate_message { - u8 layer_etype; - u8 ecode; - __be16 hdrct_rsvd; - u8 len_hdrs[0]; -}; - -#define TERM_MAX_LENGTH (sizeof(struct terminate_message) + 2 + 18 + 28) - -enum iwch_layers_types { - LAYER_RDMAP = 0x00, - LAYER_DDP = 0x10, - LAYER_MPA = 0x20, - RDMAP_LOCAL_CATA = 0x00, - RDMAP_REMOTE_PROT = 0x01, - RDMAP_REMOTE_OP = 0x02, - DDP_LOCAL_CATA = 0x00, - DDP_TAGGED_ERR = 0x01, - DDP_UNTAGGED_ERR = 0x02, - DDP_LLP = 0x03 -}; - -enum iwch_rdma_ecodes { - RDMAP_INV_STAG = 0x00, - RDMAP_BASE_BOUNDS = 0x01, - RDMAP_ACC_VIOL = 0x02, - RDMAP_STAG_NOT_ASSOC = 0x03, - RDMAP_TO_WRAP = 0x04, - RDMAP_INV_VERS = 0x05, - RDMAP_INV_OPCODE = 0x06, - RDMAP_STREAM_CATA = 0x07, - RDMAP_GLOBAL_CATA = 0x08, - RDMAP_CANT_INV_STAG = 0x09, - RDMAP_UNSPECIFIED = 0xff -}; - -enum iwch_ddp_ecodes { - DDPT_INV_STAG = 0x00, - DDPT_BASE_BOUNDS = 0x01, - DDPT_STAG_NOT_ASSOC = 0x02, - DDPT_TO_WRAP = 0x03, - DDPT_INV_VERS = 0x04, - DDPU_INV_QN = 0x01, - DDPU_INV_MSN_NOBUF = 0x02, - DDPU_INV_MSN_RANGE = 0x03, - DDPU_INV_MO = 0x04, - DDPU_MSG_TOOBIG = 0x05, - DDPU_INV_VERS = 0x06 -}; - -enum iwch_mpa_ecodes { - MPA_CRC_ERR = 0x02, - MPA_MARKER_ERR = 0x03 -}; - -enum iwch_ep_state { - IDLE = 0, - LISTEN, - CONNECTING, - MPA_REQ_WAIT, - MPA_REQ_SENT, - MPA_REQ_RCVD, - MPA_REP_SENT, - FPDU_MODE, - ABORTING, - CLOSING, - MORIBUND, - DEAD, -}; - -enum iwch_ep_flags { - PEER_ABORT_IN_PROGRESS = 0, - ABORT_REQ_IN_PROGRESS = 1, - RELEASE_RESOURCES = 2, - CLOSE_SENT = 3, -}; - -struct iwch_ep_common { - struct iw_cm_id *cm_id; - struct iwch_qp *qp; - struct t3cdev *tdev; - enum iwch_ep_state state; - struct kref kref; - spinlock_t lock; - struct sockaddr_in local_addr; - struct sockaddr_in remote_addr; - wait_queue_head_t waitq; - int rpl_done; - int rpl_err; - unsigned long flags; -}; - -struct iwch_listen_ep { - struct iwch_ep_common com; - unsigned int stid; - int backlog; -}; - -struct iwch_ep { - struct iwch_ep_common com; - struct iwch_ep *parent_ep; - struct timer_list timer; - unsigned int atid; - u32 hwtid; - u32 snd_seq; - u32 rcv_seq; - struct l2t_entry *l2t; - struct dst_entry *dst; - struct sk_buff *mpa_skb; - struct iwch_mpa_attributes mpa_attr; - unsigned int mpa_pkt_len; - u8 mpa_pkt[sizeof(struct mpa_message) + MPA_MAX_PRIVATE_DATA]; - u8 tos; - u16 emss; - u16 plen; - u32 ird; - u32 ord; -}; - -static inline struct iwch_ep *to_ep(struct iw_cm_id *cm_id) -{ - return cm_id->provider_data; -} - -static inline struct iwch_listen_ep *to_listen_ep(struct iw_cm_id *cm_id) -{ - return cm_id->provider_data; -} - -static inline int compute_wscale(int win) -{ - int wscale = 0; - - while (wscale < 14 && (65535<<wscale) < win) - wscale++; - return wscale; -} - -/* CM prototypes */ - -int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param); -int iwch_create_listen(struct iw_cm_id *cm_id, int backlog); -int iwch_destroy_listen(struct iw_cm_id *cm_id); -int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len); -int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param); -int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp); -int iwch_quiesce_tid(struct iwch_ep *ep); -int iwch_resume_tid(struct iwch_ep *ep); -void __free_ep(struct kref *kref); -void iwch_rearp(struct iwch_ep *ep); -int iwch_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new, struct l2t_entry *l2t); - -int __init iwch_cm_init(void); -void __exit iwch_cm_term(void); -extern int peer2peer; - -#endif /* _IWCH_CM_H_ */ diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c deleted file mode 100644 index a098c0140580..000000000000 --- a/drivers/infiniband/hw/cxgb3/iwch_cq.c +++ /dev/null @@ -1,230 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "iwch_provider.h" -#include "iwch.h" - -static int __iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp, - struct iwch_qp *qhp, struct ib_wc *wc) -{ - struct t3_wq *wq = qhp ? &qhp->wq : NULL; - struct t3_cqe cqe; - u32 credit = 0; - u8 cqe_flushed; - u64 cookie; - int ret = 1; - - ret = cxio_poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, - &credit); - if (t3a_device(chp->rhp) && credit) { - pr_debug("%s updating %d cq credits on id %d\n", __func__, - credit, chp->cq.cqid); - cxio_hal_cq_op(&rhp->rdev, &chp->cq, CQ_CREDIT_UPDATE, credit); - } - - if (ret) { - ret = -EAGAIN; - goto out; - } - ret = 1; - - wc->wr_id = cookie; - wc->qp = qhp ? &qhp->ibqp : NULL; - wc->vendor_err = CQE_STATUS(cqe); - wc->wc_flags = 0; - - pr_debug("%s qpid 0x%x type %d opcode %d status 0x%x wrid hi 0x%x lo 0x%x cookie 0x%llx\n", - __func__, - CQE_QPID(cqe), CQE_TYPE(cqe), - CQE_OPCODE(cqe), CQE_STATUS(cqe), CQE_WRID_HI(cqe), - CQE_WRID_LOW(cqe), (unsigned long long)cookie); - - if (CQE_TYPE(cqe) == 0) { - if (!CQE_STATUS(cqe)) - wc->byte_len = CQE_LEN(cqe); - else - wc->byte_len = 0; - wc->opcode = IB_WC_RECV; - if (CQE_OPCODE(cqe) == T3_SEND_WITH_INV || - CQE_OPCODE(cqe) == T3_SEND_WITH_SE_INV) { - wc->ex.invalidate_rkey = CQE_WRID_STAG(cqe); - wc->wc_flags |= IB_WC_WITH_INVALIDATE; - } - } else { - switch (CQE_OPCODE(cqe)) { - case T3_RDMA_WRITE: - wc->opcode = IB_WC_RDMA_WRITE; - break; - case T3_READ_REQ: - wc->opcode = IB_WC_RDMA_READ; - wc->byte_len = CQE_LEN(cqe); - break; - case T3_SEND: - case T3_SEND_WITH_SE: - case T3_SEND_WITH_INV: - case T3_SEND_WITH_SE_INV: - wc->opcode = IB_WC_SEND; - break; - case T3_LOCAL_INV: - wc->opcode = IB_WC_LOCAL_INV; - break; - case T3_FAST_REGISTER: - wc->opcode = IB_WC_REG_MR; - break; - default: - pr_err("Unexpected opcode %d in the CQE received for QPID=0x%0x\n", - CQE_OPCODE(cqe), CQE_QPID(cqe)); - ret = -EINVAL; - goto out; - } - } - - if (cqe_flushed) - wc->status = IB_WC_WR_FLUSH_ERR; - else { - - switch (CQE_STATUS(cqe)) { - case TPT_ERR_SUCCESS: - wc->status = IB_WC_SUCCESS; - break; - case TPT_ERR_STAG: - wc->status = IB_WC_LOC_ACCESS_ERR; - break; - case TPT_ERR_PDID: - wc->status = IB_WC_LOC_PROT_ERR; - break; - case TPT_ERR_QPID: - case TPT_ERR_ACCESS: - wc->status = IB_WC_LOC_ACCESS_ERR; - break; - case TPT_ERR_WRAP: - wc->status = IB_WC_GENERAL_ERR; - break; - case TPT_ERR_BOUND: - wc->status = IB_WC_LOC_LEN_ERR; - break; - case TPT_ERR_INVALIDATE_SHARED_MR: - case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND: - wc->status = IB_WC_MW_BIND_ERR; - break; - case TPT_ERR_CRC: - case TPT_ERR_MARKER: - case TPT_ERR_PDU_LEN_ERR: - case TPT_ERR_OUT_OF_RQE: - case TPT_ERR_DDP_VERSION: - case TPT_ERR_RDMA_VERSION: - case TPT_ERR_DDP_QUEUE_NUM: - case TPT_ERR_MSN: - case TPT_ERR_TBIT: - case TPT_ERR_MO: - case TPT_ERR_MSN_RANGE: - case TPT_ERR_IRD_OVERFLOW: - case TPT_ERR_OPCODE: - wc->status = IB_WC_FATAL_ERR; - break; - case TPT_ERR_SWFLUSH: - wc->status = IB_WC_WR_FLUSH_ERR; - break; - default: - pr_err("Unexpected cqe_status 0x%x for QPID=0x%0x\n", - CQE_STATUS(cqe), CQE_QPID(cqe)); - ret = -EINVAL; - } - } -out: - return ret; -} - -/* - * Get one cq entry from cxio and map it to openib. - * - * Returns: - * 0 EMPTY; - * 1 cqe returned - * -EAGAIN caller must try again - * any other -errno fatal error - */ -static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp, - struct ib_wc *wc) -{ - struct iwch_qp *qhp; - struct t3_cqe *rd_cqe; - int ret; - - rd_cqe = cxio_next_cqe(&chp->cq); - - if (!rd_cqe) - return 0; - - qhp = get_qhp(rhp, CQE_QPID(*rd_cqe)); - if (qhp) { - spin_lock(&qhp->lock); - ret = __iwch_poll_cq_one(rhp, chp, qhp, wc); - spin_unlock(&qhp->lock); - } else { - ret = __iwch_poll_cq_one(rhp, chp, NULL, wc); - } - return ret; -} - -int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) -{ - struct iwch_dev *rhp; - struct iwch_cq *chp; - unsigned long flags; - int npolled; - int err = 0; - - chp = to_iwch_cq(ibcq); - rhp = chp->rhp; - - spin_lock_irqsave(&chp->lock, flags); - for (npolled = 0; npolled < num_entries; ++npolled) { - - /* - * Because T3 can post CQEs that are _not_ associated - * with a WR, we might have to poll again after removing - * one of these. - */ - do { - err = iwch_poll_cq_one(rhp, chp, wc + npolled); - } while (err == -EAGAIN); - if (err <= 0) - break; - } - spin_unlock_irqrestore(&chp->lock, flags); - - if (err < 0) - return err; - else { - return npolled; - } -} diff --git a/drivers/infiniband/hw/cxgb3/iwch_ev.c b/drivers/infiniband/hw/cxgb3/iwch_ev.c deleted file mode 100644 index 9d356c1301c7..000000000000 --- a/drivers/infiniband/hw/cxgb3/iwch_ev.c +++ /dev/null @@ -1,232 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include <linux/gfp.h> -#include <linux/mman.h> -#include <net/sock.h> -#include "iwch_provider.h" -#include "iwch.h" -#include "iwch_cm.h" -#include "cxio_hal.h" -#include "cxio_wr.h" - -static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp, - struct respQ_msg_t *rsp_msg, - enum ib_event_type ib_event, - int send_term) -{ - struct ib_event event; - struct iwch_qp_attributes attrs; - struct iwch_qp *qhp; - unsigned long flag; - - xa_lock(&rnicp->qps); - qhp = xa_load(&rnicp->qps, CQE_QPID(rsp_msg->cqe)); - - if (!qhp) { - pr_err("%s unaffiliated error 0x%x qpid 0x%x\n", - __func__, CQE_STATUS(rsp_msg->cqe), - CQE_QPID(rsp_msg->cqe)); - xa_unlock(&rnicp->qps); - return; - } - - if ((qhp->attr.state == IWCH_QP_STATE_ERROR) || - (qhp->attr.state == IWCH_QP_STATE_TERMINATE)) { - pr_debug("%s AE received after RTS - qp state %d qpid 0x%x status 0x%x\n", - __func__, - qhp->attr.state, qhp->wq.qpid, - CQE_STATUS(rsp_msg->cqe)); - xa_unlock(&rnicp->qps); - return; - } - - pr_err("%s - AE qpid 0x%x opcode %d status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n", - __func__, - CQE_QPID(rsp_msg->cqe), CQE_OPCODE(rsp_msg->cqe), - CQE_STATUS(rsp_msg->cqe), CQE_TYPE(rsp_msg->cqe), - CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe)); - - atomic_inc(&qhp->refcnt); - xa_unlock(&rnicp->qps); - - if (qhp->attr.state == IWCH_QP_STATE_RTS) { - attrs.next_state = IWCH_QP_STATE_TERMINATE; - iwch_modify_qp(qhp->rhp, qhp, IWCH_QP_ATTR_NEXT_STATE, - &attrs, 1); - if (send_term) - iwch_post_terminate(qhp, rsp_msg); - } - - event.event = ib_event; - event.device = chp->ibcq.device; - if (ib_event == IB_EVENT_CQ_ERR) - event.element.cq = &chp->ibcq; - else - event.element.qp = &qhp->ibqp; - - if (qhp->ibqp.event_handler) - (*qhp->ibqp.event_handler)(&event, qhp->ibqp.qp_context); - - spin_lock_irqsave(&chp->comp_handler_lock, flag); - (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); - spin_unlock_irqrestore(&chp->comp_handler_lock, flag); - - if (atomic_dec_and_test(&qhp->refcnt)) - wake_up(&qhp->wait); -} - -void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb) -{ - struct iwch_dev *rnicp; - struct respQ_msg_t *rsp_msg = (struct respQ_msg_t *) skb->data; - struct iwch_cq *chp; - struct iwch_qp *qhp; - u32 cqid = RSPQ_CQID(rsp_msg); - unsigned long flag; - - rnicp = (struct iwch_dev *) rdev_p->ulp; - xa_lock(&rnicp->qps); - chp = get_chp(rnicp, cqid); - qhp = xa_load(&rnicp->qps, CQE_QPID(rsp_msg->cqe)); - if (!chp || !qhp) { - pr_err("BAD AE cqid 0x%x qpid 0x%x opcode %d status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n", - cqid, CQE_QPID(rsp_msg->cqe), - CQE_OPCODE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe), - CQE_TYPE(rsp_msg->cqe), CQE_WRID_HI(rsp_msg->cqe), - CQE_WRID_LOW(rsp_msg->cqe)); - xa_unlock(&rnicp->qps); - goto out; - } - iwch_qp_add_ref(&qhp->ibqp); - atomic_inc(&chp->refcnt); - xa_unlock(&rnicp->qps); - - /* - * 1) completion of our sending a TERMINATE. - * 2) incoming TERMINATE message. - */ - if ((CQE_OPCODE(rsp_msg->cqe) == T3_TERMINATE) && - (CQE_STATUS(rsp_msg->cqe) == 0)) { - if (SQ_TYPE(rsp_msg->cqe)) { - pr_debug("%s QPID 0x%x ep %p disconnecting\n", - __func__, qhp->wq.qpid, qhp->ep); - iwch_ep_disconnect(qhp->ep, 0, GFP_ATOMIC); - } else { - pr_debug("%s post REQ_ERR AE QPID 0x%x\n", __func__, - qhp->wq.qpid); - post_qp_event(rnicp, chp, rsp_msg, - IB_EVENT_QP_REQ_ERR, 0); - iwch_ep_disconnect(qhp->ep, 0, GFP_ATOMIC); - } - goto done; - } - - /* Bad incoming Read request */ - if (SQ_TYPE(rsp_msg->cqe) && - (CQE_OPCODE(rsp_msg->cqe) == T3_READ_RESP)) { - post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_REQ_ERR, 1); - goto done; - } - - /* Bad incoming write */ - if (RQ_TYPE(rsp_msg->cqe) && - (CQE_OPCODE(rsp_msg->cqe) == T3_RDMA_WRITE)) { - post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_REQ_ERR, 1); - goto done; - } - - switch (CQE_STATUS(rsp_msg->cqe)) { - - /* Completion Events */ - case TPT_ERR_SUCCESS: - - /* - * Confirm the destination entry if this is a RECV completion. - */ - if (qhp->ep && SQ_TYPE(rsp_msg->cqe)) - dst_confirm(qhp->ep->dst); - spin_lock_irqsave(&chp->comp_handler_lock, flag); - (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); - spin_unlock_irqrestore(&chp->comp_handler_lock, flag); - break; - - case TPT_ERR_STAG: - case TPT_ERR_PDID: - case TPT_ERR_QPID: - case TPT_ERR_ACCESS: - case TPT_ERR_WRAP: - case TPT_ERR_BOUND: - case TPT_ERR_INVALIDATE_SHARED_MR: - case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND: - post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_ACCESS_ERR, 1); - break; - - /* Device Fatal Errors */ - case TPT_ERR_ECC: - case TPT_ERR_ECC_PSTAG: - case TPT_ERR_INTERNAL_ERR: - post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_DEVICE_FATAL, 1); - break; - - /* QP Fatal Errors */ - case TPT_ERR_OUT_OF_RQE: - case TPT_ERR_PBL_ADDR_BOUND: - case TPT_ERR_CRC: - case TPT_ERR_MARKER: - case TPT_ERR_PDU_LEN_ERR: - case TPT_ERR_DDP_VERSION: - case TPT_ERR_RDMA_VERSION: - case TPT_ERR_OPCODE: - case TPT_ERR_DDP_QUEUE_NUM: - case TPT_ERR_MSN: - case TPT_ERR_TBIT: - case TPT_ERR_MO: - case TPT_ERR_MSN_GAP: - case TPT_ERR_MSN_RANGE: - case TPT_ERR_RQE_ADDR_BOUND: - case TPT_ERR_IRD_OVERFLOW: - post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_FATAL, 1); - break; - - default: - pr_err("Unknown T3 status 0x%x QPID 0x%x\n", - CQE_STATUS(rsp_msg->cqe), qhp->wq.qpid); - post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_FATAL, 1); - break; - } -done: - if (atomic_dec_and_test(&chp->refcnt)) - wake_up(&chp->wait); - iwch_qp_rem_ref(&qhp->ibqp); -out: - dev_kfree_skb_irq(skb); -} diff --git a/drivers/infiniband/hw/cxgb3/iwch_mem.c b/drivers/infiniband/hw/cxgb3/iwch_mem.c deleted file mode 100644 index ce0f2741821d..000000000000 --- a/drivers/infiniband/hw/cxgb3/iwch_mem.c +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include <linux/slab.h> -#include <asm/byteorder.h> - -#include <rdma/iw_cm.h> -#include <rdma/ib_verbs.h> - -#include "cxio_hal.h" -#include "cxio_resource.h" -#include "iwch.h" -#include "iwch_provider.h" - -static int iwch_finish_mem_reg(struct iwch_mr *mhp, u32 stag) -{ - u32 mmid; - - mhp->attr.state = 1; - mhp->attr.stag = stag; - mmid = stag >> 8; - mhp->ibmr.rkey = mhp->ibmr.lkey = stag; - pr_debug("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp); - return xa_insert_irq(&mhp->rhp->mrs, mmid, mhp, GFP_KERNEL); -} - -int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php, - struct iwch_mr *mhp, int shift) -{ - u32 stag; - int ret; - - if (cxio_register_phys_mem(&rhp->rdev, - &stag, mhp->attr.pdid, - mhp->attr.perms, - mhp->attr.zbva, - mhp->attr.va_fbo, - mhp->attr.len, - shift - 12, - mhp->attr.pbl_size, mhp->attr.pbl_addr)) - return -ENOMEM; - - ret = iwch_finish_mem_reg(mhp, stag); - if (ret) - cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, - mhp->attr.pbl_addr); - return ret; -} - -int iwch_alloc_pbl(struct iwch_mr *mhp, int npages) -{ - mhp->attr.pbl_addr = cxio_hal_pblpool_alloc(&mhp->rhp->rdev, - npages << 3); - - if (!mhp->attr.pbl_addr) - return -ENOMEM; - - mhp->attr.pbl_size = npages; - - return 0; -} - -void iwch_free_pbl(struct iwch_mr *mhp) -{ - cxio_hal_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr, - mhp->attr.pbl_size << 3); -} - -int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset) -{ - return cxio_write_pbl(&mhp->rhp->rdev, pages, - mhp->attr.pbl_addr + (offset << 3), npages); -} diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c deleted file mode 100644 index dcf02ec02810..000000000000 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ /dev/null @@ -1,1321 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include <linux/module.h> -#include <linux/moduleparam.h> -#include <linux/device.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/delay.h> -#include <linux/errno.h> -#include <linux/list.h> -#include <linux/sched/mm.h> -#include <linux/spinlock.h> -#include <linux/ethtool.h> -#include <linux/rtnetlink.h> -#include <linux/inetdevice.h> -#include <linux/slab.h> - -#include <asm/io.h> -#include <asm/irq.h> -#include <asm/byteorder.h> - -#include <rdma/iw_cm.h> -#include <rdma/ib_verbs.h> -#include <rdma/ib_smi.h> -#include <rdma/ib_umem.h> -#include <rdma/ib_user_verbs.h> -#include <rdma/uverbs_ioctl.h> - -#include "cxio_hal.h" -#include "iwch.h" -#include "iwch_provider.h" -#include "iwch_cm.h" -#include <rdma/cxgb3-abi.h> -#include "common.h" - -static void iwch_dealloc_ucontext(struct ib_ucontext *context) -{ - struct iwch_dev *rhp = to_iwch_dev(context->device); - struct iwch_ucontext *ucontext = to_iwch_ucontext(context); - struct iwch_mm_entry *mm, *tmp; - - pr_debug("%s context %p\n", __func__, context); - list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry) - kfree(mm); - cxio_release_ucontext(&rhp->rdev, &ucontext->uctx); -} - -static int iwch_alloc_ucontext(struct ib_ucontext *ucontext, - struct ib_udata *udata) -{ - struct ib_device *ibdev = ucontext->device; - struct iwch_ucontext *context = to_iwch_ucontext(ucontext); - struct iwch_dev *rhp = to_iwch_dev(ibdev); - - pr_debug("%s ibdev %p\n", __func__, ibdev); - cxio_init_ucontext(&rhp->rdev, &context->uctx); - INIT_LIST_HEAD(&context->mmaps); - spin_lock_init(&context->mmap_lock); - return 0; -} - -static void iwch_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) -{ - struct iwch_cq *chp; - - pr_debug("%s ib_cq %p\n", __func__, ib_cq); - chp = to_iwch_cq(ib_cq); - - xa_erase_irq(&chp->rhp->cqs, chp->cq.cqid); - atomic_dec(&chp->refcnt); - wait_event(chp->wait, !atomic_read(&chp->refcnt)); - - cxio_destroy_cq(&chp->rhp->rdev, &chp->cq); -} - -static int iwch_create_cq(struct ib_cq *ibcq, - const struct ib_cq_init_attr *attr, - struct ib_udata *udata) -{ - struct ib_device *ibdev = ibcq->device; - int entries = attr->cqe; - struct iwch_dev *rhp = to_iwch_dev(ibcq->device); - struct iwch_cq *chp = to_iwch_cq(ibcq); - struct iwch_create_cq_resp uresp; - struct iwch_create_cq_req ureq; - static int warned; - size_t resplen; - - pr_debug("%s ib_dev %p entries %d\n", __func__, ibdev, entries); - if (attr->flags) - return -EINVAL; - - if (udata) { - if (!t3a_device(rhp)) { - if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) - return -EFAULT; - - chp->user_rptr_addr = (u32 __user *)(unsigned long)ureq.user_rptr_addr; - } - } - - if (t3a_device(rhp)) { - - /* - * T3A: Add some fluff to handle extra CQEs inserted - * for various errors. - * Additional CQE possibilities: - * TERMINATE, - * incoming RDMA WRITE Failures - * incoming RDMA READ REQUEST FAILUREs - * NOTE: We cannot ensure the CQ won't overflow. - */ - entries += 16; - } - entries = roundup_pow_of_two(entries); - chp->cq.size_log2 = ilog2(entries); - - if (cxio_create_cq(&rhp->rdev, &chp->cq, !udata)) - return -ENOMEM; - - chp->rhp = rhp; - chp->ibcq.cqe = 1 << chp->cq.size_log2; - spin_lock_init(&chp->lock); - spin_lock_init(&chp->comp_handler_lock); - atomic_set(&chp->refcnt, 1); - init_waitqueue_head(&chp->wait); - if (xa_store_irq(&rhp->cqs, chp->cq.cqid, chp, GFP_KERNEL)) { - cxio_destroy_cq(&chp->rhp->rdev, &chp->cq); - return -ENOMEM; - } - - if (udata) { - struct iwch_mm_entry *mm; - struct iwch_ucontext *ucontext = rdma_udata_to_drv_context( - udata, struct iwch_ucontext, ibucontext); - - mm = kmalloc(sizeof(*mm), GFP_KERNEL); - if (!mm) { - iwch_destroy_cq(&chp->ibcq, udata); - return -ENOMEM; - } - uresp.cqid = chp->cq.cqid; - uresp.size_log2 = chp->cq.size_log2; - spin_lock(&ucontext->mmap_lock); - uresp.key = ucontext->key; - ucontext->key += PAGE_SIZE; - spin_unlock(&ucontext->mmap_lock); - mm->key = uresp.key; - mm->addr = virt_to_phys(chp->cq.queue); - if (udata->outlen < sizeof(uresp)) { - if (!warned++) - pr_warn("Warning - downlevel libcxgb3 (non-fatal)\n"); - mm->len = PAGE_ALIGN((1UL << uresp.size_log2) * - sizeof(struct t3_cqe)); - resplen = sizeof(struct iwch_create_cq_resp_v0); - } else { - mm->len = PAGE_ALIGN(((1UL << uresp.size_log2) + 1) * - sizeof(struct t3_cqe)); - uresp.memsize = mm->len; - uresp.reserved = 0; - resplen = sizeof(uresp); - } - if (ib_copy_to_udata(udata, &uresp, resplen)) { - kfree(mm); - iwch_destroy_cq(&chp->ibcq, udata); - return -EFAULT; - } - insert_mmap(ucontext, mm); - } - pr_debug("created cqid 0x%0x chp %p size 0x%0x, dma_addr %pad\n", - chp->cq.cqid, chp, (1 << chp->cq.size_log2), - &chp->cq.dma_addr); - return 0; -} - -static int iwch_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) -{ - struct iwch_dev *rhp; - struct iwch_cq *chp; - enum t3_cq_opcode cq_op; - int err; - unsigned long flag; - u32 rptr; - - chp = to_iwch_cq(ibcq); - rhp = chp->rhp; - if ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED) - cq_op = CQ_ARM_SE; - else - cq_op = CQ_ARM_AN; - if (chp->user_rptr_addr) { - if (get_user(rptr, chp->user_rptr_addr)) - return -EFAULT; - spin_lock_irqsave(&chp->lock, flag); - chp->cq.rptr = rptr; - } else - spin_lock_irqsave(&chp->lock, flag); - pr_debug("%s rptr 0x%x\n", __func__, chp->cq.rptr); - err = cxio_hal_cq_op(&rhp->rdev, &chp->cq, cq_op, 0); - spin_unlock_irqrestore(&chp->lock, flag); - if (err < 0) - pr_err("Error %d rearming CQID 0x%x\n", err, chp->cq.cqid); - if (err > 0 && !(flags & IB_CQ_REPORT_MISSED_EVENTS)) - err = 0; - return err; -} - -static int iwch_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) -{ - int len = vma->vm_end - vma->vm_start; - u32 key = vma->vm_pgoff << PAGE_SHIFT; - struct cxio_rdev *rdev_p; - int ret = 0; - struct iwch_mm_entry *mm; - struct iwch_ucontext *ucontext; - u64 addr; - - pr_debug("%s pgoff 0x%lx key 0x%x len %d\n", __func__, vma->vm_pgoff, - key, len); - - if (vma->vm_start & (PAGE_SIZE-1)) { - return -EINVAL; - } - - rdev_p = &(to_iwch_dev(context->device)->rdev); - ucontext = to_iwch_ucontext(context); - - mm = remove_mmap(ucontext, key, len); - if (!mm) - return -EINVAL; - addr = mm->addr; - kfree(mm); - - if ((addr >= rdev_p->rnic_info.udbell_physbase) && - (addr < (rdev_p->rnic_info.udbell_physbase + - rdev_p->rnic_info.udbell_len))) { - - /* - * Map T3 DB register. - */ - if (vma->vm_flags & VM_READ) { - return -EPERM; - } - - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; - vma->vm_flags &= ~VM_MAYREAD; - ret = io_remap_pfn_range(vma, vma->vm_start, - addr >> PAGE_SHIFT, - len, vma->vm_page_prot); - } else { - - /* - * Map WQ or CQ contig dma memory... - */ - ret = remap_pfn_range(vma, vma->vm_start, - addr >> PAGE_SHIFT, - len, vma->vm_page_prot); - } - - return ret; -} - -static void iwch_deallocate_pd(struct ib_pd *pd, struct ib_udata *udata) -{ - struct iwch_dev *rhp; - struct iwch_pd *php; - - php = to_iwch_pd(pd); - rhp = php->rhp; - pr_debug("%s ibpd %p pdid 0x%x\n", __func__, pd, php->pdid); - cxio_hal_put_pdid(rhp->rdev.rscp, php->pdid); -} - -static int iwch_allocate_pd(struct ib_pd *pd, struct ib_udata *udata) -{ - struct iwch_pd *php = to_iwch_pd(pd); - struct ib_device *ibdev = pd->device; - u32 pdid; - struct iwch_dev *rhp; - - pr_debug("%s ibdev %p\n", __func__, ibdev); - rhp = (struct iwch_dev *) ibdev; - pdid = cxio_hal_get_pdid(rhp->rdev.rscp); - if (!pdid) - return -EINVAL; - - php->pdid = pdid; - php->rhp = rhp; - if (udata) { - struct iwch_alloc_pd_resp resp = {.pdid = php->pdid}; - - if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { - iwch_deallocate_pd(&php->ibpd, udata); - return -EFAULT; - } - } - pr_debug("%s pdid 0x%0x ptr 0x%p\n", __func__, pdid, php); - return 0; -} - -static int iwch_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) -{ - struct iwch_dev *rhp; - struct iwch_mr *mhp; - u32 mmid; - - pr_debug("%s ib_mr %p\n", __func__, ib_mr); - - mhp = to_iwch_mr(ib_mr); - kfree(mhp->pages); - rhp = mhp->rhp; - mmid = mhp->attr.stag >> 8; - cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, - mhp->attr.pbl_addr); - iwch_free_pbl(mhp); - xa_erase_irq(&rhp->mrs, mmid); - if (mhp->kva) - kfree((void *) (unsigned long) mhp->kva); - ib_umem_release(mhp->umem); - pr_debug("%s mmid 0x%x ptr %p\n", __func__, mmid, mhp); - kfree(mhp); - return 0; -} - -static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc) -{ - const u64 total_size = 0xffffffff; - const u64 mask = (total_size + PAGE_SIZE - 1) & PAGE_MASK; - struct iwch_pd *php = to_iwch_pd(pd); - struct iwch_dev *rhp = php->rhp; - struct iwch_mr *mhp; - __be64 *page_list; - int shift = 26, npages, ret, i; - - pr_debug("%s ib_pd %p\n", __func__, pd); - - /* - * T3 only supports 32 bits of size. - */ - if (sizeof(phys_addr_t) > 4) { - pr_warn_once("Cannot support dma_mrs on this platform\n"); - return ERR_PTR(-ENOTSUPP); - } - - mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); - if (!mhp) - return ERR_PTR(-ENOMEM); - - mhp->rhp = rhp; - - npages = (total_size + (1ULL << shift) - 1) >> shift; - if (!npages) { - ret = -EINVAL; - goto err; - } - - page_list = kmalloc_array(npages, sizeof(u64), GFP_KERNEL); - if (!page_list) { - ret = -ENOMEM; - goto err; - } - - for (i = 0; i < npages; i++) - page_list[i] = cpu_to_be64((u64)i << shift); - - pr_debug("%s mask 0x%llx shift %d len %lld pbl_size %d\n", - __func__, mask, shift, total_size, npages); - - ret = iwch_alloc_pbl(mhp, npages); - if (ret) { - kfree(page_list); - goto err_pbl; - } - - ret = iwch_write_pbl(mhp, page_list, npages, 0); - kfree(page_list); - if (ret) - goto err_pbl; - - mhp->attr.pdid = php->pdid; - mhp->attr.zbva = 0; - - mhp->attr.perms = iwch_ib_to_tpt_access(acc); - mhp->attr.va_fbo = 0; - mhp->attr.page_size = shift - 12; - - mhp->attr.len = (u32) total_size; - mhp->attr.pbl_size = npages; - ret = iwch_register_mem(rhp, php, mhp, shift); - if (ret) - goto err_pbl; - - return &mhp->ibmr; - -err_pbl: - iwch_free_pbl(mhp); - -err: - kfree(mhp); - return ERR_PTR(ret); -} - -static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, - u64 virt, int acc, struct ib_udata *udata) -{ - __be64 *pages; - int shift, n, i; - int err = 0; - struct iwch_dev *rhp; - struct iwch_pd *php; - struct iwch_mr *mhp; - struct iwch_reg_user_mr_resp uresp; - struct sg_dma_page_iter sg_iter; - pr_debug("%s ib_pd %p\n", __func__, pd); - - php = to_iwch_pd(pd); - rhp = php->rhp; - mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); - if (!mhp) - return ERR_PTR(-ENOMEM); - - mhp->rhp = rhp; - - mhp->umem = ib_umem_get(udata, start, length, acc, 0); - if (IS_ERR(mhp->umem)) { - err = PTR_ERR(mhp->umem); - kfree(mhp); - return ERR_PTR(err); - } - - shift = PAGE_SHIFT; - - n = ib_umem_num_pages(mhp->umem); - - err = iwch_alloc_pbl(mhp, n); - if (err) - goto err; - - pages = (__be64 *) __get_free_page(GFP_KERNEL); - if (!pages) { - err = -ENOMEM; - goto err_pbl; - } - - i = n = 0; - - for_each_sg_dma_page(mhp->umem->sg_head.sgl, &sg_iter, mhp->umem->nmap, 0) { - pages[i++] = cpu_to_be64(sg_page_iter_dma_address(&sg_iter)); - if (i == PAGE_SIZE / sizeof(*pages)) { - err = iwch_write_pbl(mhp, pages, i, n); - if (err) - goto pbl_done; - n += i; - i = 0; - } - } - - if (i) - err = iwch_write_pbl(mhp, pages, i, n); - -pbl_done: - free_page((unsigned long) pages); - if (err) - goto err_pbl; - - mhp->attr.pdid = php->pdid; - mhp->attr.zbva = 0; - mhp->attr.perms = iwch_ib_to_tpt_access(acc); - mhp->attr.va_fbo = virt; - mhp->attr.page_size = shift - 12; - mhp->attr.len = (u32) length; - - err = iwch_register_mem(rhp, php, mhp, shift); - if (err) - goto err_pbl; - - if (udata && !t3a_device(rhp)) { - uresp.pbl_addr = (mhp->attr.pbl_addr - - rhp->rdev.rnic_info.pbl_base) >> 3; - pr_debug("%s user resp pbl_addr 0x%x\n", __func__, - uresp.pbl_addr); - - if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) { - iwch_dereg_mr(&mhp->ibmr, udata); - err = -EFAULT; - goto err; - } - } - - return &mhp->ibmr; - -err_pbl: - iwch_free_pbl(mhp); - -err: - ib_umem_release(mhp->umem); - kfree(mhp); - return ERR_PTR(err); -} - -static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, - struct ib_udata *udata) -{ - struct iwch_dev *rhp; - struct iwch_pd *php; - struct iwch_mw *mhp; - u32 mmid; - u32 stag = 0; - int ret; - - if (type != IB_MW_TYPE_1) - return ERR_PTR(-EINVAL); - - php = to_iwch_pd(pd); - rhp = php->rhp; - mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); - if (!mhp) - return ERR_PTR(-ENOMEM); - ret = cxio_allocate_window(&rhp->rdev, &stag, php->pdid); - if (ret) { - kfree(mhp); - return ERR_PTR(ret); - } - mhp->rhp = rhp; - mhp->attr.pdid = php->pdid; - mhp->attr.type = TPT_MW; - mhp->attr.stag = stag; - mmid = (stag) >> 8; - mhp->ibmw.rkey = stag; - if (xa_insert_irq(&rhp->mrs, mmid, mhp, GFP_KERNEL)) { - cxio_deallocate_window(&rhp->rdev, mhp->attr.stag); - kfree(mhp); - return ERR_PTR(-ENOMEM); - } - pr_debug("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag); - return &(mhp->ibmw); -} - -static int iwch_dealloc_mw(struct ib_mw *mw) -{ - struct iwch_dev *rhp; - struct iwch_mw *mhp; - u32 mmid; - - mhp = to_iwch_mw(mw); - rhp = mhp->rhp; - mmid = (mw->rkey) >> 8; - cxio_deallocate_window(&rhp->rdev, mhp->attr.stag); - xa_erase_irq(&rhp->mrs, mmid); - pr_debug("%s ib_mw %p mmid 0x%x ptr %p\n", __func__, mw, mmid, mhp); - kfree(mhp); - return 0; -} - -static struct ib_mr *iwch_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata) -{ - struct iwch_dev *rhp; - struct iwch_pd *php; - struct iwch_mr *mhp; - u32 mmid; - u32 stag = 0; - int ret = -ENOMEM; - - if (mr_type != IB_MR_TYPE_MEM_REG || - max_num_sg > T3_MAX_FASTREG_DEPTH) - return ERR_PTR(-EINVAL); - - php = to_iwch_pd(pd); - rhp = php->rhp; - mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); - if (!mhp) - goto err; - - mhp->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL); - if (!mhp->pages) - goto pl_err; - - mhp->rhp = rhp; - ret = iwch_alloc_pbl(mhp, max_num_sg); - if (ret) - goto err1; - mhp->attr.pbl_size = max_num_sg; - ret = cxio_allocate_stag(&rhp->rdev, &stag, php->pdid, - mhp->attr.pbl_size, mhp->attr.pbl_addr); - if (ret) - goto err2; - mhp->attr.pdid = php->pdid; - mhp->attr.type = TPT_NON_SHARED_MR; - mhp->attr.stag = stag; - mhp->attr.state = 1; - mmid = (stag) >> 8; - mhp->ibmr.rkey = mhp->ibmr.lkey = stag; - ret = xa_insert_irq(&rhp->mrs, mmid, mhp, GFP_KERNEL); - if (ret) - goto err3; - - pr_debug("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag); - return &(mhp->ibmr); -err3: - cxio_dereg_mem(&rhp->rdev, stag, mhp->attr.pbl_size, - mhp->attr.pbl_addr); -err2: - iwch_free_pbl(mhp); -err1: - kfree(mhp->pages); -pl_err: - kfree(mhp); -err: - return ERR_PTR(ret); -} - -static int iwch_set_page(struct ib_mr *ibmr, u64 addr) -{ - struct iwch_mr *mhp = to_iwch_mr(ibmr); - - if (unlikely(mhp->npages == mhp->attr.pbl_size)) - return -ENOMEM; - - mhp->pages[mhp->npages++] = addr; - - return 0; -} - -static int iwch_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, - int sg_nents, unsigned int *sg_offset) -{ - struct iwch_mr *mhp = to_iwch_mr(ibmr); - - mhp->npages = 0; - - return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, iwch_set_page); -} - -static int iwch_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata) -{ - struct iwch_dev *rhp; - struct iwch_qp *qhp; - struct iwch_qp_attributes attrs; - struct iwch_ucontext *ucontext; - - qhp = to_iwch_qp(ib_qp); - rhp = qhp->rhp; - - attrs.next_state = IWCH_QP_STATE_ERROR; - iwch_modify_qp(rhp, qhp, IWCH_QP_ATTR_NEXT_STATE, &attrs, 0); - wait_event(qhp->wait, !qhp->ep); - - xa_erase_irq(&rhp->qps, qhp->wq.qpid); - - atomic_dec(&qhp->refcnt); - wait_event(qhp->wait, !atomic_read(&qhp->refcnt)); - - ucontext = rdma_udata_to_drv_context(udata, struct iwch_ucontext, - ibucontext); - cxio_destroy_qp(&rhp->rdev, &qhp->wq, - ucontext ? &ucontext->uctx : &rhp->rdev.uctx); - - pr_debug("%s ib_qp %p qpid 0x%0x qhp %p\n", __func__, - ib_qp, qhp->wq.qpid, qhp); - kfree(qhp); - return 0; -} - -static struct ib_qp *iwch_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *attrs, - struct ib_udata *udata) -{ - struct iwch_dev *rhp; - struct iwch_qp *qhp; - struct iwch_pd *php; - struct iwch_cq *schp; - struct iwch_cq *rchp; - struct iwch_create_qp_resp uresp; - int wqsize, sqsize, rqsize; - struct iwch_ucontext *ucontext; - - pr_debug("%s ib_pd %p\n", __func__, pd); - if (attrs->qp_type != IB_QPT_RC) - return ERR_PTR(-EINVAL); - php = to_iwch_pd(pd); - rhp = php->rhp; - schp = get_chp(rhp, ((struct iwch_cq *) attrs->send_cq)->cq.cqid); - rchp = get_chp(rhp, ((struct iwch_cq *) attrs->recv_cq)->cq.cqid); - if (!schp || !rchp) - return ERR_PTR(-EINVAL); - - /* The RQT size must be # of entries + 1 rounded up to a power of two */ - rqsize = roundup_pow_of_two(attrs->cap.max_recv_wr); - if (rqsize == attrs->cap.max_recv_wr) - rqsize = roundup_pow_of_two(attrs->cap.max_recv_wr+1); - - /* T3 doesn't support RQT depth < 16 */ - if (rqsize < 16) - rqsize = 16; - - if (rqsize > T3_MAX_RQ_SIZE) - return ERR_PTR(-EINVAL); - - if (attrs->cap.max_inline_data > T3_MAX_INLINE) - return ERR_PTR(-EINVAL); - - /* - * NOTE: The SQ and total WQ sizes don't need to be - * a power of two. However, all the code assumes - * they are. EG: Q_FREECNT() and friends. - */ - sqsize = roundup_pow_of_two(attrs->cap.max_send_wr); - wqsize = roundup_pow_of_two(rqsize + sqsize); - - /* - * Kernel users need more wq space for fastreg WRs which can take - * 2 WR fragments. - */ - ucontext = rdma_udata_to_drv_context(udata, struct iwch_ucontext, - ibucontext); - if (!ucontext && wqsize < (rqsize + (2 * sqsize))) - wqsize = roundup_pow_of_two(rqsize + - roundup_pow_of_two(attrs->cap.max_send_wr * 2)); - pr_debug("%s wqsize %d sqsize %d rqsize %d\n", __func__, - wqsize, sqsize, rqsize); - qhp = kzalloc(sizeof(*qhp), GFP_KERNEL); - if (!qhp) - return ERR_PTR(-ENOMEM); - qhp->wq.size_log2 = ilog2(wqsize); - qhp->wq.rq_size_log2 = ilog2(rqsize); - qhp->wq.sq_size_log2 = ilog2(sqsize); - if (cxio_create_qp(&rhp->rdev, !udata, &qhp->wq, - ucontext ? &ucontext->uctx : &rhp->rdev.uctx)) { - kfree(qhp); - return ERR_PTR(-ENOMEM); - } - - attrs->cap.max_recv_wr = rqsize - 1; - attrs->cap.max_send_wr = sqsize; - attrs->cap.max_inline_data = T3_MAX_INLINE; - - qhp->rhp = rhp; - qhp->attr.pd = php->pdid; - qhp->attr.scq = ((struct iwch_cq *) attrs->send_cq)->cq.cqid; - qhp->attr.rcq = ((struct iwch_cq *) attrs->recv_cq)->cq.cqid; - qhp->attr.sq_num_entries = attrs->cap.max_send_wr; - qhp->attr.rq_num_entries = attrs->cap.max_recv_wr; - qhp->attr.sq_max_sges = attrs->cap.max_send_sge; - qhp->attr.sq_max_sges_rdma_write = attrs->cap.max_send_sge; - qhp->attr.rq_max_sges = attrs->cap.max_recv_sge; - qhp->attr.state = IWCH_QP_STATE_IDLE; - qhp->attr.next_state = IWCH_QP_STATE_IDLE; - - /* - * XXX - These don't get passed in from the openib user - * at create time. The CM sets them via a QP modify. - * Need to fix... I think the CM should - */ - qhp->attr.enable_rdma_read = 1; - qhp->attr.enable_rdma_write = 1; - qhp->attr.enable_bind = 1; - qhp->attr.max_ord = 1; - qhp->attr.max_ird = 1; - - spin_lock_init(&qhp->lock); - init_waitqueue_head(&qhp->wait); - atomic_set(&qhp->refcnt, 1); - - if (xa_store_irq(&rhp->qps, qhp->wq.qpid, qhp, GFP_KERNEL)) { - cxio_destroy_qp(&rhp->rdev, &qhp->wq, - ucontext ? &ucontext->uctx : &rhp->rdev.uctx); - kfree(qhp); - return ERR_PTR(-ENOMEM); - } - - if (udata) { - - struct iwch_mm_entry *mm1, *mm2; - - mm1 = kmalloc(sizeof(*mm1), GFP_KERNEL); - if (!mm1) { - iwch_destroy_qp(&qhp->ibqp, udata); - return ERR_PTR(-ENOMEM); - } - - mm2 = kmalloc(sizeof(*mm2), GFP_KERNEL); - if (!mm2) { - kfree(mm1); - iwch_destroy_qp(&qhp->ibqp, udata); - return ERR_PTR(-ENOMEM); - } - - uresp.qpid = qhp->wq.qpid; - uresp.size_log2 = qhp->wq.size_log2; - uresp.sq_size_log2 = qhp->wq.sq_size_log2; - uresp.rq_size_log2 = qhp->wq.rq_size_log2; - spin_lock(&ucontext->mmap_lock); - uresp.key = ucontext->key; - ucontext->key += PAGE_SIZE; - uresp.db_key = ucontext->key; - ucontext->key += PAGE_SIZE; - spin_unlock(&ucontext->mmap_lock); - if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) { - kfree(mm1); - kfree(mm2); - iwch_destroy_qp(&qhp->ibqp, udata); - return ERR_PTR(-EFAULT); - } - mm1->key = uresp.key; - mm1->addr = virt_to_phys(qhp->wq.queue); - mm1->len = PAGE_ALIGN(wqsize * sizeof(union t3_wr)); - insert_mmap(ucontext, mm1); - mm2->key = uresp.db_key; - mm2->addr = qhp->wq.udb & PAGE_MASK; - mm2->len = PAGE_SIZE; - insert_mmap(ucontext, mm2); - } - qhp->ibqp.qp_num = qhp->wq.qpid; - pr_debug( - "%s sq_num_entries %d, rq_num_entries %d qpid 0x%0x qhp %p dma_addr %pad size %d rq_addr 0x%x\n", - __func__, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries, - qhp->wq.qpid, qhp, &qhp->wq.dma_addr, 1 << qhp->wq.size_log2, - qhp->wq.rq_addr); - return &qhp->ibqp; -} - -static int iwch_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, - int attr_mask, struct ib_udata *udata) -{ - struct iwch_dev *rhp; - struct iwch_qp *qhp; - enum iwch_qp_attr_mask mask = 0; - struct iwch_qp_attributes attrs = {}; - - pr_debug("%s ib_qp %p\n", __func__, ibqp); - - /* iwarp does not support the RTR state */ - if ((attr_mask & IB_QP_STATE) && (attr->qp_state == IB_QPS_RTR)) - attr_mask &= ~IB_QP_STATE; - - /* Make sure we still have something left to do */ - if (!attr_mask) - return 0; - - qhp = to_iwch_qp(ibqp); - rhp = qhp->rhp; - - attrs.next_state = iwch_convert_state(attr->qp_state); - attrs.enable_rdma_read = (attr->qp_access_flags & - IB_ACCESS_REMOTE_READ) ? 1 : 0; - attrs.enable_rdma_write = (attr->qp_access_flags & - IB_ACCESS_REMOTE_WRITE) ? 1 : 0; - attrs.enable_bind = (attr->qp_access_flags & IB_ACCESS_MW_BIND) ? 1 : 0; - - - mask |= (attr_mask & IB_QP_STATE) ? IWCH_QP_ATTR_NEXT_STATE : 0; - mask |= (attr_mask & IB_QP_ACCESS_FLAGS) ? - (IWCH_QP_ATTR_ENABLE_RDMA_READ | - IWCH_QP_ATTR_ENABLE_RDMA_WRITE | - IWCH_QP_ATTR_ENABLE_RDMA_BIND) : 0; - - return iwch_modify_qp(rhp, qhp, mask, &attrs, 0); -} - -void iwch_qp_add_ref(struct ib_qp *qp) -{ - pr_debug("%s ib_qp %p\n", __func__, qp); - atomic_inc(&(to_iwch_qp(qp)->refcnt)); -} - -void iwch_qp_rem_ref(struct ib_qp *qp) -{ - pr_debug("%s ib_qp %p\n", __func__, qp); - if (atomic_dec_and_test(&(to_iwch_qp(qp)->refcnt))) - wake_up(&(to_iwch_qp(qp)->wait)); -} - -static struct ib_qp *iwch_get_qp(struct ib_device *dev, int qpn) -{ - pr_debug("%s ib_dev %p qpn 0x%x\n", __func__, dev, qpn); - return (struct ib_qp *)get_qhp(to_iwch_dev(dev), qpn); -} - - -static int iwch_query_pkey(struct ib_device *ibdev, - u8 port, u16 index, u16 * pkey) -{ - pr_debug("%s ibdev %p\n", __func__, ibdev); - *pkey = 0; - return 0; -} - -static int iwch_query_gid(struct ib_device *ibdev, u8 port, - int index, union ib_gid *gid) -{ - struct iwch_dev *dev; - - pr_debug("%s ibdev %p, port %d, index %d, gid %p\n", - __func__, ibdev, port, index, gid); - dev = to_iwch_dev(ibdev); - BUG_ON(port == 0 || port > 2); - memset(&(gid->raw[0]), 0, sizeof(gid->raw)); - memcpy(&(gid->raw[0]), dev->rdev.port_info.lldevs[port-1]->dev_addr, 6); - return 0; -} - -static u64 fw_vers_string_to_u64(struct iwch_dev *iwch_dev) -{ - struct ethtool_drvinfo info; - struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev; - char *cp, *next; - unsigned fw_maj, fw_min, fw_mic; - - lldev->ethtool_ops->get_drvinfo(lldev, &info); - - next = info.fw_version + 1; - cp = strsep(&next, "."); - sscanf(cp, "%i", &fw_maj); - cp = strsep(&next, "."); - sscanf(cp, "%i", &fw_min); - cp = strsep(&next, "."); - sscanf(cp, "%i", &fw_mic); - - return (((u64)fw_maj & 0xffff) << 32) | ((fw_min & 0xffff) << 16) | - (fw_mic & 0xffff); -} - -static int iwch_query_device(struct ib_device *ibdev, struct ib_device_attr *props, - struct ib_udata *uhw) -{ - - struct iwch_dev *dev; - - pr_debug("%s ibdev %p\n", __func__, ibdev); - - if (uhw->inlen || uhw->outlen) - return -EINVAL; - - dev = to_iwch_dev(ibdev); - memcpy(&props->sys_image_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6); - props->hw_ver = dev->rdev.t3cdev_p->type; - props->fw_ver = fw_vers_string_to_u64(dev); - props->device_cap_flags = dev->device_cap_flags; - props->page_size_cap = dev->attr.mem_pgsizes_bitmask; - props->vendor_id = (u32)dev->rdev.rnic_info.pdev->vendor; - props->vendor_part_id = (u32)dev->rdev.rnic_info.pdev->device; - props->max_mr_size = dev->attr.max_mr_size; - props->max_qp = dev->attr.max_qps; - props->max_qp_wr = dev->attr.max_wrs; - props->max_send_sge = dev->attr.max_sge_per_wr; - props->max_recv_sge = dev->attr.max_sge_per_wr; - props->max_sge_rd = 1; - props->max_qp_rd_atom = dev->attr.max_rdma_reads_per_qp; - props->max_qp_init_rd_atom = dev->attr.max_rdma_reads_per_qp; - props->max_cq = dev->attr.max_cqs; - props->max_cqe = dev->attr.max_cqes_per_cq; - props->max_mr = dev->attr.max_mem_regs; - props->max_pd = dev->attr.max_pds; - props->local_ca_ack_delay = 0; - props->max_fast_reg_page_list_len = T3_MAX_FASTREG_DEPTH; - - return 0; -} - -static int iwch_query_port(struct ib_device *ibdev, - u8 port, struct ib_port_attr *props) -{ - pr_debug("%s ibdev %p\n", __func__, ibdev); - - props->port_cap_flags = - IB_PORT_CM_SUP | - IB_PORT_SNMP_TUNNEL_SUP | - IB_PORT_REINIT_SUP | - IB_PORT_DEVICE_MGMT_SUP | - IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP; - props->gid_tbl_len = 1; - props->pkey_tbl_len = 1; - props->active_width = 2; - props->active_speed = IB_SPEED_DDR; - props->max_msg_sz = -1; - - return 0; -} - -static ssize_t hw_rev_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct iwch_dev *iwch_dev = - rdma_device_to_drv_device(dev, struct iwch_dev, ibdev); - - pr_debug("%s dev 0x%p\n", __func__, dev); - return sprintf(buf, "%d\n", iwch_dev->rdev.t3cdev_p->type); -} -static DEVICE_ATTR_RO(hw_rev); - -static ssize_t hca_type_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct iwch_dev *iwch_dev = - rdma_device_to_drv_device(dev, struct iwch_dev, ibdev); - struct ethtool_drvinfo info; - struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev; - - pr_debug("%s dev 0x%p\n", __func__, dev); - lldev->ethtool_ops->get_drvinfo(lldev, &info); - return sprintf(buf, "%s\n", info.driver); -} -static DEVICE_ATTR_RO(hca_type); - -static ssize_t board_id_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct iwch_dev *iwch_dev = - rdma_device_to_drv_device(dev, struct iwch_dev, ibdev); - - pr_debug("%s dev 0x%p\n", __func__, dev); - return sprintf(buf, "%x.%x\n", iwch_dev->rdev.rnic_info.pdev->vendor, - iwch_dev->rdev.rnic_info.pdev->device); -} -static DEVICE_ATTR_RO(board_id); - -enum counters { - IPINRECEIVES, - IPINHDRERRORS, - IPINADDRERRORS, - IPINUNKNOWNPROTOS, - IPINDISCARDS, - IPINDELIVERS, - IPOUTREQUESTS, - IPOUTDISCARDS, - IPOUTNOROUTES, - IPREASMTIMEOUT, - IPREASMREQDS, - IPREASMOKS, - IPREASMFAILS, - TCPACTIVEOPENS, - TCPPASSIVEOPENS, - TCPATTEMPTFAILS, - TCPESTABRESETS, - TCPCURRESTAB, - TCPINSEGS, - TCPOUTSEGS, - TCPRETRANSSEGS, - TCPINERRS, - TCPOUTRSTS, - TCPRTOMIN, - TCPRTOMAX, - NR_COUNTERS -}; - -static const char * const names[] = { - [IPINRECEIVES] = "ipInReceives", - [IPINHDRERRORS] = "ipInHdrErrors", - [IPINADDRERRORS] = "ipInAddrErrors", - [IPINUNKNOWNPROTOS] = "ipInUnknownProtos", - [IPINDISCARDS] = "ipInDiscards", - [IPINDELIVERS] = "ipInDelivers", - [IPOUTREQUESTS] = "ipOutRequests", - [IPOUTDISCARDS] = "ipOutDiscards", - [IPOUTNOROUTES] = "ipOutNoRoutes", - [IPREASMTIMEOUT] = "ipReasmTimeout", - [IPREASMREQDS] = "ipReasmReqds", - [IPREASMOKS] = "ipReasmOKs", - [IPREASMFAILS] = "ipReasmFails", - [TCPACTIVEOPENS] = "tcpActiveOpens", - [TCPPASSIVEOPENS] = "tcpPassiveOpens", - [TCPATTEMPTFAILS] = "tcpAttemptFails", - [TCPESTABRESETS] = "tcpEstabResets", - [TCPCURRESTAB] = "tcpCurrEstab", - [TCPINSEGS] = "tcpInSegs", - [TCPOUTSEGS] = "tcpOutSegs", - [TCPRETRANSSEGS] = "tcpRetransSegs", - [TCPINERRS] = "tcpInErrs", - [TCPOUTRSTS] = "tcpOutRsts", - [TCPRTOMIN] = "tcpRtoMin", - [TCPRTOMAX] = "tcpRtoMax", -}; - -static struct rdma_hw_stats *iwch_alloc_stats(struct ib_device *ibdev, - u8 port_num) -{ - BUILD_BUG_ON(ARRAY_SIZE(names) != NR_COUNTERS); - - /* Our driver only supports device level stats */ - if (port_num != 0) - return NULL; - - return rdma_alloc_hw_stats_struct(names, NR_COUNTERS, - RDMA_HW_STATS_DEFAULT_LIFESPAN); -} - -static int iwch_get_mib(struct ib_device *ibdev, struct rdma_hw_stats *stats, - u8 port, int index) -{ - struct iwch_dev *dev; - struct tp_mib_stats m; - int ret; - - if (port != 0 || !stats) - return -ENOSYS; - - pr_debug("%s ibdev %p\n", __func__, ibdev); - dev = to_iwch_dev(ibdev); - ret = dev->rdev.t3cdev_p->ctl(dev->rdev.t3cdev_p, RDMA_GET_MIB, &m); - if (ret) - return -ENOSYS; - - stats->value[IPINRECEIVES] = ((u64)m.ipInReceive_hi << 32) + m.ipInReceive_lo; - stats->value[IPINHDRERRORS] = ((u64)m.ipInHdrErrors_hi << 32) + m.ipInHdrErrors_lo; - stats->value[IPINADDRERRORS] = ((u64)m.ipInAddrErrors_hi << 32) + m.ipInAddrErrors_lo; - stats->value[IPINUNKNOWNPROTOS] = ((u64)m.ipInUnknownProtos_hi << 32) + m.ipInUnknownProtos_lo; - stats->value[IPINDISCARDS] = ((u64)m.ipInDiscards_hi << 32) + m.ipInDiscards_lo; - stats->value[IPINDELIVERS] = ((u64)m.ipInDelivers_hi << 32) + m.ipInDelivers_lo; - stats->value[IPOUTREQUESTS] = ((u64)m.ipOutRequests_hi << 32) + m.ipOutRequests_lo; - stats->value[IPOUTDISCARDS] = ((u64)m.ipOutDiscards_hi << 32) + m.ipOutDiscards_lo; - stats->value[IPOUTNOROUTES] = ((u64)m.ipOutNoRoutes_hi << 32) + m.ipOutNoRoutes_lo; - stats->value[IPREASMTIMEOUT] = m.ipReasmTimeout; - stats->value[IPREASMREQDS] = m.ipReasmReqds; - stats->value[IPREASMOKS] = m.ipReasmOKs; - stats->value[IPREASMFAILS] = m.ipReasmFails; - stats->value[TCPACTIVEOPENS] = m.tcpActiveOpens; - stats->value[TCPPASSIVEOPENS] = m.tcpPassiveOpens; - stats->value[TCPATTEMPTFAILS] = m.tcpAttemptFails; - stats->value[TCPESTABRESETS] = m.tcpEstabResets; - stats->value[TCPCURRESTAB] = m.tcpOutRsts; - stats->value[TCPINSEGS] = m.tcpCurrEstab; - stats->value[TCPOUTSEGS] = ((u64)m.tcpInSegs_hi << 32) + m.tcpInSegs_lo; - stats->value[TCPRETRANSSEGS] = ((u64)m.tcpOutSegs_hi << 32) + m.tcpOutSegs_lo; - stats->value[TCPINERRS] = ((u64)m.tcpRetransSeg_hi << 32) + m.tcpRetransSeg_lo, - stats->value[TCPOUTRSTS] = ((u64)m.tcpInErrs_hi << 32) + m.tcpInErrs_lo; - stats->value[TCPRTOMIN] = m.tcpRtoMin; - stats->value[TCPRTOMAX] = m.tcpRtoMax; - - return stats->num_counters; -} - -static struct attribute *iwch_class_attributes[] = { - &dev_attr_hw_rev.attr, - &dev_attr_hca_type.attr, - &dev_attr_board_id.attr, - NULL -}; - -static const struct attribute_group iwch_attr_group = { - .attrs = iwch_class_attributes, -}; - -static int iwch_port_immutable(struct ib_device *ibdev, u8 port_num, - struct ib_port_immutable *immutable) -{ - struct ib_port_attr attr; - int err; - - immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; - - err = ib_query_port(ibdev, port_num, &attr); - if (err) - return err; - - immutable->pkey_tbl_len = attr.pkey_tbl_len; - immutable->gid_tbl_len = attr.gid_tbl_len; - - return 0; -} - -static void get_dev_fw_ver_str(struct ib_device *ibdev, char *str) -{ - struct iwch_dev *iwch_dev = to_iwch_dev(ibdev); - struct ethtool_drvinfo info; - struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev; - - pr_debug("%s dev 0x%p\n", __func__, iwch_dev); - lldev->ethtool_ops->get_drvinfo(lldev, &info); - snprintf(str, IB_FW_VERSION_NAME_MAX, "%s", info.fw_version); -} - -static const struct ib_device_ops iwch_dev_ops = { - .owner = THIS_MODULE, - .driver_id = RDMA_DRIVER_CXGB3, - .uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION, - .uverbs_no_driver_id_binding = 1, - - .alloc_hw_stats = iwch_alloc_stats, - .alloc_mr = iwch_alloc_mr, - .alloc_mw = iwch_alloc_mw, - .alloc_pd = iwch_allocate_pd, - .alloc_ucontext = iwch_alloc_ucontext, - .create_cq = iwch_create_cq, - .create_qp = iwch_create_qp, - .dealloc_mw = iwch_dealloc_mw, - .dealloc_pd = iwch_deallocate_pd, - .dealloc_ucontext = iwch_dealloc_ucontext, - .dereg_mr = iwch_dereg_mr, - .destroy_cq = iwch_destroy_cq, - .destroy_qp = iwch_destroy_qp, - .get_dev_fw_str = get_dev_fw_ver_str, - .get_dma_mr = iwch_get_dma_mr, - .get_hw_stats = iwch_get_mib, - .get_port_immutable = iwch_port_immutable, - .iw_accept = iwch_accept_cr, - .iw_add_ref = iwch_qp_add_ref, - .iw_connect = iwch_connect, - .iw_create_listen = iwch_create_listen, - .iw_destroy_listen = iwch_destroy_listen, - .iw_get_qp = iwch_get_qp, - .iw_reject = iwch_reject_cr, - .iw_rem_ref = iwch_qp_rem_ref, - .map_mr_sg = iwch_map_mr_sg, - .mmap = iwch_mmap, - .modify_qp = iwch_ib_modify_qp, - .poll_cq = iwch_poll_cq, - .post_recv = iwch_post_receive, - .post_send = iwch_post_send, - .query_device = iwch_query_device, - .query_gid = iwch_query_gid, - .query_pkey = iwch_query_pkey, - .query_port = iwch_query_port, - .reg_user_mr = iwch_reg_user_mr, - .req_notify_cq = iwch_arm_cq, - INIT_RDMA_OBJ_SIZE(ib_pd, iwch_pd, ibpd), - INIT_RDMA_OBJ_SIZE(ib_cq, iwch_cq, ibcq), - INIT_RDMA_OBJ_SIZE(ib_ucontext, iwch_ucontext, ibucontext), -}; - -static int set_netdevs(struct ib_device *ib_dev, struct cxio_rdev *rdev) -{ - int ret; - int i; - - for (i = 0; i < rdev->port_info.nports; i++) { - ret = ib_device_set_netdev(ib_dev, rdev->port_info.lldevs[i], - i + 1); - if (ret) - return ret; - } - return 0; -} - -int iwch_register_device(struct iwch_dev *dev) -{ - int err; - - pr_debug("%s iwch_dev %p\n", __func__, dev); - memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid)); - memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6); - dev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY | - IB_DEVICE_MEM_WINDOW | - IB_DEVICE_MEM_MGT_EXTENSIONS; - - /* cxgb3 supports STag 0. */ - dev->ibdev.local_dma_lkey = 0; - - dev->ibdev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_POLL_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_POST_SEND) | - (1ull << IB_USER_VERBS_CMD_POST_RECV); - dev->ibdev.node_type = RDMA_NODE_RNIC; - BUILD_BUG_ON(sizeof(IWCH_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX); - memcpy(dev->ibdev.node_desc, IWCH_NODE_DESC, sizeof(IWCH_NODE_DESC)); - dev->ibdev.phys_port_cnt = dev->rdev.port_info.nports; - dev->ibdev.num_comp_vectors = 1; - dev->ibdev.dev.parent = &dev->rdev.rnic_info.pdev->dev; - - memcpy(dev->ibdev.iw_ifname, dev->rdev.t3cdev_p->lldev->name, - sizeof(dev->ibdev.iw_ifname)); - - rdma_set_device_sysfs_group(&dev->ibdev, &iwch_attr_group); - ib_set_device_ops(&dev->ibdev, &iwch_dev_ops); - err = set_netdevs(&dev->ibdev, &dev->rdev); - if (err) - return err; - - return ib_register_device(&dev->ibdev, "cxgb3_%d"); -} - -void iwch_unregister_device(struct iwch_dev *dev) -{ - pr_debug("%s iwch_dev %p\n", __func__, dev); - ib_unregister_device(&dev->ibdev); - return; -} diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h deleted file mode 100644 index 8adbe9658935..000000000000 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.h +++ /dev/null @@ -1,347 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __IWCH_PROVIDER_H__ -#define __IWCH_PROVIDER_H__ - -#include <linux/list.h> -#include <linux/spinlock.h> -#include <rdma/ib_verbs.h> -#include <asm/types.h> -#include "t3cdev.h" -#include "iwch.h" -#include "cxio_wr.h" -#include "cxio_hal.h" - -struct iwch_pd { - struct ib_pd ibpd; - u32 pdid; - struct iwch_dev *rhp; -}; - -static inline struct iwch_pd *to_iwch_pd(struct ib_pd *ibpd) -{ - return container_of(ibpd, struct iwch_pd, ibpd); -} - -struct tpt_attributes { - u32 stag; - u32 state:1; - u32 type:2; - u32 rsvd:1; - enum tpt_mem_perm perms; - u32 remote_invaliate_disable:1; - u32 zbva:1; - u32 mw_bind_enable:1; - u32 page_size:5; - - u32 pdid; - u32 qpid; - u32 pbl_addr; - u32 len; - u64 va_fbo; - u32 pbl_size; -}; - -struct iwch_mr { - struct ib_mr ibmr; - struct ib_umem *umem; - struct iwch_dev *rhp; - u64 kva; - struct tpt_attributes attr; - u64 *pages; - u32 npages; -}; - -typedef struct iwch_mw iwch_mw_handle; - -static inline struct iwch_mr *to_iwch_mr(struct ib_mr *ibmr) -{ - return container_of(ibmr, struct iwch_mr, ibmr); -} - -struct iwch_mw { - struct ib_mw ibmw; - struct iwch_dev *rhp; - u64 kva; - struct tpt_attributes attr; -}; - -static inline struct iwch_mw *to_iwch_mw(struct ib_mw *ibmw) -{ - return container_of(ibmw, struct iwch_mw, ibmw); -} - -struct iwch_cq { - struct ib_cq ibcq; - struct iwch_dev *rhp; - struct t3_cq cq; - spinlock_t lock; - spinlock_t comp_handler_lock; - atomic_t refcnt; - wait_queue_head_t wait; - u32 __user *user_rptr_addr; -}; - -static inline struct iwch_cq *to_iwch_cq(struct ib_cq *ibcq) -{ - return container_of(ibcq, struct iwch_cq, ibcq); -} - -enum IWCH_QP_FLAGS { - QP_QUIESCED = 0x01 -}; - -struct iwch_mpa_attributes { - u8 initiator; - u8 recv_marker_enabled; - u8 xmit_marker_enabled; /* iWARP: enable inbound Read Resp. */ - u8 crc_enabled; - u8 version; /* 0 or 1 */ -}; - -struct iwch_qp_attributes { - u32 scq; - u32 rcq; - u32 sq_num_entries; - u32 rq_num_entries; - u32 sq_max_sges; - u32 sq_max_sges_rdma_write; - u32 rq_max_sges; - u32 state; - u8 enable_rdma_read; - u8 enable_rdma_write; /* enable inbound Read Resp. */ - u8 enable_bind; - u8 enable_mmid0_fastreg; /* Enable STAG0 + Fast-register */ - /* - * Next QP state. If specify the current state, only the - * QP attributes will be modified. - */ - u32 max_ord; - u32 max_ird; - u32 pd; /* IN */ - u32 next_state; - char terminate_buffer[52]; - u32 terminate_msg_len; - u8 is_terminate_local; - struct iwch_mpa_attributes mpa_attr; /* IN-OUT */ - struct iwch_ep *llp_stream_handle; - char *stream_msg_buf; /* Last stream msg. before Idle -> RTS */ - u32 stream_msg_buf_len; /* Only on Idle -> RTS */ -}; - -struct iwch_qp { - struct ib_qp ibqp; - struct iwch_dev *rhp; - struct iwch_ep *ep; - struct iwch_qp_attributes attr; - struct t3_wq wq; - spinlock_t lock; - atomic_t refcnt; - wait_queue_head_t wait; - enum IWCH_QP_FLAGS flags; -}; - -static inline int qp_quiesced(struct iwch_qp *qhp) -{ - return qhp->flags & QP_QUIESCED; -} - -static inline struct iwch_qp *to_iwch_qp(struct ib_qp *ibqp) -{ - return container_of(ibqp, struct iwch_qp, ibqp); -} - -void iwch_qp_add_ref(struct ib_qp *qp); -void iwch_qp_rem_ref(struct ib_qp *qp); - -struct iwch_ucontext { - struct ib_ucontext ibucontext; - struct cxio_ucontext uctx; - u32 key; - spinlock_t mmap_lock; - struct list_head mmaps; -}; - -static inline struct iwch_ucontext *to_iwch_ucontext(struct ib_ucontext *c) -{ - return container_of(c, struct iwch_ucontext, ibucontext); -} - -struct iwch_mm_entry { - struct list_head entry; - u64 addr; - u32 key; - unsigned len; -}; - -static inline struct iwch_mm_entry *remove_mmap(struct iwch_ucontext *ucontext, - u32 key, unsigned len) -{ - struct list_head *pos, *nxt; - struct iwch_mm_entry *mm; - - spin_lock(&ucontext->mmap_lock); - list_for_each_safe(pos, nxt, &ucontext->mmaps) { - - mm = list_entry(pos, struct iwch_mm_entry, entry); - if (mm->key == key && mm->len == len) { - list_del_init(&mm->entry); - spin_unlock(&ucontext->mmap_lock); - pr_debug("%s key 0x%x addr 0x%llx len %d\n", - __func__, key, - (unsigned long long)mm->addr, mm->len); - return mm; - } - } - spin_unlock(&ucontext->mmap_lock); - return NULL; -} - -static inline void insert_mmap(struct iwch_ucontext *ucontext, - struct iwch_mm_entry *mm) -{ - spin_lock(&ucontext->mmap_lock); - pr_debug("%s key 0x%x addr 0x%llx len %d\n", - __func__, mm->key, (unsigned long long)mm->addr, mm->len); - list_add_tail(&mm->entry, &ucontext->mmaps); - spin_unlock(&ucontext->mmap_lock); -} - -enum iwch_qp_attr_mask { - IWCH_QP_ATTR_NEXT_STATE = 1 << 0, - IWCH_QP_ATTR_ENABLE_RDMA_READ = 1 << 7, - IWCH_QP_ATTR_ENABLE_RDMA_WRITE = 1 << 8, - IWCH_QP_ATTR_ENABLE_RDMA_BIND = 1 << 9, - IWCH_QP_ATTR_MAX_ORD = 1 << 11, - IWCH_QP_ATTR_MAX_IRD = 1 << 12, - IWCH_QP_ATTR_LLP_STREAM_HANDLE = 1 << 22, - IWCH_QP_ATTR_STREAM_MSG_BUFFER = 1 << 23, - IWCH_QP_ATTR_MPA_ATTR = 1 << 24, - IWCH_QP_ATTR_QP_CONTEXT_ACTIVATE = 1 << 25, - IWCH_QP_ATTR_VALID_MODIFY = (IWCH_QP_ATTR_ENABLE_RDMA_READ | - IWCH_QP_ATTR_ENABLE_RDMA_WRITE | - IWCH_QP_ATTR_MAX_ORD | - IWCH_QP_ATTR_MAX_IRD | - IWCH_QP_ATTR_LLP_STREAM_HANDLE | - IWCH_QP_ATTR_STREAM_MSG_BUFFER | - IWCH_QP_ATTR_MPA_ATTR | - IWCH_QP_ATTR_QP_CONTEXT_ACTIVATE) -}; - -int iwch_modify_qp(struct iwch_dev *rhp, - struct iwch_qp *qhp, - enum iwch_qp_attr_mask mask, - struct iwch_qp_attributes *attrs, - int internal); - -enum iwch_qp_state { - IWCH_QP_STATE_IDLE, - IWCH_QP_STATE_RTS, - IWCH_QP_STATE_ERROR, - IWCH_QP_STATE_TERMINATE, - IWCH_QP_STATE_CLOSING, - IWCH_QP_STATE_TOT -}; - -static inline int iwch_convert_state(enum ib_qp_state ib_state) -{ - switch (ib_state) { - case IB_QPS_RESET: - case IB_QPS_INIT: - return IWCH_QP_STATE_IDLE; - case IB_QPS_RTS: - return IWCH_QP_STATE_RTS; - case IB_QPS_SQD: - return IWCH_QP_STATE_CLOSING; - case IB_QPS_SQE: - return IWCH_QP_STATE_TERMINATE; - case IB_QPS_ERR: - return IWCH_QP_STATE_ERROR; - default: - return -1; - } -} - -static inline u32 iwch_ib_to_tpt_access(int acc) -{ - return (acc & IB_ACCESS_REMOTE_WRITE ? TPT_REMOTE_WRITE : 0) | - (acc & IB_ACCESS_REMOTE_READ ? TPT_REMOTE_READ : 0) | - (acc & IB_ACCESS_LOCAL_WRITE ? TPT_LOCAL_WRITE : 0) | - (acc & IB_ACCESS_MW_BIND ? TPT_MW_BIND : 0) | - TPT_LOCAL_READ; -} - -static inline u32 iwch_ib_to_tpt_bind_access(int acc) -{ - return (acc & IB_ACCESS_REMOTE_WRITE ? TPT_REMOTE_WRITE : 0) | - (acc & IB_ACCESS_REMOTE_READ ? TPT_REMOTE_READ : 0); -} - -enum iwch_mmid_state { - IWCH_STAG_STATE_VALID, - IWCH_STAG_STATE_INVALID -}; - -enum iwch_qp_query_flags { - IWCH_QP_QUERY_CONTEXT_NONE = 0x0, /* No ctx; Only attrs */ - IWCH_QP_QUERY_CONTEXT_GET = 0x1, /* Get ctx + attrs */ - IWCH_QP_QUERY_CONTEXT_SUSPEND = 0x2, /* Not Supported */ - - /* - * Quiesce QP context; Consumer - * will NOT replay outstanding WR - */ - IWCH_QP_QUERY_CONTEXT_QUIESCE = 0x4, - IWCH_QP_QUERY_CONTEXT_REMOVE = 0x8, - IWCH_QP_QUERY_TEST_USERWRITE = 0x32 /* Test special */ -}; - -u16 iwch_rqes_posted(struct iwch_qp *qhp); -int iwch_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, - const struct ib_send_wr **bad_wr); -int iwch_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr, - const struct ib_recv_wr **bad_wr); -int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); -int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg); -int iwch_post_zb_read(struct iwch_ep *ep); -int iwch_register_device(struct iwch_dev *dev); -void iwch_unregister_device(struct iwch_dev *dev); -void stop_read_rep_timer(struct iwch_qp *qhp); -int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php, - struct iwch_mr *mhp, int shift); -int iwch_alloc_pbl(struct iwch_mr *mhp, int npages); -void iwch_free_pbl(struct iwch_mr *mhp); -int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset); - -#define IWCH_NODE_DESC "cxgb3 Chelsio Communications" - -#endif diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c deleted file mode 100644 index c649faad63f9..000000000000 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ /dev/null @@ -1,1082 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include <linux/sched.h> -#include <linux/gfp.h> -#include "iwch_provider.h" -#include "iwch.h" -#include "iwch_cm.h" -#include "cxio_hal.h" -#include "cxio_resource.h" - -#define NO_SUPPORT -1 - -static int build_rdma_send(union t3_wr *wqe, const struct ib_send_wr *wr, - u8 *flit_cnt) -{ - int i; - u32 plen; - - switch (wr->opcode) { - case IB_WR_SEND: - if (wr->send_flags & IB_SEND_SOLICITED) - wqe->send.rdmaop = T3_SEND_WITH_SE; - else - wqe->send.rdmaop = T3_SEND; - wqe->send.rem_stag = 0; - break; - case IB_WR_SEND_WITH_INV: - if (wr->send_flags & IB_SEND_SOLICITED) - wqe->send.rdmaop = T3_SEND_WITH_SE_INV; - else - wqe->send.rdmaop = T3_SEND_WITH_INV; - wqe->send.rem_stag = cpu_to_be32(wr->ex.invalidate_rkey); - break; - default: - return -EINVAL; - } - if (wr->num_sge > T3_MAX_SGE) - return -EINVAL; - wqe->send.reserved[0] = 0; - wqe->send.reserved[1] = 0; - wqe->send.reserved[2] = 0; - plen = 0; - for (i = 0; i < wr->num_sge; i++) { - if ((plen + wr->sg_list[i].length) < plen) - return -EMSGSIZE; - - plen += wr->sg_list[i].length; - wqe->send.sgl[i].stag = cpu_to_be32(wr->sg_list[i].lkey); - wqe->send.sgl[i].len = cpu_to_be32(wr->sg_list[i].length); - wqe->send.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr); - } - wqe->send.num_sgle = cpu_to_be32(wr->num_sge); - *flit_cnt = 4 + ((wr->num_sge) << 1); - wqe->send.plen = cpu_to_be32(plen); - return 0; -} - -static int build_rdma_write(union t3_wr *wqe, const struct ib_send_wr *wr, - u8 *flit_cnt) -{ - int i; - u32 plen; - if (wr->num_sge > T3_MAX_SGE) - return -EINVAL; - wqe->write.rdmaop = T3_RDMA_WRITE; - wqe->write.reserved[0] = 0; - wqe->write.reserved[1] = 0; - wqe->write.reserved[2] = 0; - wqe->write.stag_sink = cpu_to_be32(rdma_wr(wr)->rkey); - wqe->write.to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr); - - if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) { - plen = 4; - wqe->write.sgl[0].stag = wr->ex.imm_data; - wqe->write.sgl[0].len = cpu_to_be32(0); - wqe->write.num_sgle = cpu_to_be32(0); - *flit_cnt = 6; - } else { - plen = 0; - for (i = 0; i < wr->num_sge; i++) { - if ((plen + wr->sg_list[i].length) < plen) { - return -EMSGSIZE; - } - plen += wr->sg_list[i].length; - wqe->write.sgl[i].stag = - cpu_to_be32(wr->sg_list[i].lkey); - wqe->write.sgl[i].len = - cpu_to_be32(wr->sg_list[i].length); - wqe->write.sgl[i].to = - cpu_to_be64(wr->sg_list[i].addr); - } - wqe->write.num_sgle = cpu_to_be32(wr->num_sge); - *flit_cnt = 5 + ((wr->num_sge) << 1); - } - wqe->write.plen = cpu_to_be32(plen); - return 0; -} - -static int build_rdma_read(union t3_wr *wqe, const struct ib_send_wr *wr, - u8 *flit_cnt) -{ - if (wr->num_sge > 1) - return -EINVAL; - wqe->read.rdmaop = T3_READ_REQ; - if (wr->opcode == IB_WR_RDMA_READ_WITH_INV) - wqe->read.local_inv = 1; - else - wqe->read.local_inv = 0; - wqe->read.reserved[0] = 0; - wqe->read.reserved[1] = 0; - wqe->read.rem_stag = cpu_to_be32(rdma_wr(wr)->rkey); - wqe->read.rem_to = cpu_to_be64(rdma_wr(wr)->remote_addr); - wqe->read.local_stag = cpu_to_be32(wr->sg_list[0].lkey); - wqe->read.local_len = cpu_to_be32(wr->sg_list[0].length); - wqe->read.local_to = cpu_to_be64(wr->sg_list[0].addr); - *flit_cnt = sizeof(struct t3_rdma_read_wr) >> 3; - return 0; -} - -static int build_memreg(union t3_wr *wqe, const struct ib_reg_wr *wr, - u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq) -{ - struct iwch_mr *mhp = to_iwch_mr(wr->mr); - int i; - __be64 *p; - - if (mhp->npages > T3_MAX_FASTREG_DEPTH) - return -EINVAL; - *wr_cnt = 1; - wqe->fastreg.stag = cpu_to_be32(wr->key); - wqe->fastreg.len = cpu_to_be32(mhp->ibmr.length); - wqe->fastreg.va_base_hi = cpu_to_be32(mhp->ibmr.iova >> 32); - wqe->fastreg.va_base_lo_fbo = - cpu_to_be32(mhp->ibmr.iova & 0xffffffff); - wqe->fastreg.page_type_perms = cpu_to_be32( - V_FR_PAGE_COUNT(mhp->npages) | - V_FR_PAGE_SIZE(ilog2(wr->mr->page_size) - 12) | - V_FR_TYPE(TPT_VATO) | - V_FR_PERMS(iwch_ib_to_tpt_access(wr->access))); - p = &wqe->fastreg.pbl_addrs[0]; - for (i = 0; i < mhp->npages; i++, p++) { - - /* If we need a 2nd WR, then set it up */ - if (i == T3_MAX_FASTREG_FRAG) { - *wr_cnt = 2; - wqe = (union t3_wr *)(wq->queue + - Q_PTR2IDX((wq->wptr+1), wq->size_log2)); - build_fw_riwrh((void *)wqe, T3_WR_FASTREG, 0, - Q_GENBIT(wq->wptr + 1, wq->size_log2), - 0, 1 + mhp->npages - T3_MAX_FASTREG_FRAG, - T3_EOP); - - p = &wqe->pbl_frag.pbl_addrs[0]; - } - *p = cpu_to_be64((u64)mhp->pages[i]); - } - *flit_cnt = 5 + mhp->npages; - if (*flit_cnt > 15) - *flit_cnt = 15; - return 0; -} - -static int build_inv_stag(union t3_wr *wqe, const struct ib_send_wr *wr, - u8 *flit_cnt) -{ - wqe->local_inv.stag = cpu_to_be32(wr->ex.invalidate_rkey); - wqe->local_inv.reserved = 0; - *flit_cnt = sizeof(struct t3_local_inv_wr) >> 3; - return 0; -} - -static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list, - u32 num_sgle, u32 * pbl_addr, u8 * page_size) -{ - int i; - struct iwch_mr *mhp; - u64 offset; - for (i = 0; i < num_sgle; i++) { - - mhp = get_mhp(rhp, (sg_list[i].lkey) >> 8); - if (!mhp) { - pr_debug("%s %d\n", __func__, __LINE__); - return -EIO; - } - if (!mhp->attr.state) { - pr_debug("%s %d\n", __func__, __LINE__); - return -EIO; - } - if (mhp->attr.zbva) { - pr_debug("%s %d\n", __func__, __LINE__); - return -EIO; - } - - if (sg_list[i].addr < mhp->attr.va_fbo) { - pr_debug("%s %d\n", __func__, __LINE__); - return -EINVAL; - } - if (sg_list[i].addr + ((u64) sg_list[i].length) < - sg_list[i].addr) { - pr_debug("%s %d\n", __func__, __LINE__); - return -EINVAL; - } - if (sg_list[i].addr + ((u64) sg_list[i].length) > - mhp->attr.va_fbo + ((u64) mhp->attr.len)) { - pr_debug("%s %d\n", __func__, __LINE__); - return -EINVAL; - } - offset = sg_list[i].addr - mhp->attr.va_fbo; - offset += mhp->attr.va_fbo & - ((1UL << (12 + mhp->attr.page_size)) - 1); - pbl_addr[i] = ((mhp->attr.pbl_addr - - rhp->rdev.rnic_info.pbl_base) >> 3) + - (offset >> (12 + mhp->attr.page_size)); - page_size[i] = mhp->attr.page_size; - } - return 0; -} - -static int build_rdma_recv(struct iwch_qp *qhp, union t3_wr *wqe, - const struct ib_recv_wr *wr) -{ - int i, err = 0; - u32 pbl_addr[T3_MAX_SGE]; - u8 page_size[T3_MAX_SGE]; - - err = iwch_sgl2pbl_map(qhp->rhp, wr->sg_list, wr->num_sge, pbl_addr, - page_size); - if (err) - return err; - wqe->recv.pagesz[0] = page_size[0]; - wqe->recv.pagesz[1] = page_size[1]; - wqe->recv.pagesz[2] = page_size[2]; - wqe->recv.pagesz[3] = page_size[3]; - wqe->recv.num_sgle = cpu_to_be32(wr->num_sge); - for (i = 0; i < wr->num_sge; i++) { - wqe->recv.sgl[i].stag = cpu_to_be32(wr->sg_list[i].lkey); - wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length); - - /* to in the WQE == the offset into the page */ - wqe->recv.sgl[i].to = cpu_to_be64(((u32)wr->sg_list[i].addr) & - ((1UL << (12 + page_size[i])) - 1)); - - /* pbl_addr is the adapters address in the PBL */ - wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_addr[i]); - } - for (; i < T3_MAX_SGE; i++) { - wqe->recv.sgl[i].stag = 0; - wqe->recv.sgl[i].len = 0; - wqe->recv.sgl[i].to = 0; - wqe->recv.pbl_addr[i] = 0; - } - qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, - qhp->wq.rq_size_log2)].wr_id = wr->wr_id; - qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, - qhp->wq.rq_size_log2)].pbl_addr = 0; - return 0; -} - -static int build_zero_stag_recv(struct iwch_qp *qhp, union t3_wr *wqe, - const struct ib_recv_wr *wr) -{ - int i; - u32 pbl_addr; - u32 pbl_offset; - - - /* - * The T3 HW requires the PBL in the HW recv descriptor to reference - * a PBL entry. So we allocate the max needed PBL memory here and pass - * it to the uP in the recv WR. The uP will build the PBL and setup - * the HW recv descriptor. - */ - pbl_addr = cxio_hal_pblpool_alloc(&qhp->rhp->rdev, T3_STAG0_PBL_SIZE); - if (!pbl_addr) - return -ENOMEM; - - /* - * Compute the 8B aligned offset. - */ - pbl_offset = (pbl_addr - qhp->rhp->rdev.rnic_info.pbl_base) >> 3; - - wqe->recv.num_sgle = cpu_to_be32(wr->num_sge); - - for (i = 0; i < wr->num_sge; i++) { - - /* - * Use a 128MB page size. This and an imposed 128MB - * sge length limit allows us to require only a 2-entry HW - * PBL for each SGE. This restriction is acceptable since - * since it is not possible to allocate 128MB of contiguous - * DMA coherent memory! - */ - if (wr->sg_list[i].length > T3_STAG0_MAX_PBE_LEN) - return -EINVAL; - wqe->recv.pagesz[i] = T3_STAG0_PAGE_SHIFT; - - /* - * T3 restricts a recv to all zero-stag or all non-zero-stag. - */ - if (wr->sg_list[i].lkey != 0) - return -EINVAL; - wqe->recv.sgl[i].stag = 0; - wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length); - wqe->recv.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr); - wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_offset); - pbl_offset += 2; - } - for (; i < T3_MAX_SGE; i++) { - wqe->recv.pagesz[i] = 0; - wqe->recv.sgl[i].stag = 0; - wqe->recv.sgl[i].len = 0; - wqe->recv.sgl[i].to = 0; - wqe->recv.pbl_addr[i] = 0; - } - qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, - qhp->wq.rq_size_log2)].wr_id = wr->wr_id; - qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, - qhp->wq.rq_size_log2)].pbl_addr = pbl_addr; - return 0; -} - -int iwch_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, - const struct ib_send_wr **bad_wr) -{ - int err = 0; - u8 uninitialized_var(t3_wr_flit_cnt); - enum t3_wr_opcode t3_wr_opcode = 0; - enum t3_wr_flags t3_wr_flags; - struct iwch_qp *qhp; - u32 idx; - union t3_wr *wqe; - u32 num_wrs; - unsigned long flag; - struct t3_swsq *sqp; - int wr_cnt = 1; - - qhp = to_iwch_qp(ibqp); - spin_lock_irqsave(&qhp->lock, flag); - if (qhp->attr.state > IWCH_QP_STATE_RTS) { - spin_unlock_irqrestore(&qhp->lock, flag); - err = -EINVAL; - goto out; - } - num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr, - qhp->wq.sq_size_log2); - if (num_wrs == 0) { - spin_unlock_irqrestore(&qhp->lock, flag); - err = -ENOMEM; - goto out; - } - while (wr) { - if (num_wrs == 0) { - err = -ENOMEM; - break; - } - idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2); - wqe = (union t3_wr *) (qhp->wq.queue + idx); - t3_wr_flags = 0; - if (wr->send_flags & IB_SEND_SOLICITED) - t3_wr_flags |= T3_SOLICITED_EVENT_FLAG; - if (wr->send_flags & IB_SEND_SIGNALED) - t3_wr_flags |= T3_COMPLETION_FLAG; - sqp = qhp->wq.sq + - Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2); - switch (wr->opcode) { - case IB_WR_SEND: - case IB_WR_SEND_WITH_INV: - if (wr->send_flags & IB_SEND_FENCE) - t3_wr_flags |= T3_READ_FENCE_FLAG; - t3_wr_opcode = T3_WR_SEND; - err = build_rdma_send(wqe, wr, &t3_wr_flit_cnt); - break; - case IB_WR_RDMA_WRITE: - case IB_WR_RDMA_WRITE_WITH_IMM: - t3_wr_opcode = T3_WR_WRITE; - err = build_rdma_write(wqe, wr, &t3_wr_flit_cnt); - break; - case IB_WR_RDMA_READ: - case IB_WR_RDMA_READ_WITH_INV: - t3_wr_opcode = T3_WR_READ; - t3_wr_flags = 0; /* T3 reads are always signaled */ - err = build_rdma_read(wqe, wr, &t3_wr_flit_cnt); - if (err) - break; - sqp->read_len = wqe->read.local_len; - if (!qhp->wq.oldest_read) - qhp->wq.oldest_read = sqp; - break; - case IB_WR_REG_MR: - t3_wr_opcode = T3_WR_FASTREG; - err = build_memreg(wqe, reg_wr(wr), &t3_wr_flit_cnt, - &wr_cnt, &qhp->wq); - break; - case IB_WR_LOCAL_INV: - if (wr->send_flags & IB_SEND_FENCE) - t3_wr_flags |= T3_LOCAL_FENCE_FLAG; - t3_wr_opcode = T3_WR_INV_STAG; - err = build_inv_stag(wqe, wr, &t3_wr_flit_cnt); - break; - default: - pr_debug("%s post of type=%d TBD!\n", __func__, - wr->opcode); - err = -EINVAL; - } - if (err) - break; - wqe->send.wrid.id0.hi = qhp->wq.sq_wptr; - sqp->wr_id = wr->wr_id; - sqp->opcode = wr2opcode(t3_wr_opcode); - sqp->sq_wptr = qhp->wq.sq_wptr; - sqp->complete = 0; - sqp->signaled = (wr->send_flags & IB_SEND_SIGNALED); - - build_fw_riwrh((void *) wqe, t3_wr_opcode, t3_wr_flags, - Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), - 0, t3_wr_flit_cnt, - (wr_cnt == 1) ? T3_SOPEOP : T3_SOP); - pr_debug("%s cookie 0x%llx wq idx 0x%x swsq idx %ld opcode %d\n", - __func__, (unsigned long long)wr->wr_id, idx, - Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2), - sqp->opcode); - wr = wr->next; - num_wrs--; - qhp->wq.wptr += wr_cnt; - ++(qhp->wq.sq_wptr); - } - spin_unlock_irqrestore(&qhp->lock, flag); - if (cxio_wq_db_enabled(&qhp->wq)) - ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); - -out: - if (err) - *bad_wr = wr; - return err; -} - -int iwch_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr, - const struct ib_recv_wr **bad_wr) -{ - int err = 0; - struct iwch_qp *qhp; - u32 idx; - union t3_wr *wqe; - u32 num_wrs; - unsigned long flag; - - qhp = to_iwch_qp(ibqp); - spin_lock_irqsave(&qhp->lock, flag); - if (qhp->attr.state > IWCH_QP_STATE_RTS) { - spin_unlock_irqrestore(&qhp->lock, flag); - err = -EINVAL; - goto out; - } - num_wrs = Q_FREECNT(qhp->wq.rq_rptr, qhp->wq.rq_wptr, - qhp->wq.rq_size_log2) - 1; - if (!wr) { - spin_unlock_irqrestore(&qhp->lock, flag); - err = -ENOMEM; - goto out; - } - while (wr) { - if (wr->num_sge > T3_MAX_SGE) { - err = -EINVAL; - break; - } - idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2); - wqe = (union t3_wr *) (qhp->wq.queue + idx); - if (num_wrs) - if (wr->sg_list[0].lkey) - err = build_rdma_recv(qhp, wqe, wr); - else - err = build_zero_stag_recv(qhp, wqe, wr); - else - err = -ENOMEM; - - if (err) - break; - - build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG, - Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), - 0, sizeof(struct t3_receive_wr) >> 3, T3_SOPEOP); - pr_debug("%s cookie 0x%llx idx 0x%x rq_wptr 0x%x rw_rptr 0x%x wqe %p\n", - __func__, (unsigned long long)wr->wr_id, - idx, qhp->wq.rq_wptr, qhp->wq.rq_rptr, wqe); - ++(qhp->wq.rq_wptr); - ++(qhp->wq.wptr); - wr = wr->next; - num_wrs--; - } - spin_unlock_irqrestore(&qhp->lock, flag); - if (cxio_wq_db_enabled(&qhp->wq)) - ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); - -out: - if (err) - *bad_wr = wr; - return err; -} - -static inline void build_term_codes(struct respQ_msg_t *rsp_msg, - u8 *layer_type, u8 *ecode) -{ - int status = TPT_ERR_INTERNAL_ERR; - int tagged = 0; - int opcode = -1; - int rqtype = 0; - int send_inv = 0; - - if (rsp_msg) { - status = CQE_STATUS(rsp_msg->cqe); - opcode = CQE_OPCODE(rsp_msg->cqe); - rqtype = RQ_TYPE(rsp_msg->cqe); - send_inv = (opcode == T3_SEND_WITH_INV) || - (opcode == T3_SEND_WITH_SE_INV); - tagged = (opcode == T3_RDMA_WRITE) || - (rqtype && (opcode == T3_READ_RESP)); - } - - switch (status) { - case TPT_ERR_STAG: - if (send_inv) { - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; - *ecode = RDMAP_CANT_INV_STAG; - } else { - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; - *ecode = RDMAP_INV_STAG; - } - break; - case TPT_ERR_PDID: - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; - if ((opcode == T3_SEND_WITH_INV) || - (opcode == T3_SEND_WITH_SE_INV)) - *ecode = RDMAP_CANT_INV_STAG; - else - *ecode = RDMAP_STAG_NOT_ASSOC; - break; - case TPT_ERR_QPID: - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; - *ecode = RDMAP_STAG_NOT_ASSOC; - break; - case TPT_ERR_ACCESS: - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; - *ecode = RDMAP_ACC_VIOL; - break; - case TPT_ERR_WRAP: - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; - *ecode = RDMAP_TO_WRAP; - break; - case TPT_ERR_BOUND: - if (tagged) { - *layer_type = LAYER_DDP|DDP_TAGGED_ERR; - *ecode = DDPT_BASE_BOUNDS; - } else { - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; - *ecode = RDMAP_BASE_BOUNDS; - } - break; - case TPT_ERR_INVALIDATE_SHARED_MR: - case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND: - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; - *ecode = RDMAP_CANT_INV_STAG; - break; - case TPT_ERR_ECC: - case TPT_ERR_ECC_PSTAG: - case TPT_ERR_INTERNAL_ERR: - *layer_type = LAYER_RDMAP|RDMAP_LOCAL_CATA; - *ecode = 0; - break; - case TPT_ERR_OUT_OF_RQE: - *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; - *ecode = DDPU_INV_MSN_NOBUF; - break; - case TPT_ERR_PBL_ADDR_BOUND: - *layer_type = LAYER_DDP|DDP_TAGGED_ERR; - *ecode = DDPT_BASE_BOUNDS; - break; - case TPT_ERR_CRC: - *layer_type = LAYER_MPA|DDP_LLP; - *ecode = MPA_CRC_ERR; - break; - case TPT_ERR_MARKER: - *layer_type = LAYER_MPA|DDP_LLP; - *ecode = MPA_MARKER_ERR; - break; - case TPT_ERR_PDU_LEN_ERR: - *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; - *ecode = DDPU_MSG_TOOBIG; - break; - case TPT_ERR_DDP_VERSION: - if (tagged) { - *layer_type = LAYER_DDP|DDP_TAGGED_ERR; - *ecode = DDPT_INV_VERS; - } else { - *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; - *ecode = DDPU_INV_VERS; - } - break; - case TPT_ERR_RDMA_VERSION: - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; - *ecode = RDMAP_INV_VERS; - break; - case TPT_ERR_OPCODE: - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; - *ecode = RDMAP_INV_OPCODE; - break; - case TPT_ERR_DDP_QUEUE_NUM: - *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; - *ecode = DDPU_INV_QN; - break; - case TPT_ERR_MSN: - case TPT_ERR_MSN_GAP: - case TPT_ERR_MSN_RANGE: - case TPT_ERR_IRD_OVERFLOW: - *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; - *ecode = DDPU_INV_MSN_RANGE; - break; - case TPT_ERR_TBIT: - *layer_type = LAYER_DDP|DDP_LOCAL_CATA; - *ecode = 0; - break; - case TPT_ERR_MO: - *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; - *ecode = DDPU_INV_MO; - break; - default: - *layer_type = LAYER_RDMAP|DDP_LOCAL_CATA; - *ecode = 0; - break; - } -} - -int iwch_post_zb_read(struct iwch_ep *ep) -{ - union t3_wr *wqe; - struct sk_buff *skb; - u8 flit_cnt = sizeof(struct t3_rdma_read_wr) >> 3; - - pr_debug("%s enter\n", __func__); - skb = alloc_skb(40, GFP_KERNEL); - if (!skb) { - pr_err("%s cannot send zb_read!!\n", __func__); - return -ENOMEM; - } - wqe = skb_put_zero(skb, sizeof(struct t3_rdma_read_wr)); - wqe->read.rdmaop = T3_READ_REQ; - wqe->read.reserved[0] = 0; - wqe->read.reserved[1] = 0; - wqe->read.rem_stag = cpu_to_be32(1); - wqe->read.rem_to = cpu_to_be64(1); - wqe->read.local_stag = cpu_to_be32(1); - wqe->read.local_len = cpu_to_be32(0); - wqe->read.local_to = cpu_to_be64(1); - wqe->send.wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_READ)); - wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(ep->hwtid)| - V_FW_RIWR_LEN(flit_cnt)); - skb->priority = CPL_PRIORITY_DATA; - return iwch_cxgb3_ofld_send(ep->com.qp->rhp->rdev.t3cdev_p, skb); -} - -/* - * This posts a TERMINATE with layer=RDMA, type=catastrophic. - */ -int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg) -{ - union t3_wr *wqe; - struct terminate_message *term; - struct sk_buff *skb; - - pr_debug("%s %d\n", __func__, __LINE__); - skb = alloc_skb(40, GFP_ATOMIC); - if (!skb) { - pr_err("%s cannot send TERMINATE!\n", __func__); - return -ENOMEM; - } - wqe = skb_put_zero(skb, 40); - wqe->send.rdmaop = T3_TERMINATE; - - /* immediate data length */ - wqe->send.plen = htonl(4); - - /* immediate data starts here. */ - term = (struct terminate_message *)wqe->send.sgl; - build_term_codes(rsp_msg, &term->layer_etype, &term->ecode); - wqe->send.wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_SEND) | - V_FW_RIWR_FLAGS(T3_COMPLETION_FLAG | T3_NOTIFY_FLAG)); - wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(qhp->ep->hwtid)); - skb->priority = CPL_PRIORITY_DATA; - return iwch_cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb); -} - -/* - * Assumes qhp lock is held. - */ -static void __flush_qp(struct iwch_qp *qhp, struct iwch_cq *rchp, - struct iwch_cq *schp) - __releases(&qhp->lock) - __acquires(&qhp->lock) -{ - int count; - int flushed; - - lockdep_assert_held(&qhp->lock); - - pr_debug("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp); - /* take a ref on the qhp since we must release the lock */ - atomic_inc(&qhp->refcnt); - spin_unlock(&qhp->lock); - - /* locking hierarchy: cq lock first, then qp lock. */ - spin_lock(&rchp->lock); - spin_lock(&qhp->lock); - cxio_flush_hw_cq(&rchp->cq); - cxio_count_rcqes(&rchp->cq, &qhp->wq, &count); - flushed = cxio_flush_rq(&qhp->wq, &rchp->cq, count); - spin_unlock(&qhp->lock); - spin_unlock(&rchp->lock); - if (flushed) { - spin_lock(&rchp->comp_handler_lock); - (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); - spin_unlock(&rchp->comp_handler_lock); - } - - /* locking hierarchy: cq lock first, then qp lock. */ - spin_lock(&schp->lock); - spin_lock(&qhp->lock); - cxio_flush_hw_cq(&schp->cq); - cxio_count_scqes(&schp->cq, &qhp->wq, &count); - flushed = cxio_flush_sq(&qhp->wq, &schp->cq, count); - spin_unlock(&qhp->lock); - spin_unlock(&schp->lock); - if (flushed) { - spin_lock(&schp->comp_handler_lock); - (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context); - spin_unlock(&schp->comp_handler_lock); - } - - /* deref */ - if (atomic_dec_and_test(&qhp->refcnt)) - wake_up(&qhp->wait); - - spin_lock(&qhp->lock); -} - -static void flush_qp(struct iwch_qp *qhp) -{ - struct iwch_cq *rchp, *schp; - - rchp = get_chp(qhp->rhp, qhp->attr.rcq); - schp = get_chp(qhp->rhp, qhp->attr.scq); - - if (qhp->ibqp.uobject) { - cxio_set_wq_in_error(&qhp->wq); - cxio_set_cq_in_error(&rchp->cq); - spin_lock(&rchp->comp_handler_lock); - (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); - spin_unlock(&rchp->comp_handler_lock); - if (schp != rchp) { - cxio_set_cq_in_error(&schp->cq); - spin_lock(&schp->comp_handler_lock); - (*schp->ibcq.comp_handler)(&schp->ibcq, - schp->ibcq.cq_context); - spin_unlock(&schp->comp_handler_lock); - } - return; - } - __flush_qp(qhp, rchp, schp); -} - - -/* - * Return count of RECV WRs posted - */ -u16 iwch_rqes_posted(struct iwch_qp *qhp) -{ - union t3_wr *wqe = qhp->wq.queue; - u16 count = 0; - - while (count < USHRT_MAX && fw_riwrh_opcode((struct fw_riwrh *)wqe) == T3_WR_RCV) { - count++; - wqe++; - } - pr_debug("%s qhp %p count %u\n", __func__, qhp, count); - return count; -} - -static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp, - enum iwch_qp_attr_mask mask, - struct iwch_qp_attributes *attrs) -{ - struct t3_rdma_init_attr init_attr; - int ret; - - init_attr.tid = qhp->ep->hwtid; - init_attr.qpid = qhp->wq.qpid; - init_attr.pdid = qhp->attr.pd; - init_attr.scqid = qhp->attr.scq; - init_attr.rcqid = qhp->attr.rcq; - init_attr.rq_addr = qhp->wq.rq_addr; - init_attr.rq_size = 1 << qhp->wq.rq_size_log2; - init_attr.mpaattrs = uP_RI_MPA_IETF_ENABLE | - qhp->attr.mpa_attr.recv_marker_enabled | - (qhp->attr.mpa_attr.xmit_marker_enabled << 1) | - (qhp->attr.mpa_attr.crc_enabled << 2); - - init_attr.qpcaps = uP_RI_QP_RDMA_READ_ENABLE | - uP_RI_QP_RDMA_WRITE_ENABLE | - uP_RI_QP_BIND_ENABLE; - if (!qhp->ibqp.uobject) - init_attr.qpcaps |= uP_RI_QP_STAG0_ENABLE | - uP_RI_QP_FAST_REGISTER_ENABLE; - - init_attr.tcp_emss = qhp->ep->emss; - init_attr.ord = qhp->attr.max_ord; - init_attr.ird = qhp->attr.max_ird; - init_attr.qp_dma_addr = qhp->wq.dma_addr; - init_attr.qp_dma_size = (1UL << qhp->wq.size_log2); - init_attr.rqe_count = iwch_rqes_posted(qhp); - init_attr.flags = qhp->attr.mpa_attr.initiator ? MPA_INITIATOR : 0; - init_attr.chan = qhp->ep->l2t->smt_idx; - if (peer2peer) { - init_attr.rtr_type = RTR_READ; - if (init_attr.ord == 0 && qhp->attr.mpa_attr.initiator) - init_attr.ord = 1; - if (init_attr.ird == 0 && !qhp->attr.mpa_attr.initiator) - init_attr.ird = 1; - } else - init_attr.rtr_type = 0; - init_attr.irs = qhp->ep->rcv_seq; - pr_debug("%s init_attr.rq_addr 0x%x init_attr.rq_size = %d flags 0x%x qpcaps 0x%x\n", - __func__, - init_attr.rq_addr, init_attr.rq_size, - init_attr.flags, init_attr.qpcaps); - ret = cxio_rdma_init(&rhp->rdev, &init_attr); - pr_debug("%s ret %d\n", __func__, ret); - return ret; -} - -int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp, - enum iwch_qp_attr_mask mask, - struct iwch_qp_attributes *attrs, - int internal) -{ - int ret = 0; - struct iwch_qp_attributes newattr = qhp->attr; - unsigned long flag; - int disconnect = 0; - int terminate = 0; - int abort = 0; - int free = 0; - struct iwch_ep *ep = NULL; - - pr_debug("%s qhp %p qpid 0x%x ep %p state %d -> %d\n", __func__, - qhp, qhp->wq.qpid, qhp->ep, qhp->attr.state, - (mask & IWCH_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1); - - spin_lock_irqsave(&qhp->lock, flag); - - /* Process attr changes if in IDLE */ - if (mask & IWCH_QP_ATTR_VALID_MODIFY) { - if (qhp->attr.state != IWCH_QP_STATE_IDLE) { - ret = -EIO; - goto out; - } - if (mask & IWCH_QP_ATTR_ENABLE_RDMA_READ) - newattr.enable_rdma_read = attrs->enable_rdma_read; - if (mask & IWCH_QP_ATTR_ENABLE_RDMA_WRITE) - newattr.enable_rdma_write = attrs->enable_rdma_write; - if (mask & IWCH_QP_ATTR_ENABLE_RDMA_BIND) - newattr.enable_bind = attrs->enable_bind; - if (mask & IWCH_QP_ATTR_MAX_ORD) { - if (attrs->max_ord > - rhp->attr.max_rdma_read_qp_depth) { - ret = -EINVAL; - goto out; - } - newattr.max_ord = attrs->max_ord; - } - if (mask & IWCH_QP_ATTR_MAX_IRD) { - if (attrs->max_ird > - rhp->attr.max_rdma_reads_per_qp) { - ret = -EINVAL; - goto out; - } - newattr.max_ird = attrs->max_ird; - } - qhp->attr = newattr; - } - - if (!(mask & IWCH_QP_ATTR_NEXT_STATE)) - goto out; - if (qhp->attr.state == attrs->next_state) - goto out; - - switch (qhp->attr.state) { - case IWCH_QP_STATE_IDLE: - switch (attrs->next_state) { - case IWCH_QP_STATE_RTS: - if (!(mask & IWCH_QP_ATTR_LLP_STREAM_HANDLE)) { - ret = -EINVAL; - goto out; - } - if (!(mask & IWCH_QP_ATTR_MPA_ATTR)) { - ret = -EINVAL; - goto out; - } - qhp->attr.mpa_attr = attrs->mpa_attr; - qhp->attr.llp_stream_handle = attrs->llp_stream_handle; - qhp->ep = qhp->attr.llp_stream_handle; - qhp->attr.state = IWCH_QP_STATE_RTS; - - /* - * Ref the endpoint here and deref when we - * disassociate the endpoint from the QP. This - * happens in CLOSING->IDLE transition or *->ERROR - * transition. - */ - get_ep(&qhp->ep->com); - spin_unlock_irqrestore(&qhp->lock, flag); - ret = rdma_init(rhp, qhp, mask, attrs); - spin_lock_irqsave(&qhp->lock, flag); - if (ret) - goto err; - break; - case IWCH_QP_STATE_ERROR: - qhp->attr.state = IWCH_QP_STATE_ERROR; - flush_qp(qhp); - break; - default: - ret = -EINVAL; - goto out; - } - break; - case IWCH_QP_STATE_RTS: - switch (attrs->next_state) { - case IWCH_QP_STATE_CLOSING: - BUG_ON(kref_read(&qhp->ep->com.kref) < 2); - qhp->attr.state = IWCH_QP_STATE_CLOSING; - if (!internal) { - abort=0; - disconnect = 1; - ep = qhp->ep; - get_ep(&ep->com); - } - break; - case IWCH_QP_STATE_TERMINATE: - qhp->attr.state = IWCH_QP_STATE_TERMINATE; - if (qhp->ibqp.uobject) - cxio_set_wq_in_error(&qhp->wq); - if (!internal) - terminate = 1; - break; - case IWCH_QP_STATE_ERROR: - qhp->attr.state = IWCH_QP_STATE_ERROR; - if (!internal) { - abort=1; - disconnect = 1; - ep = qhp->ep; - get_ep(&ep->com); - } - goto err; - break; - default: - ret = -EINVAL; - goto out; - } - break; - case IWCH_QP_STATE_CLOSING: - if (!internal) { - ret = -EINVAL; - goto out; - } - switch (attrs->next_state) { - case IWCH_QP_STATE_IDLE: - flush_qp(qhp); - qhp->attr.state = IWCH_QP_STATE_IDLE; - qhp->attr.llp_stream_handle = NULL; - put_ep(&qhp->ep->com); - qhp->ep = NULL; - wake_up(&qhp->wait); - break; - case IWCH_QP_STATE_ERROR: - goto err; - default: - ret = -EINVAL; - goto err; - } - break; - case IWCH_QP_STATE_ERROR: - if (attrs->next_state != IWCH_QP_STATE_IDLE) { - ret = -EINVAL; - goto out; - } - - if (!Q_EMPTY(qhp->wq.sq_rptr, qhp->wq.sq_wptr) || - !Q_EMPTY(qhp->wq.rq_rptr, qhp->wq.rq_wptr)) { - ret = -EINVAL; - goto out; - } - qhp->attr.state = IWCH_QP_STATE_IDLE; - break; - case IWCH_QP_STATE_TERMINATE: - if (!internal) { - ret = -EINVAL; - goto out; - } - goto err; - break; - default: - pr_err("%s in a bad state %d\n", __func__, qhp->attr.state); - ret = -EINVAL; - goto err; - break; - } - goto out; -err: - pr_debug("%s disassociating ep %p qpid 0x%x\n", __func__, qhp->ep, - qhp->wq.qpid); - - /* disassociate the LLP connection */ - qhp->attr.llp_stream_handle = NULL; - ep = qhp->ep; - qhp->ep = NULL; - qhp->attr.state = IWCH_QP_STATE_ERROR; - free=1; - wake_up(&qhp->wait); - BUG_ON(!ep); - flush_qp(qhp); -out: - spin_unlock_irqrestore(&qhp->lock, flag); - - if (terminate) - iwch_post_terminate(qhp, NULL); - - /* - * If disconnect is 1, then we need to initiate a disconnect - * on the EP. This can be a normal close (RTS->CLOSING) or - * an abnormal close (RTS/CLOSING->ERROR). - */ - if (disconnect) { - iwch_ep_disconnect(ep, abort, GFP_KERNEL); - put_ep(&ep->com); - } - - /* - * If free is 1, then we've disassociated the EP from the QP - * and we need to dereference the EP. - */ - if (free) - put_ep(&ep->com); - - pr_debug("%s exit state %d\n", __func__, qhp->attr.state); - return ret; -} diff --git a/drivers/infiniband/hw/cxgb3/tcb.h b/drivers/infiniband/hw/cxgb3/tcb.h deleted file mode 100644 index c702dc199e18..000000000000 --- a/drivers/infiniband/hw/cxgb3/tcb.h +++ /dev/null @@ -1,632 +0,0 @@ -/* - * Copyright (c) 2007 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef _TCB_DEFS_H -#define _TCB_DEFS_H - -#define W_TCB_T_STATE 0 -#define S_TCB_T_STATE 0 -#define M_TCB_T_STATE 0xfULL -#define V_TCB_T_STATE(x) ((x) << S_TCB_T_STATE) - -#define W_TCB_TIMER 0 -#define S_TCB_TIMER 4 -#define M_TCB_TIMER 0x1ULL -#define V_TCB_TIMER(x) ((x) << S_TCB_TIMER) - -#define W_TCB_DACK_TIMER 0 -#define S_TCB_DACK_TIMER 5 -#define M_TCB_DACK_TIMER 0x1ULL -#define V_TCB_DACK_TIMER(x) ((x) << S_TCB_DACK_TIMER) - -#define W_TCB_DEL_FLAG 0 -#define S_TCB_DEL_FLAG 6 -#define M_TCB_DEL_FLAG 0x1ULL -#define V_TCB_DEL_FLAG(x) ((x) << S_TCB_DEL_FLAG) - -#define W_TCB_L2T_IX 0 -#define S_TCB_L2T_IX 7 -#define M_TCB_L2T_IX 0x7ffULL -#define V_TCB_L2T_IX(x) ((x) << S_TCB_L2T_IX) - -#define W_TCB_SMAC_SEL 0 -#define S_TCB_SMAC_SEL 18 -#define M_TCB_SMAC_SEL 0x3ULL -#define V_TCB_SMAC_SEL(x) ((x) << S_TCB_SMAC_SEL) - -#define W_TCB_TOS 0 -#define S_TCB_TOS 20 -#define M_TCB_TOS 0x3fULL -#define V_TCB_TOS(x) ((x) << S_TCB_TOS) - -#define W_TCB_MAX_RT 0 -#define S_TCB_MAX_RT 26 -#define M_TCB_MAX_RT 0xfULL -#define V_TCB_MAX_RT(x) ((x) << S_TCB_MAX_RT) - -#define W_TCB_T_RXTSHIFT 0 -#define S_TCB_T_RXTSHIFT 30 -#define M_TCB_T_RXTSHIFT 0xfULL -#define V_TCB_T_RXTSHIFT(x) ((x) << S_TCB_T_RXTSHIFT) - -#define W_TCB_T_DUPACKS 1 -#define S_TCB_T_DUPACKS 2 -#define M_TCB_T_DUPACKS 0xfULL -#define V_TCB_T_DUPACKS(x) ((x) << S_TCB_T_DUPACKS) - -#define W_TCB_T_MAXSEG 1 -#define S_TCB_T_MAXSEG 6 -#define M_TCB_T_MAXSEG 0xfULL -#define V_TCB_T_MAXSEG(x) ((x) << S_TCB_T_MAXSEG) - -#define W_TCB_T_FLAGS1 1 -#define S_TCB_T_FLAGS1 10 -#define M_TCB_T_FLAGS1 0xffffffffULL -#define V_TCB_T_FLAGS1(x) ((x) << S_TCB_T_FLAGS1) - -#define W_TCB_T_MIGRATION 1 -#define S_TCB_T_MIGRATION 20 -#define M_TCB_T_MIGRATION 0x1ULL -#define V_TCB_T_MIGRATION(x) ((x) << S_TCB_T_MIGRATION) - -#define W_TCB_T_FLAGS2 2 -#define S_TCB_T_FLAGS2 10 -#define M_TCB_T_FLAGS2 0x7fULL -#define V_TCB_T_FLAGS2(x) ((x) << S_TCB_T_FLAGS2) - -#define W_TCB_SND_SCALE 2 -#define S_TCB_SND_SCALE 17 -#define M_TCB_SND_SCALE 0xfULL -#define V_TCB_SND_SCALE(x) ((x) << S_TCB_SND_SCALE) - -#define W_TCB_RCV_SCALE 2 -#define S_TCB_RCV_SCALE 21 -#define M_TCB_RCV_SCALE 0xfULL -#define V_TCB_RCV_SCALE(x) ((x) << S_TCB_RCV_SCALE) - -#define W_TCB_SND_UNA_RAW 2 -#define S_TCB_SND_UNA_RAW 25 -#define M_TCB_SND_UNA_RAW 0x7ffffffULL -#define V_TCB_SND_UNA_RAW(x) ((x) << S_TCB_SND_UNA_RAW) - -#define W_TCB_SND_NXT_RAW 3 -#define S_TCB_SND_NXT_RAW 20 -#define M_TCB_SND_NXT_RAW 0x7ffffffULL -#define V_TCB_SND_NXT_RAW(x) ((x) << S_TCB_SND_NXT_RAW) - -#define W_TCB_RCV_NXT 4 -#define S_TCB_RCV_NXT 15 -#define M_TCB_RCV_NXT 0xffffffffULL -#define V_TCB_RCV_NXT(x) ((x) << S_TCB_RCV_NXT) - -#define W_TCB_RCV_ADV 5 -#define S_TCB_RCV_ADV 15 -#define M_TCB_RCV_ADV 0xffffULL -#define V_TCB_RCV_ADV(x) ((x) << S_TCB_RCV_ADV) - -#define W_TCB_SND_MAX_RAW 5 -#define S_TCB_SND_MAX_RAW 31 -#define M_TCB_SND_MAX_RAW 0x7ffffffULL -#define V_TCB_SND_MAX_RAW(x) ((x) << S_TCB_SND_MAX_RAW) - -#define W_TCB_SND_CWND 6 -#define S_TCB_SND_CWND 26 -#define M_TCB_SND_CWND 0x7ffffffULL -#define V_TCB_SND_CWND(x) ((x) << S_TCB_SND_CWND) - -#define W_TCB_SND_SSTHRESH 7 -#define S_TCB_SND_SSTHRESH 21 -#define M_TCB_SND_SSTHRESH 0x7ffffffULL -#define V_TCB_SND_SSTHRESH(x) ((x) << S_TCB_SND_SSTHRESH) - -#define W_TCB_T_RTT_TS_RECENT_AGE 8 -#define S_TCB_T_RTT_TS_RECENT_AGE 16 -#define M_TCB_T_RTT_TS_RECENT_AGE 0xffffffffULL -#define V_TCB_T_RTT_TS_RECENT_AGE(x) ((x) << S_TCB_T_RTT_TS_RECENT_AGE) - -#define W_TCB_T_RTSEQ_RECENT 9 -#define S_TCB_T_RTSEQ_RECENT 16 -#define M_TCB_T_RTSEQ_RECENT 0xffffffffULL -#define V_TCB_T_RTSEQ_RECENT(x) ((x) << S_TCB_T_RTSEQ_RECENT) - -#define W_TCB_T_SRTT 10 -#define S_TCB_T_SRTT 16 -#define M_TCB_T_SRTT 0xffffULL -#define V_TCB_T_SRTT(x) ((x) << S_TCB_T_SRTT) - -#define W_TCB_T_RTTVAR 11 -#define S_TCB_T_RTTVAR 0 -#define M_TCB_T_RTTVAR 0xffffULL -#define V_TCB_T_RTTVAR(x) ((x) << S_TCB_T_RTTVAR) - -#define W_TCB_TS_LAST_ACK_SENT_RAW 11 -#define S_TCB_TS_LAST_ACK_SENT_RAW 16 -#define M_TCB_TS_LAST_ACK_SENT_RAW 0x7ffffffULL -#define V_TCB_TS_LAST_ACK_SENT_RAW(x) ((x) << S_TCB_TS_LAST_ACK_SENT_RAW) - -#define W_TCB_DIP 12 -#define S_TCB_DIP 11 -#define M_TCB_DIP 0xffffffffULL -#define V_TCB_DIP(x) ((x) << S_TCB_DIP) - -#define W_TCB_SIP 13 -#define S_TCB_SIP 11 -#define M_TCB_SIP 0xffffffffULL -#define V_TCB_SIP(x) ((x) << S_TCB_SIP) - -#define W_TCB_DP 14 -#define S_TCB_DP 11 -#define M_TCB_DP 0xffffULL -#define V_TCB_DP(x) ((x) << S_TCB_DP) - -#define W_TCB_SP 14 -#define S_TCB_SP 27 -#define M_TCB_SP 0xffffULL -#define V_TCB_SP(x) ((x) << S_TCB_SP) - -#define W_TCB_TIMESTAMP 15 -#define S_TCB_TIMESTAMP 11 -#define M_TCB_TIMESTAMP 0xffffffffULL -#define V_TCB_TIMESTAMP(x) ((x) << S_TCB_TIMESTAMP) - -#define W_TCB_TIMESTAMP_OFFSET 16 -#define S_TCB_TIMESTAMP_OFFSET 11 -#define M_TCB_TIMESTAMP_OFFSET 0xfULL -#define V_TCB_TIMESTAMP_OFFSET(x) ((x) << S_TCB_TIMESTAMP_OFFSET) - -#define W_TCB_TX_MAX 16 -#define S_TCB_TX_MAX 15 -#define M_TCB_TX_MAX 0xffffffffULL -#define V_TCB_TX_MAX(x) ((x) << S_TCB_TX_MAX) - -#define W_TCB_TX_HDR_PTR_RAW 17 -#define S_TCB_TX_HDR_PTR_RAW 15 -#define M_TCB_TX_HDR_PTR_RAW 0x1ffffULL -#define V_TCB_TX_HDR_PTR_RAW(x) ((x) << S_TCB_TX_HDR_PTR_RAW) - -#define W_TCB_TX_LAST_PTR_RAW 18 -#define S_TCB_TX_LAST_PTR_RAW 0 -#define M_TCB_TX_LAST_PTR_RAW 0x1ffffULL -#define V_TCB_TX_LAST_PTR_RAW(x) ((x) << S_TCB_TX_LAST_PTR_RAW) - -#define W_TCB_TX_COMPACT 18 -#define S_TCB_TX_COMPACT 17 -#define M_TCB_TX_COMPACT 0x1ULL -#define V_TCB_TX_COMPACT(x) ((x) << S_TCB_TX_COMPACT) - -#define W_TCB_RX_COMPACT 18 -#define S_TCB_RX_COMPACT 18 -#define M_TCB_RX_COMPACT 0x1ULL -#define V_TCB_RX_COMPACT(x) ((x) << S_TCB_RX_COMPACT) - -#define W_TCB_RCV_WND 18 -#define S_TCB_RCV_WND 19 -#define M_TCB_RCV_WND 0x7ffffffULL -#define V_TCB_RCV_WND(x) ((x) << S_TCB_RCV_WND) - -#define W_TCB_RX_HDR_OFFSET 19 -#define S_TCB_RX_HDR_OFFSET 14 -#define M_TCB_RX_HDR_OFFSET 0x7ffffffULL -#define V_TCB_RX_HDR_OFFSET(x) ((x) << S_TCB_RX_HDR_OFFSET) - -#define W_TCB_RX_FRAG0_START_IDX_RAW 20 -#define S_TCB_RX_FRAG0_START_IDX_RAW 9 -#define M_TCB_RX_FRAG0_START_IDX_RAW 0x7ffffffULL -#define V_TCB_RX_FRAG0_START_IDX_RAW(x) ((x) << S_TCB_RX_FRAG0_START_IDX_RAW) - -#define W_TCB_RX_FRAG1_START_IDX_OFFSET 21 -#define S_TCB_RX_FRAG1_START_IDX_OFFSET 4 -#define M_TCB_RX_FRAG1_START_IDX_OFFSET 0x7ffffffULL -#define V_TCB_RX_FRAG1_START_IDX_OFFSET(x) ((x) << S_TCB_RX_FRAG1_START_IDX_OFFSET) - -#define W_TCB_RX_FRAG0_LEN 21 -#define S_TCB_RX_FRAG0_LEN 31 -#define M_TCB_RX_FRAG0_LEN 0x7ffffffULL -#define V_TCB_RX_FRAG0_LEN(x) ((x) << S_TCB_RX_FRAG0_LEN) - -#define W_TCB_RX_FRAG1_LEN 22 -#define S_TCB_RX_FRAG1_LEN 26 -#define M_TCB_RX_FRAG1_LEN 0x7ffffffULL -#define V_TCB_RX_FRAG1_LEN(x) ((x) << S_TCB_RX_FRAG1_LEN) - -#define W_TCB_NEWRENO_RECOVER 23 -#define S_TCB_NEWRENO_RECOVER 21 -#define M_TCB_NEWRENO_RECOVER 0x7ffffffULL -#define V_TCB_NEWRENO_RECOVER(x) ((x) << S_TCB_NEWRENO_RECOVER) - -#define W_TCB_PDU_HAVE_LEN 24 -#define S_TCB_PDU_HAVE_LEN 16 -#define M_TCB_PDU_HAVE_LEN 0x1ULL -#define V_TCB_PDU_HAVE_LEN(x) ((x) << S_TCB_PDU_HAVE_LEN) - -#define W_TCB_PDU_LEN 24 -#define S_TCB_PDU_LEN 17 -#define M_TCB_PDU_LEN 0xffffULL -#define V_TCB_PDU_LEN(x) ((x) << S_TCB_PDU_LEN) - -#define W_TCB_RX_QUIESCE 25 -#define S_TCB_RX_QUIESCE 1 -#define M_TCB_RX_QUIESCE 0x1ULL -#define V_TCB_RX_QUIESCE(x) ((x) << S_TCB_RX_QUIESCE) - -#define W_TCB_RX_PTR_RAW 25 -#define S_TCB_RX_PTR_RAW 2 -#define M_TCB_RX_PTR_RAW 0x1ffffULL -#define V_TCB_RX_PTR_RAW(x) ((x) << S_TCB_RX_PTR_RAW) - -#define W_TCB_CPU_NO 25 -#define S_TCB_CPU_NO 19 -#define M_TCB_CPU_NO 0x7fULL -#define V_TCB_CPU_NO(x) ((x) << S_TCB_CPU_NO) - -#define W_TCB_ULP_TYPE 25 -#define S_TCB_ULP_TYPE 26 -#define M_TCB_ULP_TYPE 0xfULL -#define V_TCB_ULP_TYPE(x) ((x) << S_TCB_ULP_TYPE) - -#define W_TCB_RX_FRAG1_PTR_RAW 25 -#define S_TCB_RX_FRAG1_PTR_RAW 30 -#define M_TCB_RX_FRAG1_PTR_RAW 0x1ffffULL -#define V_TCB_RX_FRAG1_PTR_RAW(x) ((x) << S_TCB_RX_FRAG1_PTR_RAW) - -#define W_TCB_RX_FRAG2_START_IDX_OFFSET_RAW 26 -#define S_TCB_RX_FRAG2_START_IDX_OFFSET_RAW 15 -#define M_TCB_RX_FRAG2_START_IDX_OFFSET_RAW 0x7ffffffULL -#define V_TCB_RX_FRAG2_START_IDX_OFFSET_RAW(x) ((x) << S_TCB_RX_FRAG2_START_IDX_OFFSET_RAW) - -#define W_TCB_RX_FRAG2_PTR_RAW 27 -#define S_TCB_RX_FRAG2_PTR_RAW 10 -#define M_TCB_RX_FRAG2_PTR_RAW 0x1ffffULL -#define V_TCB_RX_FRAG2_PTR_RAW(x) ((x) << S_TCB_RX_FRAG2_PTR_RAW) - -#define W_TCB_RX_FRAG2_LEN_RAW 27 -#define S_TCB_RX_FRAG2_LEN_RAW 27 -#define M_TCB_RX_FRAG2_LEN_RAW 0x7ffffffULL -#define V_TCB_RX_FRAG2_LEN_RAW(x) ((x) << S_TCB_RX_FRAG2_LEN_RAW) - -#define W_TCB_RX_FRAG3_PTR_RAW 28 -#define S_TCB_RX_FRAG3_PTR_RAW 22 -#define M_TCB_RX_FRAG3_PTR_RAW 0x1ffffULL -#define V_TCB_RX_FRAG3_PTR_RAW(x) ((x) << S_TCB_RX_FRAG3_PTR_RAW) - -#define W_TCB_RX_FRAG3_LEN_RAW 29 -#define S_TCB_RX_FRAG3_LEN_RAW 7 -#define M_TCB_RX_FRAG3_LEN_RAW 0x7ffffffULL -#define V_TCB_RX_FRAG3_LEN_RAW(x) ((x) << S_TCB_RX_FRAG3_LEN_RAW) - -#define W_TCB_RX_FRAG3_START_IDX_OFFSET_RAW 30 -#define S_TCB_RX_FRAG3_START_IDX_OFFSET_RAW 2 -#define M_TCB_RX_FRAG3_START_IDX_OFFSET_RAW 0x7ffffffULL -#define V_TCB_RX_FRAG3_START_IDX_OFFSET_RAW(x) ((x) << S_TCB_RX_FRAG3_START_IDX_OFFSET_RAW) - -#define W_TCB_PDU_HDR_LEN 30 -#define S_TCB_PDU_HDR_LEN 29 -#define M_TCB_PDU_HDR_LEN 0xffULL -#define V_TCB_PDU_HDR_LEN(x) ((x) << S_TCB_PDU_HDR_LEN) - -#define W_TCB_SLUSH1 31 -#define S_TCB_SLUSH1 5 -#define M_TCB_SLUSH1 0x7ffffULL -#define V_TCB_SLUSH1(x) ((x) << S_TCB_SLUSH1) - -#define W_TCB_ULP_RAW 31 -#define S_TCB_ULP_RAW 24 -#define M_TCB_ULP_RAW 0xffULL -#define V_TCB_ULP_RAW(x) ((x) << S_TCB_ULP_RAW) - -#define W_TCB_DDP_RDMAP_VERSION 25 -#define S_TCB_DDP_RDMAP_VERSION 30 -#define M_TCB_DDP_RDMAP_VERSION 0x1ULL -#define V_TCB_DDP_RDMAP_VERSION(x) ((x) << S_TCB_DDP_RDMAP_VERSION) - -#define W_TCB_MARKER_ENABLE_RX 25 -#define S_TCB_MARKER_ENABLE_RX 31 -#define M_TCB_MARKER_ENABLE_RX 0x1ULL -#define V_TCB_MARKER_ENABLE_RX(x) ((x) << S_TCB_MARKER_ENABLE_RX) - -#define W_TCB_MARKER_ENABLE_TX 26 -#define S_TCB_MARKER_ENABLE_TX 0 -#define M_TCB_MARKER_ENABLE_TX 0x1ULL -#define V_TCB_MARKER_ENABLE_TX(x) ((x) << S_TCB_MARKER_ENABLE_TX) - -#define W_TCB_CRC_ENABLE 26 -#define S_TCB_CRC_ENABLE 1 -#define M_TCB_CRC_ENABLE 0x1ULL -#define V_TCB_CRC_ENABLE(x) ((x) << S_TCB_CRC_ENABLE) - -#define W_TCB_IRS_ULP 26 -#define S_TCB_IRS_ULP 2 -#define M_TCB_IRS_ULP 0x1ffULL -#define V_TCB_IRS_ULP(x) ((x) << S_TCB_IRS_ULP) - -#define W_TCB_ISS_ULP 26 -#define S_TCB_ISS_ULP 11 -#define M_TCB_ISS_ULP 0x1ffULL -#define V_TCB_ISS_ULP(x) ((x) << S_TCB_ISS_ULP) - -#define W_TCB_TX_PDU_LEN 26 -#define S_TCB_TX_PDU_LEN 20 -#define M_TCB_TX_PDU_LEN 0x3fffULL -#define V_TCB_TX_PDU_LEN(x) ((x) << S_TCB_TX_PDU_LEN) - -#define W_TCB_TX_PDU_OUT 27 -#define S_TCB_TX_PDU_OUT 2 -#define M_TCB_TX_PDU_OUT 0x1ULL -#define V_TCB_TX_PDU_OUT(x) ((x) << S_TCB_TX_PDU_OUT) - -#define W_TCB_CQ_IDX_SQ 27 -#define S_TCB_CQ_IDX_SQ 3 -#define M_TCB_CQ_IDX_SQ 0xffffULL -#define V_TCB_CQ_IDX_SQ(x) ((x) << S_TCB_CQ_IDX_SQ) - -#define W_TCB_CQ_IDX_RQ 27 -#define S_TCB_CQ_IDX_RQ 19 -#define M_TCB_CQ_IDX_RQ 0xffffULL -#define V_TCB_CQ_IDX_RQ(x) ((x) << S_TCB_CQ_IDX_RQ) - -#define W_TCB_QP_ID 28 -#define S_TCB_QP_ID 3 -#define M_TCB_QP_ID 0xffffULL -#define V_TCB_QP_ID(x) ((x) << S_TCB_QP_ID) - -#define W_TCB_PD_ID 28 -#define S_TCB_PD_ID 19 -#define M_TCB_PD_ID 0xffffULL -#define V_TCB_PD_ID(x) ((x) << S_TCB_PD_ID) - -#define W_TCB_STAG 29 -#define S_TCB_STAG 3 -#define M_TCB_STAG 0xffffffffULL -#define V_TCB_STAG(x) ((x) << S_TCB_STAG) - -#define W_TCB_RQ_START 30 -#define S_TCB_RQ_START 3 -#define M_TCB_RQ_START 0x3ffffffULL -#define V_TCB_RQ_START(x) ((x) << S_TCB_RQ_START) - -#define W_TCB_RQ_MSN 30 -#define S_TCB_RQ_MSN 29 -#define M_TCB_RQ_MSN 0x3ffULL -#define V_TCB_RQ_MSN(x) ((x) << S_TCB_RQ_MSN) - -#define W_TCB_RQ_MAX_OFFSET 31 -#define S_TCB_RQ_MAX_OFFSET 7 -#define M_TCB_RQ_MAX_OFFSET 0xfULL -#define V_TCB_RQ_MAX_OFFSET(x) ((x) << S_TCB_RQ_MAX_OFFSET) - -#define W_TCB_RQ_WRITE_PTR 31 -#define S_TCB_RQ_WRITE_PTR 11 -#define M_TCB_RQ_WRITE_PTR 0x3ffULL -#define V_TCB_RQ_WRITE_PTR(x) ((x) << S_TCB_RQ_WRITE_PTR) - -#define W_TCB_INB_WRITE_PERM 31 -#define S_TCB_INB_WRITE_PERM 21 -#define M_TCB_INB_WRITE_PERM 0x1ULL -#define V_TCB_INB_WRITE_PERM(x) ((x) << S_TCB_INB_WRITE_PERM) - -#define W_TCB_INB_READ_PERM 31 -#define S_TCB_INB_READ_PERM 22 -#define M_TCB_INB_READ_PERM 0x1ULL -#define V_TCB_INB_READ_PERM(x) ((x) << S_TCB_INB_READ_PERM) - -#define W_TCB_ORD_L_BIT_VLD 31 -#define S_TCB_ORD_L_BIT_VLD 23 -#define M_TCB_ORD_L_BIT_VLD 0x1ULL -#define V_TCB_ORD_L_BIT_VLD(x) ((x) << S_TCB_ORD_L_BIT_VLD) - -#define W_TCB_RDMAP_OPCODE 31 -#define S_TCB_RDMAP_OPCODE 24 -#define M_TCB_RDMAP_OPCODE 0xfULL -#define V_TCB_RDMAP_OPCODE(x) ((x) << S_TCB_RDMAP_OPCODE) - -#define W_TCB_TX_FLUSH 31 -#define S_TCB_TX_FLUSH 28 -#define M_TCB_TX_FLUSH 0x1ULL -#define V_TCB_TX_FLUSH(x) ((x) << S_TCB_TX_FLUSH) - -#define W_TCB_TX_OOS_RXMT 31 -#define S_TCB_TX_OOS_RXMT 29 -#define M_TCB_TX_OOS_RXMT 0x1ULL -#define V_TCB_TX_OOS_RXMT(x) ((x) << S_TCB_TX_OOS_RXMT) - -#define W_TCB_TX_OOS_TXMT 31 -#define S_TCB_TX_OOS_TXMT 30 -#define M_TCB_TX_OOS_TXMT 0x1ULL -#define V_TCB_TX_OOS_TXMT(x) ((x) << S_TCB_TX_OOS_TXMT) - -#define W_TCB_SLUSH_AUX2 31 -#define S_TCB_SLUSH_AUX2 31 -#define M_TCB_SLUSH_AUX2 0x1ULL -#define V_TCB_SLUSH_AUX2(x) ((x) << S_TCB_SLUSH_AUX2) - -#define W_TCB_RX_FRAG1_PTR_RAW2 25 -#define S_TCB_RX_FRAG1_PTR_RAW2 30 -#define M_TCB_RX_FRAG1_PTR_RAW2 0x1ffffULL -#define V_TCB_RX_FRAG1_PTR_RAW2(x) ((x) << S_TCB_RX_FRAG1_PTR_RAW2) - -#define W_TCB_RX_DDP_FLAGS 26 -#define S_TCB_RX_DDP_FLAGS 15 -#define M_TCB_RX_DDP_FLAGS 0x3ffULL -#define V_TCB_RX_DDP_FLAGS(x) ((x) << S_TCB_RX_DDP_FLAGS) - -#define W_TCB_SLUSH_AUX3 26 -#define S_TCB_SLUSH_AUX3 31 -#define M_TCB_SLUSH_AUX3 0x1ffULL -#define V_TCB_SLUSH_AUX3(x) ((x) << S_TCB_SLUSH_AUX3) - -#define W_TCB_RX_DDP_BUF0_OFFSET 27 -#define S_TCB_RX_DDP_BUF0_OFFSET 8 -#define M_TCB_RX_DDP_BUF0_OFFSET 0x3fffffULL -#define V_TCB_RX_DDP_BUF0_OFFSET(x) ((x) << S_TCB_RX_DDP_BUF0_OFFSET) - -#define W_TCB_RX_DDP_BUF0_LEN 27 -#define S_TCB_RX_DDP_BUF0_LEN 30 -#define M_TCB_RX_DDP_BUF0_LEN 0x3fffffULL -#define V_TCB_RX_DDP_BUF0_LEN(x) ((x) << S_TCB_RX_DDP_BUF0_LEN) - -#define W_TCB_RX_DDP_BUF1_OFFSET 28 -#define S_TCB_RX_DDP_BUF1_OFFSET 20 -#define M_TCB_RX_DDP_BUF1_OFFSET 0x3fffffULL -#define V_TCB_RX_DDP_BUF1_OFFSET(x) ((x) << S_TCB_RX_DDP_BUF1_OFFSET) - -#define W_TCB_RX_DDP_BUF1_LEN 29 -#define S_TCB_RX_DDP_BUF1_LEN 10 -#define M_TCB_RX_DDP_BUF1_LEN 0x3fffffULL -#define V_TCB_RX_DDP_BUF1_LEN(x) ((x) << S_TCB_RX_DDP_BUF1_LEN) - -#define W_TCB_RX_DDP_BUF0_TAG 30 -#define S_TCB_RX_DDP_BUF0_TAG 0 -#define M_TCB_RX_DDP_BUF0_TAG 0xffffffffULL -#define V_TCB_RX_DDP_BUF0_TAG(x) ((x) << S_TCB_RX_DDP_BUF0_TAG) - -#define W_TCB_RX_DDP_BUF1_TAG 31 -#define S_TCB_RX_DDP_BUF1_TAG 0 -#define M_TCB_RX_DDP_BUF1_TAG 0xffffffffULL -#define V_TCB_RX_DDP_BUF1_TAG(x) ((x) << S_TCB_RX_DDP_BUF1_TAG) - -#define S_TF_DACK 10 -#define V_TF_DACK(x) ((x) << S_TF_DACK) - -#define S_TF_NAGLE 11 -#define V_TF_NAGLE(x) ((x) << S_TF_NAGLE) - -#define S_TF_RECV_SCALE 12 -#define V_TF_RECV_SCALE(x) ((x) << S_TF_RECV_SCALE) - -#define S_TF_RECV_TSTMP 13 -#define V_TF_RECV_TSTMP(x) ((x) << S_TF_RECV_TSTMP) - -#define S_TF_RECV_SACK 14 -#define V_TF_RECV_SACK(x) ((x) << S_TF_RECV_SACK) - -#define S_TF_TURBO 15 -#define V_TF_TURBO(x) ((x) << S_TF_TURBO) - -#define S_TF_KEEPALIVE 16 -#define V_TF_KEEPALIVE(x) ((x) << S_TF_KEEPALIVE) - -#define S_TF_TCAM_BYPASS 17 -#define V_TF_TCAM_BYPASS(x) ((x) << S_TF_TCAM_BYPASS) - -#define S_TF_CORE_FIN 18 -#define V_TF_CORE_FIN(x) ((x) << S_TF_CORE_FIN) - -#define S_TF_CORE_MORE 19 -#define V_TF_CORE_MORE(x) ((x) << S_TF_CORE_MORE) - -#define S_TF_MIGRATING 20 -#define V_TF_MIGRATING(x) ((x) << S_TF_MIGRATING) - -#define S_TF_ACTIVE_OPEN 21 -#define V_TF_ACTIVE_OPEN(x) ((x) << S_TF_ACTIVE_OPEN) - -#define S_TF_ASK_MODE 22 -#define V_TF_ASK_MODE(x) ((x) << S_TF_ASK_MODE) - -#define S_TF_NON_OFFLOAD 23 -#define V_TF_NON_OFFLOAD(x) ((x) << S_TF_NON_OFFLOAD) - -#define S_TF_MOD_SCHD 24 -#define V_TF_MOD_SCHD(x) ((x) << S_TF_MOD_SCHD) - -#define S_TF_MOD_SCHD_REASON0 25 -#define V_TF_MOD_SCHD_REASON0(x) ((x) << S_TF_MOD_SCHD_REASON0) - -#define S_TF_MOD_SCHD_REASON1 26 -#define V_TF_MOD_SCHD_REASON1(x) ((x) << S_TF_MOD_SCHD_REASON1) - -#define S_TF_MOD_SCHD_RX 27 -#define V_TF_MOD_SCHD_RX(x) ((x) << S_TF_MOD_SCHD_RX) - -#define S_TF_CORE_PUSH 28 -#define V_TF_CORE_PUSH(x) ((x) << S_TF_CORE_PUSH) - -#define S_TF_RCV_COALESCE_ENABLE 29 -#define V_TF_RCV_COALESCE_ENABLE(x) ((x) << S_TF_RCV_COALESCE_ENABLE) - -#define S_TF_RCV_COALESCE_PUSH 30 -#define V_TF_RCV_COALESCE_PUSH(x) ((x) << S_TF_RCV_COALESCE_PUSH) - -#define S_TF_RCV_COALESCE_LAST_PSH 31 -#define V_TF_RCV_COALESCE_LAST_PSH(x) ((x) << S_TF_RCV_COALESCE_LAST_PSH) - -#define S_TF_RCV_COALESCE_HEARTBEAT 32 -#define V_TF_RCV_COALESCE_HEARTBEAT(x) ((x) << S_TF_RCV_COALESCE_HEARTBEAT) - -#define S_TF_HALF_CLOSE 33 -#define V_TF_HALF_CLOSE(x) ((x) << S_TF_HALF_CLOSE) - -#define S_TF_DACK_MSS 34 -#define V_TF_DACK_MSS(x) ((x) << S_TF_DACK_MSS) - -#define S_TF_CCTRL_SEL0 35 -#define V_TF_CCTRL_SEL0(x) ((x) << S_TF_CCTRL_SEL0) - -#define S_TF_CCTRL_SEL1 36 -#define V_TF_CCTRL_SEL1(x) ((x) << S_TF_CCTRL_SEL1) - -#define S_TF_TCP_NEWRENO_FAST_RECOVERY 37 -#define V_TF_TCP_NEWRENO_FAST_RECOVERY(x) ((x) << S_TF_TCP_NEWRENO_FAST_RECOVERY) - -#define S_TF_TX_PACE_AUTO 38 -#define V_TF_TX_PACE_AUTO(x) ((x) << S_TF_TX_PACE_AUTO) - -#define S_TF_PEER_FIN_HELD 39 -#define V_TF_PEER_FIN_HELD(x) ((x) << S_TF_PEER_FIN_HELD) - -#define S_TF_CORE_URG 40 -#define V_TF_CORE_URG(x) ((x) << S_TF_CORE_URG) - -#define S_TF_RDMA_ERROR 41 -#define V_TF_RDMA_ERROR(x) ((x) << S_TF_RDMA_ERROR) - -#define S_TF_SSWS_DISABLED 42 -#define V_TF_SSWS_DISABLED(x) ((x) << S_TF_SSWS_DISABLED) - -#define S_TF_DUPACK_COUNT_ODD 43 -#define V_TF_DUPACK_COUNT_ODD(x) ((x) << S_TF_DUPACK_COUNT_ODD) - -#define S_TF_TX_CHANNEL 44 -#define V_TF_TX_CHANNEL(x) ((x) << S_TF_TX_CHANNEL) - -#define S_TF_RX_CHANNEL 45 -#define V_TF_RX_CHANNEL(x) ((x) << S_TF_RX_CHANNEL) - -#define S_TF_TX_PACE_FIXED 46 -#define V_TF_TX_PACE_FIXED(x) ((x) << S_TF_TX_PACE_FIXED) - -#define S_TF_RDMA_FLM_ERROR 47 -#define V_TF_RDMA_FLM_ERROR(x) ((x) << S_TF_RDMA_FLM_ERROR) - -#define S_TF_RX_FLOW_CONTROL_DISABLE 48 -#define V_TF_RX_FLOW_CONTROL_DISABLE(x) ((x) << S_TF_RX_FLOW_CONTROL_DISABLE) - -#endif /* _TCB_DEFS_H */ diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index e87fc0408470..ee1182f9b627 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -495,7 +495,6 @@ static int _put_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb) ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *))); release_ep_resources(ep); - kfree_skb(skb); return 0; } @@ -506,7 +505,6 @@ static int _put_pass_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb) ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *))); c4iw_put_ep(&ep->parent_ep->com); release_ep_resources(ep); - kfree_skb(skb); return 0; } @@ -2424,20 +2422,6 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type; pr_debug("ep %p tid %u\n", ep, ep->hwtid); - - skb_get(skb); - rpl = cplhdr(skb); - if (!is_t4(adapter_type)) { - skb_trim(skb, roundup(sizeof(*rpl5), 16)); - rpl5 = (void *)rpl; - INIT_TP_WR(rpl5, ep->hwtid); - } else { - skb_trim(skb, sizeof(*rpl)); - INIT_TP_WR(rpl, ep->hwtid); - } - OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, - ep->hwtid)); - cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, enable_tcp_timestamps && req->tcpopt.tstamp, (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1); @@ -2483,6 +2467,20 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, if (tcph->ece && tcph->cwr) opt2 |= CCTRL_ECN_V(1); } + + skb_get(skb); + rpl = cplhdr(skb); + if (!is_t4(adapter_type)) { + skb_trim(skb, roundup(sizeof(*rpl5), 16)); + rpl5 = (void *)rpl; + INIT_TP_WR(rpl5, ep->hwtid); + } else { + skb_trim(skb, sizeof(*rpl)); + INIT_TP_WR(rpl, ep->hwtid); + } + OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, + ep->hwtid)); + if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T4) { u32 isn = (prandom_u32() & ~7UL) - 1; opt2 |= T5_OPT_2_VALID_F; @@ -3381,7 +3379,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (raddr->sin_addr.s_addr == htonl(INADDR_ANY)) { err = pick_local_ipaddrs(dev, cm_id); if (err) - goto fail2; + goto fail3; } /* find a route */ @@ -3403,7 +3401,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (ipv6_addr_type(&raddr6->sin6_addr) == IPV6_ADDR_ANY) { err = pick_local_ip6addrs(dev, cm_id); if (err) - goto fail2; + goto fail3; } /* find a route */ diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index a8b9548bd1a2..599340c1f0b8 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -242,10 +242,13 @@ static void set_ep_sin6_addrs(struct c4iw_ep *ep, } } -static int dump_qp(struct c4iw_qp *qp, struct c4iw_debugfs_data *qpd) +static int dump_qp(unsigned long id, struct c4iw_qp *qp, + struct c4iw_debugfs_data *qpd) { int space; int cc; + if (id != qp->wq.sq.qid) + return 0; space = qpd->bufsize - qpd->pos - 1; if (space == 0) @@ -350,7 +353,7 @@ static int qp_open(struct inode *inode, struct file *file) xa_lock_irq(&qpd->devp->qps); xa_for_each(&qpd->devp->qps, index, qp) - dump_qp(qp, qpd); + dump_qp(index, qp, qpd); xa_unlock_irq(&qpd->devp->qps); qpd->buf[qpd->pos++] = 0; diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index aa772ee0706f..fe3a7e8561df 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -275,13 +275,17 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, struct sk_buff *skb, struct c4iw_wr_wait *wr_waitp) { int err; - struct fw_ri_tpte tpt; + struct fw_ri_tpte *tpt; u32 stag_idx; static atomic_t key; if (c4iw_fatal_error(rdev)) return -EIO; + tpt = kmalloc(sizeof(*tpt), GFP_KERNEL); + if (!tpt) + return -ENOMEM; + stag_state = stag_state > 0; stag_idx = (*stag) >> 8; @@ -291,6 +295,7 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, mutex_lock(&rdev->stats.lock); rdev->stats.stag.fail++; mutex_unlock(&rdev->stats.lock); + kfree(tpt); return -ENOMEM; } mutex_lock(&rdev->stats.lock); @@ -305,28 +310,28 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, /* write TPT entry */ if (reset_tpt_entry) - memset(&tpt, 0, sizeof(tpt)); + memset(tpt, 0, sizeof(*tpt)); else { - tpt.valid_to_pdid = cpu_to_be32(FW_RI_TPTE_VALID_F | + tpt->valid_to_pdid = cpu_to_be32(FW_RI_TPTE_VALID_F | FW_RI_TPTE_STAGKEY_V((*stag & FW_RI_TPTE_STAGKEY_M)) | FW_RI_TPTE_STAGSTATE_V(stag_state) | FW_RI_TPTE_STAGTYPE_V(type) | FW_RI_TPTE_PDID_V(pdid)); - tpt.locread_to_qpid = cpu_to_be32(FW_RI_TPTE_PERM_V(perm) | + tpt->locread_to_qpid = cpu_to_be32(FW_RI_TPTE_PERM_V(perm) | (bind_enabled ? FW_RI_TPTE_MWBINDEN_F : 0) | FW_RI_TPTE_ADDRTYPE_V((zbva ? FW_RI_ZERO_BASED_TO : FW_RI_VA_BASED_TO))| FW_RI_TPTE_PS_V(page_size)); - tpt.nosnoop_pbladdr = !pbl_size ? 0 : cpu_to_be32( + tpt->nosnoop_pbladdr = !pbl_size ? 0 : cpu_to_be32( FW_RI_TPTE_PBLADDR_V(PBL_OFF(rdev, pbl_addr)>>3)); - tpt.len_lo = cpu_to_be32((u32)(len & 0xffffffffUL)); - tpt.va_hi = cpu_to_be32((u32)(to >> 32)); - tpt.va_lo_fbo = cpu_to_be32((u32)(to & 0xffffffffUL)); - tpt.dca_mwbcnt_pstag = cpu_to_be32(0); - tpt.len_hi = cpu_to_be32((u32)(len >> 32)); + tpt->len_lo = cpu_to_be32((u32)(len & 0xffffffffUL)); + tpt->va_hi = cpu_to_be32((u32)(to >> 32)); + tpt->va_lo_fbo = cpu_to_be32((u32)(to & 0xffffffffUL)); + tpt->dca_mwbcnt_pstag = cpu_to_be32(0); + tpt->len_hi = cpu_to_be32((u32)(len >> 32)); } err = write_adapter_mem(rdev, stag_idx + (rdev->lldi.vr->stag.start >> 5), - sizeof(tpt), &tpt, skb, wr_waitp); + sizeof(*tpt), tpt, skb, wr_waitp); if (reset_tpt_entry) { c4iw_put_resource(&rdev->resource.tpt_table, stag_idx); @@ -334,6 +339,7 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, rdev->stats.stag.cur -= 32; mutex_unlock(&rdev->stats.lock); } + kfree(tpt); return err; } @@ -537,7 +543,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mhp->rhp = rhp; - mhp->umem = ib_umem_get(udata, start, length, acc, 0); + mhp->umem = ib_umem_get(udata, start, length, acc); if (IS_ERR(mhp->umem)) goto err_free_skb; diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index d373ac0fe2cb..ba83d942997c 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -305,7 +305,10 @@ static int c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *pro static int c4iw_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { + int ret = 0; pr_debug("ibdev %p\n", ibdev); + ret = ib_get_eth_speed(ibdev, port, &props->active_speed, + &props->active_width); props->port_cap_flags = IB_PORT_CM_SUP | @@ -315,11 +318,9 @@ static int c4iw_query_port(struct ib_device *ibdev, u8 port, IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP; props->gid_tbl_len = 1; props->pkey_tbl_len = 1; - props->active_width = 2; - props->active_speed = IB_SPEED_DDR; props->max_msg_sz = -1; - return 0; + return ret; } static ssize_t hw_rev_show(struct device *dev, diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index eb9368be28c1..bbcac539777a 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -2737,15 +2737,11 @@ int c4iw_create_srq(struct ib_srq *ib_srq, struct ib_srq_init_attr *attrs, if (CHELSIO_CHIP_VERSION(rhp->rdev.lldi.adapter_type) > CHELSIO_T6) srq->flags = T4_SRQ_LIMIT_SUPPORT; - ret = xa_insert_irq(&rhp->qps, srq->wq.qid, srq, GFP_KERNEL); - if (ret) - goto err_free_queue; - if (udata) { srq_key_mm = kmalloc(sizeof(*srq_key_mm), GFP_KERNEL); if (!srq_key_mm) { ret = -ENOMEM; - goto err_remove_handle; + goto err_free_queue; } srq_db_key_mm = kmalloc(sizeof(*srq_db_key_mm), GFP_KERNEL); if (!srq_db_key_mm) { @@ -2789,8 +2785,6 @@ err_free_srq_db_key_mm: kfree(srq_db_key_mm); err_free_srq_key_mm: kfree(srq_key_mm); -err_remove_handle: - xa_erase_irq(&rhp->qps, srq->wq.qid); err_free_queue: free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx, srq->wr_waitp); @@ -2813,8 +2807,6 @@ void c4iw_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) rhp = srq->rhp; pr_debug("%s id %d\n", __func__, srq->wq.qid); - - xa_erase_irq(&rhp->qps, srq->wq.qid); ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext, ibucontext); free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx, diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h index 2283e432693e..aa7396a1588a 100644 --- a/drivers/infiniband/hw/efa/efa.h +++ b/drivers/infiniband/hw/efa/efa.h @@ -60,8 +60,6 @@ struct efa_dev { u64 mem_bar_len; u64 db_bar_addr; u64 db_bar_len; - u8 addr[EFA_GID_SIZE]; - u32 mtu; int admin_msix_vector_idx; struct efa_irq admin_irq; @@ -71,8 +69,6 @@ struct efa_dev { struct efa_ucontext { struct ib_ucontext ibucontext; - struct xarray mmap_xa; - u32 mmap_xa_page; u16 uarn; }; @@ -91,6 +87,7 @@ struct efa_cq { struct efa_ucontext *ucontext; dma_addr_t dma_addr; void *cpu_addr; + struct rdma_user_mmap_entry *mmap_entry; size_t size; u16 cq_idx; }; @@ -101,6 +98,13 @@ struct efa_qp { void *rq_cpu_addr; size_t rq_size; enum ib_qp_state state; + + /* Used for saving mmap_xa entries */ + struct rdma_user_mmap_entry *sq_db_mmap_entry; + struct rdma_user_mmap_entry *llq_desc_mmap_entry; + struct rdma_user_mmap_entry *rq_db_mmap_entry; + struct rdma_user_mmap_entry *rq_mmap_entry; + u32 qp_handle; u32 max_send_wr; u32 max_recv_wr; @@ -147,6 +151,7 @@ int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata); void efa_dealloc_ucontext(struct ib_ucontext *ibucontext); int efa_mmap(struct ib_ucontext *ibucontext, struct vm_area_struct *vma); +void efa_mmap_free(struct rdma_user_mmap_entry *rdma_entry); int efa_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, u32 flags, diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h index 2be0469d545f..e96bcb16bd2b 100644 --- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h +++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h @@ -362,9 +362,13 @@ struct efa_admin_reg_mr_cmd { /* * permissions - * 0 : local_write_enable - Write permissions: value - * of 1 needed for RQ buffers and for RDMA write - * 7:1 : reserved1 - remote access flags, etc + * 0 : local_write_enable - Local write permissions: + * must be set for RQ buffers and buffers posted for + * RDMA Read requests + * 1 : reserved1 - MBZ + * 2 : remote_read_enable - Remote read permissions: + * must be set to enable RDMA read from the region + * 7:3 : reserved2 - MBZ */ u8 permissions; @@ -558,6 +562,16 @@ struct efa_admin_feature_device_attr_desc { /* Indicates how many bits are used virtual address access */ u8 virt_addr_width; + + /* + * 0 : rdma_read - If set, RDMA Read is supported on + * TX queues + * 31:1 : reserved - MBZ + */ + u32 device_caps; + + /* Max RDMA transfer size in bytes */ + u32 max_rdma_size; }; struct efa_admin_feature_queue_attr_desc { @@ -604,6 +618,9 @@ struct efa_admin_feature_queue_attr_desc { /* The maximum size of LLQ in bytes */ u32 max_llq_size; + + /* Maximum number of SGEs for a single RDMA read WQE */ + u16 max_wr_rdma_sges; }; struct efa_admin_feature_aenq_desc { @@ -618,6 +635,7 @@ struct efa_admin_feature_network_attr_desc { /* Raw address data in network byte order */ u8 addr[16]; + /* max packet payload size in bytes */ u32 mtu; }; @@ -780,6 +798,8 @@ struct efa_admin_mmio_req_read_less_resp { #define EFA_ADMIN_REG_MR_CMD_MEM_ADDR_PHY_MODE_EN_SHIFT 7 #define EFA_ADMIN_REG_MR_CMD_MEM_ADDR_PHY_MODE_EN_MASK BIT(7) #define EFA_ADMIN_REG_MR_CMD_LOCAL_WRITE_ENABLE_MASK BIT(0) +#define EFA_ADMIN_REG_MR_CMD_REMOTE_READ_ENABLE_SHIFT 2 +#define EFA_ADMIN_REG_MR_CMD_REMOTE_READ_ENABLE_MASK BIT(2) /* create_cq_cmd */ #define EFA_ADMIN_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_SHIFT 5 @@ -791,4 +811,7 @@ struct efa_admin_mmio_req_read_less_resp { /* get_set_feature_common_desc */ #define EFA_ADMIN_GET_SET_FEATURE_COMMON_DESC_SELECT_MASK GENMASK(1, 0) +/* feature_device_attr_desc */ +#define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_READ_MASK BIT(0) + #endif /* _EFA_ADMIN_CMDS_H_ */ diff --git a/drivers/infiniband/hw/efa/efa_com.c b/drivers/infiniband/hw/efa/efa_com.c index 3c412bc5b94f..0778f4f7dccd 100644 --- a/drivers/infiniband/hw/efa/efa_com.c +++ b/drivers/infiniband/hw/efa/efa_com.c @@ -317,6 +317,7 @@ static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queu struct efa_admin_acq_entry *comp, size_t comp_size_in_bytes) { + struct efa_admin_aq_entry *aqe; struct efa_comp_ctx *comp_ctx; u16 queue_size_mask; u16 cmd_id; @@ -350,7 +351,9 @@ static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queu reinit_completion(&comp_ctx->wait_event); - memcpy(&aq->sq.entries[pi], cmd, cmd_size_in_bytes); + aqe = &aq->sq.entries[pi]; + memset(aqe, 0, sizeof(*aqe)); + memcpy(aqe, cmd, cmd_size_in_bytes); aq->sq.pc++; atomic64_inc(&aq->stats.submitted_cmd); diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c index c079f1332082..e20bd84a1014 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.c +++ b/drivers/infiniband/hw/efa/efa_com_cmd.c @@ -230,8 +230,7 @@ int efa_com_register_mr(struct efa_com_dev *edev, mr_cmd.flags |= params->page_shift & EFA_ADMIN_REG_MR_CMD_PHYS_PAGE_SIZE_SHIFT_MASK; mr_cmd.iova = params->iova; - mr_cmd.permissions |= params->permissions & - EFA_ADMIN_REG_MR_CMD_LOCAL_WRITE_ENABLE_MASK; + mr_cmd.permissions = params->permissions; if (params->inline_pbl) { memcpy(mr_cmd.pbl.inline_pbl_array, @@ -423,28 +422,6 @@ static int efa_com_get_feature(struct efa_com_dev *edev, return efa_com_get_feature_ex(edev, get_resp, feature_id, 0, 0); } -int efa_com_get_network_attr(struct efa_com_dev *edev, - struct efa_com_get_network_attr_result *result) -{ - struct efa_admin_get_feature_resp resp; - int err; - - err = efa_com_get_feature(edev, &resp, - EFA_ADMIN_NETWORK_ATTR); - if (err) { - ibdev_err_ratelimited(edev->efa_dev, - "Failed to get network attributes %d\n", - err); - return err; - } - - memcpy(result->addr, resp.u.network_attr.addr, - sizeof(resp.u.network_attr.addr)); - result->mtu = resp.u.network_attr.mtu; - - return 0; -} - int efa_com_get_device_attr(struct efa_com_dev *edev, struct efa_com_get_device_attr_result *result) { @@ -467,6 +444,8 @@ int efa_com_get_device_attr(struct efa_com_dev *edev, result->phys_addr_width = resp.u.device_attr.phys_addr_width; result->virt_addr_width = resp.u.device_attr.virt_addr_width; result->db_bar = resp.u.device_attr.db_bar; + result->max_rdma_size = resp.u.device_attr.max_rdma_size; + result->device_caps = resp.u.device_attr.device_caps; if (result->admin_api_version < 1) { ibdev_err_ratelimited( @@ -500,6 +479,19 @@ int efa_com_get_device_attr(struct efa_com_dev *edev, result->max_ah = resp.u.queue_attr.max_ah; result->max_llq_size = resp.u.queue_attr.max_llq_size; result->sub_cqs_per_cq = resp.u.queue_attr.sub_cqs_per_cq; + result->max_wr_rdma_sge = resp.u.queue_attr.max_wr_rdma_sges; + + err = efa_com_get_feature(edev, &resp, EFA_ADMIN_NETWORK_ATTR); + if (err) { + ibdev_err_ratelimited(edev->efa_dev, + "Failed to get network attributes %d\n", + err); + return err; + } + + memcpy(result->addr, resp.u.network_attr.addr, + sizeof(resp.u.network_attr.addr)); + result->mtu = resp.u.network_attr.mtu; return 0; } diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h index 7f6c13052f49..31db5a0cbd5b 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.h +++ b/drivers/infiniband/hw/efa/efa_com_cmd.h @@ -100,14 +100,11 @@ struct efa_com_destroy_ah_params { u16 pdn; }; -struct efa_com_get_network_attr_result { - u8 addr[EFA_GID_SIZE]; - u32 mtu; -}; - struct efa_com_get_device_attr_result { + u8 addr[EFA_GID_SIZE]; u64 page_size_cap; u64 max_mr_pages; + u32 mtu; u32 fw_version; u32 admin_api_version; u32 device_version; @@ -124,9 +121,12 @@ struct efa_com_get_device_attr_result { u32 max_pd; u32 max_ah; u32 max_llq_size; + u32 max_rdma_size; + u32 device_caps; u16 sub_cqs_per_cq; u16 max_sq_sge; u16 max_rq_sge; + u16 max_wr_rdma_sge; u8 db_bar; }; @@ -181,12 +181,7 @@ struct efa_com_reg_mr_params { * address mapping */ u8 page_shift; - /* - * permissions - * 0: local_write_enable - Write permissions: value of 1 needed - * for RQ buffers and for RDMA write:1: reserved1 - remote - * access flags, etc - */ + /* see permissions field of struct efa_admin_reg_mr_cmd */ u8 permissions; u8 inline_pbl; u8 indirect; @@ -271,8 +266,6 @@ int efa_com_create_ah(struct efa_com_dev *edev, struct efa_com_create_ah_result *result); int efa_com_destroy_ah(struct efa_com_dev *edev, struct efa_com_destroy_ah_params *params); -int efa_com_get_network_attr(struct efa_com_dev *edev, - struct efa_com_get_network_attr_result *result); int efa_com_get_device_attr(struct efa_com_dev *edev, struct efa_com_get_device_attr_result *result); int efa_com_get_hw_hints(struct efa_com_dev *edev, diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c index 83858f7e83d0..faf3ff1bca2a 100644 --- a/drivers/infiniband/hw/efa/efa_main.c +++ b/drivers/infiniband/hw/efa/efa_main.c @@ -30,15 +30,6 @@ MODULE_DEVICE_TABLE(pci, efa_pci_tbl); (BIT(EFA_ADMIN_FATAL_ERROR) | BIT(EFA_ADMIN_WARNING) | \ BIT(EFA_ADMIN_NOTIFICATION) | BIT(EFA_ADMIN_KEEP_ALIVE)) -static void efa_update_network_attr(struct efa_dev *dev, - struct efa_com_get_network_attr_result *network_attr) -{ - memcpy(dev->addr, network_attr->addr, sizeof(network_attr->addr)); - dev->mtu = network_attr->mtu; - - dev_dbg(&dev->pdev->dev, "Full address %pI6\n", dev->addr); -} - /* This handler will called for unknown event group or unimplemented handlers */ static void unimplemented_aenq_handler(void *data, struct efa_admin_aenq_entry *aenq_e) @@ -217,6 +208,7 @@ static const struct ib_device_ops efa_dev_ops = { .get_link_layer = efa_port_link_layer, .get_port_immutable = efa_get_port_immutable, .mmap = efa_mmap, + .mmap_free = efa_mmap_free, .modify_qp = efa_modify_qp, .query_device = efa_query_device, .query_gid = efa_query_gid, @@ -233,7 +225,6 @@ static const struct ib_device_ops efa_dev_ops = { static int efa_ib_device_add(struct efa_dev *dev) { - struct efa_com_get_network_attr_result network_attr; struct efa_com_get_hw_hints_result hw_hints; struct pci_dev *pdev = dev->pdev; int err; @@ -249,12 +240,6 @@ static int efa_ib_device_add(struct efa_dev *dev) if (err) return err; - err = efa_com_get_network_attr(&dev->edev, &network_attr); - if (err) - goto err_release_doorbell_bar; - - efa_update_network_attr(dev, &network_attr); - err = efa_com_get_hw_hints(&dev->edev, &hw_hints); if (err) goto err_release_doorbell_bar; diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index 4edae89e8e3c..c9d294caa27a 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -13,10 +13,6 @@ #include "efa.h" -#define EFA_MMAP_FLAG_SHIFT 56 -#define EFA_MMAP_PAGE_MASK GENMASK(EFA_MMAP_FLAG_SHIFT - 1, 0) -#define EFA_MMAP_INVALID U64_MAX - enum { EFA_MMAP_DMA_PAGE = 0, EFA_MMAP_IO_WC, @@ -27,20 +23,12 @@ enum { (BIT(EFA_ADMIN_FATAL_ERROR) | BIT(EFA_ADMIN_WARNING) | \ BIT(EFA_ADMIN_NOTIFICATION) | BIT(EFA_ADMIN_KEEP_ALIVE)) -struct efa_mmap_entry { - void *obj; +struct efa_user_mmap_entry { + struct rdma_user_mmap_entry rdma_entry; u64 address; - u64 length; - u32 mmap_page; u8 mmap_flag; }; -static inline u64 get_mmap_key(const struct efa_mmap_entry *efa) -{ - return ((u64)efa->mmap_flag << EFA_MMAP_FLAG_SHIFT) | - ((u64)efa->mmap_page << PAGE_SHIFT); -} - #define EFA_DEFINE_STATS(op) \ op(EFA_TX_BYTES, "tx_bytes") \ op(EFA_TX_PKTS, "tx_pkts") \ @@ -82,8 +70,6 @@ static const char *const efa_stats_names[] = { #define EFA_CHUNK_USED_SIZE \ ((EFA_PTRS_PER_CHUNK * EFA_CHUNK_PAYLOAD_PTR_SIZE) + EFA_CHUNK_PTR_SIZE) -#define EFA_SUPPORTED_ACCESS_FLAGS IB_ACCESS_LOCAL_WRITE - struct pbl_chunk { dma_addr_t dma_addr; u64 *buf; @@ -147,6 +133,17 @@ static inline struct efa_ah *to_eah(struct ib_ah *ibah) return container_of(ibah, struct efa_ah, ibah); } +static inline struct efa_user_mmap_entry * +to_emmap(struct rdma_user_mmap_entry *rdma_entry) +{ + return container_of(rdma_entry, struct efa_user_mmap_entry, rdma_entry); +} + +static inline bool is_rdma_read_cap(struct efa_dev *dev) +{ + return dev->dev_attr.device_caps & EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_READ_MASK; +} + #define field_avail(x, fld, sz) (offsetof(typeof(x), fld) + \ FIELD_SIZEOF(typeof(x), fld) <= (sz)) @@ -172,106 +169,6 @@ static void *efa_zalloc_mapped(struct efa_dev *dev, dma_addr_t *dma_addr, return addr; } -/* - * This is only called when the ucontext is destroyed and there can be no - * concurrent query via mmap or allocate on the xarray, thus we can be sure no - * other thread is using the entry pointer. We also know that all the BAR - * pages have either been zap'd or munmaped at this point. Normal pages are - * refcounted and will be freed at the proper time. - */ -static void mmap_entries_remove_free(struct efa_dev *dev, - struct efa_ucontext *ucontext) -{ - struct efa_mmap_entry *entry; - unsigned long mmap_page; - - xa_for_each(&ucontext->mmap_xa, mmap_page, entry) { - xa_erase(&ucontext->mmap_xa, mmap_page); - - ibdev_dbg( - &dev->ibdev, - "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n", - entry->obj, get_mmap_key(entry), entry->address, - entry->length); - if (entry->mmap_flag == EFA_MMAP_DMA_PAGE) - /* DMA mapping is already gone, now free the pages */ - free_pages_exact(phys_to_virt(entry->address), - entry->length); - kfree(entry); - } -} - -static struct efa_mmap_entry *mmap_entry_get(struct efa_dev *dev, - struct efa_ucontext *ucontext, - u64 key, u64 len) -{ - struct efa_mmap_entry *entry; - u64 mmap_page; - - mmap_page = (key & EFA_MMAP_PAGE_MASK) >> PAGE_SHIFT; - if (mmap_page > U32_MAX) - return NULL; - - entry = xa_load(&ucontext->mmap_xa, mmap_page); - if (!entry || get_mmap_key(entry) != key || entry->length != len) - return NULL; - - ibdev_dbg(&dev->ibdev, - "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n", - entry->obj, key, entry->address, entry->length); - - return entry; -} - -/* - * Note this locking scheme cannot support removal of entries, except during - * ucontext destruction when the core code guarentees no concurrency. - */ -static u64 mmap_entry_insert(struct efa_dev *dev, struct efa_ucontext *ucontext, - void *obj, u64 address, u64 length, u8 mmap_flag) -{ - struct efa_mmap_entry *entry; - u32 next_mmap_page; - int err; - - entry = kmalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) - return EFA_MMAP_INVALID; - - entry->obj = obj; - entry->address = address; - entry->length = length; - entry->mmap_flag = mmap_flag; - - xa_lock(&ucontext->mmap_xa); - if (check_add_overflow(ucontext->mmap_xa_page, - (u32)(length >> PAGE_SHIFT), - &next_mmap_page)) - goto err_unlock; - - entry->mmap_page = ucontext->mmap_xa_page; - ucontext->mmap_xa_page = next_mmap_page; - err = __xa_insert(&ucontext->mmap_xa, entry->mmap_page, entry, - GFP_KERNEL); - if (err) - goto err_unlock; - - xa_unlock(&ucontext->mmap_xa); - - ibdev_dbg( - &dev->ibdev, - "mmap: obj[0x%p] addr[%#llx], len[%#llx], key[%#llx] inserted\n", - entry->obj, entry->address, entry->length, get_mmap_key(entry)); - - return get_mmap_key(entry); - -err_unlock: - xa_unlock(&ucontext->mmap_xa); - kfree(entry); - return EFA_MMAP_INVALID; - -} - int efa_query_device(struct ib_device *ibdev, struct ib_device_attr *props, struct ib_udata *udata) @@ -306,12 +203,17 @@ int efa_query_device(struct ib_device *ibdev, dev_attr->max_rq_depth); props->max_send_sge = dev_attr->max_sq_sge; props->max_recv_sge = dev_attr->max_rq_sge; + props->max_sge_rd = dev_attr->max_wr_rdma_sge; if (udata && udata->outlen) { resp.max_sq_sge = dev_attr->max_sq_sge; resp.max_rq_sge = dev_attr->max_rq_sge; resp.max_sq_wr = dev_attr->max_sq_depth; resp.max_rq_wr = dev_attr->max_rq_depth; + resp.max_rdma_size = dev_attr->max_rdma_size; + + if (is_rdma_read_cap(dev)) + resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RDMA_READ; err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen)); @@ -338,9 +240,9 @@ int efa_query_port(struct ib_device *ibdev, u8 port, props->pkey_tbl_len = 1; props->active_speed = IB_SPEED_EDR; props->active_width = IB_WIDTH_4X; - props->max_mtu = ib_mtu_int_to_enum(dev->mtu); - props->active_mtu = ib_mtu_int_to_enum(dev->mtu); - props->max_msg_sz = dev->mtu; + props->max_mtu = ib_mtu_int_to_enum(dev->dev_attr.mtu); + props->active_mtu = ib_mtu_int_to_enum(dev->dev_attr.mtu); + props->max_msg_sz = dev->dev_attr.mtu; props->max_vl_num = 1; return 0; @@ -401,7 +303,7 @@ int efa_query_gid(struct ib_device *ibdev, u8 port, int index, { struct efa_dev *dev = to_edev(ibdev); - memcpy(gid->raw, dev->addr, sizeof(dev->addr)); + memcpy(gid->raw, dev->dev_attr.addr, sizeof(dev->dev_attr.addr)); return 0; } @@ -485,8 +387,19 @@ static int efa_destroy_qp_handle(struct efa_dev *dev, u32 qp_handle) return efa_com_destroy_qp(&dev->edev, ¶ms); } +static void efa_qp_user_mmap_entries_remove(struct efa_ucontext *uctx, + struct efa_qp *qp) +{ + rdma_user_mmap_entry_remove(qp->rq_mmap_entry); + rdma_user_mmap_entry_remove(qp->rq_db_mmap_entry); + rdma_user_mmap_entry_remove(qp->llq_desc_mmap_entry); + rdma_user_mmap_entry_remove(qp->sq_db_mmap_entry); +} + int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { + struct efa_ucontext *ucontext = rdma_udata_to_drv_context(udata, + struct efa_ucontext, ibucontext); struct efa_dev *dev = to_edev(ibqp->pd->device); struct efa_qp *qp = to_eqp(ibqp); int err; @@ -505,61 +418,101 @@ int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) DMA_TO_DEVICE); } + efa_qp_user_mmap_entries_remove(ucontext, qp); kfree(qp); return 0; } +static struct rdma_user_mmap_entry* +efa_user_mmap_entry_insert(struct ib_ucontext *ucontext, + u64 address, size_t length, + u8 mmap_flag, u64 *offset) +{ + struct efa_user_mmap_entry *entry = kzalloc(sizeof(*entry), GFP_KERNEL); + int err; + + if (!entry) + return NULL; + + entry->address = address; + entry->mmap_flag = mmap_flag; + + err = rdma_user_mmap_entry_insert(ucontext, &entry->rdma_entry, + length); + if (err) { + kfree(entry); + return NULL; + } + *offset = rdma_user_mmap_get_offset(&entry->rdma_entry); + + return &entry->rdma_entry; +} + static int qp_mmap_entries_setup(struct efa_qp *qp, struct efa_dev *dev, struct efa_ucontext *ucontext, struct efa_com_create_qp_params *params, struct efa_ibv_create_qp_resp *resp) { - /* - * Once an entry is inserted it might be mmapped, hence cannot be - * cleaned up until dealloc_ucontext. - */ - resp->sq_db_mmap_key = - mmap_entry_insert(dev, ucontext, qp, - dev->db_bar_addr + resp->sq_db_offset, - PAGE_SIZE, EFA_MMAP_IO_NC); - if (resp->sq_db_mmap_key == EFA_MMAP_INVALID) + size_t length; + u64 address; + + address = dev->db_bar_addr + resp->sq_db_offset; + qp->sq_db_mmap_entry = + efa_user_mmap_entry_insert(&ucontext->ibucontext, + address, + PAGE_SIZE, EFA_MMAP_IO_NC, + &resp->sq_db_mmap_key); + if (!qp->sq_db_mmap_entry) return -ENOMEM; resp->sq_db_offset &= ~PAGE_MASK; - resp->llq_desc_mmap_key = - mmap_entry_insert(dev, ucontext, qp, - dev->mem_bar_addr + resp->llq_desc_offset, - PAGE_ALIGN(params->sq_ring_size_in_bytes + - (resp->llq_desc_offset & ~PAGE_MASK)), - EFA_MMAP_IO_WC); - if (resp->llq_desc_mmap_key == EFA_MMAP_INVALID) - return -ENOMEM; + address = dev->mem_bar_addr + resp->llq_desc_offset; + length = PAGE_ALIGN(params->sq_ring_size_in_bytes + + (resp->llq_desc_offset & ~PAGE_MASK)); + + qp->llq_desc_mmap_entry = + efa_user_mmap_entry_insert(&ucontext->ibucontext, + address, length, + EFA_MMAP_IO_WC, + &resp->llq_desc_mmap_key); + if (!qp->llq_desc_mmap_entry) + goto err_remove_mmap; resp->llq_desc_offset &= ~PAGE_MASK; if (qp->rq_size) { - resp->rq_db_mmap_key = - mmap_entry_insert(dev, ucontext, qp, - dev->db_bar_addr + resp->rq_db_offset, - PAGE_SIZE, EFA_MMAP_IO_NC); - if (resp->rq_db_mmap_key == EFA_MMAP_INVALID) - return -ENOMEM; + address = dev->db_bar_addr + resp->rq_db_offset; + + qp->rq_db_mmap_entry = + efa_user_mmap_entry_insert(&ucontext->ibucontext, + address, PAGE_SIZE, + EFA_MMAP_IO_NC, + &resp->rq_db_mmap_key); + if (!qp->rq_db_mmap_entry) + goto err_remove_mmap; resp->rq_db_offset &= ~PAGE_MASK; - resp->rq_mmap_key = - mmap_entry_insert(dev, ucontext, qp, - virt_to_phys(qp->rq_cpu_addr), - qp->rq_size, EFA_MMAP_DMA_PAGE); - if (resp->rq_mmap_key == EFA_MMAP_INVALID) - return -ENOMEM; + address = virt_to_phys(qp->rq_cpu_addr); + qp->rq_mmap_entry = + efa_user_mmap_entry_insert(&ucontext->ibucontext, + address, qp->rq_size, + EFA_MMAP_DMA_PAGE, + &resp->rq_mmap_key); + if (!qp->rq_mmap_entry) + goto err_remove_mmap; resp->rq_mmap_size = qp->rq_size; } return 0; + +err_remove_mmap: + efa_qp_user_mmap_entries_remove(ucontext, qp); + + return -ENOMEM; } static int efa_qp_validate_cap(struct efa_dev *dev, @@ -634,7 +587,6 @@ struct ib_qp *efa_create_qp(struct ib_pd *ibpd, struct efa_dev *dev = to_edev(ibpd->device); struct efa_ibv_create_qp_resp resp = {}; struct efa_ibv_create_qp cmd = {}; - bool rq_entry_inserted = false; struct efa_ucontext *ucontext; struct efa_qp *qp; int err; @@ -742,7 +694,6 @@ struct ib_qp *efa_create_qp(struct ib_pd *ibpd, if (err) goto err_destroy_qp; - rq_entry_inserted = true; qp->qp_handle = create_qp_resp.qp_handle; qp->ibqp.qp_num = create_qp_resp.qp_num; qp->ibqp.qp_type = init_attr->qp_type; @@ -759,7 +710,7 @@ struct ib_qp *efa_create_qp(struct ib_pd *ibpd, ibdev_dbg(&dev->ibdev, "Failed to copy udata for qp[%u]\n", create_qp_resp.qp_num); - goto err_destroy_qp; + goto err_remove_mmap_entries; } } @@ -767,13 +718,16 @@ struct ib_qp *efa_create_qp(struct ib_pd *ibpd, return &qp->ibqp; +err_remove_mmap_entries: + efa_qp_user_mmap_entries_remove(ucontext, qp); err_destroy_qp: efa_destroy_qp_handle(dev, create_qp_resp.qp_handle); err_free_mapped: if (qp->rq_size) { dma_unmap_single(&dev->pdev->dev, qp->rq_dma_addr, qp->rq_size, DMA_TO_DEVICE); - if (!rq_entry_inserted) + + if (!qp->rq_mmap_entry) free_pages_exact(qp->rq_cpu_addr, qp->rq_size); } err_free_qp: @@ -897,16 +851,18 @@ void efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) efa_destroy_cq_idx(dev, cq->cq_idx); dma_unmap_single(&dev->pdev->dev, cq->dma_addr, cq->size, DMA_FROM_DEVICE); + rdma_user_mmap_entry_remove(cq->mmap_entry); } static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq, struct efa_ibv_create_cq_resp *resp) { resp->q_mmap_size = cq->size; - resp->q_mmap_key = mmap_entry_insert(dev, cq->ucontext, cq, - virt_to_phys(cq->cpu_addr), - cq->size, EFA_MMAP_DMA_PAGE); - if (resp->q_mmap_key == EFA_MMAP_INVALID) + cq->mmap_entry = efa_user_mmap_entry_insert(&cq->ucontext->ibucontext, + virt_to_phys(cq->cpu_addr), + cq->size, EFA_MMAP_DMA_PAGE, + &resp->q_mmap_key); + if (!cq->mmap_entry) return -ENOMEM; return 0; @@ -924,7 +880,6 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct efa_dev *dev = to_edev(ibdev); struct efa_ibv_create_cq cmd = {}; struct efa_cq *cq = to_ecq(ibcq); - bool cq_entry_inserted = false; int entries = attr->cqe; int err; @@ -1013,15 +968,13 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, goto err_destroy_cq; } - cq_entry_inserted = true; - if (udata->outlen) { err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen)); if (err) { ibdev_dbg(ibdev, "Failed to copy udata for create_cq\n"); - goto err_destroy_cq; + goto err_remove_mmap; } } @@ -1030,13 +983,16 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, return 0; +err_remove_mmap: + rdma_user_mmap_entry_remove(cq->mmap_entry); err_destroy_cq: efa_destroy_cq_idx(dev, cq->cq_idx); err_free_mapped: dma_unmap_single(&dev->pdev->dev, cq->dma_addr, cq->size, DMA_FROM_DEVICE); - if (!cq_entry_inserted) + if (!cq->mmap_entry) free_pages_exact(cq->cpu_addr, cq->size); + err_out: atomic64_inc(&dev->stats.sw_stats.create_cq_err); return err; @@ -1396,6 +1352,7 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, struct efa_com_reg_mr_params params = {}; struct efa_com_reg_mr_result result = {}; struct pbl_context pbl; + int supp_access_flags; unsigned int pg_sz; struct efa_mr *mr; int inline_size; @@ -1409,10 +1366,14 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, goto err_out; } - if (access_flags & ~EFA_SUPPORTED_ACCESS_FLAGS) { + supp_access_flags = + IB_ACCESS_LOCAL_WRITE | + (is_rdma_read_cap(dev) ? IB_ACCESS_REMOTE_READ : 0); + + if (access_flags & ~supp_access_flags) { ibdev_dbg(&dev->ibdev, "Unsupported access flags[%#x], supported[%#x]\n", - access_flags, EFA_SUPPORTED_ACCESS_FLAGS); + access_flags, supp_access_flags); err = -EOPNOTSUPP; goto err_out; } @@ -1423,7 +1384,7 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, goto err_out; } - mr->umem = ib_umem_get(udata, start, length, access_flags, 0); + mr->umem = ib_umem_get(udata, start, length, access_flags); if (IS_ERR(mr->umem)) { err = PTR_ERR(mr->umem); ibdev_dbg(&dev->ibdev, @@ -1434,7 +1395,7 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, params.pd = to_epd(ibpd)->pdn; params.iova = virt_addr; params.mr_length_in_bytes = length; - params.permissions = access_flags & 0x1; + params.permissions = access_flags; pg_sz = ib_umem_find_best_pgsz(mr->umem, dev->dev_attr.page_size_cap, @@ -1556,7 +1517,6 @@ int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata) goto err_out; ucontext->uarn = result.uarn; - xa_init(&ucontext->mmap_xa); resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_QUERY_DEVICE; resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_CREATE_AH; @@ -1585,38 +1545,56 @@ void efa_dealloc_ucontext(struct ib_ucontext *ibucontext) struct efa_ucontext *ucontext = to_eucontext(ibucontext); struct efa_dev *dev = to_edev(ibucontext->device); - mmap_entries_remove_free(dev, ucontext); efa_dealloc_uar(dev, ucontext->uarn); } +void efa_mmap_free(struct rdma_user_mmap_entry *rdma_entry) +{ + struct efa_user_mmap_entry *entry = to_emmap(rdma_entry); + + /* DMA mapping is already gone, now free the pages */ + if (entry->mmap_flag == EFA_MMAP_DMA_PAGE) + free_pages_exact(phys_to_virt(entry->address), + entry->rdma_entry.npages * PAGE_SIZE); + kfree(entry); +} + static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext, - struct vm_area_struct *vma, u64 key, u64 length) + struct vm_area_struct *vma) { - struct efa_mmap_entry *entry; + struct rdma_user_mmap_entry *rdma_entry; + struct efa_user_mmap_entry *entry; unsigned long va; + int err = 0; u64 pfn; - int err; - entry = mmap_entry_get(dev, ucontext, key, length); - if (!entry) { - ibdev_dbg(&dev->ibdev, "key[%#llx] does not have valid entry\n", - key); + rdma_entry = rdma_user_mmap_entry_get(&ucontext->ibucontext, vma); + if (!rdma_entry) { + ibdev_dbg(&dev->ibdev, + "pgoff[%#lx] does not have valid entry\n", + vma->vm_pgoff); return -EINVAL; } + entry = to_emmap(rdma_entry); ibdev_dbg(&dev->ibdev, - "Mapping address[%#llx], length[%#llx], mmap_flag[%d]\n", - entry->address, length, entry->mmap_flag); + "Mapping address[%#llx], length[%#zx], mmap_flag[%d]\n", + entry->address, rdma_entry->npages * PAGE_SIZE, + entry->mmap_flag); pfn = entry->address >> PAGE_SHIFT; switch (entry->mmap_flag) { case EFA_MMAP_IO_NC: - err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, length, - pgprot_noncached(vma->vm_page_prot)); + err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, + entry->rdma_entry.npages * PAGE_SIZE, + pgprot_noncached(vma->vm_page_prot), + rdma_entry); break; case EFA_MMAP_IO_WC: - err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, length, - pgprot_writecombine(vma->vm_page_prot)); + err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, + entry->rdma_entry.npages * PAGE_SIZE, + pgprot_writecombine(vma->vm_page_prot), + rdma_entry); break; case EFA_MMAP_DMA_PAGE: for (va = vma->vm_start; va < vma->vm_end; @@ -1633,12 +1611,13 @@ static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext, if (err) { ibdev_dbg( &dev->ibdev, - "Couldn't mmap address[%#llx] length[%#llx] mmap_flag[%d] err[%d]\n", - entry->address, length, entry->mmap_flag, err); - return err; + "Couldn't mmap address[%#llx] length[%#zx] mmap_flag[%d] err[%d]\n", + entry->address, rdma_entry->npages * PAGE_SIZE, + entry->mmap_flag, err); } - return 0; + rdma_user_mmap_entry_put(rdma_entry); + return err; } int efa_mmap(struct ib_ucontext *ibucontext, @@ -1646,26 +1625,13 @@ int efa_mmap(struct ib_ucontext *ibucontext, { struct efa_ucontext *ucontext = to_eucontext(ibucontext); struct efa_dev *dev = to_edev(ibucontext->device); - u64 length = vma->vm_end - vma->vm_start; - u64 key = vma->vm_pgoff << PAGE_SHIFT; + size_t length = vma->vm_end - vma->vm_start; ibdev_dbg(&dev->ibdev, - "start %#lx, end %#lx, length = %#llx, key = %#llx\n", - vma->vm_start, vma->vm_end, length, key); - - if (length % PAGE_SIZE != 0 || !(vma->vm_flags & VM_SHARED)) { - ibdev_dbg(&dev->ibdev, - "length[%#llx] is not page size aligned[%#lx] or VM_SHARED is not set [%#lx]\n", - length, PAGE_SIZE, vma->vm_flags); - return -EINVAL; - } - - if (vma->vm_flags & VM_EXEC) { - ibdev_dbg(&dev->ibdev, "Mapping executable pages is not permitted\n"); - return -EPERM; - } + "start %#lx, end %#lx, length = %#zx, pgoff = %#lx\n", + vma->vm_start, vma->vm_end, length, vma->vm_pgoff); - return __efa_mmap(dev, ucontext, vma, key, length); + return __efa_mmap(dev, ucontext, vma); } static int efa_ah_destroy(struct efa_dev *dev, struct efa_ah *ah) diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index f9a7e9d29c8b..7c5e3fb22413 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -1138,7 +1138,7 @@ static int get_ctxt_info(struct hfi1_filedata *fd, unsigned long arg, u32 len) HFI1_CAP_UGET_MASK(uctxt->flags, MASK) | HFI1_CAP_KGET_MASK(uctxt->flags, K2U); /* adjust flag if this fd is not able to cache */ - if (!fd->handler) + if (!fd->use_mn) cinfo.runtime_flags |= HFI1_CAP_TID_UNMAP; /* no caching */ cinfo.num_active = hfi1_count_active_units(); diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index fa45350a9a1d..fc10d65fc3e1 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1444,7 +1444,7 @@ struct hfi1_filedata { /* for cpu affinity; -1 if none */ int rec_cpu_num; u32 tid_n_pinned; - struct mmu_rb_handler *handler; + bool use_mn; struct tid_rb_node **entry_to_rb; spinlock_t tid_lock; /* protect tid_[limit,used] counters */ u32 tid_limit; diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 71cb9525c074..26b792bb1027 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -1489,7 +1489,6 @@ static int __init hfi1_mod_init(void) goto bail_dev; } - hfi1_compute_tid_rdma_flow_wt(); /* * These must be called before the driver is registered with * the PCI subsystem. diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index d8ff063a5419..a51bcd2b4391 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -4915,16 +4915,11 @@ static int hfi1_process_ib_mad(struct ib_device *ibdev, int mad_flags, u8 port, */ int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in_mad, size_t in_mad_size, - struct ib_mad_hdr *out_mad, size_t *out_mad_size, - u16 *out_mad_pkey_index) + const struct ib_mad *in_mad, struct ib_mad *out_mad, + size_t *out_mad_size, u16 *out_mad_pkey_index) { - switch (in_mad->base_version) { + switch (in_mad->mad_hdr.base_version) { case OPA_MGMT_BASE_VERSION: - if (unlikely(in_mad_size != sizeof(struct opa_mad))) { - dev_err(ibdev->dev.parent, "invalid in_mad_size\n"); - return IB_MAD_RESULT_FAILURE; - } return hfi1_process_opa_mad(ibdev, mad_flags, port, in_wc, in_grh, (struct opa_mad *)in_mad, @@ -4932,10 +4927,8 @@ int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, out_mad_size, out_mad_pkey_index); case IB_MGMT_BASE_VERSION: - return hfi1_process_ib_mad(ibdev, mad_flags, port, - in_wc, in_grh, - (const struct ib_mad *)in_mad, - (struct ib_mad *)out_mad); + return hfi1_process_ib_mad(ibdev, mad_flags, port, in_wc, + in_grh, in_mad, out_mad); default: break; } diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index 61aa5504d7c3..61362bd6d3ce 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -319,7 +319,9 @@ int pcie_speeds(struct hfi1_devdata *dd) /* * bus->max_bus_speed is set from the bridge's linkcap Max Link Speed */ - if (parent && dd->pcidev->bus->max_bus_speed != PCIE_SPEED_8_0GT) { + if (parent && + (dd->pcidev->bus->max_bus_speed == PCIE_SPEED_2_5GT || + dd->pcidev->bus->max_bus_speed == PCIE_SPEED_5_0GT)) { dd_dev_info(dd, "Parent PCIe bridge does not support Gen3\n"); dd->link_gen3_capable = 0; } diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c index cbf7faa5038c..36593f2efe26 100644 --- a/drivers/infiniband/hw/hfi1/platform.c +++ b/drivers/infiniband/hw/hfi1/platform.c @@ -634,7 +634,7 @@ static void apply_tx_lanes(struct hfi1_pportdata *ppd, u8 field_id, u32 config_data, const char *message) { u8 i; - int ret = HCMD_SUCCESS; + int ret; for (i = 0; i < 4; i++) { ret = load_8051_config(ppd->dd, field_id, i, config_data); diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 513a8aac9ccd..1a3c647675a7 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -2209,15 +2209,15 @@ int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, if (qp->s_flags & RVT_S_WAIT_RNR) goto bail_stop; rdi = ib_to_rvt(qp->ibqp.device); - if (qp->s_rnr_retry == 0 && - !((rdi->post_parms[wqe->wr.opcode].flags & - RVT_OPERATION_IGN_RNR_CNT) && - qp->s_rnr_retry_cnt == 0)) { - status = IB_WC_RNR_RETRY_EXC_ERR; - goto class_b; + if (!(rdi->post_parms[wqe->wr.opcode].flags & + RVT_OPERATION_IGN_RNR_CNT)) { + if (qp->s_rnr_retry == 0) { + status = IB_WC_RNR_RETRY_EXC_ERR; + goto class_b; + } + if (qp->s_rnr_retry_cnt < 7 && qp->s_rnr_retry_cnt > 0) + qp->s_rnr_retry--; } - if (qp->s_rnr_retry_cnt < 7 && qp->s_rnr_retry_cnt > 0) - qp->s_rnr_retry--; /* * The last valid PSN is the previous PSN. For TID RDMA WRITE diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 2395fd4233a7..5774dfc22e18 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -65,6 +65,7 @@ #define SDMA_DESCQ_CNT 2048 #define SDMA_DESC_INTR 64 #define INVALID_TAIL 0xffff +#define SDMA_PAD max_t(size_t, MAX_16B_PADDING, sizeof(u32)) static uint sdma_descq_cnt = SDMA_DESCQ_CNT; module_param(sdma_descq_cnt, uint, S_IRUGO); @@ -880,8 +881,8 @@ struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd, cpu_id = smp_processor_id(); rcu_read_lock(); - rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu_id, - sdma_rht_params); + rht_node = rhashtable_lookup(dd->sdma_rht, &cpu_id, + sdma_rht_params); if (rht_node && rht_node->map[vl]) { struct sdma_rht_map_elem *map = rht_node->map[vl]; @@ -1296,7 +1297,7 @@ void sdma_clean(struct hfi1_devdata *dd, size_t num_engines) struct sdma_engine *sde; if (dd->sdma_pad_dma) { - dma_free_coherent(&dd->pcidev->dev, 4, + dma_free_coherent(&dd->pcidev->dev, SDMA_PAD, (void *)dd->sdma_pad_dma, dd->sdma_pad_phys); dd->sdma_pad_dma = NULL; @@ -1491,7 +1492,7 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) } /* Allocate memory for pad */ - dd->sdma_pad_dma = dma_alloc_coherent(&dd->pcidev->dev, sizeof(u32), + dd->sdma_pad_dma = dma_alloc_coherent(&dd->pcidev->dev, SDMA_PAD, &dd->sdma_pad_phys, GFP_KERNEL); if (!dd->sdma_pad_dma) { dd_dev_err(dd, "failed to allocate SendDMA pad memory\n"); @@ -1526,8 +1527,11 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) } ret = rhashtable_init(tmp_sdma_rht, &sdma_rht_params); - if (ret < 0) + if (ret < 0) { + kfree(tmp_sdma_rht); goto bail; + } + dd->sdma_rht = tmp_sdma_rht; dd_dev_info(dd, "SDMA num_sdma: %u\n", dd->num_sdma); diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index b4dcc4d29f84..e53f542b60af 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -107,8 +107,6 @@ static u32 mask_generation(u32 a) * C - Capcode */ -static u32 tid_rdma_flow_wt; - static void tid_rdma_trigger_resume(struct work_struct *work); static void hfi1_kern_exp_rcv_free_flows(struct tid_rdma_request *req); static int hfi1_kern_exp_rcv_alloc_flows(struct tid_rdma_request *req, @@ -136,6 +134,26 @@ static void update_r_next_psn_fecn(struct hfi1_packet *packet, struct tid_rdma_flow *flow, bool fecn); +static void validate_r_tid_ack(struct hfi1_qp_priv *priv) +{ + if (priv->r_tid_ack == HFI1_QP_WQE_INVALID) + priv->r_tid_ack = priv->r_tid_tail; +} + +static void tid_rdma_schedule_ack(struct rvt_qp *qp) +{ + struct hfi1_qp_priv *priv = qp->priv; + + priv->s_flags |= RVT_S_ACK_PENDING; + hfi1_schedule_tid_send(qp); +} + +static void tid_rdma_trigger_ack(struct rvt_qp *qp) +{ + validate_r_tid_ack(qp->priv); + tid_rdma_schedule_ack(qp); +} + static u64 tid_rdma_opfn_encode(struct tid_rdma_params *p) { return @@ -2736,11 +2754,6 @@ static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd, diff = cmp_psn(psn, flow->flow_state.r_next_psn); if (diff > 0) { - if (!(qp->r_flags & RVT_R_RDMAR_SEQ)) - restart_tid_rdma_read_req(rcd, - qp, - wqe); - /* Drop the packet.*/ goto s_unlock; } else if (diff < 0) { @@ -3010,10 +3023,7 @@ nak_psn: qpriv->s_nak_state = IB_NAK_PSN_ERROR; /* We are NAK'ing the next expected PSN */ qpriv->s_nak_psn = mask_psn(flow->flow_state.r_next_psn); - qpriv->s_flags |= RVT_S_ACK_PENDING; - if (qpriv->r_tid_ack == HFI1_QP_WQE_INVALID) - qpriv->r_tid_ack = qpriv->r_tid_tail; - hfi1_schedule_tid_send(qp); + tid_rdma_trigger_ack(qp); } goto unlock; } @@ -3376,18 +3386,17 @@ u32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe, return sizeof(ohdr->u.tid_rdma.w_req) / sizeof(u32); } -void hfi1_compute_tid_rdma_flow_wt(void) +static u32 hfi1_compute_tid_rdma_flow_wt(struct rvt_qp *qp) { /* * Heuristic for computing the RNR timeout when waiting on the flow * queue. Rather than a computationaly expensive exact estimate of when * a flow will be available, we assume that if a QP is at position N in * the flow queue it has to wait approximately (N + 1) * (number of - * segments between two sync points), assuming PMTU of 4K. The rationale - * for this is that flows are released and recycled at each sync point. + * segments between two sync points). The rationale for this is that + * flows are released and recycled at each sync point. */ - tid_rdma_flow_wt = MAX_TID_FLOW_PSN * enum_to_mtu(OPA_MTU_4096) / - TID_RDMA_MAX_SEGMENT_SIZE; + return (MAX_TID_FLOW_PSN * qp->pmtu) >> TID_RDMA_SEGMENT_SHIFT; } static u32 position_in_queue(struct hfi1_qp_priv *qpriv, @@ -3510,7 +3519,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx) if (qpriv->flow_state.index >= RXE_NUM_TID_FLOWS) { ret = hfi1_kern_setup_hw_flow(qpriv->rcd, qp); if (ret) { - to_seg = tid_rdma_flow_wt * + to_seg = hfi1_compute_tid_rdma_flow_wt(qp) * position_in_queue(qpriv, &rcd->flow_queue); break; @@ -3531,7 +3540,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx) /* * If overtaking req->acked_tail, send an RNR NAK. Because the * QP is not queued in this case, and the issue can only be - * caused due a delay in scheduling the second leg which we + * caused by a delay in scheduling the second leg which we * cannot estimate, we use a rather arbitrary RNR timeout of * (MAX_FLOWS / 2) segments */ @@ -3539,8 +3548,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx) MAX_FLOWS)) { ret = -EAGAIN; to_seg = MAX_FLOWS >> 1; - qpriv->s_flags |= RVT_S_ACK_PENDING; - hfi1_schedule_tid_send(qp); + tid_rdma_trigger_ack(qp); break; } @@ -4340,8 +4348,7 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet) trace_hfi1_tid_req_rcv_write_data(qp, 0, e->opcode, e->psn, e->lpsn, req); trace_hfi1_tid_write_rsp_rcv_data(qp); - if (priv->r_tid_ack == HFI1_QP_WQE_INVALID) - priv->r_tid_ack = priv->r_tid_tail; + validate_r_tid_ack(priv); if (opcode == TID_OP(WRITE_DATA_LAST)) { release_rdma_sge_mr(e); @@ -4380,8 +4387,7 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet) } done: - priv->s_flags |= RVT_S_ACK_PENDING; - hfi1_schedule_tid_send(qp); + tid_rdma_schedule_ack(qp); exit: priv->r_next_psn_kdeth = flow->flow_state.r_next_psn; if (fecn) @@ -4393,10 +4399,7 @@ send_nak: if (!priv->s_nak_state) { priv->s_nak_state = IB_NAK_PSN_ERROR; priv->s_nak_psn = flow->flow_state.r_next_psn; - priv->s_flags |= RVT_S_ACK_PENDING; - if (priv->r_tid_ack == HFI1_QP_WQE_INVALID) - priv->r_tid_ack = priv->r_tid_tail; - hfi1_schedule_tid_send(qp); + tid_rdma_trigger_ack(qp); } goto done; } @@ -4944,8 +4947,7 @@ void hfi1_rc_rcv_tid_rdma_resync(struct hfi1_packet *packet) qpriv->resync = true; /* RESYNC request always gets a TID RDMA ACK. */ qpriv->s_nak_state = 0; - qpriv->s_flags |= RVT_S_ACK_PENDING; - hfi1_schedule_tid_send(qp); + tid_rdma_trigger_ack(qp); bail: if (fecn) qp->s_flags |= RVT_S_ECN; diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.h b/drivers/infiniband/hw/hfi1/tid_rdma.h index 1c536185261e..6e82df2190b7 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.h +++ b/drivers/infiniband/hw/hfi1/tid_rdma.h @@ -17,6 +17,7 @@ #define TID_RDMA_MIN_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */ #define TID_RDMA_MAX_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */ #define TID_RDMA_MAX_PAGES (BIT(18) >> PAGE_SHIFT) +#define TID_RDMA_SEGMENT_SHIFT 18 /* * Bit definitions for priv->s_flags. @@ -274,8 +275,6 @@ u32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe, struct ib_other_headers *ohdr, u32 *bth1, u32 *bth2, u32 *len); -void hfi1_compute_tid_rdma_flow_wt(void); - void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet); u32 hfi1_build_tid_rdma_write_resp(struct rvt_qp *qp, struct rvt_ack_entry *e, diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index 3592a9ec155e..f05742ac0949 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -59,11 +59,11 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd, struct tid_user_buf *tbuf, u32 rcventry, struct tid_group *grp, u16 pageidx, unsigned int npages); -static int tid_rb_insert(void *arg, struct mmu_rb_node *node); static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata, struct tid_rb_node *tnode); -static void tid_rb_remove(void *arg, struct mmu_rb_node *node); -static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode); +static bool tid_rb_invalidate(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq); static int program_rcvarray(struct hfi1_filedata *fd, struct tid_user_buf *, struct tid_group *grp, unsigned int start, u16 count, @@ -73,10 +73,8 @@ static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo, struct tid_group **grp); static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node); -static struct mmu_rb_ops tid_rb_ops = { - .insert = tid_rb_insert, - .remove = tid_rb_remove, - .invalidate = tid_rb_invalidate +static const struct mmu_interval_notifier_ops tid_mn_ops = { + .invalidate = tid_rb_invalidate, }; /* @@ -87,7 +85,6 @@ static struct mmu_rb_ops tid_rb_ops = { int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd, struct hfi1_ctxtdata *uctxt) { - struct hfi1_devdata *dd = uctxt->dd; int ret = 0; spin_lock_init(&fd->tid_lock); @@ -109,20 +106,7 @@ int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd, fd->entry_to_rb = NULL; return -ENOMEM; } - - /* - * Register MMU notifier callbacks. If the registration - * fails, continue without TID caching for this context. - */ - ret = hfi1_mmu_rb_register(fd, fd->mm, &tid_rb_ops, - dd->pport->hfi1_wq, - &fd->handler); - if (ret) { - dd_dev_info(dd, - "Failed MMU notifier registration %d\n", - ret); - ret = 0; - } + fd->use_mn = true; } /* @@ -139,7 +123,7 @@ int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd, * init. */ spin_lock(&fd->tid_lock); - if (uctxt->subctxt_cnt && fd->handler) { + if (uctxt->subctxt_cnt && fd->use_mn) { u16 remainder; fd->tid_limit = uctxt->expected_count / uctxt->subctxt_cnt; @@ -158,18 +142,10 @@ void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd) { struct hfi1_ctxtdata *uctxt = fd->uctxt; - /* - * The notifier would have been removed when the process'es mm - * was freed. - */ - if (fd->handler) { - hfi1_mmu_rb_unregister(fd->handler); - } else { - if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list)) - unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd); - if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list)) - unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd); - } + if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list)) + unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd); + if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list)) + unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd); kfree(fd->invalid_tids); fd->invalid_tids = NULL; @@ -201,7 +177,7 @@ static void unpin_rcv_pages(struct hfi1_filedata *fd, if (mapped) { pci_unmap_single(dd->pcidev, node->dma_addr, - node->mmu.len, PCI_DMA_FROMDEVICE); + node->npages * PAGE_SIZE, PCI_DMA_FROMDEVICE); pages = &node->pages[idx]; } else { pages = &tidbuf->pages[idx]; @@ -777,8 +753,7 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd, return -EFAULT; } - node->mmu.addr = tbuf->vaddr + (pageidx * PAGE_SIZE); - node->mmu.len = npages * PAGE_SIZE; + node->fdata = fd; node->phys = page_to_phys(pages[0]); node->npages = npages; node->rcventry = rcventry; @@ -787,23 +762,35 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd, node->freed = false; memcpy(node->pages, pages, sizeof(struct page *) * npages); - if (!fd->handler) - ret = tid_rb_insert(fd, &node->mmu); - else - ret = hfi1_mmu_rb_insert(fd->handler, &node->mmu); - - if (ret) { - hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d", - node->rcventry, node->mmu.addr, node->phys, ret); - pci_unmap_single(dd->pcidev, phys, npages * PAGE_SIZE, - PCI_DMA_FROMDEVICE); - kfree(node); - return -EFAULT; + if (fd->use_mn) { + ret = mmu_interval_notifier_insert( + &node->notifier, fd->mm, + tbuf->vaddr + (pageidx * PAGE_SIZE), npages * PAGE_SIZE, + &tid_mn_ops); + if (ret) + goto out_unmap; + /* + * FIXME: This is in the wrong order, the notifier should be + * established before the pages are pinned by pin_rcv_pages. + */ + mmu_interval_read_begin(&node->notifier); } + fd->entry_to_rb[node->rcventry - uctxt->expected_base] = node; + hfi1_put_tid(dd, rcventry, PT_EXPECTED, phys, ilog2(npages) + 1); trace_hfi1_exp_tid_reg(uctxt->ctxt, fd->subctxt, rcventry, npages, - node->mmu.addr, node->phys, phys); + node->notifier.interval_tree.start, node->phys, + phys); return 0; + +out_unmap: + hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d", + node->rcventry, node->notifier.interval_tree.start, + node->phys, ret); + pci_unmap_single(dd->pcidev, phys, npages * PAGE_SIZE, + PCI_DMA_FROMDEVICE); + kfree(node); + return -EFAULT; } static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo, @@ -833,10 +820,9 @@ static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo, if (grp) *grp = node->grp; - if (!fd->handler) - cacheless_tid_rb_remove(fd, node); - else - hfi1_mmu_rb_remove(fd->handler, &node->mmu); + if (fd->use_mn) + mmu_interval_notifier_remove(&node->notifier); + cacheless_tid_rb_remove(fd, node); return 0; } @@ -847,7 +833,8 @@ static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node) struct hfi1_devdata *dd = uctxt->dd; trace_hfi1_exp_tid_unreg(uctxt->ctxt, fd->subctxt, node->rcventry, - node->npages, node->mmu.addr, node->phys, + node->npages, + node->notifier.interval_tree.start, node->phys, node->dma_addr); /* @@ -894,30 +881,29 @@ static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt, if (!node || node->rcventry != rcventry) continue; + if (fd->use_mn) + mmu_interval_notifier_remove( + &node->notifier); cacheless_tid_rb_remove(fd, node); } } } } -/* - * Always return 0 from this function. A non-zero return indicates that the - * remove operation will be called and that memory should be unpinned. - * However, the driver cannot unpin out from under PSM. Instead, retain the - * memory (by returning 0) and inform PSM that the memory is going away. PSM - * will call back later when it has removed the memory from its list. - */ -static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode) +static bool tid_rb_invalidate(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq) { - struct hfi1_filedata *fdata = arg; - struct hfi1_ctxtdata *uctxt = fdata->uctxt; struct tid_rb_node *node = - container_of(mnode, struct tid_rb_node, mmu); + container_of(mni, struct tid_rb_node, notifier); + struct hfi1_filedata *fdata = node->fdata; + struct hfi1_ctxtdata *uctxt = fdata->uctxt; if (node->freed) - return 0; + return true; - trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt, node->mmu.addr, + trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt, + node->notifier.interval_tree.start, node->rcventry, node->npages, node->dma_addr); node->freed = true; @@ -946,18 +932,7 @@ static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode) fdata->invalid_tid_idx++; } spin_unlock(&fdata->invalid_lock); - return 0; -} - -static int tid_rb_insert(void *arg, struct mmu_rb_node *node) -{ - struct hfi1_filedata *fdata = arg; - struct tid_rb_node *tnode = - container_of(node, struct tid_rb_node, mmu); - u32 base = fdata->uctxt->expected_base; - - fdata->entry_to_rb[tnode->rcventry - base] = tnode; - return 0; + return true; } static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata, @@ -968,12 +943,3 @@ static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata, fdata->entry_to_rb[tnode->rcventry - base] = NULL; clear_tid_node(fdata, tnode); } - -static void tid_rb_remove(void *arg, struct mmu_rb_node *node) -{ - struct hfi1_filedata *fdata = arg; - struct tid_rb_node *tnode = - container_of(node, struct tid_rb_node, mmu); - - cacheless_tid_rb_remove(fdata, tnode); -} diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h index 43b105de1d54..6257eee083a1 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.h +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h @@ -65,7 +65,8 @@ struct tid_user_buf { }; struct tid_rb_node { - struct mmu_rb_node mmu; + struct mmu_interval_notifier notifier; + struct hfi1_filedata *fdata; unsigned long phys; struct tid_group *grp; u32 rcventry; diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 7bff0a1e713d..089e201d7550 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -147,9 +147,6 @@ static int pio_wait(struct rvt_qp *qp, /* Length of buffer to create verbs txreq cache name */ #define TXREQ_NAME_LEN 24 -/* 16B trailing buffer */ -static const u8 trail_buf[MAX_16B_PADDING]; - static uint wss_threshold = 80; module_param(wss_threshold, uint, S_IRUGO); MODULE_PARM_DESC(wss_threshold, "Percentage (1-100) of LLC to use as a threshold for a cacheless copy"); @@ -820,8 +817,8 @@ static int build_verbs_tx_desc( /* add icrc, lt byte, and padding to flit */ if (extra_bytes) - ret = sdma_txadd_kvaddr(sde->dd, &tx->txreq, - (void *)trail_buf, extra_bytes); + ret = sdma_txadd_daddr(sde->dd, &tx->txreq, + sde->dd->sdma_pad_phys, extra_bytes); bail_txadd: return ret; @@ -1089,7 +1086,8 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, } /* add icrc, lt byte, and padding to flit */ if (extra_bytes) - seg_pio_copy_mid(pbuf, trail_buf, extra_bytes); + seg_pio_copy_mid(pbuf, ppd->dd->sdma_pad_dma, + extra_bytes); seg_pio_copy_end(pbuf); } diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index ae9582ddbc8f..b0e9bf7cd150 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -330,9 +330,8 @@ void hfi1_sys_guid_chg(struct hfi1_ibport *ibp); void hfi1_node_desc_chg(struct hfi1_ibport *ibp); int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in_mad, size_t in_mad_size, - struct ib_mad_hdr *out_mad, size_t *out_mad_size, - u16 *out_mad_pkey_index); + const struct ib_mad *in_mad, struct ib_mad *out_mad, + size_t *out_mad_size, u16 *out_mad_pkey_index); /* * The PSN_MASK and PSN_SHIFT allow for diff --git a/drivers/infiniband/hw/hns/Kconfig b/drivers/infiniband/hw/hns/Kconfig index d602b698b57e..4921c1e40ccd 100644 --- a/drivers/infiniband/hw/hns/Kconfig +++ b/drivers/infiniband/hw/hns/Kconfig @@ -1,23 +1,34 @@ # SPDX-License-Identifier: GPL-2.0-only config INFINIBAND_HNS - bool "HNS RoCE Driver" + tristate "HNS RoCE Driver" depends on NET_VENDOR_HISILICON depends on ARM64 || (COMPILE_TEST && 64BIT) + depends on (HNS_DSAF && HNS_ENET) || HNS3 ---help--- This is a RoCE/RDMA driver for the Hisilicon RoCE engine. The engine is used in Hisilicon Hip06 and more further ICT SoC based on platform device. + To compile HIP06 or HIP08 driver as module, choose M here. + config INFINIBAND_HNS_HIP06 - tristate "Hisilicon Hip06 Family RoCE support" + bool "Hisilicon Hip06 Family RoCE support" depends on INFINIBAND_HNS && HNS && HNS_DSAF && HNS_ENET + depends on INFINIBAND_HNS=m || (HNS_DSAF=y && HNS_ENET=y) ---help--- RoCE driver support for Hisilicon RoCE engine in Hisilicon Hip06 and Hip07 SoC. These RoCE engines are platform devices. + To compile this driver, choose Y here: if INFINIBAND_HNS is m, this + module will be called hns-roce-hw-v1 + config INFINIBAND_HNS_HIP08 - tristate "Hisilicon Hip08 Family RoCE support" + bool "Hisilicon Hip08 Family RoCE support" depends on INFINIBAND_HNS && PCI && HNS3 + depends on INFINIBAND_HNS=m || HNS3=y ---help--- RoCE driver support for Hisilicon RoCE engine in Hisilicon Hip08 SoC. The RoCE engine is a PCI device. + + To compile this driver, choose Y here: if INFINIBAND_HNS is m, this + module will be called hns-roce-hw-v2. diff --git a/drivers/infiniband/hw/hns/Makefile b/drivers/infiniband/hw/hns/Makefile index 449a2d81319d..e105945b94a1 100644 --- a/drivers/infiniband/hw/hns/Makefile +++ b/drivers/infiniband/hw/hns/Makefile @@ -9,8 +9,12 @@ hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \ hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \ hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o hns_roce_restrack.o +ifdef CONFIG_INFINIBAND_HNS_HIP06 hns-roce-hw-v1-objs := hns_roce_hw_v1.o $(hns-roce-objs) -obj-$(CONFIG_INFINIBAND_HNS_HIP06) += hns-roce-hw-v1.o +obj-$(CONFIG_INFINIBAND_HNS) += hns-roce-hw-v1.o +endif +ifdef CONFIG_INFINIBAND_HNS_HIP08 hns-roce-hw-v2-objs := hns_roce_hw_v2.o hns_roce_hw_v2_dfx.o $(hns-roce-objs) -obj-$(CONFIG_INFINIBAND_HNS_HIP08) += hns-roce-hw-v2.o +obj-$(CONFIG_INFINIBAND_HNS) += hns-roce-hw-v2.o +endif diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index 90e08c0c332d..8a522e14ef62 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -46,32 +46,32 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, const struct ib_gid_attr *gid_attr; struct device *dev = hr_dev->dev; struct hns_roce_ah *ah = to_hr_ah(ibah); - u16 vlan_tag = 0xffff; const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); + u16 vlan_id = 0xffff; bool vlan_en = false; int ret; gid_attr = ah_attr->grh.sgid_attr; - ret = rdma_read_gid_l2_fields(gid_attr, &vlan_tag, NULL); + ret = rdma_read_gid_l2_fields(gid_attr, &vlan_id, NULL); if (ret) return ret; /* Get mac address */ memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN); - if (vlan_tag < VLAN_CFI_MASK) { + if (vlan_id < VLAN_N_VID) { vlan_en = true; - vlan_tag |= (rdma_ah_get_sl(ah_attr) & + vlan_id |= (rdma_ah_get_sl(ah_attr) & HNS_ROCE_VLAN_SL_BIT_MASK) << HNS_ROCE_VLAN_SL_SHIFT; } ah->av.port = rdma_ah_get_port_num(ah_attr); ah->av.gid_index = grh->sgid_index; - ah->av.vlan = vlan_tag; + ah->av.vlan_id = vlan_id; ah->av.vlan_en = vlan_en; - dev_dbg(dev, "gid_index = 0x%x,vlan = 0x%x\n", ah->av.gid_index, - ah->av.vlan); + dev_dbg(dev, "gid_index = 0x%x,vlan_id = 0x%x\n", ah->av.gid_index, + ah->av.vlan_id); if (rdma_ah_get_static_rate(ah_attr)) ah->av.stat_rate = IB_RATE_10_GBPS; diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c index 8c063c598d2a..da574c26e063 100644 --- a/drivers/infiniband/hw/hns/hns_roce_alloc.c +++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c @@ -55,7 +55,7 @@ int hns_roce_bitmap_alloc(struct hns_roce_bitmap *bitmap, unsigned long *obj) bitmap->last = 0; *obj |= bitmap->top; } else { - ret = -1; + ret = -EINVAL; } spin_unlock(&bitmap->lock); @@ -100,7 +100,7 @@ int hns_roce_bitmap_alloc_range(struct hns_roce_bitmap *bitmap, int cnt, } *obj |= bitmap->top; } else { - ret = -1; + ret = -EINVAL; } spin_unlock(&bitmap->lock); diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h index 2b6ac646ca9a..1915bacaded0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.h +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h @@ -115,12 +115,12 @@ enum { enum { /* TPT commands */ - HNS_ROCE_CMD_SW2HW_MPT = 0xd, - HNS_ROCE_CMD_HW2SW_MPT = 0xf, + HNS_ROCE_CMD_CREATE_MPT = 0xd, + HNS_ROCE_CMD_DESTROY_MPT = 0xf, /* CQ commands */ - HNS_ROCE_CMD_SW2HW_CQ = 0x16, - HNS_ROCE_CMD_HW2SW_CQ = 0x17, + HNS_ROCE_CMD_CREATE_CQC = 0x16, + HNS_ROCE_CMD_DESTROY_CQC = 0x17, /* QP/EE commands */ HNS_ROCE_CMD_RST2INIT_QP = 0x19, @@ -129,14 +129,14 @@ enum { HNS_ROCE_CMD_RTS2RTS_QP = 0x1c, HNS_ROCE_CMD_2ERR_QP = 0x1e, HNS_ROCE_CMD_RTS2SQD_QP = 0x1f, - HNS_ROCE_CMD_SQD2SQD_QP = 0x38, HNS_ROCE_CMD_SQD2RTS_QP = 0x20, HNS_ROCE_CMD_2RST_QP = 0x21, HNS_ROCE_CMD_QUERY_QP = 0x22, - HNS_ROCE_CMD_SW2HW_SRQ = 0x70, + HNS_ROCE_CMD_SQD2SQD_QP = 0x38, + HNS_ROCE_CMD_CREATE_SRQ = 0x70, HNS_ROCE_CMD_MODIFY_SRQC = 0x72, HNS_ROCE_CMD_QUERY_SRQC = 0x73, - HNS_ROCE_CMD_HW2SW_SRQ = 0x74, + HNS_ROCE_CMD_DESTROY_SRQ = 0x74, }; int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 22541d19cd09..af1d8823b3f0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -39,51 +39,8 @@ #include <rdma/hns-abi.h> #include "hns_roce_common.h" -static void hns_roce_ib_cq_comp(struct hns_roce_cq *hr_cq) -{ - struct ib_cq *ibcq = &hr_cq->ib_cq; - - ibcq->comp_handler(ibcq, ibcq->cq_context); -} - -static void hns_roce_ib_cq_event(struct hns_roce_cq *hr_cq, - enum hns_roce_event event_type) -{ - struct hns_roce_dev *hr_dev; - struct ib_event event; - struct ib_cq *ibcq; - - ibcq = &hr_cq->ib_cq; - hr_dev = to_hr_dev(ibcq->device); - - if (event_type != HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID && - event_type != HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR && - event_type != HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW) { - dev_err(hr_dev->dev, - "hns_roce_ib: Unexpected event type 0x%x on CQ %06lx\n", - event_type, hr_cq->cqn); - return; - } - - if (ibcq->event_handler) { - event.device = ibcq->device; - event.event = IB_EVENT_CQ_ERR; - event.element.cq = ibcq; - ibcq->event_handler(&event, ibcq->cq_context); - } -} - -static int hns_roce_sw2hw_cq(struct hns_roce_dev *dev, - struct hns_roce_cmd_mailbox *mailbox, - unsigned long cq_num) -{ - return hns_roce_cmd_mbox(dev, mailbox->dma, 0, cq_num, 0, - HNS_ROCE_CMD_SW2HW_CQ, HNS_ROCE_CMD_TIMEOUT_MSECS); -} - -static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent, - struct hns_roce_mtt *hr_mtt, - struct hns_roce_cq *hr_cq, int vector) +static int hns_roce_alloc_cqc(struct hns_roce_dev *hr_dev, + struct hns_roce_cq *hr_cq) { struct hns_roce_cmd_mailbox *mailbox; struct hns_roce_hem_table *mtt_table; @@ -101,35 +58,32 @@ static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent, else mtt_table = &hr_dev->mr_table.mtt_table; - mtts = hns_roce_table_find(hr_dev, mtt_table, - hr_mtt->first_seg, &dma_handle); - if (!mtts) { - dev_err(dev, "CQ alloc.Failed to find cq buf addr.\n"); - return -EINVAL; - } + mtts = hns_roce_table_find(hr_dev, mtt_table, hr_cq->mtt.first_seg, + &dma_handle); - if (vector >= hr_dev->caps.num_comp_vectors) { - dev_err(dev, "CQ alloc.Invalid vector.\n"); + if (!mtts) { + dev_err(dev, "Failed to find mtt for CQ buf.\n"); return -EINVAL; } - hr_cq->vector = vector; ret = hns_roce_bitmap_alloc(&cq_table->bitmap, &hr_cq->cqn); - if (ret == -1) { - dev_err(dev, "CQ alloc.Failed to alloc index.\n"); - return -ENOMEM; + if (ret) { + dev_err(dev, "Num of CQ out of range.\n"); + return ret; } /* Get CQC memory HEM(Hardware Entry Memory) table */ ret = hns_roce_table_get(hr_dev, &cq_table->table, hr_cq->cqn); if (ret) { - dev_err(dev, "CQ alloc.Failed to get context mem.\n"); + dev_err(dev, + "Get context mem failed(%d) when CQ(0x%lx) alloc.\n", + ret, hr_cq->cqn); goto err_out; } ret = xa_err(xa_store(&cq_table->array, hr_cq->cqn, hr_cq, GFP_KERNEL)); if (ret) { - dev_err(dev, "CQ alloc failed xa_store.\n"); + dev_err(dev, "Failed to xa_store CQ.\n"); goto err_put; } @@ -140,14 +94,16 @@ static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent, goto err_xa; } - hr_dev->hw->write_cqc(hr_dev, hr_cq, mailbox->buf, mtts, dma_handle, - nent, vector); + hr_dev->hw->write_cqc(hr_dev, hr_cq, mailbox->buf, mtts, dma_handle); /* Send mailbox to hw */ - ret = hns_roce_sw2hw_cq(hr_dev, mailbox, hr_cq->cqn); + ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, hr_cq->cqn, 0, + HNS_ROCE_CMD_CREATE_CQC, HNS_ROCE_CMD_TIMEOUT_MSECS); hns_roce_free_cmd_mailbox(hr_dev, mailbox); if (ret) { - dev_err(dev, "CQ alloc.Failed to cmd mailbox.\n"); + dev_err(dev, + "Send cmd mailbox failed(%d) when CQ(0x%lx) alloc.\n", + ret, hr_cq->cqn); goto err_xa; } @@ -170,24 +126,17 @@ err_out: return ret; } -static int hns_roce_hw2sw_cq(struct hns_roce_dev *dev, - struct hns_roce_cmd_mailbox *mailbox, - unsigned long cq_num) -{ - return hns_roce_cmd_mbox(dev, 0, mailbox ? mailbox->dma : 0, cq_num, - mailbox ? 0 : 1, HNS_ROCE_CMD_HW2SW_CQ, - HNS_ROCE_CMD_TIMEOUT_MSECS); -} - -void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) +void hns_roce_free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; struct device *dev = hr_dev->dev; int ret; - ret = hns_roce_hw2sw_cq(hr_dev, NULL, hr_cq->cqn); + ret = hns_roce_cmd_mbox(hr_dev, 0, 0, hr_cq->cqn, 1, + HNS_ROCE_CMD_DESTROY_CQC, + HNS_ROCE_CMD_TIMEOUT_MSECS); if (ret) - dev_err(dev, "HW2SW_CQ failed (%d) for CQN %06lx\n", ret, + dev_err(dev, "DESTROY_CQ failed (%d) for CQN %06lx\n", ret, hr_cq->cqn); xa_erase(&cq_table->array, hr_cq->cqn); @@ -204,103 +153,91 @@ void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn, BITMAP_NO_RR); } -static int hns_roce_ib_get_cq_umem(struct hns_roce_dev *hr_dev, - struct ib_udata *udata, - struct hns_roce_cq_buf *buf, - struct ib_umem **umem, u64 buf_addr, int cqe) +static int get_cq_umem(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, + struct hns_roce_ib_create_cq ucmd, + struct ib_udata *udata) { - int ret; - u32 page_shift; + struct hns_roce_buf *buf = &hr_cq->buf; + struct hns_roce_mtt *mtt = &hr_cq->mtt; + struct ib_umem **umem = &hr_cq->umem; u32 npages; + int ret; - *umem = ib_umem_get(udata, buf_addr, cqe * hr_dev->caps.cq_entry_sz, - IB_ACCESS_LOCAL_WRITE, 1); + *umem = ib_umem_get(udata, ucmd.buf_addr, buf->size, + IB_ACCESS_LOCAL_WRITE); if (IS_ERR(*umem)) return PTR_ERR(*umem); if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) - buf->hr_mtt.mtt_type = MTT_TYPE_CQE; + mtt->mtt_type = MTT_TYPE_CQE; else - buf->hr_mtt.mtt_type = MTT_TYPE_WQE; - - if (hr_dev->caps.cqe_buf_pg_sz) { - npages = (ib_umem_page_count(*umem) + - (1 << hr_dev->caps.cqe_buf_pg_sz) - 1) / - (1 << hr_dev->caps.cqe_buf_pg_sz); - page_shift = PAGE_SHIFT + hr_dev->caps.cqe_buf_pg_sz; - ret = hns_roce_mtt_init(hr_dev, npages, page_shift, - &buf->hr_mtt); - } else { - ret = hns_roce_mtt_init(hr_dev, ib_umem_page_count(*umem), - PAGE_SHIFT, &buf->hr_mtt); - } + mtt->mtt_type = MTT_TYPE_WQE; + + npages = DIV_ROUND_UP(ib_umem_page_count(*umem), + 1 << hr_dev->caps.cqe_buf_pg_sz); + ret = hns_roce_mtt_init(hr_dev, npages, buf->page_shift, mtt); if (ret) goto err_buf; - ret = hns_roce_ib_umem_write_mtt(hr_dev, &buf->hr_mtt, *umem); + ret = hns_roce_ib_umem_write_mtt(hr_dev, mtt, *umem); if (ret) goto err_mtt; return 0; err_mtt: - hns_roce_mtt_cleanup(hr_dev, &buf->hr_mtt); + hns_roce_mtt_cleanup(hr_dev, mtt); err_buf: ib_umem_release(*umem); return ret; } -static int hns_roce_ib_alloc_cq_buf(struct hns_roce_dev *hr_dev, - struct hns_roce_cq_buf *buf, u32 nent) +static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { + struct hns_roce_buf *buf = &hr_cq->buf; + struct hns_roce_mtt *mtt = &hr_cq->mtt; int ret; - u32 page_shift = PAGE_SHIFT + hr_dev->caps.cqe_buf_pg_sz; - ret = hns_roce_buf_alloc(hr_dev, nent * hr_dev->caps.cq_entry_sz, - (1 << page_shift) * 2, &buf->hr_buf, - page_shift); + ret = hns_roce_buf_alloc(hr_dev, buf->size, (1 << buf->page_shift) * 2, + buf, buf->page_shift); if (ret) goto out; if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) - buf->hr_mtt.mtt_type = MTT_TYPE_CQE; + mtt->mtt_type = MTT_TYPE_CQE; else - buf->hr_mtt.mtt_type = MTT_TYPE_WQE; + mtt->mtt_type = MTT_TYPE_WQE; - ret = hns_roce_mtt_init(hr_dev, buf->hr_buf.npages, - buf->hr_buf.page_shift, &buf->hr_mtt); + ret = hns_roce_mtt_init(hr_dev, buf->npages, buf->page_shift, mtt); if (ret) goto err_buf; - ret = hns_roce_buf_write_mtt(hr_dev, &buf->hr_mtt, &buf->hr_buf); + ret = hns_roce_buf_write_mtt(hr_dev, mtt, buf); if (ret) goto err_mtt; return 0; err_mtt: - hns_roce_mtt_cleanup(hr_dev, &buf->hr_mtt); + hns_roce_mtt_cleanup(hr_dev, mtt); err_buf: - hns_roce_buf_free(hr_dev, nent * hr_dev->caps.cq_entry_sz, - &buf->hr_buf); + hns_roce_buf_free(hr_dev, buf->size, buf); + out: return ret; } -static void hns_roce_ib_free_cq_buf(struct hns_roce_dev *hr_dev, - struct hns_roce_cq_buf *buf, int cqe) +static void free_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { - hns_roce_buf_free(hr_dev, (cqe + 1) * hr_dev->caps.cq_entry_sz, - &buf->hr_buf); + hns_roce_buf_free(hr_dev, hr_cq->buf.size, &hr_cq->buf); } static int create_user_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, struct ib_udata *udata, - struct hns_roce_ib_create_cq_resp *resp, - int cq_entries) + struct hns_roce_ib_create_cq_resp *resp) { struct hns_roce_ib_create_cq ucmd; struct device *dev = hr_dev->dev; @@ -314,9 +251,7 @@ static int create_user_cq(struct hns_roce_dev *hr_dev, } /* Get user space address, write it into mtt table */ - ret = hns_roce_ib_get_cq_umem(hr_dev, udata, &hr_cq->hr_buf, - &hr_cq->umem, ucmd.buf_addr, - cq_entries); + ret = get_cq_umem(hr_dev, hr_cq, ucmd, udata); if (ret) { dev_err(dev, "Failed to get_cq_umem.\n"); return ret; @@ -337,17 +272,16 @@ static int create_user_cq(struct hns_roce_dev *hr_dev, return 0; err_mtt: - hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt); + hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt); ib_umem_release(hr_cq->umem); return ret; } static int create_kernel_cq(struct hns_roce_dev *hr_dev, - struct hns_roce_cq *hr_cq, int cq_entries) + struct hns_roce_cq *hr_cq) { struct device *dev = hr_dev->dev; - struct hns_roce_uar *uar; int ret; if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) { @@ -361,15 +295,14 @@ static int create_kernel_cq(struct hns_roce_dev *hr_dev, } /* Init mtt table and write buff address to mtt table */ - ret = hns_roce_ib_alloc_cq_buf(hr_dev, &hr_cq->hr_buf, cq_entries); + ret = alloc_cq_buf(hr_dev, hr_cq); if (ret) { dev_err(dev, "Failed to alloc_cq_buf.\n"); goto err_db; } - uar = &hr_dev->priv_uar; hr_cq->cq_db_l = hr_dev->reg_base + hr_dev->odb_offset + - DB_REG_OFFSET * uar->index; + DB_REG_OFFSET * hr_dev->priv_uar.index; return 0; @@ -392,64 +325,69 @@ static void destroy_user_cq(struct hns_roce_dev *hr_dev, (udata->outlen >= sizeof(*resp))) hns_roce_db_unmap_user(context, &hr_cq->db); - hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt); + hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt); ib_umem_release(hr_cq->umem); } static void destroy_kernel_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { - hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt); - hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf, hr_cq->ib_cq.cqe); + hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt); + free_cq_buf(hr_dev, hr_cq); if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) hns_roce_free_db(hr_dev, &hr_cq->db); } -int hns_roce_ib_create_cq(struct ib_cq *ib_cq, - const struct ib_cq_init_attr *attr, - struct ib_udata *udata) +int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device); - struct device *dev = hr_dev->dev; struct hns_roce_ib_create_cq_resp resp = {}; struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq); + struct device *dev = hr_dev->dev; int vector = attr->comp_vector; - int cq_entries = attr->cqe; + u32 cq_entries = attr->cqe; int ret; if (cq_entries < 1 || cq_entries > hr_dev->caps.max_cqes) { - dev_err(dev, "Creat CQ failed. entries=%d, max=%d\n", + dev_err(dev, "Create CQ failed. entries=%d, max=%d\n", cq_entries, hr_dev->caps.max_cqes); return -EINVAL; } - if (hr_dev->caps.min_cqes) - cq_entries = max(cq_entries, hr_dev->caps.min_cqes); + if (vector >= hr_dev->caps.num_comp_vectors) { + dev_err(dev, "Create CQ failed, vector=%d, max=%d\n", + vector, hr_dev->caps.num_comp_vectors); + return -EINVAL; + } - cq_entries = roundup_pow_of_two((unsigned int)cq_entries); - hr_cq->ib_cq.cqe = cq_entries - 1; + cq_entries = max(cq_entries, hr_dev->caps.min_cqes); + cq_entries = roundup_pow_of_two(cq_entries); + hr_cq->ib_cq.cqe = cq_entries - 1; /* used as cqe index */ + hr_cq->cq_depth = cq_entries; + hr_cq->vector = vector; + hr_cq->buf.size = hr_cq->cq_depth * hr_dev->caps.cq_entry_sz; + hr_cq->buf.page_shift = PAGE_SHIFT + hr_dev->caps.cqe_buf_pg_sz; spin_lock_init(&hr_cq->lock); if (udata) { - ret = create_user_cq(hr_dev, hr_cq, udata, &resp, cq_entries); + ret = create_user_cq(hr_dev, hr_cq, udata, &resp); if (ret) { dev_err(dev, "Create cq failed in user mode!\n"); goto err_cq; } } else { - ret = create_kernel_cq(hr_dev, hr_cq, cq_entries); + ret = create_kernel_cq(hr_dev, hr_cq); if (ret) { dev_err(dev, "Create cq failed in kernel mode!\n"); goto err_cq; } } - /* Allocate cq index, fill cq_context */ - ret = hns_roce_cq_alloc(hr_dev, cq_entries, &hr_cq->hr_buf.hr_mtt, - hr_cq, vector); + ret = hns_roce_alloc_cqc(hr_dev, hr_cq); if (ret) { - dev_err(dev, "Creat CQ .Failed to cq_alloc.\n"); + dev_err(dev, "Alloc CQ failed(%d).\n", ret); goto err_dbmap; } @@ -462,11 +400,6 @@ int hns_roce_ib_create_cq(struct ib_cq *ib_cq, if (!udata && hr_cq->tptr_addr) *hr_cq->tptr_addr = 0; - /* Get created cq handler and carry out event */ - hr_cq->comp = hns_roce_ib_cq_comp; - hr_cq->event = hns_roce_ib_cq_event; - hr_cq->cq_depth = cq_entries; - if (udata) { resp.cqn = hr_cq->cqn; ret = ib_copy_to_udata(udata, &resp, sizeof(resp)); @@ -477,7 +410,7 @@ int hns_roce_ib_create_cq(struct ib_cq *ib_cq, return 0; err_cqc: - hns_roce_free_cq(hr_dev, hr_cq); + hns_roce_free_cqc(hr_dev, hr_cq); err_dbmap: if (udata) @@ -489,7 +422,7 @@ err_cq: return ret; } -void hns_roce_ib_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) +void hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device); struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq); @@ -499,8 +432,8 @@ void hns_roce_ib_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) return; } - hns_roce_free_cq(hr_dev, hr_cq); - hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt); + hns_roce_free_cqc(hr_dev, hr_cq); + hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt); ib_umem_release(hr_cq->umem); if (udata) { @@ -512,7 +445,7 @@ void hns_roce_ib_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) &hr_cq->db); } else { /* Free the buff of stored cq */ - hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf, ib_cq->cqe); + free_cq_buf(hr_dev, hr_cq); if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) hns_roce_free_db(hr_dev, &hr_cq->db); } @@ -520,38 +453,57 @@ void hns_roce_ib_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn) { - struct device *dev = hr_dev->dev; - struct hns_roce_cq *cq; + struct hns_roce_cq *hr_cq; + struct ib_cq *ibcq; - cq = xa_load(&hr_dev->cq_table.array, cqn & (hr_dev->caps.num_cqs - 1)); - if (!cq) { - dev_warn(dev, "Completion event for bogus CQ 0x%08x\n", cqn); + hr_cq = xa_load(&hr_dev->cq_table.array, + cqn & (hr_dev->caps.num_cqs - 1)); + if (!hr_cq) { + dev_warn(hr_dev->dev, "Completion event for bogus CQ 0x%06x\n", + cqn); return; } - ++cq->arm_sn; - cq->comp(cq); + ++hr_cq->arm_sn; + ibcq = &hr_cq->ib_cq; + if (ibcq->comp_handler) + ibcq->comp_handler(ibcq, ibcq->cq_context); } void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type) { - struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; struct device *dev = hr_dev->dev; - struct hns_roce_cq *cq; + struct hns_roce_cq *hr_cq; + struct ib_event event; + struct ib_cq *ibcq; - cq = xa_load(&cq_table->array, cqn & (hr_dev->caps.num_cqs - 1)); - if (cq) - atomic_inc(&cq->refcount); + hr_cq = xa_load(&hr_dev->cq_table.array, + cqn & (hr_dev->caps.num_cqs - 1)); + if (!hr_cq) { + dev_warn(dev, "Async event for bogus CQ 0x%06x\n", cqn); + return; + } - if (!cq) { - dev_warn(dev, "Async event for bogus CQ %08x\n", cqn); + if (event_type != HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID && + event_type != HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR && + event_type != HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW) { + dev_err(dev, "Unexpected event type 0x%x on CQ 0x%06x\n", + event_type, cqn); return; } - cq->event(cq, (enum hns_roce_event)event_type); + atomic_inc(&hr_cq->refcount); + + ibcq = &hr_cq->ib_cq; + if (ibcq->event_handler) { + event.device = ibcq->device; + event.element.cq = ibcq; + event.event = IB_EVENT_CQ_ERR; + ibcq->event_handler(&event, ibcq->cq_context); + } - if (atomic_dec_and_test(&cq->refcount)) - complete(&cq->free); + if (atomic_dec_and_test(&hr_cq->refcount)) + complete(&hr_cq->free); } int hns_roce_init_cq_table(struct hns_roce_dev *hr_dev) diff --git a/drivers/infiniband/hw/hns/hns_roce_db.c b/drivers/infiniband/hw/hns/hns_roce_db.c index c00714c2f16a..10af6958ab69 100644 --- a/drivers/infiniband/hw/hns/hns_roce_db.c +++ b/drivers/infiniband/hw/hns/hns_roce_db.c @@ -31,7 +31,7 @@ int hns_roce_db_map_user(struct hns_roce_ucontext *context, refcount_set(&page->refcount, 1); page->user_virt = page_addr; - page->umem = ib_umem_get(udata, page_addr, PAGE_SIZE, 0, 0); + page->umem = ib_umem_get(udata, page_addr, PAGE_SIZE, 0); if (IS_ERR(page->umem)) { ret = PTR_ERR(page->umem); kfree(page); diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 96d1302abde1..5617434cbfb4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -45,7 +45,7 @@ #define HNS_ROCE_MAX_MSG_LEN 0x80000000 -#define HNS_ROCE_ALOGN_UP(a, b) ((((a) + (b) - 1) / (b)) * (b)) +#define HNS_ROCE_ALIGN_UP(a, b) ((((a) + (b) - 1) / (b)) * (b)) #define HNS_ROCE_IB_MIN_SQ_STRIDE 6 @@ -53,8 +53,6 @@ #define BA_BYTE_LEN 8 -#define BITS_PER_BYTE 8 - /* Hardware specification only for v1 engine */ #define HNS_ROCE_MIN_CQE_NUM 0x40 #define HNS_ROCE_MIN_WQE_NUM 0x20 @@ -426,7 +424,6 @@ struct hns_roce_wq { u64 *wrid; /* Work request ID */ spinlock_t lock; int wqe_cnt; /* WQE num */ - u32 max_post; int max_gs; int offset; int wqe_shift; /* WQE size */ @@ -451,6 +448,7 @@ struct hns_roce_buf { struct hns_roce_buf_list *page_list; int nbufs; u32 npages; + u32 size; int page_shift; }; @@ -482,22 +480,14 @@ struct hns_roce_db { int order; }; -struct hns_roce_cq_buf { - struct hns_roce_buf hr_buf; - struct hns_roce_mtt hr_mtt; -}; - struct hns_roce_cq { struct ib_cq ib_cq; - struct hns_roce_cq_buf hr_buf; + struct hns_roce_buf buf; + struct hns_roce_mtt mtt; struct hns_roce_db db; u8 db_en; spinlock_t lock; struct ib_umem *umem; - void (*comp)(struct hns_roce_cq *cq); - void (*event)(struct hns_roce_cq *cq, enum hns_roce_event event_type); - - struct hns_roce_uar *uar; u32 cq_depth; u32 cons_index; u32 *set_ci_db; @@ -521,9 +511,8 @@ struct hns_roce_idx_que { struct hns_roce_srq { struct ib_srq ibsrq; - void (*event)(struct hns_roce_srq *srq, enum hns_roce_event event); unsigned long srqn; - int max; + u32 wqe_cnt; int max_gs; int wqe_shift; void __iomem *db_reg_l; @@ -539,8 +528,8 @@ struct hns_roce_srq { spinlock_t lock; int head; int tail; - u16 wqe_ctr; struct mutex mutex; + void (*event)(struct hns_roce_srq *srq, enum hns_roce_event event); }; struct hns_roce_uar_table { @@ -582,7 +571,7 @@ struct hns_roce_av { u8 tclass; u8 dgid[HNS_ROCE_GID_SIZE]; u8 mac[ETH_ALEN]; - u16 vlan; + u16 vlan_id; bool vlan_en; }; @@ -695,10 +684,6 @@ struct hns_roce_qp { struct hns_roce_rinl_buf rq_inl_buf; }; -struct hns_roce_sqp { - struct hns_roce_qp hr_qp; -}; - struct hns_roce_ib_iboe { spinlock_t lock; struct net_device *netdevs[HNS_ROCE_MAX_PORTS]; @@ -821,8 +806,8 @@ struct hns_roce_caps { int max_qp_init_rdma; int max_qp_dest_rdma; int num_cqs; - int max_cqes; - int min_cqes; + u32 max_cqes; + u32 min_cqes; u32 min_wqes; int reserved_cqs; int reserved_srqs; @@ -953,7 +938,7 @@ struct hns_roce_hw { int (*mw_write_mtpt)(void *mb_buf, struct hns_roce_mw *mw); void (*write_cqc)(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, void *mb_buf, u64 *mtts, - dma_addr_t dma_handle, int nent, u32 vector); + dma_addr_t dma_handle); int (*set_hem)(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, int obj, int step_idx); int (*clear_hem)(struct hns_roce_dev *hr_dev, @@ -1092,11 +1077,6 @@ static inline struct hns_roce_srq *to_hr_srq(struct ib_srq *ibsrq) return container_of(ibsrq, struct hns_roce_srq, ibsrq); } -static inline struct hns_roce_sqp *hr_to_hr_sqp(struct hns_roce_qp *hr_qp) -{ - return container_of(hr_qp, struct hns_roce_sqp, hr_qp); -} - static inline void hns_roce_write64_k(__le32 val[2], void __iomem *dest) { __raw_writeq(*(u64 *) val, dest); @@ -1198,9 +1178,9 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); -int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev, - struct hns_roce_cmd_mailbox *mailbox, - unsigned long mpt_index); +int hns_roce_hw_destroy_mpt(struct hns_roce_dev *hr_dev, + struct hns_roce_cmd_mailbox *mailbox, + unsigned long mpt_index); unsigned long key_to_hw_index(u32 key); struct ib_mw *hns_roce_alloc_mw(struct ib_pd *pd, enum ib_mw_type, @@ -1257,12 +1237,11 @@ void hns_roce_release_range_qp(struct hns_roce_dev *hr_dev, int base_qpn, __be32 send_ieth(const struct ib_send_wr *wr); int to_hr_qp_type(int qp_type); -int hns_roce_ib_create_cq(struct ib_cq *ib_cq, - const struct ib_cq_init_attr *attr, - struct ib_udata *udata); +int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata); -void hns_roce_ib_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata); -void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq); +void hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata); +void hns_roce_free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq); int hns_roce_db_map_user(struct hns_roce_ucontext *context, struct ib_udata *udata, unsigned long virt, diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h index 86783276fb1f..3bb8f78fb7b0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.h +++ b/drivers/infiniband/hw/hns/hns_roce_hem.h @@ -59,7 +59,7 @@ enum { #define HNS_ROCE_HEM_CHUNK_LEN \ ((256 - sizeof(struct list_head) - 2 * sizeof(int)) / \ - (sizeof(struct scatterlist))) + (sizeof(struct scatterlist) + sizeof(void *))) #define check_whether_bt_num_3(type, hop_num) \ (type < HEM_TYPE_MTT && hop_num == 2) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 5f74bf55f471..2a2b2112f886 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -732,7 +732,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev) if (!cq) return -ENOMEM; - ret = hns_roce_ib_create_cq(cq, &cq_init_attr, NULL); + ret = hns_roce_create_cq(cq, &cq_init_attr, NULL); if (ret) { dev_err(dev, "Create cq for reserved loop qp failed!"); goto alloc_cq_failed; @@ -868,7 +868,7 @@ alloc_pd_failed: kfree(pd); alloc_mem_failed: - hns_roce_ib_destroy_cq(cq, NULL); + hns_roce_destroy_cq(cq, NULL); alloc_cq_failed: kfree(cq); return ret; @@ -897,7 +897,7 @@ static void hns_roce_v1_release_lp_qp(struct hns_roce_dev *hr_dev) i, ret); } - hns_roce_ib_destroy_cq(&free_mr->mr_free_cq->ib_cq, NULL); + hns_roce_destroy_cq(&free_mr->mr_free_cq->ib_cq, NULL); kfree(&free_mr->mr_free_cq->ib_cq); hns_roce_dealloc_pd(&free_mr->mr_free_pd->ibpd, NULL); kfree(&free_mr->mr_free_pd->ibpd); @@ -1114,9 +1114,10 @@ static int hns_roce_v1_dereg_mr(struct hns_roce_dev *hr_dev, free_mr = &priv->free_mr; if (mr->enabled) { - if (hns_roce_hw2sw_mpt(hr_dev, NULL, key_to_hw_index(mr->key) - & (hr_dev->caps.num_mtpts - 1))) - dev_warn(dev, "HW2SW_MPT failed!\n"); + if (hns_roce_hw_destroy_mpt(hr_dev, NULL, + key_to_hw_index(mr->key) & + (hr_dev->caps.num_mtpts - 1))) + dev_warn(dev, "DESTROY_MPT failed!\n"); } mr_work = kzalloc(sizeof(*mr_work), GFP_KERNEL); @@ -1979,8 +1980,7 @@ static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, static void *get_cqe(struct hns_roce_cq *hr_cq, int n) { - return hns_roce_buf_offset(&hr_cq->hr_buf.hr_buf, - n * HNS_ROCE_V1_CQE_ENTRY_SIZE); + return hns_roce_buf_offset(&hr_cq->buf, n * HNS_ROCE_V1_CQE_ENTRY_SIZE); } static void *get_sw_cqe(struct hns_roce_cq *hr_cq, int n) @@ -1989,7 +1989,7 @@ static void *get_sw_cqe(struct hns_roce_cq *hr_cq, int n) /* Get cqe when Owner bit is Conversely with the MSB of cons_idx */ return (roce_get_bit(hr_cqe->cqe_byte_4, CQE_BYTE_4_OWNER_S) ^ - !!(n & (hr_cq->ib_cq.cqe + 1))) ? hr_cqe : NULL; + !!(n & hr_cq->cq_depth)) ? hr_cqe : NULL; } static struct hns_roce_cqe *next_cqe_sw(struct hns_roce_cq *hr_cq) @@ -2072,8 +2072,7 @@ static void hns_roce_v1_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn, static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, void *mb_buf, - u64 *mtts, dma_addr_t dma_handle, int nent, - u32 vector) + u64 *mtts, dma_addr_t dma_handle) { struct hns_roce_cq_context *cq_context = NULL; struct hns_roce_buf_list *tptr_buf; @@ -2108,9 +2107,9 @@ static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev, roce_set_field(cq_context->cqc_byte_12, CQ_CONTEXT_CQC_BYTE_12_CQ_CQE_SHIFT_M, CQ_CONTEXT_CQC_BYTE_12_CQ_CQE_SHIFT_S, - ilog2((unsigned int)nent)); + ilog2(hr_cq->cq_depth)); roce_set_field(cq_context->cqc_byte_12, CQ_CONTEXT_CQC_BYTE_12_CEQN_M, - CQ_CONTEXT_CQC_BYTE_12_CEQN_S, vector); + CQ_CONTEXT_CQC_BYTE_12_CEQN_S, hr_cq->vector); cq_context->cur_cqe_ba0_l = cpu_to_le32((u32)(mtts[0])); @@ -3644,10 +3643,7 @@ int hns_roce_v1_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf); } - if (hr_qp->ibqp.qp_type == IB_QPT_RC) - kfree(hr_qp); - else - kfree(hr_to_hr_sqp(hr_qp)); + kfree(hr_qp); return 0; } @@ -3658,10 +3654,9 @@ static void hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) struct device *dev = &hr_dev->pdev->dev; u32 cqe_cnt_ori; u32 cqe_cnt_cur; - u32 cq_buf_size; int wait_time = 0; - hns_roce_free_cq(hr_dev, hr_cq); + hns_roce_free_cqc(hr_dev, hr_cq); /* * Before freeing cq buffer, we need to ensure that the outstanding CQE @@ -3686,13 +3681,12 @@ static void hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) wait_time++; } - hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt); + hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt); ib_umem_release(hr_cq->umem); if (!udata) { /* Free the buff of stored cq */ - cq_buf_size = (ibcq->cqe + 1) * hr_dev->caps.cq_entry_sz; - hns_roce_buf_free(hr_dev, cq_buf_size, &hr_cq->hr_buf.hr_buf); + hns_roce_buf_free(hr_dev, hr_cq->buf.size, &hr_cq->buf); } } diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 7a89d669f8bf..cb8071a3e0d5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -389,7 +389,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_VLAN_M, V2_UD_SEND_WQE_BYTE_36_VLAN_S, - le16_to_cpu(ah->av.vlan)); + ah->av.vlan_id); roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M, V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S, @@ -2447,8 +2447,7 @@ static int hns_roce_v2_mw_write_mtpt(void *mb_buf, struct hns_roce_mw *mw) static void *get_cqe_v2(struct hns_roce_cq *hr_cq, int n) { - return hns_roce_buf_offset(&hr_cq->hr_buf.hr_buf, - n * HNS_ROCE_V2_CQE_ENTRY_SIZE); + return hns_roce_buf_offset(&hr_cq->buf, n * HNS_ROCE_V2_CQE_ENTRY_SIZE); } static void *get_sw_cqe_v2(struct hns_roce_cq *hr_cq, int n) @@ -2457,7 +2456,7 @@ static void *get_sw_cqe_v2(struct hns_roce_cq *hr_cq, int n) /* Get cqe when Owner bit is Conversely with the MSB of cons_idx */ return (roce_get_bit(cqe->byte_4, V2_CQE_BYTE_4_OWNER_S) ^ - !!(n & (hr_cq->ib_cq.cqe + 1))) ? cqe : NULL; + !!(n & hr_cq->cq_depth)) ? cqe : NULL; } static struct hns_roce_v2_cqe *next_cqe_sw_v2(struct hns_roce_cq *hr_cq) @@ -2550,8 +2549,7 @@ static void hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn, static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, void *mb_buf, - u64 *mtts, dma_addr_t dma_handle, int nent, - u32 vector) + u64 *mtts, dma_addr_t dma_handle) { struct hns_roce_v2_cq_context *cq_context; @@ -2563,9 +2561,10 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev, roce_set_field(cq_context->byte_4_pg_ceqn, V2_CQC_BYTE_4_ARM_ST_M, V2_CQC_BYTE_4_ARM_ST_S, REG_NXT_CEQE); roce_set_field(cq_context->byte_4_pg_ceqn, V2_CQC_BYTE_4_SHIFT_M, - V2_CQC_BYTE_4_SHIFT_S, ilog2((unsigned int)nent)); + V2_CQC_BYTE_4_SHIFT_S, + ilog2(hr_cq->cq_depth)); roce_set_field(cq_context->byte_4_pg_ceqn, V2_CQC_BYTE_4_CEQN_M, - V2_CQC_BYTE_4_CEQN_S, vector); + V2_CQC_BYTE_4_CEQN_S, hr_cq->vector); roce_set_field(cq_context->byte_8_cqn, V2_CQC_BYTE_8_CQN_M, V2_CQC_BYTE_8_CQN_S, hr_cq->cqn); @@ -4061,8 +4060,8 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); const struct ib_gid_attr *gid_attr = NULL; int is_roce_protocol; + u16 vlan_id = 0xffff; bool is_udp = false; - u16 vlan = 0xffff; u8 ib_port; u8 hr_port; int ret; @@ -4074,7 +4073,7 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, if (is_roce_protocol) { gid_attr = attr->ah_attr.grh.sgid_attr; - ret = rdma_read_gid_l2_fields(gid_attr, &vlan, NULL); + ret = rdma_read_gid_l2_fields(gid_attr, &vlan_id, NULL); if (ret) return ret; @@ -4083,7 +4082,7 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, IB_GID_TYPE_ROCE_UDP_ENCAP); } - if (vlan < VLAN_CFI_MASK) { + if (vlan_id < VLAN_N_VID) { roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQ_VLAN_EN_S, 1); roce_set_bit(qpc_mask->byte_76_srqn_op_en, @@ -4095,7 +4094,7 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, } roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_ID_M, - V2_QPC_BYTE_24_VLAN_ID_S, vlan); + V2_QPC_BYTE_24_VLAN_ID_S, vlan_id); roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_ID_M, V2_QPC_BYTE_24_VLAN_ID_S, 0); @@ -4650,16 +4649,14 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, { struct hns_roce_cq *send_cq, *recv_cq; struct ib_device *ibdev = &hr_dev->ib_dev; - int ret; + int ret = 0; if (hr_qp->ibqp.qp_type == IB_QPT_RC && hr_qp->state != IB_QPS_RESET) { /* Modify qp to reset before destroying qp */ ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state, IB_QPS_RESET); - if (ret) { + if (ret) ibdev_err(ibdev, "modify QP to Reset failed.\n"); - return ret; - } } send_cq = to_hr_cq(hr_qp->ibqp.send_cq); @@ -4715,7 +4712,7 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, kfree(hr_qp->rq_inl_buf.wqe_list); } - return 0; + return ret; } static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) @@ -4725,16 +4722,11 @@ static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) int ret; ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, udata); - if (ret) { + if (ret) ibdev_err(&hr_dev->ib_dev, "Destroy qp 0x%06lx failed(%d)\n", hr_qp->qpn, ret); - return ret; - } - if (hr_qp->ibqp.qp_type == IB_QPT_GSI) - kfree(hr_to_hr_sqp(hr_qp)); - else - kfree(hr_qp); + kfree(hr_qp); return 0; } @@ -4951,10 +4943,7 @@ static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev, static void set_eq_cons_index_v2(struct hns_roce_eq *eq) { struct hns_roce_dev *hr_dev = eq->hr_dev; - __le32 doorbell[2]; - - doorbell[0] = 0; - doorbell[1] = 0; + __le32 doorbell[2] = {}; if (eq->type_flag == HNS_ROCE_AEQ) { roce_set_field(doorbell[0], HNS_ROCE_V2_EQ_DB_CMD_M, @@ -5389,9 +5378,9 @@ static void hns_roce_v2_free_eq(struct hns_roce_dev *hr_dev, return; } - if (eq->buf_list) - dma_free_coherent(hr_dev->dev, buf_chk_sz, - eq->buf_list->buf, eq->buf_list->map); + dma_free_coherent(hr_dev->dev, buf_chk_sz, eq->buf_list->buf, + eq->buf_list->map); + kfree(eq->buf_list); } static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev, @@ -6047,7 +6036,7 @@ static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev, hr_dev->caps.srqwqe_hop_num)); roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQ_SHIFT_M, SRQC_BYTE_4_SRQ_SHIFT_S, - ilog2(srq->max)); + ilog2(srq->wqe_cnt)); roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQN_M, SRQC_BYTE_4_SRQN_S, srq->srqn); @@ -6092,11 +6081,11 @@ static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev, roce_set_field(srq_context->byte_44_idxbufpgsz_addr, SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_M, SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_S, - hr_dev->caps.idx_ba_pg_sz); + hr_dev->caps.idx_ba_pg_sz + PG_SHIFT_OFFSET); roce_set_field(srq_context->byte_44_idxbufpgsz_addr, SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_M, SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_S, - hr_dev->caps.idx_buf_pg_sz); + hr_dev->caps.idx_buf_pg_sz + PG_SHIFT_OFFSET); srq_context->idx_nxt_blk_addr = cpu_to_le32(mtts_idx[1] >> PAGE_ADDR_SHIFT); @@ -6133,7 +6122,7 @@ static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq, int ret; if (srq_attr_mask & IB_SRQ_LIMIT) { - if (srq_attr->srq_limit >= srq->max) + if (srq_attr->srq_limit >= srq->wqe_cnt) return -EINVAL; mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); @@ -6193,7 +6182,7 @@ static int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) SRQC_BYTE_8_SRQ_LIMIT_WL_S); attr->srq_limit = limit_wl; - attr->max_wr = srq->max - 1; + attr->max_wr = srq->wqe_cnt - 1; attr->max_sge = srq->max_gs; memcpy(srq_context, mailbox->buf, sizeof(*srq_context)); @@ -6246,7 +6235,7 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, spin_lock_irqsave(&srq->lock, flags); - ind = srq->head & (srq->max - 1); + ind = srq->head & (srq->wqe_cnt - 1); for (nreq = 0; wr; ++nreq, wr = wr->next) { if (unlikely(wr->num_sge > srq->max_gs)) { @@ -6261,7 +6250,7 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, break; } - wqe_idx = find_empty_entry(&srq->idx_que, srq->max); + wqe_idx = find_empty_entry(&srq->idx_que, srq->wqe_cnt); if (wqe_idx < 0) { ret = -ENOMEM; *bad_wr = wr; @@ -6285,7 +6274,7 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, } srq->wrid[wqe_idx] = wr->wr_id; - ind = (ind + 1) & (srq->max - 1); + ind = (ind + 1) & (srq->wqe_cnt - 1); } if (likely(nreq)) { @@ -6380,12 +6369,14 @@ static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = { MODULE_DEVICE_TABLE(pci, hns_roce_hw_v2_pci_tbl); -static int hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev, +static void hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev, struct hnae3_handle *handle) { struct hns_roce_v2_priv *priv = hr_dev->priv; int i; + hr_dev->pci_dev = handle->pdev; + hr_dev->dev = &handle->pdev->dev; hr_dev->hw = &hns_roce_hw_v2; hr_dev->dfx = &hns_roce_dfx_hw_v2; hr_dev->sdb_offset = ROCEE_DB_SQ_L_0_REG; @@ -6410,8 +6401,6 @@ static int hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev, hr_dev->reset_cnt = handle->ae_algo->ops->ae_dev_reset_cnt(handle); priv->handle = handle; - - return 0; } static int __hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) @@ -6429,14 +6418,7 @@ static int __hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) goto error_failed_kzalloc; } - hr_dev->pci_dev = handle->pdev; - hr_dev->dev = &handle->pdev->dev; - - ret = hns_roce_hw_v2_get_cfg(hr_dev, handle); - if (ret) { - dev_err(hr_dev->dev, "Get Configuration failed!\n"); - goto error_failed_get_cfg; - } + hns_roce_hw_v2_get_cfg(hr_dev, handle); ret = hns_roce_init(hr_dev); if (ret) { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 43219d2f7de0..76a14db7028d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -87,8 +87,8 @@ #define HNS_ROCE_V2_MTT_ENTRY_SZ 64 #define HNS_ROCE_V2_CQE_ENTRY_SIZE 32 #define HNS_ROCE_V2_SCCC_ENTRY_SZ 32 -#define HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ 4096 -#define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ 4096 +#define HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ PAGE_SIZE +#define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ PAGE_SIZE #define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED 0xFFFFF000 #define HNS_ROCE_V2_MAX_INNER_MTPT_NUM 2 #define HNS_ROCE_INVALID_LKEY 0x100 diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index b5d196c119ee..854ef6e74788 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -111,7 +111,7 @@ static int handle_en_event(struct hns_roce_dev *hr_dev, u8 port, netdev = hr_dev->iboe.netdevs[port]; if (!netdev) { - dev_err(dev, "port(%d) can't find netdev\n", port); + dev_err(dev, "Can't find netdev on port(%u)!\n", port); return -ENODEV; } @@ -253,7 +253,7 @@ static int hns_roce_query_port(struct ib_device *ib_dev, u8 port_num, net_dev = hr_dev->iboe.netdevs[port]; if (!net_dev) { spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); - dev_err(dev, "find netdev %d failed!\r\n", port); + dev_err(dev, "Find netdev %u failed!\n", port); return -EINVAL; } @@ -301,12 +301,6 @@ static int hns_roce_modify_device(struct ib_device *ib_dev, int mask, return 0; } -static int hns_roce_modify_port(struct ib_device *ib_dev, u8 port_num, int mask, - struct ib_port_modify *props) -{ - return 0; -} - static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) { @@ -359,7 +353,8 @@ static int hns_roce_mmap(struct ib_ucontext *context, return rdma_user_mmap_io(context, vma, to_hr_ucontext(context)->uar.pfn, PAGE_SIZE, - pgprot_noncached(vma->vm_page_prot)); + pgprot_noncached(vma->vm_page_prot), + NULL); /* vm_pgoff: 1 -- TPTR */ case 1: @@ -372,7 +367,8 @@ static int hns_roce_mmap(struct ib_ucontext *context, return rdma_user_mmap_io(context, vma, hr_dev->tptr_dma_addr >> PAGE_SHIFT, hr_dev->tptr_size, - vma->vm_page_prot); + vma->vm_page_prot, + NULL); default: return -EINVAL; @@ -423,14 +419,14 @@ static const struct ib_device_ops hns_roce_dev_ops = { .alloc_pd = hns_roce_alloc_pd, .alloc_ucontext = hns_roce_alloc_ucontext, .create_ah = hns_roce_create_ah, - .create_cq = hns_roce_ib_create_cq, + .create_cq = hns_roce_create_cq, .create_qp = hns_roce_create_qp, .dealloc_pd = hns_roce_dealloc_pd, .dealloc_ucontext = hns_roce_dealloc_ucontext, .del_gid = hns_roce_del_gid, .dereg_mr = hns_roce_dereg_mr, .destroy_ah = hns_roce_destroy_ah, - .destroy_cq = hns_roce_ib_destroy_cq, + .destroy_cq = hns_roce_destroy_cq, .disassociate_ucontext = hns_roce_disassociate_ucontext, .fill_res_entry = hns_roce_fill_res_entry, .get_dma_mr = hns_roce_get_dma_mr, @@ -438,7 +434,6 @@ static const struct ib_device_ops hns_roce_dev_ops = { .get_port_immutable = hns_roce_port_immutable, .mmap = hns_roce_mmap, .modify_device = hns_roce_modify_device, - .modify_port = hns_roce_modify_port, .modify_qp = hns_roce_modify_qp, .query_ah = hns_roce_query_ah, .query_device = hns_roce_query_device, diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 5f8416ba09a9..9ad19170c3f9 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -48,21 +48,21 @@ unsigned long key_to_hw_index(u32 key) return (key << 24) | (key >> 8); } -static int hns_roce_sw2hw_mpt(struct hns_roce_dev *hr_dev, - struct hns_roce_cmd_mailbox *mailbox, - unsigned long mpt_index) +static int hns_roce_hw_create_mpt(struct hns_roce_dev *hr_dev, + struct hns_roce_cmd_mailbox *mailbox, + unsigned long mpt_index) { return hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, mpt_index, 0, - HNS_ROCE_CMD_SW2HW_MPT, + HNS_ROCE_CMD_CREATE_MPT, HNS_ROCE_CMD_TIMEOUT_MSECS); } -int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev, - struct hns_roce_cmd_mailbox *mailbox, - unsigned long mpt_index) +int hns_roce_hw_destroy_mpt(struct hns_roce_dev *hr_dev, + struct hns_roce_cmd_mailbox *mailbox, + unsigned long mpt_index) { return hns_roce_cmd_mbox(hr_dev, 0, mailbox ? mailbox->dma : 0, - mpt_index, !mailbox, HNS_ROCE_CMD_HW2SW_MPT, + mpt_index, !mailbox, HNS_ROCE_CMD_DESTROY_MPT, HNS_ROCE_CMD_TIMEOUT_MSECS); } @@ -83,7 +83,7 @@ static int hns_roce_buddy_alloc(struct hns_roce_buddy *buddy, int order, } } spin_unlock(&buddy->lock); - return -1; + return -EINVAL; found: clear_bit(*seg, buddy->bits[o]); @@ -206,13 +206,14 @@ static int hns_roce_alloc_mtt_range(struct hns_roce_dev *hr_dev, int order, } ret = hns_roce_buddy_alloc(buddy, order, seg); - if (ret == -1) - return -1; + if (ret) + return ret; - if (hns_roce_table_get_range(hr_dev, table, *seg, - *seg + (1 << order) - 1)) { + ret = hns_roce_table_get_range(hr_dev, table, *seg, + *seg + (1 << order) - 1); + if (ret) { hns_roce_buddy_free(buddy, *seg, order); - return -1; + return ret; } return 0; @@ -578,7 +579,7 @@ static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova, /* Allocate a key for mr from mr_table */ ret = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &index); - if (ret == -1) + if (ret) return -ENOMEM; mr->iova = iova; /* MR va starting addr */ @@ -707,10 +708,11 @@ static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, int ret; if (mr->enabled) { - ret = hns_roce_hw2sw_mpt(hr_dev, NULL, key_to_hw_index(mr->key) - & (hr_dev->caps.num_mtpts - 1)); + ret = hns_roce_hw_destroy_mpt(hr_dev, NULL, + key_to_hw_index(mr->key) & + (hr_dev->caps.num_mtpts - 1)); if (ret) - dev_warn(dev, "HW2SW_MPT failed (%d)\n", ret); + dev_warn(dev, "DESTROY_MPT failed (%d)\n", ret); } if (mr->size != ~0ULL) { @@ -763,10 +765,10 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev, goto err_page; } - ret = hns_roce_sw2hw_mpt(hr_dev, mailbox, - mtpt_idx & (hr_dev->caps.num_mtpts - 1)); + ret = hns_roce_hw_create_mpt(hr_dev, mailbox, + mtpt_idx & (hr_dev->caps.num_mtpts - 1)); if (ret) { - dev_err(dev, "SW2HW_MPT failed (%d)\n", ret); + dev_err(dev, "CREATE_MPT failed (%d)\n", ret); goto err_page; } @@ -1143,7 +1145,7 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (!mr) return ERR_PTR(-ENOMEM); - mr->umem = ib_umem_get(udata, start, length, access_flags, 0); + mr->umem = ib_umem_get(udata, start, length, access_flags); if (IS_ERR(mr->umem)) { ret = PTR_ERR(mr->umem); goto err_free; @@ -1228,7 +1230,7 @@ static int rereg_mr_trans(struct ib_mr *ibmr, int flags, } ib_umem_release(mr->umem); - mr->umem = ib_umem_get(udata, start, length, mr_access_flags, 0); + mr->umem = ib_umem_get(udata, start, length, mr_access_flags); if (IS_ERR(mr->umem)) { ret = PTR_ERR(mr->umem); mr->umem = NULL; @@ -1308,9 +1310,9 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length, if (ret) goto free_cmd_mbox; - ret = hns_roce_hw2sw_mpt(hr_dev, NULL, mtpt_idx); + ret = hns_roce_hw_destroy_mpt(hr_dev, NULL, mtpt_idx); if (ret) - dev_warn(dev, "HW2SW_MPT failed (%d)\n", ret); + dev_warn(dev, "DESTROY_MPT failed (%d)\n", ret); mr->enabled = 0; @@ -1332,9 +1334,9 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length, goto free_cmd_mbox; } - ret = hns_roce_sw2hw_mpt(hr_dev, mailbox, mtpt_idx); + ret = hns_roce_hw_create_mpt(hr_dev, mailbox, mtpt_idx); if (ret) { - dev_err(dev, "SW2HW_MPT failed (%d)\n", ret); + dev_err(dev, "CREATE_MPT failed (%d)\n", ret); ib_umem_release(mr->umem); goto free_cmd_mbox; } @@ -1448,10 +1450,11 @@ static void hns_roce_mw_free(struct hns_roce_dev *hr_dev, int ret; if (mw->enabled) { - ret = hns_roce_hw2sw_mpt(hr_dev, NULL, key_to_hw_index(mw->rkey) - & (hr_dev->caps.num_mtpts - 1)); + ret = hns_roce_hw_destroy_mpt(hr_dev, NULL, + key_to_hw_index(mw->rkey) & + (hr_dev->caps.num_mtpts - 1)); if (ret) - dev_warn(dev, "MW HW2SW_MPT failed (%d)\n", ret); + dev_warn(dev, "MW DESTROY_MPT failed (%d)\n", ret); hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table, key_to_hw_index(mw->rkey)); @@ -1487,10 +1490,10 @@ static int hns_roce_mw_enable(struct hns_roce_dev *hr_dev, goto err_page; } - ret = hns_roce_sw2hw_mpt(hr_dev, mailbox, - mtpt_idx & (hr_dev->caps.num_mtpts - 1)); + ret = hns_roce_hw_create_mpt(hr_dev, mailbox, + mtpt_idx & (hr_dev->caps.num_mtpts - 1)); if (ret) { - dev_err(dev, "MW sw2hw_mpt failed (%d)\n", ret); + dev_err(dev, "MW CREATE_MPT failed (%d)\n", ret); goto err_page; } diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c index 912b89b4da34..780c780fdb22 100644 --- a/drivers/infiniband/hw/hns/hns_roce_pd.c +++ b/drivers/infiniband/hw/hns/hns_roce_pd.c @@ -96,7 +96,7 @@ int hns_roce_uar_alloc(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar) /* Using bitmap to manager UAR index */ ret = hns_roce_bitmap_alloc(&hr_dev->uar_table.bitmap, &uar->logic_idx); - if (ret == -1) + if (ret) return -ENOMEM; if (uar->logic_idx > 0 && hr_dev->caps.phy_num_uars > 1) diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index bd78ff90d998..a6565b674801 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -318,7 +318,7 @@ static int hns_roce_set_rq_size(struct hns_roce_dev *hr_dev, * hr_qp->rq.max_gs); } - cap->max_recv_wr = hr_qp->rq.max_post = hr_qp->rq.wqe_cnt; + cap->max_recv_wr = hr_qp->rq.wqe_cnt; cap->max_recv_sge = hr_qp->rq.max_gs; return 0; @@ -332,9 +332,8 @@ static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev, u8 max_sq_stride = ilog2(roundup_sq_stride); /* Sanity check SQ size before proceeding */ - if ((u32)(1 << ucmd->log_sq_bb_count) > hr_dev->caps.max_wqes || - ucmd->log_sq_stride > max_sq_stride || - ucmd->log_sq_stride < HNS_ROCE_IB_MIN_SQ_STRIDE) { + if (ucmd->log_sq_stride > max_sq_stride || + ucmd->log_sq_stride < HNS_ROCE_IB_MIN_SQ_STRIDE) { ibdev_err(&hr_dev->ib_dev, "check SQ size error!\n"); return -EINVAL; } @@ -358,13 +357,16 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev, u32 max_cnt; int ret; + if (check_shl_overflow(1, ucmd->log_sq_bb_count, &hr_qp->sq.wqe_cnt) || + hr_qp->sq.wqe_cnt > hr_dev->caps.max_wqes) + return -EINVAL; + ret = check_sq_size_with_integrity(hr_dev, cap, ucmd); if (ret) { ibdev_err(&hr_dev->ib_dev, "Sanity check sq size failed\n"); return ret; } - hr_qp->sq.wqe_cnt = 1 << ucmd->log_sq_bb_count; hr_qp->sq.wqe_shift = ucmd->log_sq_stride; max_cnt = max(1U, cap->max_send_sge); @@ -391,37 +393,37 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev, /* Get buf size, SQ and RQ are aligned to page_szie */ if (hr_dev->caps.max_sq_sg <= 2) { - hr_qp->buff_size = HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt << + hr_qp->buff_size = HNS_ROCE_ALIGN_UP((hr_qp->rq.wqe_cnt << hr_qp->rq.wqe_shift), PAGE_SIZE) + - HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt << + HNS_ROCE_ALIGN_UP((hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift), PAGE_SIZE); hr_qp->sq.offset = 0; - hr_qp->rq.offset = HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt << + hr_qp->rq.offset = HNS_ROCE_ALIGN_UP((hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift), PAGE_SIZE); } else { page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); hr_qp->sge.sge_cnt = ex_sge_num ? max(page_size / (1 << hr_qp->sge.sge_shift), ex_sge_num) : 0; - hr_qp->buff_size = HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt << + hr_qp->buff_size = HNS_ROCE_ALIGN_UP((hr_qp->rq.wqe_cnt << hr_qp->rq.wqe_shift), page_size) + - HNS_ROCE_ALOGN_UP((hr_qp->sge.sge_cnt << + HNS_ROCE_ALIGN_UP((hr_qp->sge.sge_cnt << hr_qp->sge.sge_shift), page_size) + - HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt << + HNS_ROCE_ALIGN_UP((hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift), page_size); hr_qp->sq.offset = 0; if (ex_sge_num) { - hr_qp->sge.offset = HNS_ROCE_ALOGN_UP( + hr_qp->sge.offset = HNS_ROCE_ALIGN_UP( (hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift), page_size); hr_qp->rq.offset = hr_qp->sge.offset + - HNS_ROCE_ALOGN_UP((hr_qp->sge.sge_cnt << + HNS_ROCE_ALIGN_UP((hr_qp->sge.sge_cnt << hr_qp->sge.sge_shift), page_size); } else { - hr_qp->rq.offset = HNS_ROCE_ALOGN_UP( + hr_qp->rq.offset = HNS_ROCE_ALIGN_UP( (hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift), page_size); @@ -591,24 +593,24 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev, /* Get buf size, SQ and RQ are aligned to PAGE_SIZE */ page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); hr_qp->sq.offset = 0; - size = HNS_ROCE_ALOGN_UP(hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift, + size = HNS_ROCE_ALIGN_UP(hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift, page_size); if (hr_dev->caps.max_sq_sg > 2 && hr_qp->sge.sge_cnt) { hr_qp->sge.sge_cnt = max(page_size/(1 << hr_qp->sge.sge_shift), (u32)hr_qp->sge.sge_cnt); hr_qp->sge.offset = size; - size += HNS_ROCE_ALOGN_UP(hr_qp->sge.sge_cnt << + size += HNS_ROCE_ALIGN_UP(hr_qp->sge.sge_cnt << hr_qp->sge.sge_shift, page_size); } hr_qp->rq.offset = size; - size += HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt << hr_qp->rq.wqe_shift), + size += HNS_ROCE_ALIGN_UP((hr_qp->rq.wqe_cnt << hr_qp->rq.wqe_shift), page_size); hr_qp->buff_size = size; /* Get wr and sge number which send */ - cap->max_send_wr = hr_qp->sq.max_post = hr_qp->sq.wqe_cnt; + cap->max_send_wr = hr_qp->sq.wqe_cnt; cap->max_send_sge = hr_qp->sq.max_gs; /* We don't support inline sends for kernel QPs (yet) */ @@ -743,7 +745,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, } hr_qp->umem = ib_umem_get(udata, ucmd.buf_addr, - hr_qp->buff_size, 0, 0); + hr_qp->buff_size, 0); if (IS_ERR(hr_qp->umem)) { dev_err(dev, "ib_umem_get error for create qp\n"); ret = PTR_ERR(hr_qp->umem); @@ -1017,7 +1019,6 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, { struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); struct ib_device *ibdev = &hr_dev->ib_dev; - struct hns_roce_sqp *hr_sqp; struct hns_roce_qp *hr_qp; int ret; @@ -1030,7 +1031,7 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata, 0, hr_qp); if (ret) { - ibdev_err(ibdev, "Create RC QP 0x%06lx failed(%d)\n", + ibdev_err(ibdev, "Create QP 0x%06lx failed(%d)\n", hr_qp->qpn, ret); kfree(hr_qp); return ERR_PTR(ret); @@ -1047,11 +1048,10 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, return ERR_PTR(-EINVAL); } - hr_sqp = kzalloc(sizeof(*hr_sqp), GFP_KERNEL); - if (!hr_sqp) + hr_qp = kzalloc(sizeof(*hr_qp), GFP_KERNEL); + if (!hr_qp) return ERR_PTR(-ENOMEM); - hr_qp = &hr_sqp->hr_qp; hr_qp->port = init_attr->port_num - 1; hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port]; @@ -1066,7 +1066,7 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, hr_qp->ibqp.qp_num, hr_qp); if (ret) { ibdev_err(ibdev, "Create GSI QP failed!\n"); - kfree(hr_sqp); + kfree(hr_qp); return ERR_PTR(ret); } @@ -1289,7 +1289,7 @@ bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq, u32 cur; cur = hr_wq->head - hr_wq->tail; - if (likely(cur + nreq < hr_wq->max_post)) + if (likely(cur + nreq < hr_wq->wqe_cnt)) return false; hr_cq = to_hr_cq(ib_cq); @@ -1297,7 +1297,7 @@ bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq, cur = hr_wq->head - hr_wq->tail; spin_unlock(&hr_cq->lock); - return cur + nreq >= hr_wq->max_post; + return cur + nreq >= hr_wq->wqe_cnt; } int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev) diff --git a/drivers/infiniband/hw/hns/hns_roce_restrack.c b/drivers/infiniband/hw/hns/hns_roce_restrack.c index 0a31d0a3d657..06871731ac43 100644 --- a/drivers/infiniband/hw/hns/hns_roce_restrack.c +++ b/drivers/infiniband/hw/hns/hns_roce_restrack.c @@ -98,11 +98,15 @@ static int hns_roce_fill_res_cq_entry(struct sk_buff *msg, goto err; table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER); - if (!table_attr) + if (!table_attr) { + ret = -EMSGSIZE; goto err; + } - if (hns_roce_fill_cq(msg, context)) + if (hns_roce_fill_cq(msg, context)) { + ret = -EMSGSIZE; goto err_cancel_table; + } nla_nest_end(msg, table_attr); kfree(context); @@ -113,7 +117,7 @@ err_cancel_table: nla_nest_cancel(msg, table_attr); err: kfree(context); - return -EMSGSIZE; + return ret; } int hns_roce_fill_res_entry(struct sk_buff *msg, diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 9591457eb768..7113ebfdb4f0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -59,21 +59,21 @@ static void hns_roce_ib_srq_event(struct hns_roce_srq *srq, } } -static int hns_roce_sw2hw_srq(struct hns_roce_dev *dev, - struct hns_roce_cmd_mailbox *mailbox, - unsigned long srq_num) +static int hns_roce_hw_create_srq(struct hns_roce_dev *dev, + struct hns_roce_cmd_mailbox *mailbox, + unsigned long srq_num) { return hns_roce_cmd_mbox(dev, mailbox->dma, 0, srq_num, 0, - HNS_ROCE_CMD_SW2HW_SRQ, + HNS_ROCE_CMD_CREATE_SRQ, HNS_ROCE_CMD_TIMEOUT_MSECS); } -static int hns_roce_hw2sw_srq(struct hns_roce_dev *dev, - struct hns_roce_cmd_mailbox *mailbox, - unsigned long srq_num) +static int hns_roce_hw_destroy_srq(struct hns_roce_dev *dev, + struct hns_roce_cmd_mailbox *mailbox, + unsigned long srq_num) { return hns_roce_cmd_mbox(dev, 0, mailbox ? mailbox->dma : 0, srq_num, - mailbox ? 0 : 1, HNS_ROCE_CMD_HW2SW_SRQ, + mailbox ? 0 : 1, HNS_ROCE_CMD_DESTROY_SRQ, HNS_ROCE_CMD_TIMEOUT_MSECS); } @@ -95,8 +95,7 @@ static int hns_roce_srq_alloc(struct hns_roce_dev *hr_dev, u32 pdn, u32 cqn, srq->mtt.first_seg, &dma_handle_wqe); if (!mtts_wqe) { - dev_err(hr_dev->dev, - "SRQ alloc.Failed to find srq buf addr.\n"); + dev_err(hr_dev->dev, "Failed to find mtt for srq buf.\n"); return -EINVAL; } @@ -106,13 +105,14 @@ static int hns_roce_srq_alloc(struct hns_roce_dev *hr_dev, u32 pdn, u32 cqn, &dma_handle_idx); if (!mtts_idx) { dev_err(hr_dev->dev, - "SRQ alloc.Failed to find idx que buf addr.\n"); + "Failed to find mtt for srq idx queue buf.\n"); return -EINVAL; } ret = hns_roce_bitmap_alloc(&srq_table->bitmap, &srq->srqn); - if (ret == -1) { - dev_err(hr_dev->dev, "SRQ alloc.Failed to alloc index.\n"); + if (ret) { + dev_err(hr_dev->dev, + "Failed to alloc a bit from srq bitmap.\n"); return -ENOMEM; } @@ -134,7 +134,7 @@ static int hns_roce_srq_alloc(struct hns_roce_dev *hr_dev, u32 pdn, u32 cqn, mtts_wqe, mtts_idx, dma_handle_wqe, dma_handle_idx); - ret = hns_roce_sw2hw_srq(hr_dev, mailbox, srq->srqn); + ret = hns_roce_hw_create_srq(hr_dev, mailbox, srq->srqn); hns_roce_free_cmd_mailbox(hr_dev, mailbox); if (ret) goto err_xa; @@ -160,9 +160,9 @@ static void hns_roce_srq_free(struct hns_roce_dev *hr_dev, struct hns_roce_srq_table *srq_table = &hr_dev->srq_table; int ret; - ret = hns_roce_hw2sw_srq(hr_dev, NULL, srq->srqn); + ret = hns_roce_hw_destroy_srq(hr_dev, NULL, srq->srqn); if (ret) - dev_err(hr_dev->dev, "HW2SW_SRQ failed (%d) for CQN %06lx\n", + dev_err(hr_dev->dev, "DESTROY_SRQ failed (%d) for SRQN %06lx\n", ret, srq->srqn); xa_erase(&srq_table->xa, srq->srqn); @@ -180,22 +180,23 @@ static int create_user_srq(struct hns_roce_srq *srq, struct ib_udata *udata, { struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device); struct hns_roce_ib_create_srq ucmd; - u32 page_shift; - u32 npages; + struct hns_roce_buf *buf; int ret; if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) return -EFAULT; - srq->umem = ib_umem_get(udata, ucmd.buf_addr, srq_buf_size, 0, 0); + srq->umem = ib_umem_get(udata, ucmd.buf_addr, srq_buf_size, 0); if (IS_ERR(srq->umem)) return PTR_ERR(srq->umem); - npages = (ib_umem_page_count(srq->umem) + - (1 << hr_dev->caps.srqwqe_buf_pg_sz) - 1) / - (1 << hr_dev->caps.srqwqe_buf_pg_sz); - page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz; - ret = hns_roce_mtt_init(hr_dev, npages, page_shift, &srq->mtt); + buf = &srq->buf; + buf->npages = (ib_umem_page_count(srq->umem) + + (1 << hr_dev->caps.srqwqe_buf_pg_sz) - 1) / + (1 << hr_dev->caps.srqwqe_buf_pg_sz); + buf->page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz; + ret = hns_roce_mtt_init(hr_dev, buf->npages, buf->page_shift, + &srq->mtt); if (ret) goto err_user_buf; @@ -205,16 +206,19 @@ static int create_user_srq(struct hns_roce_srq *srq, struct ib_udata *udata, /* config index queue BA */ srq->idx_que.umem = ib_umem_get(udata, ucmd.que_addr, - srq->idx_que.buf_size, 0, 0); + srq->idx_que.buf_size, 0); if (IS_ERR(srq->idx_que.umem)) { dev_err(hr_dev->dev, "ib_umem_get error for index queue\n"); ret = PTR_ERR(srq->idx_que.umem); goto err_user_srq_mtt; } - ret = hns_roce_mtt_init(hr_dev, ib_umem_page_count(srq->idx_que.umem), - PAGE_SHIFT, &srq->idx_que.mtt); - + buf = &srq->idx_que.idx_buf; + buf->npages = DIV_ROUND_UP(ib_umem_page_count(srq->idx_que.umem), + 1 << hr_dev->caps.idx_buf_pg_sz); + buf->page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz; + ret = hns_roce_mtt_init(hr_dev, buf->npages, buf->page_shift, + &srq->idx_que.mtt); if (ret) { dev_err(hr_dev->dev, "hns_roce_mtt_init error for idx que\n"); goto err_user_idx_mtt; @@ -251,7 +255,7 @@ static int hns_roce_create_idx_que(struct ib_pd *pd, struct hns_roce_srq *srq, struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); struct hns_roce_idx_que *idx_que = &srq->idx_que; - idx_que->bitmap = bitmap_zalloc(srq->max, GFP_KERNEL); + idx_que->bitmap = bitmap_zalloc(srq->wqe_cnt, GFP_KERNEL); if (!idx_que->bitmap) return -ENOMEM; @@ -277,7 +281,7 @@ static int create_kernel_srq(struct hns_roce_srq *srq, int srq_buf_size) return -ENOMEM; srq->head = 0; - srq->tail = srq->max - 1; + srq->tail = srq->wqe_cnt - 1; ret = hns_roce_mtt_init(hr_dev, srq->buf.npages, srq->buf.page_shift, &srq->mtt); @@ -308,7 +312,7 @@ static int create_kernel_srq(struct hns_roce_srq *srq, int srq_buf_size) if (ret) goto err_kernel_idx_buf; - srq->wrid = kvmalloc_array(srq->max, sizeof(u64), GFP_KERNEL); + srq->wrid = kvmalloc_array(srq->wqe_cnt, sizeof(u64), GFP_KERNEL); if (!srq->wrid) { ret = -ENOMEM; goto err_kernel_idx_buf; @@ -354,7 +358,7 @@ static void destroy_kernel_srq(struct hns_roce_dev *hr_dev, } int hns_roce_create_srq(struct ib_srq *ib_srq, - struct ib_srq_init_attr *srq_init_attr, + struct ib_srq_init_attr *init_attr, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ib_srq->device); @@ -366,24 +370,24 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, u32 cqn; /* Check the actual SRQ wqe and SRQ sge num */ - if (srq_init_attr->attr.max_wr >= hr_dev->caps.max_srq_wrs || - srq_init_attr->attr.max_sge > hr_dev->caps.max_srq_sges) + if (init_attr->attr.max_wr >= hr_dev->caps.max_srq_wrs || + init_attr->attr.max_sge > hr_dev->caps.max_srq_sges) return -EINVAL; mutex_init(&srq->mutex); spin_lock_init(&srq->lock); - srq->max = roundup_pow_of_two(srq_init_attr->attr.max_wr + 1); - srq->max_gs = srq_init_attr->attr.max_sge; + srq->wqe_cnt = roundup_pow_of_two(init_attr->attr.max_wr + 1); + srq->max_gs = init_attr->attr.max_sge; - srq_desc_size = max(16, 16 * srq->max_gs); + srq_desc_size = roundup_pow_of_two(max(16, 16 * srq->max_gs)); srq->wqe_shift = ilog2(srq_desc_size); - srq_buf_size = srq->max * srq_desc_size; + srq_buf_size = srq->wqe_cnt * srq_desc_size; srq->idx_que.entry_sz = HNS_ROCE_IDX_QUE_ENTRY_SZ; - srq->idx_que.buf_size = srq->max * srq->idx_que.entry_sz; + srq->idx_que.buf_size = srq->wqe_cnt * srq->idx_que.entry_sz; srq->mtt.mtt_type = MTT_TYPE_SRQWQE; srq->idx_que.mtt.mtt_type = MTT_TYPE_IDX; @@ -401,8 +405,8 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, } } - cqn = ib_srq_has_cq(srq_init_attr->srq_type) ? - to_hr_cq(srq_init_attr->ext.cq)->cqn : 0; + cqn = ib_srq_has_cq(init_attr->srq_type) ? + to_hr_cq(init_attr->ext.cq)->cqn : 0; srq->db_reg_l = hr_dev->reg_base + SRQ_DB_REG; @@ -449,7 +453,7 @@ void hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt); } else { kvfree(srq->wrid); - hns_roce_buf_free(hr_dev, srq->max << srq->wqe_shift, + hns_roce_buf_free(hr_dev, srq->wqe_cnt << srq->wqe_shift, &srq->buf); } ib_umem_release(srq->idx_que.umem); diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 2d6a378e8560..bb78d3280acc 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -2079,9 +2079,9 @@ static int i40iw_addr_resolve_neigh_ipv6(struct i40iw_device *iwdev, dst = i40iw_get_dst_ipv6(&src_addr, &dst_addr); if (!dst || dst->error) { if (dst) { - dst_release(dst); i40iw_pr_err("ip6_route_output returned dst->error = %d\n", dst->error); + dst_release(dst); } return rc; } diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 8056930bbe2c..86375947bc67 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -1763,7 +1763,7 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd, if (length > I40IW_MAX_MR_SIZE) return ERR_PTR(-EINVAL); - region = ib_umem_get(udata, start, length, acc, 0); + region = ib_umem_get(udata, start, length, acc); if (IS_ERR(region)) return (struct ib_mr *)region; @@ -2773,6 +2773,10 @@ int i40iw_register_rdma_device(struct i40iw_device *iwdev) return -ENOMEM; iwibdev = iwdev->iwibdev; rdma_set_device_sysfs_group(&iwibdev->ibdev, &i40iw_attr_group); + ret = ib_device_set_netdev(&iwibdev->ibdev, iwdev->netdev, 1); + if (ret) + goto error; + ret = ib_register_device(&iwibdev->ibdev, "i40iw%d"); if (ret) goto error; diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index a7d238d312f0..306b21281fa2 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -145,7 +145,7 @@ static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_udata *udata, int n; *umem = ib_umem_get(udata, buf_addr, cqe * cqe_size, - IB_ACCESS_LOCAL_WRITE, 1); + IB_ACCESS_LOCAL_WRITE); if (IS_ERR(*umem)) return PTR_ERR(*umem); diff --git a/drivers/infiniband/hw/mlx4/doorbell.c b/drivers/infiniband/hw/mlx4/doorbell.c index 0f390351cef0..714f9df5bf39 100644 --- a/drivers/infiniband/hw/mlx4/doorbell.c +++ b/drivers/infiniband/hw/mlx4/doorbell.c @@ -64,7 +64,7 @@ int mlx4_ib_db_map_user(struct ib_udata *udata, unsigned long virt, page->user_virt = (virt & PAGE_MASK); page->refcnt = 0; - page->umem = ib_umem_get(udata, virt & PAGE_MASK, PAGE_SIZE, 0, 0); + page->umem = ib_umem_get(udata, virt & PAGE_MASK, PAGE_SIZE, 0); if (IS_ERR(page->umem)) { err = PTR_ERR(page->umem); kfree(page); diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 57079110af9b..abe68708d6d6 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -966,7 +966,6 @@ static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, } mutex_unlock(&dev->counters_table[port_num - 1].mutex); if (stats_avail) { - memset(out_mad->data, 0, sizeof out_mad->data); switch (counter_stats.counter_mode & 0xf) { case 0: edit_counter(&counter_stats, @@ -984,38 +983,31 @@ static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index) + const struct ib_mad *in, struct ib_mad *out, + size_t *out_mad_size, u16 *out_mad_pkey_index) { struct mlx4_ib_dev *dev = to_mdev(ibdev); - const struct ib_mad *in_mad = (const struct ib_mad *)in; - struct ib_mad *out_mad = (struct ib_mad *)out; enum rdma_link_layer link = rdma_port_get_link_layer(ibdev, port_num); - if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || - *out_mad_size != sizeof(*out_mad))) - return IB_MAD_RESULT_FAILURE; - /* iboe_process_mad() which uses the HCA flow-counters to implement IB PMA * queries, should be called only by VFs and for that specific purpose */ if (link == IB_LINK_LAYER_INFINIBAND) { if (mlx4_is_slave(dev->dev) && - (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT && - (in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS || - in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS_EXT || - in_mad->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO))) - return iboe_process_mad(ibdev, mad_flags, port_num, in_wc, - in_grh, in_mad, out_mad); + (in->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT && + (in->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS || + in->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS_EXT || + in->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO))) + return iboe_process_mad(ibdev, mad_flags, port_num, + in_wc, in_grh, in, out); - return ib_process_mad(ibdev, mad_flags, port_num, in_wc, - in_grh, in_mad, out_mad); + return ib_process_mad(ibdev, mad_flags, port_num, in_wc, in_grh, + in, out); } if (link == IB_LINK_LAYER_ETHERNET) return iboe_process_mad(ibdev, mad_flags, port_num, in_wc, - in_grh, in_mad, out_mad); + in_grh, in, out); return -EINVAL; } diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 8d2f1e38b891..0b5dc1d5928f 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -256,6 +256,8 @@ static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context) int hw_update = 0; int i; struct gid_entry *gids = NULL; + u16 vlan_id = 0xffff; + u8 mac[ETH_ALEN]; if (!rdma_cap_roce_gid_table(attr->device, attr->port_num)) return -EINVAL; @@ -266,12 +268,16 @@ static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context) if (!context) return -EINVAL; + ret = rdma_read_gid_l2_fields(attr, &vlan_id, &mac[0]); + if (ret) + return ret; port_gid_table = &iboe->gids[attr->port_num - 1]; spin_lock_bh(&iboe->lock); for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) { if (!memcmp(&port_gid_table->gids[i].gid, &attr->gid, sizeof(attr->gid)) && - port_gid_table->gids[i].gid_type == attr->gid_type) { + port_gid_table->gids[i].gid_type == attr->gid_type && + port_gid_table->gids[i].vlan_id == vlan_id) { found = i; break; } @@ -291,6 +297,7 @@ static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context) memcpy(&port_gid_table->gids[free].gid, &attr->gid, sizeof(attr->gid)); port_gid_table->gids[free].gid_type = attr->gid_type; + port_gid_table->gids[free].vlan_id = vlan_id; port_gid_table->gids[free].ctx->real_index = free; port_gid_table->gids[free].ctx->refcount = 1; hw_update = 1; @@ -1146,7 +1153,8 @@ static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) return rdma_user_mmap_io(context, vma, to_mucontext(context)->uar.pfn, PAGE_SIZE, - pgprot_noncached(vma->vm_page_prot)); + pgprot_noncached(vma->vm_page_prot), + NULL); case 1: if (dev->dev->caps.bf_reg_size == 0) @@ -1155,7 +1163,8 @@ static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) context, vma, to_mucontext(context)->uar.pfn + dev->dev->caps.num_uars, - PAGE_SIZE, pgprot_writecombine(vma->vm_page_prot)); + PAGE_SIZE, pgprot_writecombine(vma->vm_page_prot), + NULL); case 3: { struct mlx4_clock_params params; @@ -1171,7 +1180,8 @@ static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) params.bar) + params.offset) >> PAGE_SHIFT, - PAGE_SIZE, pgprot_noncached(vma->vm_page_prot)); + PAGE_SIZE, pgprot_noncached(vma->vm_page_prot), + NULL); } default: diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index eb53bb4c0c91..d188573187fa 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -508,6 +508,7 @@ struct gid_entry { union ib_gid gid; enum ib_gid_type gid_type; struct gid_cache_context *ctx; + u16 vlan_id; }; struct mlx4_port_gid_table { @@ -786,11 +787,10 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags, int port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, const void *in_mad, void *response_mad); -int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, +int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index); + const struct ib_mad *in, struct ib_mad *out, + size_t *out_mad_size, u16 *out_mad_pkey_index); int mlx4_ib_mad_init(struct mlx4_ib_dev *dev); void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev); diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 6ae503cfc526..dfa17bcdcdbc 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -398,7 +398,7 @@ static struct ib_umem *mlx4_get_umem_mr(struct ib_udata *udata, u64 start, up_read(¤t->mm->mmap_sem); } - return ib_umem_get(udata, start, length, access_flags, 0); + return ib_umem_get(udata, start, length, access_flags); } struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index bd4aa04416c6..85f57b76e446 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -916,7 +916,7 @@ static int create_rq(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) + (qp->sq.wqe_cnt << qp->sq.wqe_shift); - qp->umem = ib_umem_get(udata, wq.buf_addr, qp->buf_size, 0, 0); + qp->umem = ib_umem_get(udata, wq.buf_addr, qp->buf_size, 0); if (IS_ERR(qp->umem)) { err = PTR_ERR(qp->umem); goto err; @@ -1110,8 +1110,7 @@ static int create_qp_common(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, if (err) goto err; - qp->umem = - ib_umem_get(udata, ucmd.buf_addr, qp->buf_size, 0, 0); + qp->umem = ib_umem_get(udata, ucmd.buf_addr, qp->buf_size, 0); if (IS_ERR(qp->umem)) { err = PTR_ERR(qp->umem); goto err; diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index 848db7264cc9..8dcf6e3d9ae2 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -110,7 +110,7 @@ int mlx4_ib_create_srq(struct ib_srq *ib_srq, if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) return -EFAULT; - srq->umem = ib_umem_get(udata, ucmd.buf_addr, buf_size, 0, 0); + srq->umem = ib_umem_get(udata, ucmd.buf_addr, buf_size, 0); if (IS_ERR(srq->umem)) return PTR_ERR(srq->umem); diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile index 9924be8384d8..d0a043ccbe58 100644 --- a/drivers/infiniband/hw/mlx5/Makefile +++ b/drivers/infiniband/hw/mlx5/Makefile @@ -3,7 +3,7 @@ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq_cmd.o \ srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o \ - cong.o + cong.o restrack.o mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 45f48cde6b9d..dd8d24ee8e1d 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -423,9 +423,6 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq, struct mlx5_cqe64 *cqe64; struct mlx5_core_qp *mqp; struct mlx5_ib_wq *wq; - struct mlx5_sig_err_cqe *sig_err_cqe; - struct mlx5_core_mkey *mmkey; - struct mlx5_ib_mr *mr; uint8_t opcode; uint32_t qpn; u16 wqe_ctr; @@ -519,27 +516,29 @@ repoll: } } break; - case MLX5_CQE_SIG_ERR: - sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64; + case MLX5_CQE_SIG_ERR: { + struct mlx5_sig_err_cqe *sig_err_cqe = + (struct mlx5_sig_err_cqe *)cqe64; + struct mlx5_core_sig_ctx *sig; - xa_lock(&dev->mdev->priv.mkey_table); - mmkey = xa_load(&dev->mdev->priv.mkey_table, + xa_lock(&dev->sig_mrs); + sig = xa_load(&dev->sig_mrs, mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey))); - mr = to_mibmr(mmkey); - get_sig_err_item(sig_err_cqe, &mr->sig->err_item); - mr->sig->sig_err_exists = true; - mr->sig->sigerr_count++; + get_sig_err_item(sig_err_cqe, &sig->err_item); + sig->sig_err_exists = true; + sig->sigerr_count++; mlx5_ib_warn(dev, "CQN: 0x%x Got SIGERR on key: 0x%x err_type %x err_offset %llx expected %x actual %x\n", - cq->mcq.cqn, mr->sig->err_item.key, - mr->sig->err_item.err_type, - mr->sig->err_item.sig_err_offset, - mr->sig->err_item.expected, - mr->sig->err_item.actual); + cq->mcq.cqn, sig->err_item.key, + sig->err_item.err_type, + sig->err_item.sig_err_offset, + sig->err_item.expected, + sig->err_item.actual); - xa_unlock(&dev->mdev->priv.mkey_table); + xa_unlock(&dev->sig_mrs); goto repoll; } + } return 0; } @@ -710,7 +709,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, cq->buf.umem = ib_umem_get(udata, ucmd.buf_addr, entries * ucmd.cqe_size, - IB_ACCESS_LOCAL_WRITE, 1); + IB_ACCESS_LOCAL_WRITE); if (IS_ERR(cq->buf.umem)) { err = PTR_ERR(cq->buf.umem); return err; @@ -1111,7 +1110,7 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, umem = ib_umem_get(udata, ucmd.buf_addr, (size_t)ucmd.cqe_size * entries, - IB_ACCESS_LOCAL_WRITE, 1); + IB_ACCESS_LOCAL_WRITE); if (IS_ERR(umem)) { err = PTR_ERR(umem); return err; diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 59022b744144..9d0a18cf9e5e 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -100,6 +100,7 @@ struct devx_obj { struct mlx5_ib_devx_mr devx_mr; struct mlx5_core_dct core_dct; struct mlx5_core_cq core_cq; + u32 flow_counter_bulk_size; }; struct list_head event_sub; /* holds devx_event_subscription entries */ }; @@ -192,15 +193,20 @@ bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type) } } -bool mlx5_ib_devx_is_flow_counter(void *obj, u32 *counter_id) +bool mlx5_ib_devx_is_flow_counter(void *obj, u32 offset, u32 *counter_id) { struct devx_obj *devx_obj = obj; u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode); if (opcode == MLX5_CMD_OP_DEALLOC_FLOW_COUNTER) { + + if (offset && offset >= devx_obj->flow_counter_bulk_size) + return false; + *counter_id = MLX5_GET(dealloc_flow_counter_in, devx_obj->dinbox, flow_counter_id); + *counter_id += offset; return true; } @@ -1265,8 +1271,8 @@ static int devx_handle_mkey_indirect(struct devx_obj *obj, mkey->pd = MLX5_GET(mkc, mkc, pd); devx_mr->ndescs = MLX5_GET(mkc, mkc, translations_octword_size); - return xa_err(xa_store(&dev->mdev->priv.mkey_table, - mlx5_base_mkey(mkey->key), mkey, GFP_KERNEL)); + return xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mkey->key), mkey, + GFP_KERNEL)); } static int devx_handle_mkey_create(struct mlx5_ib_dev *dev, @@ -1298,29 +1304,6 @@ static int devx_handle_mkey_create(struct mlx5_ib_dev *dev, return 0; } -static void devx_free_indirect_mkey(struct rcu_head *rcu) -{ - kfree(container_of(rcu, struct devx_obj, devx_mr.rcu)); -} - -/* This function to delete from the radix tree needs to be called before - * destroying the underlying mkey. Otherwise a race might occur in case that - * other thread will get the same mkey before this one will be deleted, - * in that case it will fail via inserting to the tree its own data. - * - * Note: - * An error in the destroy is not expected unless there is some other indirect - * mkey which points to this one. In a kernel cleanup flow it will be just - * destroyed in the iterative destruction call. In a user flow, in case - * the application didn't close in the expected order it's its own problem, - * the mkey won't be part of the tree, in both cases the kernel is safe. - */ -static void devx_cleanup_mkey(struct devx_obj *obj) -{ - xa_erase(&obj->ib_dev->mdev->priv.mkey_table, - mlx5_base_mkey(obj->devx_mr.mmkey.key)); -} - static void devx_cleanup_subscription(struct mlx5_ib_dev *dev, struct devx_event_subscription *sub) { @@ -1362,8 +1345,16 @@ static int devx_obj_cleanup(struct ib_uobject *uobject, int ret; dev = mlx5_udata_to_mdev(&attrs->driver_udata); - if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) - devx_cleanup_mkey(obj); + if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) { + /* + * The pagefault_single_data_segment() does commands against + * the mmkey, we must wait for that to stop before freeing the + * mkey, as another allocation could get the same mkey #. + */ + xa_erase(&obj->ib_dev->odp_mkeys, + mlx5_base_mkey(obj->devx_mr.mmkey.key)); + synchronize_srcu(&dev->odp_srcu); + } if (obj->flags & DEVX_OBJ_FLAGS_DCT) ret = mlx5_core_destroy_dct(obj->ib_dev->mdev, &obj->core_dct); @@ -1382,12 +1373,6 @@ static int devx_obj_cleanup(struct ib_uobject *uobject, devx_cleanup_subscription(dev, sub_entry); mutex_unlock(&devx_event_table->event_xa_lock); - if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) { - call_srcu(&dev->mr_srcu, &obj->devx_mr.rcu, - devx_free_indirect_mkey); - return ret; - } - kfree(obj); return ret; } @@ -1484,6 +1469,13 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( if (err) goto obj_free; + if (opcode == MLX5_CMD_OP_ALLOC_FLOW_COUNTER) { + u8 bulk = MLX5_GET(alloc_flow_counter_in, + cmd_in, + flow_counter_bulk); + obj->flow_counter_bulk_size = 128UL * bulk; + } + uobj->object = obj; INIT_LIST_HEAD(&obj->event_sub); obj->ib_dev = dev; @@ -1491,26 +1483,21 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( &obj_id); WARN_ON(obj->dinlen > MLX5_MAX_DESTROY_INBOX_SIZE_DW * sizeof(u32)); - if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) { - err = devx_handle_mkey_indirect(obj, dev, cmd_in, cmd_out); - if (err) - goto obj_destroy; - } - err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len); if (err) - goto err_copy; + goto obj_destroy; if (opcode == MLX5_CMD_OP_CREATE_GENERAL_OBJECT) obj_type = MLX5_GET(general_obj_in_cmd_hdr, cmd_in, obj_type); - obj->obj_id = get_enc_obj_id(opcode | obj_type << 16, obj_id); + if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) { + err = devx_handle_mkey_indirect(obj, dev, cmd_in, cmd_out); + if (err) + goto obj_destroy; + } return 0; -err_copy: - if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) - devx_cleanup_mkey(obj); obj_destroy: if (obj->flags & DEVX_OBJ_FLAGS_DCT) mlx5_core_destroy_dct(obj->ib_dev->mdev, &obj->core_dct); @@ -2147,7 +2134,7 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext, if (err) return err; - obj->umem = ib_umem_get(&attrs->driver_udata, addr, size, access, 0); + obj->umem = ib_umem_get(&attrs->driver_udata, addr, size, access); if (IS_ERR(obj->umem)) return PTR_ERR(obj->umem); diff --git a/drivers/infiniband/hw/mlx5/doorbell.c b/drivers/infiniband/hw/mlx5/doorbell.c index 8f4e5f22b84c..12737c509aa2 100644 --- a/drivers/infiniband/hw/mlx5/doorbell.c +++ b/drivers/infiniband/hw/mlx5/doorbell.c @@ -64,7 +64,7 @@ int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, page->user_virt = (virt & PAGE_MASK); page->refcnt = 0; - page->umem = ib_umem_get(udata, virt & PAGE_MASK, PAGE_SIZE, 0, 0); + page->umem = ib_umem_get(udata, virt & PAGE_MASK, PAGE_SIZE, 0); if (IS_ERR(page->umem)) { err = PTR_ERR(page->umem); kfree(page); diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c index b198ff10cde9..dbee17d22d50 100644 --- a/drivers/infiniband/hw/mlx5/flow.c +++ b/drivers/infiniband/hw/mlx5/flow.c @@ -85,6 +85,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata); int len, ret, i; u32 counter_id = 0; + u32 *offset_attr; + u32 offset = 0; if (!capable(CAP_NET_RAW)) return -EPERM; @@ -151,8 +153,27 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( if (len) { devx_obj = arr_flow_actions[0]->object; - if (!mlx5_ib_devx_is_flow_counter(devx_obj, &counter_id)) + if (uverbs_attr_is_valid(attrs, + MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET)) { + + int num_offsets = uverbs_attr_ptr_get_array_size( + attrs, + MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET, + sizeof(u32)); + + if (num_offsets != 1) + return -EINVAL; + + offset_attr = uverbs_attr_get_alloced_ptr( + attrs, + MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET); + offset = *offset_attr; + } + + if (!mlx5_ib_devx_is_flow_counter(devx_obj, offset, + &counter_id)) return -EINVAL; + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; } @@ -598,7 +619,11 @@ DECLARE_UVERBS_NAMED_METHOD( UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, MLX5_IB_OBJECT_DEVX_OBJ, UVERBS_ACCESS_READ, 1, 1, - UA_OPTIONAL)); + UA_OPTIONAL), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET, + UVERBS_ATTR_MIN_SIZE(sizeof(u32)), + UA_OPTIONAL, + UA_ALLOC_AND_COPY)); DECLARE_UVERBS_NAMED_METHOD_DESTROY( MLX5_IB_METHOD_DESTROY_FLOW, diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c index 4950df3f71b6..ac4d8d1b9a07 100644 --- a/drivers/infiniband/hw/mlx5/gsi.c +++ b/drivers/infiniband/hw/mlx5/gsi.c @@ -263,7 +263,7 @@ static struct ib_qp *create_gsi_ud_qp(struct mlx5_ib_gsi_qp *gsi) }, .sq_sig_type = gsi->sq_sig_type, .qp_type = IB_QPT_UD, - .create_flags = mlx5_ib_create_qp_sqpn_qp1(), + .create_flags = MLX5_IB_QP_CREATE_SQPN_QP1, }; return ib_create_qp(pd, &init_attr); diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index 74ce9249e75a..5c3d052ac30b 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -35,7 +35,7 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) int vport_index; if (rep->vport == MLX5_VPORT_UPLINK) - profile = &uplink_rep_profile; + profile = &raw_eth_profile; else return mlx5_ib_set_vport_rep(dev, rep); diff --git a/drivers/infiniband/hw/mlx5/ib_rep.h b/drivers/infiniband/hw/mlx5/ib_rep.h index de43b423bafc..3b6750cba796 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.h +++ b/drivers/infiniband/hw/mlx5/ib_rep.h @@ -10,7 +10,7 @@ #include "mlx5_ib.h" #ifdef CONFIG_MLX5_ESWITCH -extern const struct mlx5_ib_profile uplink_rep_profile; +extern const struct mlx5_ib_profile raw_eth_profile; u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw); struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw, diff --git a/drivers/infiniband/hw/mlx5/ib_virt.c b/drivers/infiniband/hw/mlx5/ib_virt.c index 649a3364f838..4f0edd4832bd 100644 --- a/drivers/infiniband/hw/mlx5/ib_virt.c +++ b/drivers/infiniband/hw/mlx5/ib_virt.c @@ -201,3 +201,27 @@ int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port, return -EINVAL; } + +int mlx5_ib_get_vf_guid(struct ib_device *device, int vf, u8 port, + struct ifla_vf_guid *node_guid, + struct ifla_vf_guid *port_guid) +{ + struct mlx5_ib_dev *dev = to_mdev(device); + struct mlx5_core_dev *mdev = dev->mdev; + struct mlx5_hca_vport_context *rep; + int err; + + rep = kzalloc(sizeof(*rep), GFP_KERNEL); + if (!rep) + return -ENOMEM; + + err = mlx5_query_hca_vport_context(mdev, 1, 1, vf+1, rep); + if (err) + goto ex; + + port_guid->guid = rep->port_guid; + node_guid->guid = rep->node_guid; +ex: + kfree(rep); + return err; +} diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 348c1df69cdc..14e0c17de6a9 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -74,58 +74,6 @@ static int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, port); } -static int process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad *in_mad, struct ib_mad *out_mad) -{ - u16 slid; - int err; - - slid = in_wc ? ib_lid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE); - - if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) - return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; - - if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || - in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { - if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET && - in_mad->mad_hdr.method != IB_MGMT_METHOD_SET && - in_mad->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS) - return IB_MAD_RESULT_SUCCESS; - - /* Don't process SMInfo queries -- the SMA can't handle them. - */ - if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO) - return IB_MAD_RESULT_SUCCESS; - } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT || - in_mad->mad_hdr.mgmt_class == MLX5_IB_VENDOR_CLASS1 || - in_mad->mad_hdr.mgmt_class == MLX5_IB_VENDOR_CLASS2 || - in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_CONG_MGMT) { - if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET && - in_mad->mad_hdr.method != IB_MGMT_METHOD_SET) - return IB_MAD_RESULT_SUCCESS; - } else { - return IB_MAD_RESULT_SUCCESS; - } - - err = mlx5_MAD_IFC(to_mdev(ibdev), - mad_flags & IB_MAD_IGNORE_MKEY, - mad_flags & IB_MAD_IGNORE_BKEY, - port_num, in_wc, in_grh, in_mad, out_mad); - if (err) - return IB_MAD_RESULT_FAILURE; - - /* set return bit in status of directed route responses */ - if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) - out_mad->mad_hdr.status |= cpu_to_be16(1 << 15); - - if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) - /* no response for trap repress */ - return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; - - return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; -} - static void pma_cnt_ext_assign(struct ib_pma_portcounters_ext *pma_cnt_ext, void *out) { @@ -271,30 +219,66 @@ done: int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index) + const struct ib_mad *in, struct ib_mad *out, + size_t *out_mad_size, u16 *out_mad_pkey_index) { struct mlx5_ib_dev *dev = to_mdev(ibdev); - const struct ib_mad *in_mad = (const struct ib_mad *)in; - struct ib_mad *out_mad = (struct ib_mad *)out; - int ret; + u8 mgmt_class = in->mad_hdr.mgmt_class; + u8 method = in->mad_hdr.method; + u16 slid; + int err; - if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || - *out_mad_size != sizeof(*out_mad))) - return IB_MAD_RESULT_FAILURE; + slid = in_wc ? ib_lid_cpu16(in_wc->slid) : + be16_to_cpu(IB_LID_PERMISSIVE); - memset(out_mad->data, 0, sizeof(out_mad->data)); + if (method == IB_MGMT_METHOD_TRAP && !slid) + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; - if (MLX5_CAP_GEN(dev->mdev, vport_counters) && - in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT && - in_mad->mad_hdr.method == IB_MGMT_METHOD_GET) { - ret = process_pma_cmd(dev, port_num, in_mad, out_mad); - } else { - ret = process_mad(ibdev, mad_flags, port_num, in_wc, in_grh, - in_mad, out_mad); + switch (mgmt_class) { + case IB_MGMT_CLASS_SUBN_LID_ROUTED: + case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: { + if (method != IB_MGMT_METHOD_GET && + method != IB_MGMT_METHOD_SET && + method != IB_MGMT_METHOD_TRAP_REPRESS) + return IB_MAD_RESULT_SUCCESS; + + /* Don't process SMInfo queries -- the SMA can't handle them. + */ + if (in->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO) + return IB_MAD_RESULT_SUCCESS; + } break; + case IB_MGMT_CLASS_PERF_MGMT: + if (MLX5_CAP_GEN(dev->mdev, vport_counters) && + method == IB_MGMT_METHOD_GET) + return process_pma_cmd(dev, port_num, in, out); + /* fallthrough */ + case MLX5_IB_VENDOR_CLASS1: + /* fallthrough */ + case MLX5_IB_VENDOR_CLASS2: + case IB_MGMT_CLASS_CONG_MGMT: { + if (method != IB_MGMT_METHOD_GET && + method != IB_MGMT_METHOD_SET) + return IB_MAD_RESULT_SUCCESS; + } break; + default: + return IB_MAD_RESULT_SUCCESS; } - return ret; + + err = mlx5_MAD_IFC(to_mdev(ibdev), mad_flags & IB_MAD_IGNORE_MKEY, + mad_flags & IB_MAD_IGNORE_BKEY, port_num, in_wc, + in_grh, in, out); + if (err) + return IB_MAD_RESULT_FAILURE; + + /* set return bit in status of directed route responses */ + if (mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + out->mad_hdr.status |= cpu_to_be16(1 << 15); + + if (method == IB_MGMT_METHOD_TRAP_REPRESS) + /* no response for trap repress */ + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; + + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; } int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 831539419c30..51100350b688 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -67,6 +67,7 @@ #include <rdma/uverbs_std_types.h> #include <rdma/mlx5_user_ioctl_verbs.h> #include <rdma/mlx5_user_ioctl_cmds.h> +#include <rdma/ib_umem_odp.h> #define UVERBS_MODULE_NAME mlx5_ib #include <rdma/uverbs_named_ioctl.h> @@ -693,21 +694,6 @@ static void get_atomic_caps_qp(struct mlx5_ib_dev *dev, get_atomic_caps(dev, atomic_size_qp, props); } -static void get_atomic_caps_dc(struct mlx5_ib_dev *dev, - struct ib_device_attr *props) -{ - u8 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_dc); - - get_atomic_caps(dev, atomic_size_qp, props); -} - -bool mlx5_ib_dc_atomic_is_supported(struct mlx5_ib_dev *dev) -{ - struct ib_device_attr props = {}; - - get_atomic_caps_dc(dev, &props); - return (props.atomic_cap == IB_ATOMIC_HCA) ? true : false; -} static int mlx5_query_system_image_guid(struct ib_device *ibdev, __be64 *sys_image_guid) { @@ -844,8 +830,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, resp_len = sizeof(resp.comp_mask) + sizeof(resp.response_length); if (uhw->outlen && uhw->outlen < resp_len) return -EINVAL; - else - resp.response_length = resp_len; + + resp.response_length = resp_len; if (uhw->inlen && !ib_is_udata_cleared(uhw, 0, uhw->inlen)) return -EINVAL; @@ -1011,6 +997,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, 1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size); props->max_pi_fast_reg_page_list_len = props->max_fast_reg_page_list_len / 2; + props->max_sgl_rd = + MLX5_CAP_GEN(mdev, max_sgl_for_optimized_performance); get_atomic_caps_qp(dev, props); props->masked_atomic_cap = IB_ATOMIC_NONE; props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg); @@ -1031,7 +1019,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (MLX5_CAP_GEN(mdev, cd)) props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL; - if (!mlx5_core_is_pf(mdev)) + if (mlx5_core_is_vf(mdev)) props->device_cap_flags |= IB_DEVICE_VIRTUAL_FUNCTION; if (mlx5_ib_port_link_layer(ibdev, 1) == @@ -1161,8 +1149,14 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES; resp.striding_rq_caps.max_single_stride_log_num_of_bytes = MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES; - resp.striding_rq_caps.min_single_wqe_log_num_of_strides = - MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES; + if (MLX5_CAP_GEN(dev->mdev, ext_stride_num_range)) + resp.striding_rq_caps + .min_single_wqe_log_num_of_strides = + MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES; + else + resp.striding_rq_caps + .min_single_wqe_log_num_of_strides = + MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES; resp.striding_rq_caps.max_single_wqe_log_num_of_strides = MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES; resp.striding_rq_caps.supported_qpts = @@ -1808,7 +1802,7 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, return -EINVAL; resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp); - if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf)) + if (dev->wc_support) resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size); resp.cache_line_size = cache_line_size(); resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq); @@ -2168,7 +2162,7 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn %pa\n", idx, &pfn); err = rdma_user_mmap_io(&context->ibucontext, vma, pfn, PAGE_SIZE, - prot); + prot, NULL); if (err) { mlx5_ib_err(dev, "rdma_user_mmap_io failed with error=%d, mmap_cmd=%s\n", @@ -2210,7 +2204,8 @@ static int dm_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) PAGE_SHIFT) + page_idx; return rdma_user_mmap_io(context, vma, pfn, map_size, - pgprot_writecombine(vma->vm_page_prot)); + pgprot_writecombine(vma->vm_page_prot), + NULL); } static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) @@ -2248,7 +2243,8 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm PAGE_SHIFT; return rdma_user_mmap_io(&context->ibucontext, vma, pfn, PAGE_SIZE, - pgprot_noncached(vma->vm_page_prot)); + pgprot_noncached(vma->vm_page_prot), + NULL); case MLX5_IB_MMAP_CLOCK_INFO: return mlx5_ib_mmap_clock_info_page(dev, vma, context); @@ -5145,8 +5141,7 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num, immutable->pkey_tbl_len = attr.pkey_tbl_len; immutable->gid_tbl_len = attr.gid_tbl_len; immutable->core_cap_flags = get_core_cap_flags(ibdev, &rep); - if ((ll == IB_LINK_LAYER_INFINIBAND) || MLX5_CAP_GEN(dev->mdev, roce)) - immutable->max_mad_size = IB_MGMT_MAD_SIZE; + immutable->max_mad_size = IB_MGMT_MAD_SIZE; return 0; } @@ -5249,11 +5244,9 @@ static int mlx5_enable_eth(struct mlx5_ib_dev *dev) { int err; - if (MLX5_CAP_GEN(dev->mdev, roce)) { - err = mlx5_nic_vport_enable_roce(dev->mdev); - if (err) - return err; - } + err = mlx5_nic_vport_enable_roce(dev->mdev); + if (err) + return err; err = mlx5_eth_lag_init(dev); if (err) @@ -5262,8 +5255,7 @@ static int mlx5_enable_eth(struct mlx5_ib_dev *dev) return 0; err_disable_roce: - if (MLX5_CAP_GEN(dev->mdev, roce)) - mlx5_nic_vport_disable_roce(dev->mdev); + mlx5_nic_vport_disable_roce(dev->mdev); return err; } @@ -5271,8 +5263,7 @@ err_disable_roce: static void mlx5_disable_eth(struct mlx5_ib_dev *dev) { mlx5_eth_lag_cleanup(dev); - if (MLX5_CAP_GEN(dev->mdev, roce)) - mlx5_nic_vport_disable_roce(dev->mdev); + mlx5_nic_vport_disable_roce(dev->mdev); } struct mlx5_ib_counter { @@ -5710,11 +5701,10 @@ static int mlx5_ib_rn_get_params(struct ib_device *device, u8 port_num, static void delay_drop_debugfs_cleanup(struct mlx5_ib_dev *dev) { - if (!dev->delay_drop.dbg) + if (!dev->delay_drop.dir_debugfs) return; - debugfs_remove_recursive(dev->delay_drop.dbg->dir_debugfs); - kfree(dev->delay_drop.dbg); - dev->delay_drop.dbg = NULL; + debugfs_remove_recursive(dev->delay_drop.dir_debugfs); + dev->delay_drop.dir_debugfs = NULL; } static void cancel_delay_drop(struct mlx5_ib_dev *dev) @@ -5765,52 +5755,22 @@ static const struct file_operations fops_delay_drop_timeout = { .read = delay_drop_timeout_read, }; -static int delay_drop_debugfs_init(struct mlx5_ib_dev *dev) +static void delay_drop_debugfs_init(struct mlx5_ib_dev *dev) { - struct mlx5_ib_dbg_delay_drop *dbg; + struct dentry *root; if (!mlx5_debugfs_root) - return 0; - - dbg = kzalloc(sizeof(*dbg), GFP_KERNEL); - if (!dbg) - return -ENOMEM; - - dev->delay_drop.dbg = dbg; - - dbg->dir_debugfs = - debugfs_create_dir("delay_drop", - dev->mdev->priv.dbg_root); - if (!dbg->dir_debugfs) - goto out_debugfs; - - dbg->events_cnt_debugfs = - debugfs_create_atomic_t("num_timeout_events", 0400, - dbg->dir_debugfs, - &dev->delay_drop.events_cnt); - if (!dbg->events_cnt_debugfs) - goto out_debugfs; - - dbg->rqs_cnt_debugfs = - debugfs_create_atomic_t("num_rqs", 0400, - dbg->dir_debugfs, - &dev->delay_drop.rqs_cnt); - if (!dbg->rqs_cnt_debugfs) - goto out_debugfs; - - dbg->timeout_debugfs = - debugfs_create_file("timeout", 0600, - dbg->dir_debugfs, - &dev->delay_drop, - &fops_delay_drop_timeout); - if (!dbg->timeout_debugfs) - goto out_debugfs; + return; - return 0; + root = debugfs_create_dir("delay_drop", dev->mdev->priv.dbg_root); + dev->delay_drop.dir_debugfs = root; -out_debugfs: - delay_drop_debugfs_cleanup(dev); - return -ENOMEM; + debugfs_create_atomic_t("num_timeout_events", 0400, root, + &dev->delay_drop.events_cnt); + debugfs_create_atomic_t("num_rqs", 0400, root, + &dev->delay_drop.rqs_cnt); + debugfs_create_file("timeout", 0600, root, &dev->delay_drop, + &fops_delay_drop_timeout); } static void init_delay_drop(struct mlx5_ib_dev *dev) @@ -5826,8 +5786,7 @@ static void init_delay_drop(struct mlx5_ib_dev *dev) atomic_set(&dev->delay_drop.rqs_cnt, 0); atomic_set(&dev->delay_drop.events_cnt, 0); - if (delay_drop_debugfs_init(dev)) - mlx5_ib_warn(dev, "Failed to init delay drop debugfs\n"); + delay_drop_debugfs_init(dev); } static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev, @@ -6145,11 +6104,10 @@ static struct ib_counters *mlx5_ib_create_counters(struct ib_device *device, static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) { mlx5_ib_cleanup_multiport_master(dev); - if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { - srcu_barrier(&dev->mr_srcu); - cleanup_srcu_struct(&dev->mr_srcu); - } + WARN_ON(!xa_empty(&dev->odp_mkeys)); + cleanup_srcu_struct(&dev->odp_srcu); + WARN_ON(!xa_empty(&dev->sig_mrs)); WARN_ON(!bitmap_empty(dev->dm.memic_alloc_pages, MLX5_MAX_MEMIC_PAGES)); } @@ -6201,15 +6159,15 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) mutex_init(&dev->cap_mask_mutex); INIT_LIST_HEAD(&dev->qp_list); spin_lock_init(&dev->reset_flow_resource_lock); + xa_init(&dev->odp_mkeys); + xa_init(&dev->sig_mrs); spin_lock_init(&dev->dm.lock); dev->dm.dev = mdev; - if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { - err = init_srcu_struct(&dev->mr_srcu); - if (err) - goto err_mp; - } + err = init_srcu_struct(&dev->odp_srcu); + if (err) + goto err_mp; return 0; @@ -6269,6 +6227,9 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .disassociate_ucontext = mlx5_ib_disassociate_ucontext, .drain_rq = mlx5_ib_drain_rq, .drain_sq = mlx5_ib_drain_sq, + .enable_driver = mlx5_ib_enable_driver, + .fill_res_entry = mlx5_ib_fill_res_entry, + .fill_stat_entry = mlx5_ib_fill_stat_entry, .get_dev_fw_str = get_dev_fw_str, .get_dma_mr = mlx5_ib_get_dma_mr, .get_link_layer = mlx5_ib_port_link_layer, @@ -6315,6 +6276,7 @@ static const struct ib_device_ops mlx5_ib_dev_ipoib_enhanced_ops = { static const struct ib_device_ops mlx5_ib_dev_sriov_ops = { .get_vf_config = mlx5_ib_get_vf_config, + .get_vf_guid = mlx5_ib_get_vf_guid, .get_vf_stats = mlx5_ib_get_vf_stats, .set_vf_guid = mlx5_ib_set_vf_guid, .set_vf_link_state = mlx5_ib_set_vf_link_state, @@ -6444,7 +6406,7 @@ static const struct ib_device_ops mlx5_ib_dev_port_rep_ops = { .query_port = mlx5_ib_rep_query_port, }; -static int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev) +static int mlx5_ib_stage_raw_eth_non_default_cb(struct mlx5_ib_dev *dev) { ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_port_rep_ops); return 0; @@ -6484,7 +6446,7 @@ static void mlx5_ib_stage_common_roce_cleanup(struct mlx5_ib_dev *dev) mlx5_remove_netdev_notifier(dev, port_num); } -static int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev) +static int mlx5_ib_stage_raw_eth_roce_init(struct mlx5_ib_dev *dev) { struct mlx5_core_dev *mdev = dev->mdev; enum rdma_link_layer ll; @@ -6500,7 +6462,7 @@ static int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev) return err; } -static void mlx5_ib_stage_rep_roce_cleanup(struct mlx5_ib_dev *dev) +static void mlx5_ib_stage_raw_eth_roce_cleanup(struct mlx5_ib_dev *dev) { mlx5_ib_stage_common_roce_cleanup(dev); } @@ -6710,6 +6672,18 @@ static void mlx5_ib_stage_devx_cleanup(struct mlx5_ib_dev *dev) } } +int mlx5_ib_enable_driver(struct ib_device *dev) +{ + struct mlx5_ib_dev *mdev = to_mdev(dev); + int ret; + + ret = mlx5_ib_test_wc(mdev); + mlx5_ib_dbg(mdev, "Write-Combining %s", + mdev->wc_support ? "supported" : "not supported"); + + return ret; +} + void __mlx5_ib_remove(struct mlx5_ib_dev *dev, const struct mlx5_ib_profile *profile, int stage) @@ -6807,7 +6781,7 @@ static const struct mlx5_ib_profile pf_profile = { mlx5_ib_stage_delay_drop_cleanup), }; -const struct mlx5_ib_profile uplink_rep_profile = { +const struct mlx5_ib_profile raw_eth_profile = { STAGE_CREATE(MLX5_IB_STAGE_INIT, mlx5_ib_stage_init_init, mlx5_ib_stage_init_cleanup), @@ -6818,11 +6792,11 @@ const struct mlx5_ib_profile uplink_rep_profile = { mlx5_ib_stage_caps_init, NULL), STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB, - mlx5_ib_stage_rep_non_default_cb, + mlx5_ib_stage_raw_eth_non_default_cb, NULL), STAGE_CREATE(MLX5_IB_STAGE_ROCE, - mlx5_ib_stage_rep_roce_init, - mlx5_ib_stage_rep_roce_cleanup), + mlx5_ib_stage_raw_eth_roce_init, + mlx5_ib_stage_raw_eth_roce_cleanup), STAGE_CREATE(MLX5_IB_STAGE_SRQ, mlx5_init_srq_table, mlx5_cleanup_srq_table), @@ -6898,6 +6872,7 @@ static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev) static void *mlx5_ib_add(struct mlx5_core_dev *mdev) { + const struct mlx5_ib_profile *profile; enum rdma_link_layer ll; struct mlx5_ib_dev *dev; int port_type_cap; @@ -6933,7 +6908,12 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) dev->mdev = mdev; dev->num_ports = num_ports; - return __mlx5_ib_add(dev, &pf_profile); + if (ll == IB_LINK_LAYER_ETHERNET && !mlx5_is_roce_enabled(mdev)) + profile = &raw_eth_profile; + else + profile = &pf_profile; + + return __mlx5_ib_add(dev, profile); } static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index b5aece786b36..048f4e974a61 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -34,6 +34,7 @@ #include <rdma/ib_umem.h> #include <rdma/ib_umem_odp.h> #include "mlx5_ib.h" +#include <linux/jiffies.h> /* @umem: umem object to scan * @addr: ib virtual address requested by the user @@ -216,3 +217,201 @@ int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset) *offset = buf_off >> ilog2(off_size); return 0; } + +#define WR_ID_BF 0xBF +#define WR_ID_END 0xBAD +#define TEST_WC_NUM_WQES 255 +#define TEST_WC_POLLING_MAX_TIME_JIFFIES msecs_to_jiffies(100) +static int post_send_nop(struct mlx5_ib_dev *dev, struct ib_qp *ibqp, u64 wr_id, + bool signaled) +{ + struct mlx5_ib_qp *qp = to_mqp(ibqp); + struct mlx5_wqe_ctrl_seg *ctrl; + struct mlx5_bf *bf = &qp->bf; + __be32 mmio_wqe[16] = {}; + unsigned long flags; + unsigned int idx; + int i; + + if (unlikely(dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)) + return -EIO; + + spin_lock_irqsave(&qp->sq.lock, flags); + + idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); + ctrl = mlx5_frag_buf_get_wqe(&qp->sq.fbc, idx); + + memset(ctrl, 0, sizeof(struct mlx5_wqe_ctrl_seg)); + ctrl->fm_ce_se = signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0; + ctrl->opmod_idx_opcode = + cpu_to_be32(((u32)(qp->sq.cur_post) << 8) | MLX5_OPCODE_NOP); + ctrl->qpn_ds = cpu_to_be32((sizeof(struct mlx5_wqe_ctrl_seg) / 16) | + (qp->trans_qp.base.mqp.qpn << 8)); + + qp->sq.wrid[idx] = wr_id; + qp->sq.w_list[idx].opcode = MLX5_OPCODE_NOP; + qp->sq.wqe_head[idx] = qp->sq.head + 1; + qp->sq.cur_post += DIV_ROUND_UP(sizeof(struct mlx5_wqe_ctrl_seg), + MLX5_SEND_WQE_BB); + qp->sq.w_list[idx].next = qp->sq.cur_post; + qp->sq.head++; + + memcpy(mmio_wqe, ctrl, sizeof(*ctrl)); + ((struct mlx5_wqe_ctrl_seg *)&mmio_wqe)->fm_ce_se |= + MLX5_WQE_CTRL_CQ_UPDATE; + + /* Make sure that descriptors are written before + * updating doorbell record and ringing the doorbell + */ + wmb(); + + qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post); + + /* Make sure doorbell record is visible to the HCA before + * we hit doorbell + */ + wmb(); + for (i = 0; i < 8; i++) + mlx5_write64(&mmio_wqe[i * 2], + bf->bfreg->map + bf->offset + i * 8); + + bf->offset ^= bf->buf_size; + + spin_unlock_irqrestore(&qp->sq.lock, flags); + + return 0; +} + +static int test_wc_poll_cq_result(struct mlx5_ib_dev *dev, struct ib_cq *cq) +{ + int ret; + struct ib_wc wc = {}; + unsigned long end = jiffies + TEST_WC_POLLING_MAX_TIME_JIFFIES; + + do { + ret = ib_poll_cq(cq, 1, &wc); + if (ret < 0 || wc.status) + return ret < 0 ? ret : -EINVAL; + if (ret) + break; + } while (!time_after(jiffies, end)); + + if (!ret) + return -ETIMEDOUT; + + if (wc.wr_id != WR_ID_BF) + ret = 0; + + return ret; +} + +static int test_wc_do_send(struct mlx5_ib_dev *dev, struct ib_qp *qp) +{ + int err, i; + + for (i = 0; i < TEST_WC_NUM_WQES; i++) { + err = post_send_nop(dev, qp, WR_ID_BF, false); + if (err) + return err; + } + + return post_send_nop(dev, qp, WR_ID_END, true); +} + +int mlx5_ib_test_wc(struct mlx5_ib_dev *dev) +{ + struct ib_cq_init_attr cq_attr = { .cqe = TEST_WC_NUM_WQES + 1 }; + int port_type_cap = MLX5_CAP_GEN(dev->mdev, port_type); + struct ib_qp_init_attr qp_init_attr = { + .cap = { .max_send_wr = TEST_WC_NUM_WQES }, + .qp_type = IB_QPT_UD, + .sq_sig_type = IB_SIGNAL_REQ_WR, + .create_flags = MLX5_IB_QP_CREATE_WC_TEST, + }; + struct ib_qp_attr qp_attr = { .port_num = 1 }; + struct ib_device *ibdev = &dev->ib_dev; + struct ib_qp *qp; + struct ib_cq *cq; + struct ib_pd *pd; + int ret; + + if (!MLX5_CAP_GEN(dev->mdev, bf)) + return 0; + + if (!dev->mdev->roce.roce_en && + port_type_cap == MLX5_CAP_PORT_TYPE_ETH) { + if (mlx5_core_is_pf(dev->mdev)) + dev->wc_support = true; + return 0; + } + + ret = mlx5_alloc_bfreg(dev->mdev, &dev->wc_bfreg, true, false); + if (ret) + goto print_err; + + if (!dev->wc_bfreg.wc) + goto out1; + + pd = ib_alloc_pd(ibdev, 0); + if (IS_ERR(pd)) { + ret = PTR_ERR(pd); + goto out1; + } + + cq = ib_create_cq(ibdev, NULL, NULL, NULL, &cq_attr); + if (IS_ERR(cq)) { + ret = PTR_ERR(cq); + goto out2; + } + + qp_init_attr.recv_cq = cq; + qp_init_attr.send_cq = cq; + qp = ib_create_qp(pd, &qp_init_attr); + if (IS_ERR(qp)) { + ret = PTR_ERR(qp); + goto out3; + } + + qp_attr.qp_state = IB_QPS_INIT; + ret = ib_modify_qp(qp, &qp_attr, + IB_QP_STATE | IB_QP_PORT | IB_QP_PKEY_INDEX | + IB_QP_QKEY); + if (ret) + goto out4; + + qp_attr.qp_state = IB_QPS_RTR; + ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE); + if (ret) + goto out4; + + qp_attr.qp_state = IB_QPS_RTS; + ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN); + if (ret) + goto out4; + + ret = test_wc_do_send(dev, qp); + if (ret < 0) + goto out4; + + ret = test_wc_poll_cq_result(dev, cq); + if (ret > 0) { + dev->wc_support = true; + ret = 0; + } + +out4: + ib_destroy_qp(qp); +out3: + ib_destroy_cq(cq); +out2: + ib_dealloc_pd(pd); +out1: + mlx5_free_bfreg(dev->mdev, &dev->wc_bfreg); +print_err: + if (ret) + mlx5_ib_err( + dev, + "Error %d while trying to test write-combining support\n", + ret); + return ret; +} diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 2ceaef3ea3fb..5986953ec2fa 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -247,12 +247,8 @@ struct mlx5_ib_flow_db { * These flags are intended for internal use by the mlx5_ib driver, and they * rely on the range reserved for that use in the ib_qp_create_flags enum. */ - -/* Create a UD QP whose source QP number is 1 */ -static inline enum ib_qp_create_flags mlx5_ib_create_qp_sqpn_qp1(void) -{ - return IB_QP_CREATE_RESERVED_START; -} +#define MLX5_IB_QP_CREATE_SQPN_QP1 IB_QP_CREATE_RESERVED_START +#define MLX5_IB_QP_CREATE_WC_TEST (IB_QP_CREATE_RESERVED_START << 1) struct wr_list { u16 opcode; @@ -295,6 +291,7 @@ enum mlx5_ib_wq_flags { #define MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES 16 #define MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES 6 #define MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES 13 +#define MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES 3 struct mlx5_ib_rwq { struct ib_wq ibwq; @@ -585,6 +582,9 @@ struct mlx5_ib_dm { IB_ACCESS_REMOTE_READ |\ IB_ZERO_BASED) +#define mlx5_update_odp_stats(mr, counter_name, value) \ + atomic64_add(value, &((mr)->odp_stats.counter_name)) + struct mlx5_ib_mr { struct ib_mr ibmr; void *descs; @@ -606,7 +606,6 @@ struct mlx5_ib_mr { struct mlx5_ib_dev *dev; u32 out[MLX5_ST_SZ_DW(create_mkey_out)]; struct mlx5_core_sig_ctx *sig; - int live; void *descs_alloc; int access_flags; /* Needed for rereg MR */ @@ -618,10 +617,18 @@ struct mlx5_ib_mr { u64 data_iova; u64 pi_iova; - atomic_t num_leaf_free; - wait_queue_head_t q_leaf_free; + /* For ODP and implicit */ + atomic_t num_deferred_work; + struct xarray implicit_children; + union { + struct rcu_head rcu; + struct list_head elm; + struct work_struct work; + } odp_destroy; + struct ib_odp_counters odp_stats; + bool is_odp_implicit; + struct mlx5_async_work cb_work; - atomic_t num_pending_prefetch; }; static inline bool is_odp_mr(struct mlx5_ib_mr *mr) @@ -639,7 +646,6 @@ struct mlx5_ib_mw { struct mlx5_ib_devx_mr { struct mlx5_core_mkey mmkey; int ndescs; - struct rcu_head rcu; }; struct mlx5_ib_umr_context { @@ -793,13 +799,6 @@ enum { MLX5_MAX_DELAY_DROP_TIMEOUT_MS = 100, }; -struct mlx5_ib_dbg_delay_drop { - struct dentry *dir_debugfs; - struct dentry *rqs_cnt_debugfs; - struct dentry *events_cnt_debugfs; - struct dentry *timeout_debugfs; -}; - struct mlx5_ib_delay_drop { struct mlx5_ib_dev *dev; struct work_struct delay_drop_work; @@ -809,7 +808,7 @@ struct mlx5_ib_delay_drop { bool activate; atomic_t events_cnt; atomic_t rqs_cnt; - struct mlx5_ib_dbg_delay_drop *dbg; + struct dentry *dir_debugfs; }; enum mlx5_ib_stages { @@ -958,7 +957,11 @@ struct mlx5_ib_dev { /* serialize update of capability mask */ struct mutex cap_mask_mutex; - bool ib_active; + u8 ib_active:1; + u8 fill_delay:1; + u8 is_rep:1; + u8 lag_active:1; + u8 wc_support:1; struct umr_common umrc; /* sync used page count stats */ @@ -967,7 +970,6 @@ struct mlx5_ib_dev { struct timer_list delay_timer; /* Prevents soft lock on massive reg MRs */ struct mutex slow_path_mutex; - int fill_delay; struct ib_odp_caps odp_caps; u64 odp_max_size; struct mlx5_ib_pf_eq odp_pf_eq; @@ -976,7 +978,9 @@ struct mlx5_ib_dev { * Sleepable RCU that prevents destruction of MRs while they are still * being used by a page fault handler. */ - struct srcu_struct mr_srcu; + struct srcu_struct odp_srcu; + struct xarray odp_mkeys; + u32 null_mkey; struct mlx5_ib_flow_db *flow_db; /* protect resources needed as part of reset flow */ @@ -985,11 +989,10 @@ struct mlx5_ib_dev { /* Array with num_ports elements */ struct mlx5_ib_port *port; struct mlx5_sq_bfreg bfreg; + struct mlx5_sq_bfreg wc_bfreg; struct mlx5_sq_bfreg fp_bfreg; struct mlx5_ib_delay_drop delay_drop; const struct mlx5_ib_profile *profile; - bool is_rep; - int lag_active; struct mlx5_ib_lb_state lb; u8 umr_fence; @@ -1000,6 +1003,8 @@ struct mlx5_ib_dev { struct mlx5_srq_table srq_table; struct mlx5_async_ctx async_ctx; struct mlx5_devx_event_table devx_event_table; + + struct xarray sig_mrs; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) @@ -1163,6 +1168,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, struct ib_udata *udata, int access_flags); void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr); +void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr); int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_pd *pd, struct ib_udata *udata); @@ -1180,9 +1186,8 @@ int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, unsigned int *meta_sg_offset); int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index); + const struct ib_mad *in, struct ib_mad *out, + size_t *out_mad_size, u16 *out_mad_pkey_index); struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, struct ib_udata *udata); int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); @@ -1224,6 +1229,8 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev); struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry); void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); +int mlx5_mr_cache_invalidate(struct mlx5_ib_mr *mr); + int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, struct ib_mr_status *mr_status); struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd, @@ -1236,7 +1243,6 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device, struct ib_rwq_ind_table_init_attr *init_attr, struct ib_udata *udata); int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table); -bool mlx5_ib_dc_atomic_is_supported(struct mlx5_ib_dev *dev); struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev, struct ib_ucontext *context, struct ib_dm_alloc_attr *attr, @@ -1252,8 +1258,6 @@ int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev); void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev); int __init mlx5_ib_odp_init(void); void mlx5_ib_odp_cleanup(void); -void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, - unsigned long end); void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent); void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset, size_t nentries, struct mlx5_ib_mr *mr, int flags); @@ -1283,11 +1287,10 @@ mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, { return -EOPNOTSUPP; } -static inline void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, - unsigned long start, - unsigned long end){}; #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ +extern const struct mmu_interval_notifier_ops mlx5_mn_ops; + /* Needed for rep profile */ void __mlx5_ib_remove(struct mlx5_ib_dev *dev, const struct mlx5_ib_profile *profile, @@ -1301,6 +1304,9 @@ int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf, u8 port, int state); int mlx5_ib_get_vf_stats(struct ib_device *device, int vf, u8 port, struct ifla_vf_stats *stats); +int mlx5_ib_get_vf_guid(struct ib_device *device, int vf, u8 port, + struct ifla_vf_guid *node_guid, + struct ifla_vf_guid *port_guid); int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid, int type); @@ -1335,6 +1341,10 @@ struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *dev, u8 *native_port_num); void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev, u8 port_num); +int mlx5_ib_fill_res_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res); +int mlx5_ib_fill_stat_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res); #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user); @@ -1350,7 +1360,7 @@ struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add( struct mlx5_flow_act *flow_act, u32 counter_id, void *cmd_in, int inlen, int dest_id, int dest_type); bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type); -bool mlx5_ib_devx_is_flow_counter(void *obj, u32 *counter_id); +bool mlx5_ib_devx_is_flow_counter(void *obj, u32 offset, u32 *counter_id); int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root); void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction); #else @@ -1492,4 +1502,7 @@ static inline bool mlx5_ib_can_use_umr(struct mlx5_ib_dev *dev, return true; } + +int mlx5_ib_enable_driver(struct ib_device *dev); +int mlx5_ib_test_wc(struct mlx5_ib_dev *dev); #endif /* MLX5_IB_H */ diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 1eff031ef048..ea8bfc3e2d8d 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -50,7 +50,6 @@ enum { static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); static int mr_cache_max_order(struct mlx5_ib_dev *dev); -static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev) { @@ -59,13 +58,9 @@ static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev) static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { - int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); + WARN_ON(xa_load(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key))); - if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) - /* Wait until all page fault handlers using the mr complete. */ - synchronize_srcu(&dev->mr_srcu); - - return err; + return mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); } static int order2idx(struct mlx5_ib_dev *dev, int order) @@ -84,32 +79,6 @@ static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length) length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1)); } -static void update_odp_mr(struct mlx5_ib_mr *mr) -{ - if (is_odp_mr(mr)) { - /* - * This barrier prevents the compiler from moving the - * setting of umem->odp_data->private to point to our - * MR, before reg_umr finished, to ensure that the MR - * initialization have finished before starting to - * handle invalidations. - */ - smp_wmb(); - to_ib_umem_odp(mr->umem)->private = mr; - /* - * Make sure we will see the new - * umem->odp_data->private value in the invalidation - * routines, before we can get page faults on the - * MR. Page faults can happen once we put the MR in - * the tree, below this line. Without the barrier, - * there can be a fault handling and an invalidation - * before umem->odp_data->private == mr is visible to - * the invalidation handler. - */ - smp_wmb(); - } -} - static void reg_mr_callback(int status, struct mlx5_async_work *context) { struct mlx5_ib_mr *mr = @@ -120,8 +89,6 @@ static void reg_mr_callback(int status, struct mlx5_async_work *context) struct mlx5_cache_ent *ent = &cache->ent[c]; u8 key; unsigned long flags; - struct xarray *mkeys = &dev->mdev->priv.mkey_table; - int err; spin_lock_irqsave(&ent->lock, flags); ent->pending--; @@ -148,13 +115,6 @@ static void reg_mr_callback(int status, struct mlx5_async_work *context) ent->size++; spin_unlock_irqrestore(&ent->lock, flags); - xa_lock_irqsave(mkeys, flags); - err = xa_err(__xa_store(mkeys, mlx5_base_mkey(mr->mmkey.key), - &mr->mmkey, GFP_ATOMIC)); - xa_unlock_irqrestore(mkeys, flags); - if (err) - pr_err("Error inserting to mkey tree. 0x%x\n", -err); - if (!completion_done(&ent->compl)) complete(&ent->compl); } @@ -244,9 +204,6 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); } - if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) - synchronize_srcu(&dev->mr_srcu); - list_for_each_entry_safe(mr, tmp_mr, &del_list, list) { list_del(&mr->list); kfree(mr); @@ -454,7 +411,7 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry) if (entry < 0 || entry >= MAX_MR_CACHE_ENTRIES) { mlx5_ib_err(dev, "cache entry %d is out of range\n", entry); - return NULL; + return ERR_PTR(-EINVAL); } ent = &cache->ent[entry]; @@ -537,7 +494,7 @@ void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) c = order2idx(dev, mr->order); WARN_ON(c < 0 || c >= MAX_MR_CACHE_ENTRIES); - if (unreg_umr(dev, mr)) { + if (mlx5_mr_cache_invalidate(mr)) { mr->allocated_from_cache = false; destroy_mkey(dev, mr); ent = &cache->ent[c]; @@ -581,10 +538,6 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c) mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); } -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - synchronize_srcu(&dev->mr_srcu); -#endif - list_for_each_entry_safe(mr, tmp_mr, &del_list, list) { list_del(&mr->list); kfree(mr); @@ -705,6 +658,20 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) return 0; } +static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr, + struct ib_pd *pd) +{ + MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); + MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); + MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ)); + MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE)); + MLX5_SET(mkc, mkc, lr, 1); + + MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET64(mkc, mkc, start_addr, start_addr); +} + struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) { struct mlx5_ib_dev *dev = to_mdev(pd->device); @@ -728,16 +695,8 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA); - MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); - MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); - MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ)); - MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE)); - MLX5_SET(mkc, mkc, lr, 1); - MLX5_SET(mkc, mkc, length64, 1); - MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); - MLX5_SET(mkc, mkc, qpn, 0xffffff); - MLX5_SET64(mkc, mkc, start_addr, 0); + set_mkc_access_pd_addr_fields(mkc, acc, 0, pd); err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen); if (err) @@ -790,7 +749,8 @@ static int mr_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata, if (access_flags & IB_ACCESS_ON_DEMAND) { struct ib_umem_odp *odp; - odp = ib_umem_odp_get(udata, start, length, access_flags); + odp = ib_umem_odp_get(udata, start, length, access_flags, + &mlx5_mn_ops); if (IS_ERR(odp)) { mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(odp)); @@ -805,7 +765,7 @@ static int mr_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata, if (order) *order = ilog2(roundup_pow_of_two(*ncont)); } else { - u = ib_umem_get(udata, start, length, access_flags, 0); + u = ib_umem_get(udata, start, length, access_flags); if (IS_ERR(u)) { mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(u)); return PTR_ERR(u); @@ -1195,16 +1155,8 @@ static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr, MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3); MLX5_SET(mkc, mkc, access_mode_4_2, (mode >> 2) & 0x7); - MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); - MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); - MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ)); - MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE)); - MLX5_SET(mkc, mkc, lr, 1); - MLX5_SET64(mkc, mkc, len, length); - MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); - MLX5_SET(mkc, mkc, qpn, 0xffffff); - MLX5_SET64(mkc, mkc, start_addr, start_addr); + set_mkc_access_pd_addr_fields(mkc, acc, start_addr, pd); err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen); if (err) @@ -1346,8 +1298,6 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mr->umem = umem; set_mr_fields(dev, mr, npages, length, access_flags); - update_odp_mr(mr); - if (use_umr) { int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE; @@ -1363,9 +1313,16 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } } - if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { - mr->live = 1; - atomic_set(&mr->num_pending_prefetch, 0); + if (is_odp_mr(mr)) { + to_ib_umem_odp(mr->umem)->private = mr; + atomic_set(&mr->num_deferred_work, 0); + err = xa_err(xa_store(&dev->odp_mkeys, + mlx5_base_mkey(mr->mmkey.key), &mr->mmkey, + GFP_KERNEL)); + if (err) { + dereg_mr(dev, mr); + return ERR_PTR(err); + } } return &mr->ibmr; @@ -1374,22 +1331,29 @@ error: return ERR_PTR(err); } -static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) +/** + * mlx5_mr_cache_invalidate - Fence all DMA on the MR + * @mr: The MR to fence + * + * Upon return the NIC will not be doing any DMA to the pages under the MR, + * and any DMA inprogress will be completed. Failure of this function + * indicates the HW has failed catastrophically. + */ +int mlx5_mr_cache_invalidate(struct mlx5_ib_mr *mr) { - struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_umr_wr umrwr = {}; - if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) + if (mr->dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) return 0; umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR | MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; umrwr.wr.opcode = MLX5_IB_WR_UMR; - umrwr.pd = dev->umrc.pd; + umrwr.pd = mr->dev->umrc.pd; umrwr.mkey = mr->mmkey.key; umrwr.ignore_free_state = 1; - return mlx5_ib_post_send_wait(dev, &umrwr); + return mlx5_ib_post_send_wait(mr->dev, &umrwr); } static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr, @@ -1441,6 +1405,9 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, if (!mr->umem) return -EINVAL; + if (is_odp_mr(mr)) + return -EOPNOTSUPP; + if (flags & IB_MR_REREG_TRANS) { addr = virt_addr; len = length; @@ -1470,7 +1437,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, * UMR can't be used - MKey needs to be replaced. */ if (mr->allocated_from_cache) - err = unreg_umr(dev, mr); + err = mlx5_mr_cache_invalidate(mr); else err = destroy_mkey(dev, mr); if (err) @@ -1486,8 +1453,6 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, } mr->allocated_from_cache = 0; - if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) - mr->live = 1; } else { /* * Send a UMR WQE @@ -1516,7 +1481,6 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, set_mr_fields(dev, mr, npages, len, access_flags); - update_odp_mr(mr); return 0; err: @@ -1586,6 +1550,7 @@ static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) mr->sig->psv_wire.psv_idx)) mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", mr->sig->psv_wire.psv_idx); + xa_erase(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key)); kfree(mr->sig); mr->sig = NULL; } @@ -1601,53 +1566,20 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) int npages = mr->npages; struct ib_umem *umem = mr->umem; - if (is_odp_mr(mr)) { - struct ib_umem_odp *umem_odp = to_ib_umem_odp(umem); - - /* Prevent new page faults and - * prefetch requests from succeeding - */ - mr->live = 0; - - /* dequeue pending prefetch requests for the mr */ - if (atomic_read(&mr->num_pending_prefetch)) - flush_workqueue(system_unbound_wq); - WARN_ON(atomic_read(&mr->num_pending_prefetch)); - - /* Wait for all running page-fault handlers to finish. */ - synchronize_srcu(&dev->mr_srcu); - /* Destroy all page mappings */ - if (!umem_odp->is_implicit_odp) - mlx5_ib_invalidate_range(umem_odp, - ib_umem_start(umem_odp), - ib_umem_end(umem_odp)); - else - mlx5_ib_free_implicit_mr(mr); - /* - * We kill the umem before the MR for ODP, - * so that there will not be any invalidations in - * flight, looking at the *mr struct. - */ - ib_umem_odp_release(umem_odp); - atomic_sub(npages, &dev->mdev->priv.reg_pages); - - /* Avoid double-freeing the umem. */ - umem = NULL; - } + /* Stop all DMA */ + if (is_odp_mr(mr)) + mlx5_ib_fence_odp_mr(mr); + else + clean_mr(dev, mr); - clean_mr(dev, mr); + if (mr->allocated_from_cache) + mlx5_mr_cache_free(dev, mr); + else + kfree(mr); - /* - * We should unregister the DMA address from the HCA before - * remove the DMA mapping. - */ - mlx5_mr_cache_free(dev, mr); ib_umem_release(umem); - if (umem) - atomic_sub(npages, &dev->mdev->priv.reg_pages); + atomic_sub(npages, &dev->mdev->priv.reg_pages); - if (!mr->allocated_from_cache) - kfree(mr); } int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) @@ -1659,6 +1591,11 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) dereg_mr(to_mdev(mmr->klm_mr->ibmr.device), mmr->klm_mr); } + if (is_odp_mr(mmr) && to_ib_umem_odp(mmr->umem)->is_implicit_odp) { + mlx5_ib_free_implicit_mr(mmr); + return 0; + } + dereg_mr(to_mdev(ibmr->device), mmr); return 0; @@ -1822,8 +1759,15 @@ static int mlx5_alloc_integrity_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, if (err) goto err_free_mtt_mr; + err = xa_err(xa_store(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key), + mr->sig, GFP_KERNEL)); + if (err) + goto err_free_descs; return 0; +err_free_descs: + destroy_mkey(dev, mr); + mlx5_free_priv_descs(mr); err_free_mtt_mr: dereg_mr(to_mdev(mr->mtt_mr->ibmr.device), mr->mtt_mr); mr->mtt_mr = NULL; @@ -1976,9 +1920,19 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, } } + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { + err = xa_err(xa_store(&dev->odp_mkeys, + mlx5_base_mkey(mw->mmkey.key), &mw->mmkey, + GFP_KERNEL)); + if (err) + goto free_mkey; + } + kfree(in); return &mw->ibmw; +free_mkey: + mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey); free: kfree(mw); kfree(in); @@ -1987,14 +1941,24 @@ free: int mlx5_ib_dealloc_mw(struct ib_mw *mw) { + struct mlx5_ib_dev *dev = to_mdev(mw->device); struct mlx5_ib_mw *mmw = to_mmw(mw); int err; - err = mlx5_core_destroy_mkey((to_mdev(mw->device))->mdev, - &mmw->mmkey); - if (!err) - kfree(mmw); - return err; + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { + xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mmw->mmkey.key)); + /* + * pagefault_single_data_segment() may be accessing mmw under + * SRCU if the user bound an ODP MR to this MW. + */ + synchronize_srcu(&dev->odp_srcu); + } + + err = mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey); + if (err) + return err; + kfree(mmw); + return 0; } int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 2e9b43061797..f924250f80c2 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -93,152 +93,185 @@ struct mlx5_pagefault { static u64 mlx5_imr_ksm_entries; -static int check_parent(struct ib_umem_odp *odp, - struct mlx5_ib_mr *parent) +void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries, + struct mlx5_ib_mr *imr, int flags) { - struct mlx5_ib_mr *mr = odp->private; - - return mr && mr->parent == parent && !odp->dying; -} - -static struct ib_ucontext_per_mm *mr_to_per_mm(struct mlx5_ib_mr *mr) -{ - if (WARN_ON(!mr || !is_odp_mr(mr))) - return NULL; - - return to_ib_umem_odp(mr->umem)->per_mm; -} - -static struct ib_umem_odp *odp_next(struct ib_umem_odp *odp) -{ - struct mlx5_ib_mr *mr = odp->private, *parent = mr->parent; - struct ib_ucontext_per_mm *per_mm = odp->per_mm; - struct rb_node *rb; - - down_read(&per_mm->umem_rwsem); - while (1) { - rb = rb_next(&odp->interval_tree.rb); - if (!rb) - goto not_found; - odp = rb_entry(rb, struct ib_umem_odp, interval_tree.rb); - if (check_parent(odp, parent)) - goto end; - } -not_found: - odp = NULL; -end: - up_read(&per_mm->umem_rwsem); - return odp; -} - -static struct ib_umem_odp *odp_lookup(u64 start, u64 length, - struct mlx5_ib_mr *parent) -{ - struct ib_ucontext_per_mm *per_mm = mr_to_per_mm(parent); - struct ib_umem_odp *odp; - struct rb_node *rb; - - down_read(&per_mm->umem_rwsem); - odp = rbt_ib_umem_lookup(&per_mm->umem_tree, start, length); - if (!odp) - goto end; - - while (1) { - if (check_parent(odp, parent)) - goto end; - rb = rb_next(&odp->interval_tree.rb); - if (!rb) - goto not_found; - odp = rb_entry(rb, struct ib_umem_odp, interval_tree.rb); - if (ib_umem_start(odp) > start + length) - goto not_found; - } -not_found: - odp = NULL; -end: - up_read(&per_mm->umem_rwsem); - return odp; -} - -void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset, - size_t nentries, struct mlx5_ib_mr *mr, int flags) -{ - struct ib_pd *pd = mr->ibmr.pd; - struct mlx5_ib_dev *dev = to_mdev(pd->device); - struct ib_umem_odp *odp; - unsigned long va; - int i; + struct mlx5_klm *end = pklm + nentries; if (flags & MLX5_IB_UPD_XLT_ZAP) { - for (i = 0; i < nentries; i++, pklm++) { + for (; pklm != end; pklm++, idx++) { pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE); - pklm->key = cpu_to_be32(dev->null_mkey); + pklm->key = cpu_to_be32(imr->dev->null_mkey); pklm->va = 0; } return; } - odp = odp_lookup(offset * MLX5_IMR_MTT_SIZE, - nentries * MLX5_IMR_MTT_SIZE, mr); + /* + * The locking here is pretty subtle. Ideally the implicit_children + * xarray would be protected by the umem_mutex, however that is not + * possible. Instead this uses a weaker update-then-lock pattern: + * + * srcu_read_lock() + * xa_store() + * mutex_lock(umem_mutex) + * mlx5_ib_update_xlt() + * mutex_unlock(umem_mutex) + * destroy lkey + * + * ie any change the xarray must be followed by the locked update_xlt + * before destroying. + * + * The umem_mutex provides the acquire/release semantic needed to make + * the xa_store() visible to a racing thread. While SRCU is not + * technically required, using it gives consistent use of the SRCU + * locking around the xarray. + */ + lockdep_assert_held(&to_ib_umem_odp(imr->umem)->umem_mutex); + lockdep_assert_held(&imr->dev->odp_srcu); - for (i = 0; i < nentries; i++, pklm++) { - pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE); - va = (offset + i) * MLX5_IMR_MTT_SIZE; - if (odp && ib_umem_start(odp) == va) { - struct mlx5_ib_mr *mtt = odp->private; + for (; pklm != end; pklm++, idx++) { + struct mlx5_ib_mr *mtt = xa_load(&imr->implicit_children, idx); + pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE); + if (mtt) { pklm->key = cpu_to_be32(mtt->ibmr.lkey); - odp = odp_next(odp); + pklm->va = cpu_to_be64(idx * MLX5_IMR_MTT_SIZE); } else { - pklm->key = cpu_to_be32(dev->null_mkey); + pklm->key = cpu_to_be32(imr->dev->null_mkey); + pklm->va = 0; } - mlx5_ib_dbg(dev, "[%d] va %lx key %x\n", - i, va, be32_to_cpu(pklm->key)); } } -static void mr_leaf_free_action(struct work_struct *work) +static void dma_fence_odp_mr(struct mlx5_ib_mr *mr) { - struct ib_umem_odp *odp = container_of(work, struct ib_umem_odp, work); - int idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT; - struct mlx5_ib_mr *mr = odp->private, *imr = mr->parent; + struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); - mr->parent = NULL; - synchronize_srcu(&mr->dev->mr_srcu); + /* Ensure mlx5_ib_invalidate_range() will not touch the MR any more */ + mutex_lock(&odp->umem_mutex); + if (odp->npages) { + mlx5_mr_cache_invalidate(mr); + ib_umem_odp_unmap_dma_pages(odp, ib_umem_start(odp), + ib_umem_end(odp)); + WARN_ON(odp->npages); + } + odp->private = NULL; + mutex_unlock(&odp->umem_mutex); - ib_umem_odp_release(odp); - if (imr->live) - mlx5_ib_update_xlt(imr, idx, 1, 0, + if (!mr->allocated_from_cache) { + mlx5_core_destroy_mkey(mr->dev->mdev, &mr->mmkey); + WARN_ON(mr->descs); + } +} + +/* + * This must be called after the mr has been removed from implicit_children + * and the SRCU synchronized. NOTE: The MR does not necessarily have to be + * empty here, parallel page faults could have raced with the free process and + * added pages to it. + */ +static void free_implicit_child_mr(struct mlx5_ib_mr *mr, bool need_imr_xlt) +{ + struct mlx5_ib_mr *imr = mr->parent; + struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem); + struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); + unsigned long idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT; + int srcu_key; + + /* implicit_child_mr's are not allowed to have deferred work */ + WARN_ON(atomic_read(&mr->num_deferred_work)); + + if (need_imr_xlt) { + srcu_key = srcu_read_lock(&mr->dev->odp_srcu); + mutex_lock(&odp_imr->umem_mutex); + mlx5_ib_update_xlt(mr->parent, idx, 1, 0, MLX5_IB_UPD_XLT_INDIRECT | MLX5_IB_UPD_XLT_ATOMIC); + mutex_unlock(&odp_imr->umem_mutex); + srcu_read_unlock(&mr->dev->odp_srcu, srcu_key); + } + + dma_fence_odp_mr(mr); + + mr->parent = NULL; mlx5_mr_cache_free(mr->dev, mr); + ib_umem_odp_release(odp); + atomic_dec(&imr->num_deferred_work); +} - if (atomic_dec_and_test(&imr->num_leaf_free)) - wake_up(&imr->q_leaf_free); +static void free_implicit_child_mr_work(struct work_struct *work) +{ + struct mlx5_ib_mr *mr = + container_of(work, struct mlx5_ib_mr, odp_destroy.work); + + free_implicit_child_mr(mr, true); } -void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, - unsigned long end) +static void free_implicit_child_mr_rcu(struct rcu_head *head) { + struct mlx5_ib_mr *mr = + container_of(head, struct mlx5_ib_mr, odp_destroy.rcu); + + /* Freeing a MR is a sleeping operation, so bounce to a work queue */ + INIT_WORK(&mr->odp_destroy.work, free_implicit_child_mr_work); + queue_work(system_unbound_wq, &mr->odp_destroy.work); +} + +static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr) +{ + struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); + unsigned long idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT; + struct mlx5_ib_mr *imr = mr->parent; + + xa_lock(&imr->implicit_children); + /* + * This can race with mlx5_ib_free_implicit_mr(), the first one to + * reach the xa lock wins the race and destroys the MR. + */ + if (__xa_cmpxchg(&imr->implicit_children, idx, mr, NULL, GFP_ATOMIC) != + mr) + goto out_unlock; + + atomic_inc(&imr->num_deferred_work); + call_srcu(&mr->dev->odp_srcu, &mr->odp_destroy.rcu, + free_implicit_child_mr_rcu); + +out_unlock: + xa_unlock(&imr->implicit_children); +} + +static bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq) +{ + struct ib_umem_odp *umem_odp = + container_of(mni, struct ib_umem_odp, notifier); struct mlx5_ib_mr *mr; const u64 umr_block_mask = (MLX5_UMR_MTT_ALIGNMENT / sizeof(struct mlx5_mtt)) - 1; u64 idx = 0, blk_start_idx = 0; + u64 invalidations = 0; + unsigned long start; + unsigned long end; int in_block = 0; u64 addr; - if (!umem_odp) { - pr_err("invalidation called on NULL umem or non-ODP umem\n"); - return; - } + if (!mmu_notifier_range_blockable(range)) + return false; + mutex_lock(&umem_odp->umem_mutex); + mmu_interval_set_seq(mni, cur_seq); + /* + * If npages is zero then umem_odp->private may not be setup yet. This + * does not complete until after the first page is mapped for DMA. + */ + if (!umem_odp->npages) + goto out; mr = umem_odp->private; - if (!mr || !mr->ibmr.pd) - return; - - start = max_t(u64, ib_umem_start(umem_odp), start); - end = min_t(u64, ib_umem_end(umem_odp), end); + start = max_t(u64, ib_umem_start(umem_odp), range->start); + end = min_t(u64, ib_umem_end(umem_odp), range->end); /* * Iteration one - zap the HW's MTTs. The notifiers_count ensures that @@ -246,7 +279,6 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, * overwrite the same MTTs. Concurent invalidations might race us, * but they will write 0s as well, so no difference in the end result. */ - mutex_lock(&umem_odp->umem_mutex); for (addr = start; addr < end; addr += BIT(umem_odp->page_shift)) { idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift; /* @@ -261,6 +293,9 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, blk_start_idx = idx; in_block = 1; } + + /* Count page invalidations */ + invalidations += idx - blk_start_idx + 1; } else { u64 umr_offset = idx & umr_block_mask; @@ -278,7 +313,9 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, idx - blk_start_idx + 1, 0, MLX5_IB_UPD_XLT_ZAP | MLX5_IB_UPD_XLT_ATOMIC); - mutex_unlock(&umem_odp->umem_mutex); + + mlx5_update_odp_stats(mr, invalidations, invalidations); + /* * We are now sure that the device will not access the * memory. We can safely unmap it, and mark it as dirty if @@ -287,14 +324,17 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, ib_umem_odp_unmap_dma_pages(umem_odp, start, end); - if (unlikely(!umem_odp->npages && mr->parent && - !umem_odp->dying)) { - WRITE_ONCE(umem_odp->dying, 1); - atomic_inc(&mr->parent->num_leaf_free); - schedule_work(&umem_odp->work); - } + if (unlikely(!umem_odp->npages && mr->parent)) + destroy_unused_implicit_child_mr(mr); +out: + mutex_unlock(&umem_odp->umem_mutex); + return true; } +const struct mmu_interval_notifier_ops mlx5_mn_ops = { + .invalidate = mlx5_ib_invalidate_range, +}; + void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) { struct ib_odp_caps *caps = &dev->odp_caps; @@ -359,8 +399,6 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset) && !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled)) caps->general_caps |= IB_ODP_SUPPORT_IMPLICIT; - - return; } static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev, @@ -385,252 +423,226 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev, wq_num, err); } -static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd, - struct ib_umem_odp *umem_odp, - bool ksm, int access_flags) +static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, + unsigned long idx) { - struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct ib_umem_odp *odp; struct mlx5_ib_mr *mr; + struct mlx5_ib_mr *ret; int err; - mr = mlx5_mr_cache_alloc(dev, ksm ? MLX5_IMR_KSM_CACHE_ENTRY : - MLX5_IMR_MTT_CACHE_ENTRY); + odp = ib_umem_odp_alloc_child(to_ib_umem_odp(imr->umem), + idx * MLX5_IMR_MTT_SIZE, + MLX5_IMR_MTT_SIZE, &mlx5_mn_ops); + if (IS_ERR(odp)) + return ERR_CAST(odp); + ret = mr = mlx5_mr_cache_alloc(imr->dev, MLX5_IMR_MTT_CACHE_ENTRY); if (IS_ERR(mr)) - return mr; - - mr->ibmr.pd = pd; - - mr->dev = dev; - mr->access_flags = access_flags; - mr->mmkey.iova = 0; - mr->umem = &umem_odp->umem; - - if (ksm) { - err = mlx5_ib_update_xlt(mr, 0, - mlx5_imr_ksm_entries, - MLX5_KSM_PAGE_SHIFT, - MLX5_IB_UPD_XLT_INDIRECT | - MLX5_IB_UPD_XLT_ZAP | - MLX5_IB_UPD_XLT_ENABLE); - - } else { - err = mlx5_ib_update_xlt(mr, 0, - MLX5_IMR_MTT_ENTRIES, - PAGE_SHIFT, - MLX5_IB_UPD_XLT_ZAP | - MLX5_IB_UPD_XLT_ENABLE | - MLX5_IB_UPD_XLT_ATOMIC); - } - - if (err) - goto fail; + goto out_umem; + mr->ibmr.pd = imr->ibmr.pd; + mr->access_flags = imr->access_flags; + mr->umem = &odp->umem; mr->ibmr.lkey = mr->mmkey.key; mr->ibmr.rkey = mr->mmkey.key; - - mr->live = 1; - - mlx5_ib_dbg(dev, "key %x dev %p mr %p\n", - mr->mmkey.key, dev->mdev, mr); - - return mr; - -fail: - mlx5_ib_err(dev, "Failed to register MKEY %d\n", err); - mlx5_mr_cache_free(dev, mr); - - return ERR_PTR(err); -} - -static struct ib_umem_odp *implicit_mr_get_data(struct mlx5_ib_mr *mr, - u64 io_virt, size_t bcnt) -{ - struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.pd->device); - struct ib_umem_odp *odp, *result = NULL; - struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem); - u64 addr = io_virt & MLX5_IMR_MTT_MASK; - int nentries = 0, start_idx = 0, ret; - struct mlx5_ib_mr *mtt; - - mutex_lock(&odp_mr->umem_mutex); - odp = odp_lookup(addr, 1, mr); - - mlx5_ib_dbg(dev, "io_virt:%llx bcnt:%zx addr:%llx odp:%p\n", - io_virt, bcnt, addr, odp); - -next_mr: - if (likely(odp)) { - if (nentries) - nentries++; - } else { - odp = ib_umem_odp_alloc_child(odp_mr, addr, MLX5_IMR_MTT_SIZE); - if (IS_ERR(odp)) { - mutex_unlock(&odp_mr->umem_mutex); - return ERR_CAST(odp); - } - - mtt = implicit_mr_alloc(mr->ibmr.pd, odp, 0, - mr->access_flags); - if (IS_ERR(mtt)) { - mutex_unlock(&odp_mr->umem_mutex); - ib_umem_odp_release(odp); - return ERR_CAST(mtt); - } - - odp->private = mtt; - mtt->umem = &odp->umem; - mtt->mmkey.iova = addr; - mtt->parent = mr; - INIT_WORK(&odp->work, mr_leaf_free_action); - - if (!nentries) - start_idx = addr >> MLX5_IMR_MTT_SHIFT; - nentries++; - } - - /* Return first odp if region not covered by single one */ - if (likely(!result)) - result = odp; - - addr += MLX5_IMR_MTT_SIZE; - if (unlikely(addr < io_virt + bcnt)) { - odp = odp_next(odp); - if (odp && ib_umem_start(odp) != addr) - odp = NULL; - goto next_mr; + mr->mmkey.iova = idx * MLX5_IMR_MTT_SIZE; + mr->parent = imr; + odp->private = mr; + + err = mlx5_ib_update_xlt(mr, 0, + MLX5_IMR_MTT_ENTRIES, + PAGE_SHIFT, + MLX5_IB_UPD_XLT_ZAP | + MLX5_IB_UPD_XLT_ENABLE); + if (err) { + ret = ERR_PTR(err); + goto out_mr; } - if (unlikely(nentries)) { - ret = mlx5_ib_update_xlt(mr, start_idx, nentries, 0, - MLX5_IB_UPD_XLT_INDIRECT | - MLX5_IB_UPD_XLT_ATOMIC); - if (ret) { - mlx5_ib_err(dev, "Failed to update PAS\n"); - result = ERR_PTR(ret); + /* + * Once the store to either xarray completes any error unwind has to + * use synchronize_srcu(). Avoid this with xa_reserve() + */ + ret = xa_cmpxchg(&imr->implicit_children, idx, NULL, mr, + GFP_KERNEL); + if (unlikely(ret)) { + if (xa_is_err(ret)) { + ret = ERR_PTR(xa_err(ret)); + goto out_mr; } + /* + * Another thread beat us to creating the child mr, use + * theirs. + */ + goto out_mr; } - mutex_unlock(&odp_mr->umem_mutex); - return result; + mlx5_ib_dbg(imr->dev, "key %x mr %p\n", mr->mmkey.key, mr); + return mr; + +out_mr: + mlx5_mr_cache_free(imr->dev, mr); +out_umem: + ib_umem_odp_release(odp); + return ret; } struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, struct ib_udata *udata, int access_flags) { - struct mlx5_ib_mr *imr; + struct mlx5_ib_dev *dev = to_mdev(pd->ibpd.device); struct ib_umem_odp *umem_odp; + struct mlx5_ib_mr *imr; + int err; umem_odp = ib_umem_odp_alloc_implicit(udata, access_flags); if (IS_ERR(umem_odp)) return ERR_CAST(umem_odp); - imr = implicit_mr_alloc(&pd->ibpd, umem_odp, 1, access_flags); + imr = mlx5_mr_cache_alloc(dev, MLX5_IMR_KSM_CACHE_ENTRY); if (IS_ERR(imr)) { - ib_umem_odp_release(umem_odp); - return ERR_CAST(imr); + err = PTR_ERR(imr); + goto out_umem; } + imr->ibmr.pd = &pd->ibpd; + imr->access_flags = access_flags; + imr->mmkey.iova = 0; + imr->umem = &umem_odp->umem; + imr->ibmr.lkey = imr->mmkey.key; + imr->ibmr.rkey = imr->mmkey.key; imr->umem = &umem_odp->umem; - init_waitqueue_head(&imr->q_leaf_free); - atomic_set(&imr->num_leaf_free, 0); - atomic_set(&imr->num_pending_prefetch, 0); + imr->is_odp_implicit = true; + atomic_set(&imr->num_deferred_work, 0); + xa_init(&imr->implicit_children); + + err = mlx5_ib_update_xlt(imr, 0, + mlx5_imr_ksm_entries, + MLX5_KSM_PAGE_SHIFT, + MLX5_IB_UPD_XLT_INDIRECT | + MLX5_IB_UPD_XLT_ZAP | + MLX5_IB_UPD_XLT_ENABLE); + if (err) + goto out_mr; + + err = xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(imr->mmkey.key), + &imr->mmkey, GFP_KERNEL)); + if (err) + goto out_mr; + mlx5_ib_dbg(dev, "key %x mr %p\n", imr->mmkey.key, imr); return imr; +out_mr: + mlx5_ib_err(dev, "Failed to register MKEY %d\n", err); + mlx5_mr_cache_free(dev, imr); +out_umem: + ib_umem_odp_release(umem_odp); + return ERR_PTR(err); } void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) { - struct ib_ucontext_per_mm *per_mm = mr_to_per_mm(imr); - struct rb_node *node; + struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem); + struct mlx5_ib_dev *dev = imr->dev; + struct list_head destroy_list; + struct mlx5_ib_mr *mtt; + struct mlx5_ib_mr *tmp; + unsigned long idx; - down_read(&per_mm->umem_rwsem); - for (node = rb_first_cached(&per_mm->umem_tree); node; - node = rb_next(node)) { - struct ib_umem_odp *umem_odp = - rb_entry(node, struct ib_umem_odp, interval_tree.rb); - struct mlx5_ib_mr *mr = umem_odp->private; + INIT_LIST_HEAD(&destroy_list); - if (mr->parent != imr) - continue; + xa_erase(&dev->odp_mkeys, mlx5_base_mkey(imr->mmkey.key)); + /* + * This stops the SRCU protected page fault path from touching either + * the imr or any children. The page fault path can only reach the + * children xarray via the imr. + */ + synchronize_srcu(&dev->odp_srcu); - ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp), - ib_umem_end(umem_odp)); + xa_lock(&imr->implicit_children); + xa_for_each (&imr->implicit_children, idx, mtt) { + __xa_erase(&imr->implicit_children, idx); + list_add(&mtt->odp_destroy.elm, &destroy_list); + } + xa_unlock(&imr->implicit_children); - if (umem_odp->dying) - continue; + /* + * num_deferred_work can only be incremented inside the odp_srcu, or + * under xa_lock while the child is in the xarray. Thus at this point + * it is only decreasing, and all work holding it is now on the wq. + */ + if (atomic_read(&imr->num_deferred_work)) { + flush_workqueue(system_unbound_wq); + WARN_ON(atomic_read(&imr->num_deferred_work)); + } + + /* + * Fence the imr before we destroy the children. This allows us to + * skip updating the XLT of the imr during destroy of the child mkey + * the imr points to. + */ + mlx5_mr_cache_invalidate(imr); + + list_for_each_entry_safe (mtt, tmp, &destroy_list, odp_destroy.elm) + free_implicit_child_mr(mtt, false); - WRITE_ONCE(umem_odp->dying, 1); - atomic_inc(&imr->num_leaf_free); - schedule_work(&umem_odp->work); + mlx5_mr_cache_free(dev, imr); + ib_umem_odp_release(odp_imr); +} + +/** + * mlx5_ib_fence_odp_mr - Stop all access to the ODP MR + * @mr: to fence + * + * On return no parallel threads will be touching this MR and no DMA will be + * active. + */ +void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr) +{ + /* Prevent new page faults and prefetch requests from succeeding */ + xa_erase(&mr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); + + /* Wait for all running page-fault handlers to finish. */ + synchronize_srcu(&mr->dev->odp_srcu); + + if (atomic_read(&mr->num_deferred_work)) { + flush_workqueue(system_unbound_wq); + WARN_ON(atomic_read(&mr->num_deferred_work)); } - up_read(&per_mm->umem_rwsem); - wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free)); + dma_fence_odp_mr(mr); } -#define MLX5_PF_FLAGS_PREFETCH BIT(0) #define MLX5_PF_FLAGS_DOWNGRADE BIT(1) -static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, - u64 io_virt, size_t bcnt, u32 *bytes_mapped, - u32 flags) +static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp, + u64 user_va, size_t bcnt, u32 *bytes_mapped, + u32 flags) { - int npages = 0, current_seq, page_shift, ret, np; - struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem); + int page_shift, ret, np; bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE; - bool prefetch = flags & MLX5_PF_FLAGS_PREFETCH; + unsigned long current_seq; u64 access_mask; u64 start_idx, page_mask; - struct ib_umem_odp *odp; - size_t size; - - if (odp_mr->is_implicit_odp) { - odp = implicit_mr_get_data(mr, io_virt, bcnt); - - if (IS_ERR(odp)) - return PTR_ERR(odp); - mr = odp->private; - } else { - odp = odp_mr; - } - -next_mr: - size = min_t(size_t, bcnt, ib_umem_end(odp) - io_virt); page_shift = odp->page_shift; page_mask = ~(BIT(page_shift) - 1); - start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift; + start_idx = (user_va - (mr->mmkey.iova & page_mask)) >> page_shift; access_mask = ODP_READ_ALLOWED_BIT; - if (prefetch && !downgrade && !odp->umem.writable) { - /* prefetch with write-access must - * be supported by the MR - */ - ret = -EINVAL; - goto out; - } - if (odp->umem.writable && !downgrade) access_mask |= ODP_WRITE_ALLOWED_BIT; - current_seq = READ_ONCE(odp->notifiers_seq); - /* - * Ensure the sequence number is valid for some time before we call - * gup. - */ - smp_rmb(); - - ret = ib_umem_odp_map_dma_pages(odp, io_virt, size, access_mask, - current_seq); - - if (ret < 0) - goto out; + current_seq = mmu_interval_read_begin(&odp->notifier); - np = ret; + np = ib_umem_odp_map_dma_pages(odp, user_va, bcnt, access_mask, + current_seq); + if (np < 0) + return np; mutex_lock(&odp->umem_mutex); - if (!ib_umem_mmu_notifier_retry(odp, current_seq)) { + if (!mmu_interval_read_retry(&odp->notifier, current_seq)) { /* * No need to check whether the MTTs really belong to * this MR, since ib_umem_odp_map_dma_pages already @@ -645,53 +657,127 @@ next_mr: if (ret < 0) { if (ret != -EAGAIN) - mlx5_ib_err(dev, "Failed to update mkey page tables\n"); + mlx5_ib_err(mr->dev, + "Failed to update mkey page tables\n"); goto out; } if (bytes_mapped) { u32 new_mappings = (np << page_shift) - - (io_virt - round_down(io_virt, 1 << page_shift)); - *bytes_mapped += min_t(u32, new_mappings, size); + (user_va - round_down(user_va, 1 << page_shift)); + + *bytes_mapped += min_t(u32, new_mappings, bcnt); } - npages += np << (page_shift - PAGE_SHIFT); - bcnt -= size; + return np << (page_shift - PAGE_SHIFT); - if (unlikely(bcnt)) { - struct ib_umem_odp *next; +out: + return ret; +} - io_virt += size; - next = odp_next(odp); - if (unlikely(!next || ib_umem_start(next) != io_virt)) { - mlx5_ib_dbg(dev, "next implicit leaf removed at 0x%llx. got %p\n", - io_virt, next); - return -EAGAIN; +static int pagefault_implicit_mr(struct mlx5_ib_mr *imr, + struct ib_umem_odp *odp_imr, u64 user_va, + size_t bcnt, u32 *bytes_mapped, u32 flags) +{ + unsigned long end_idx = (user_va + bcnt - 1) >> MLX5_IMR_MTT_SHIFT; + unsigned long upd_start_idx = end_idx + 1; + unsigned long upd_len = 0; + unsigned long npages = 0; + int err; + int ret; + + if (unlikely(user_va >= mlx5_imr_ksm_entries * MLX5_IMR_MTT_SIZE || + mlx5_imr_ksm_entries * MLX5_IMR_MTT_SIZE - user_va < bcnt)) + return -EFAULT; + + /* Fault each child mr that intersects with our interval. */ + while (bcnt) { + unsigned long idx = user_va >> MLX5_IMR_MTT_SHIFT; + struct ib_umem_odp *umem_odp; + struct mlx5_ib_mr *mtt; + u64 len; + + mtt = xa_load(&imr->implicit_children, idx); + if (unlikely(!mtt)) { + mtt = implicit_get_child_mr(imr, idx); + if (IS_ERR(mtt)) { + ret = PTR_ERR(mtt); + goto out; + } + upd_start_idx = min(upd_start_idx, idx); + upd_len = idx - upd_start_idx + 1; } - odp = next; - mr = odp->private; - goto next_mr; + + umem_odp = to_ib_umem_odp(mtt->umem); + len = min_t(u64, user_va + bcnt, ib_umem_end(umem_odp)) - + user_va; + + ret = pagefault_real_mr(mtt, umem_odp, user_va, len, + bytes_mapped, flags); + if (ret < 0) + goto out; + user_va += len; + bcnt -= len; + npages += ret; } - return npages; + ret = npages; + /* + * Any time the implicit_children are changed we must perform an + * update of the xlt before exiting to ensure the HW and the + * implicit_children remains synchronized. + */ out: - if (ret == -EAGAIN) { - unsigned long timeout = msecs_to_jiffies(MMU_NOTIFIER_TIMEOUT); - - if (!wait_for_completion_timeout(&odp->notifier_completion, - timeout)) { - mlx5_ib_warn( - dev, - "timeout waiting for mmu notifier. seq %d against %d. notifiers_count=%d\n", - current_seq, odp->notifiers_seq, - odp->notifiers_count); - } - } + if (likely(!upd_len)) + return ret; + /* + * Notice this is not strictly ordered right, the KSM is updated after + * the implicit_children is updated, so a parallel page fault could + * see a MR that is not yet visible in the KSM. This is similar to a + * parallel page fault seeing a MR that is being concurrently removed + * from the KSM. Both of these improbable situations are resolved + * safely by resuming the HW and then taking another page fault. The + * next pagefault handler will see the new information. + */ + mutex_lock(&odp_imr->umem_mutex); + err = mlx5_ib_update_xlt(imr, upd_start_idx, upd_len, 0, + MLX5_IB_UPD_XLT_INDIRECT | + MLX5_IB_UPD_XLT_ATOMIC); + mutex_unlock(&odp_imr->umem_mutex); + if (err) { + mlx5_ib_err(imr->dev, "Failed to update PAS\n"); + return err; + } return ret; } +/* + * Returns: + * -EFAULT: The io_virt->bcnt is not within the MR, it covers pages that are + * not accessible, or the MR is no longer valid. + * -EAGAIN/-ENOMEM: The operation should be retried + * + * -EINVAL/others: General internal malfunction + * >0: Number of pages mapped + */ +static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, + u32 *bytes_mapped, u32 flags) +{ + struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); + + if (!odp->is_implicit_odp) { + if (unlikely(io_virt < ib_umem_start(odp) || + ib_umem_end(odp) - io_virt < bcnt)) + return -EFAULT; + return pagefault_real_mr(mr, odp, io_virt, bcnt, bytes_mapped, + flags); + } + return pagefault_implicit_mr(mr, odp, io_virt, bcnt, bytes_mapped, + flags); +} + struct pf_frame { struct pf_frame *next; u32 key; @@ -739,10 +825,9 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, struct ib_pd *pd, u32 key, u64 io_virt, size_t bcnt, u32 *bytes_committed, - u32 *bytes_mapped, u32 flags) + u32 *bytes_mapped) { int npages = 0, srcu_key, ret, i, outlen, cur_outlen = 0, depth = 0; - bool prefetch = flags & MLX5_PF_FLAGS_PREFETCH; struct pf_frame *head = NULL, *frame; struct mlx5_core_mkey *mmkey; struct mlx5_ib_mr *mr; @@ -751,58 +836,49 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, size_t offset; int ndescs; - srcu_key = srcu_read_lock(&dev->mr_srcu); + srcu_key = srcu_read_lock(&dev->odp_srcu); io_virt += *bytes_committed; bcnt -= *bytes_committed; next_mr: - mmkey = xa_load(&dev->mdev->priv.mkey_table, mlx5_base_mkey(key)); + mmkey = xa_load(&dev->odp_mkeys, mlx5_base_mkey(key)); + if (!mmkey) { + mlx5_ib_dbg( + dev, + "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n", + key); + if (bytes_mapped) + *bytes_mapped += bcnt; + /* + * The user could specify a SGL with multiple lkeys and only + * some of them are ODP. Treat the non-ODP ones as fully + * faulted. + */ + ret = 0; + goto srcu_unlock; + } if (!mkey_is_eq(mmkey, key)) { mlx5_ib_dbg(dev, "failed to find mkey %x\n", key); ret = -EFAULT; goto srcu_unlock; } - if (prefetch && mmkey->type != MLX5_MKEY_MR) { - mlx5_ib_dbg(dev, "prefetch is allowed only for MR\n"); - ret = -EINVAL; - goto srcu_unlock; - } - switch (mmkey->type) { case MLX5_MKEY_MR: mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); - if (!mr->live || !mr->ibmr.pd) { - mlx5_ib_dbg(dev, "got dead MR\n"); - ret = -EFAULT; - goto srcu_unlock; - } - if (prefetch) { - if (!is_odp_mr(mr) || - mr->ibmr.pd != pd) { - mlx5_ib_dbg(dev, "Invalid prefetch request: %s\n", - is_odp_mr(mr) ? "MR is not ODP" : - "PD is not of the MR"); - ret = -EINVAL; - goto srcu_unlock; - } - } - - if (!is_odp_mr(mr)) { - mlx5_ib_dbg(dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n", - key); - if (bytes_mapped) - *bytes_mapped += bcnt; - ret = 0; - goto srcu_unlock; - } - - ret = pagefault_mr(dev, mr, io_virt, bcnt, bytes_mapped, flags); + ret = pagefault_mr(mr, io_virt, bcnt, bytes_mapped, 0); if (ret < 0) goto srcu_unlock; + /* + * When prefetching a page, page fault is generated + * in order to bring the page to the main memory. + * In the current flow, page faults are being counted. + */ + mlx5_update_odp_stats(mr, faults, ret); + npages += ret; ret = 0; break; @@ -892,7 +968,7 @@ srcu_unlock: } kfree(out); - srcu_read_unlock(&dev->mr_srcu, srcu_key); + srcu_read_unlock(&dev->odp_srcu, srcu_key); *bytes_committed = 0; return ret ? ret : npages; } @@ -973,7 +1049,7 @@ static int pagefault_data_segments(struct mlx5_ib_dev *dev, ret = pagefault_single_data_segment(dev, NULL, key, io_virt, bcnt, &pfault->bytes_committed, - bytes_mapped, 0); + bytes_mapped); if (ret < 0) break; npages += ret; @@ -1256,8 +1332,7 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev, } ret = pagefault_single_data_segment(dev, NULL, rkey, address, length, - &pfault->bytes_committed, NULL, - 0); + &pfault->bytes_committed, NULL); if (ret == -EAGAIN) { /* We're racing with an invalidation, don't prefetch */ prefetch_activated = 0; @@ -1284,8 +1359,7 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev, ret = pagefault_single_data_segment(dev, NULL, rkey, address, prefetch_len, - &bytes_committed, NULL, - 0); + &bytes_committed, NULL); if (ret < 0 && ret != -EAGAIN) { mlx5_ib_dbg(dev, "Prefetch failed. ret: %d, QP 0x%x, address: 0x%.16llx, length = 0x%.16x\n", ret, pfault->token, address, prefetch_len); @@ -1545,7 +1619,6 @@ void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) static const struct ib_device_ops mlx5_ib_dev_odp_ops = { .advise_mr = mlx5_ib_advise_mr, - .invalidate_range = mlx5_ib_invalidate_range, }; int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev) @@ -1588,114 +1661,128 @@ int mlx5_ib_odp_init(void) struct prefetch_mr_work { struct work_struct work; - struct ib_pd *pd; u32 pf_flags; u32 num_sge; - struct ib_sge sg_list[0]; + struct { + u64 io_virt; + struct mlx5_ib_mr *mr; + size_t length; + } frags[]; }; -static void num_pending_prefetch_dec(struct mlx5_ib_dev *dev, - struct ib_sge *sg_list, u32 num_sge, - u32 from) +static void destroy_prefetch_work(struct prefetch_mr_work *work) { u32 i; - int srcu_key; - - srcu_key = srcu_read_lock(&dev->mr_srcu); - - for (i = from; i < num_sge; ++i) { - struct mlx5_core_mkey *mmkey; - struct mlx5_ib_mr *mr; - mmkey = xa_load(&dev->mdev->priv.mkey_table, - mlx5_base_mkey(sg_list[i].lkey)); - mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); - atomic_dec(&mr->num_pending_prefetch); - } - - srcu_read_unlock(&dev->mr_srcu, srcu_key); + for (i = 0; i < work->num_sge; ++i) + atomic_dec(&work->frags[i].mr->num_deferred_work); + kvfree(work); } -static bool num_pending_prefetch_inc(struct ib_pd *pd, - struct ib_sge *sg_list, u32 num_sge) +static struct mlx5_ib_mr * +get_prefetchable_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, + u32 lkey) { struct mlx5_ib_dev *dev = to_mdev(pd->device); - bool ret = true; - u32 i; + struct mlx5_core_mkey *mmkey; + struct ib_umem_odp *odp; + struct mlx5_ib_mr *mr; - for (i = 0; i < num_sge; ++i) { - struct mlx5_core_mkey *mmkey; - struct mlx5_ib_mr *mr; + lockdep_assert_held(&dev->odp_srcu); - mmkey = xa_load(&dev->mdev->priv.mkey_table, - mlx5_base_mkey(sg_list[i].lkey)); - if (!mmkey || mmkey->key != sg_list[i].lkey) { - ret = false; - break; - } + mmkey = xa_load(&dev->odp_mkeys, mlx5_base_mkey(lkey)); + if (!mmkey || mmkey->key != lkey || mmkey->type != MLX5_MKEY_MR) + return NULL; - if (mmkey->type != MLX5_MKEY_MR) { - ret = false; - break; - } + mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); - mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); + if (mr->ibmr.pd != pd) + return NULL; - if (mr->ibmr.pd != pd) { - ret = false; - break; - } + odp = to_ib_umem_odp(mr->umem); - if (!mr->live) { - ret = false; - break; - } + /* prefetch with write-access must be supported by the MR */ + if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE && + !odp->umem.writable) + return NULL; - atomic_inc(&mr->num_pending_prefetch); - } + return mr; +} - if (!ret) - num_pending_prefetch_dec(dev, sg_list, i, 0); +static void mlx5_ib_prefetch_mr_work(struct work_struct *w) +{ + struct prefetch_mr_work *work = + container_of(w, struct prefetch_mr_work, work); + u32 bytes_mapped = 0; + u32 i; - return ret; + for (i = 0; i < work->num_sge; ++i) + pagefault_mr(work->frags[i].mr, work->frags[i].io_virt, + work->frags[i].length, &bytes_mapped, + work->pf_flags); + + destroy_prefetch_work(work); } -static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd, u32 pf_flags, - struct ib_sge *sg_list, u32 num_sge) +static bool init_prefetch_work(struct ib_pd *pd, + enum ib_uverbs_advise_mr_advice advice, + u32 pf_flags, struct prefetch_mr_work *work, + struct ib_sge *sg_list, u32 num_sge) { u32 i; - int ret = 0; - struct mlx5_ib_dev *dev = to_mdev(pd->device); + + INIT_WORK(&work->work, mlx5_ib_prefetch_mr_work); + work->pf_flags = pf_flags; for (i = 0; i < num_sge; ++i) { - struct ib_sge *sg = &sg_list[i]; - int bytes_committed = 0; + work->frags[i].io_virt = sg_list[i].addr; + work->frags[i].length = sg_list[i].length; + work->frags[i].mr = + get_prefetchable_mr(pd, advice, sg_list[i].lkey); + if (!work->frags[i].mr) { + work->num_sge = i - 1; + if (i) + destroy_prefetch_work(work); + return false; + } - ret = pagefault_single_data_segment(dev, pd, sg->lkey, sg->addr, - sg->length, - &bytes_committed, NULL, - pf_flags); - if (ret < 0) - break; + /* Keep the MR pointer will valid outside the SRCU */ + atomic_inc(&work->frags[i].mr->num_deferred_work); } - - return ret < 0 ? ret : 0; + work->num_sge = num_sge; + return true; } -static void mlx5_ib_prefetch_mr_work(struct work_struct *work) +static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd, + enum ib_uverbs_advise_mr_advice advice, + u32 pf_flags, struct ib_sge *sg_list, + u32 num_sge) { - struct prefetch_mr_work *w = - container_of(work, struct prefetch_mr_work, work); + struct mlx5_ib_dev *dev = to_mdev(pd->device); + u32 bytes_mapped = 0; + int srcu_key; + int ret = 0; + u32 i; + + srcu_key = srcu_read_lock(&dev->odp_srcu); + for (i = 0; i < num_sge; ++i) { + struct mlx5_ib_mr *mr; - if (ib_device_try_get(w->pd->device)) { - mlx5_ib_prefetch_sg_list(w->pd, w->pf_flags, w->sg_list, - w->num_sge); - ib_device_put(w->pd->device); + mr = get_prefetchable_mr(pd, advice, sg_list[i].lkey); + if (!mr) { + ret = -ENOENT; + goto out; + } + ret = pagefault_mr(mr, sg_list[i].addr, sg_list[i].length, + &bytes_mapped, pf_flags); + if (ret < 0) + goto out; } + ret = 0; - num_pending_prefetch_dec(to_mdev(w->pd->device), w->sg_list, - w->num_sge, 0); - kvfree(w); +out: + srcu_read_unlock(&dev->odp_srcu, srcu_key); + return ret; } int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, @@ -1703,43 +1790,27 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, u32 flags, struct ib_sge *sg_list, u32 num_sge) { struct mlx5_ib_dev *dev = to_mdev(pd->device); - u32 pf_flags = MLX5_PF_FLAGS_PREFETCH; + u32 pf_flags = 0; struct prefetch_mr_work *work; - bool valid_req; int srcu_key; if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH) pf_flags |= MLX5_PF_FLAGS_DOWNGRADE; if (flags & IB_UVERBS_ADVISE_MR_FLAG_FLUSH) - return mlx5_ib_prefetch_sg_list(pd, pf_flags, sg_list, + return mlx5_ib_prefetch_sg_list(pd, advice, pf_flags, sg_list, num_sge); - work = kvzalloc(struct_size(work, sg_list, num_sge), GFP_KERNEL); + work = kvzalloc(struct_size(work, frags, num_sge), GFP_KERNEL); if (!work) return -ENOMEM; - memcpy(work->sg_list, sg_list, num_sge * sizeof(struct ib_sge)); - - /* It is guaranteed that the pd when work is executed is the pd when - * work was queued since pd can't be destroyed while it holds MRs and - * destroying a MR leads to flushing the workquque - */ - work->pd = pd; - work->pf_flags = pf_flags; - work->num_sge = num_sge; - - INIT_WORK(&work->work, mlx5_ib_prefetch_mr_work); - - srcu_key = srcu_read_lock(&dev->mr_srcu); - - valid_req = num_pending_prefetch_inc(pd, sg_list, num_sge); - if (valid_req) - queue_work(system_unbound_wq, &work->work); - else - kvfree(work); - - srcu_read_unlock(&dev->mr_srcu, srcu_key); - - return valid_req ? 0 : -EINVAL; + srcu_key = srcu_read_lock(&dev->odp_srcu); + if (!init_prefetch_work(pd, advice, pf_flags, work, sg_list, num_sge)) { + srcu_read_unlock(&dev->odp_srcu, srcu_key); + return -EINVAL; + } + queue_work(system_unbound_wq, &work->work); + srcu_read_unlock(&dev->odp_srcu, srcu_key); + return 0; } diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 8937d72ddcf6..7e51870e9e01 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -749,7 +749,7 @@ static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata, { int err; - *umem = ib_umem_get(udata, addr, size, 0, 0); + *umem = ib_umem_get(udata, addr, size, 0); if (IS_ERR(*umem)) { mlx5_ib_dbg(dev, "umem_get failed\n"); return PTR_ERR(*umem); @@ -806,7 +806,7 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (!ucmd->buf_addr) return -EINVAL; - rwq->umem = ib_umem_get(udata, ucmd->buf_addr, rwq->buf_size, 0, 0); + rwq->umem = ib_umem_get(udata, ucmd->buf_addr, rwq->buf_size, 0); if (IS_ERR(rwq->umem)) { mlx5_ib_dbg(dev, "umem_get failed\n"); err = PTR_ERR(rwq->umem); @@ -1041,11 +1041,14 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK | IB_QP_CREATE_IPOIB_UD_LSO | IB_QP_CREATE_NETIF_QP | - mlx5_ib_create_qp_sqpn_qp1())) + MLX5_IB_QP_CREATE_SQPN_QP1 | + MLX5_IB_QP_CREATE_WC_TEST)) return -EINVAL; if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR) qp->bf.bfreg = &dev->fp_bfreg; + else if (init_attr->create_flags & MLX5_IB_QP_CREATE_WC_TEST) + qp->bf.bfreg = &dev->wc_bfreg; else qp->bf.bfreg = &dev->bfreg; @@ -1104,7 +1107,7 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, MLX5_SET(qpc, qpc, fre, 1); MLX5_SET(qpc, qpc, rlky, 1); - if (init_attr->create_flags & mlx5_ib_create_qp_sqpn_qp1()) { + if (init_attr->create_flags & MLX5_IB_QP_CREATE_SQPN_QP1) { MLX5_SET(qpc, qpc, deth_sqpn, 1); qp->flags |= MLX5_IB_QP_SQPN_QP1; } @@ -2140,7 +2143,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, return -EINVAL; } if (init_attr->create_flags & - mlx5_ib_create_qp_sqpn_qp1()) { + MLX5_IB_QP_CREATE_SQPN_QP1) { mlx5_ib_dbg(dev, "user-space is not allowed to create UD QPs spoofing as QP1\n"); return -EINVAL; } @@ -3249,10 +3252,12 @@ static int modify_raw_packet_qp_sq( } /* Only remove the old rate after new rate was set */ - if ((old_rl.rate && - !mlx5_rl_are_equal(&old_rl, &new_rl)) || - (new_state != MLX5_SQC_STATE_RDY)) + if ((old_rl.rate && !mlx5_rl_are_equal(&old_rl, &new_rl)) || + (new_state != MLX5_SQC_STATE_RDY)) { mlx5_rl_remove_rate(dev, &old_rl); + if (new_state != MLX5_SQC_STATE_RDY) + memset(&new_rl, 0, sizeof(new_rl)); + } ibqp->rl = new_rl; sq->state = new_state; @@ -5328,7 +5333,6 @@ out: * we hit doorbell */ wmb(); - /* currently we support only regular doorbells */ mlx5_write64((__be32 *)ctrl, bf->bfreg->map + bf->offset); /* Make sure doorbells don't leak out of SQ spinlock * and reach the HCA out of order. @@ -5823,7 +5827,7 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, if (qp->flags & MLX5_IB_QP_MANAGED_RECV) qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_RECV; if (qp->flags & MLX5_IB_QP_SQPN_QP1) - qp_init_attr->create_flags |= mlx5_ib_create_qp_sqpn_qp1(); + qp_init_attr->create_flags |= MLX5_IB_QP_CREATE_SQPN_QP1; qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; @@ -5955,12 +5959,21 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd, } MLX5_SET(wq, wq, log_wq_stride, rwq->log_rq_stride); if (rwq->create_flags & MLX5_IB_WQ_FLAGS_STRIDING_RQ) { + /* + * In Firmware number of strides in each WQE is: + * "512 * 2^single_wqe_log_num_of_strides" + * Values 3 to 8 are accepted as 10 to 15, 9 to 18 are + * accepted as 0 to 9 + */ + static const u8 fw_map[] = { 10, 11, 12, 13, 14, 15, 0, 1, + 2, 3, 4, 5, 6, 7, 8, 9 }; MLX5_SET(wq, wq, two_byte_shift_en, rwq->two_byte_shift_en); MLX5_SET(wq, wq, log_wqe_stride_size, rwq->single_stride_log_num_of_bytes - MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES); - MLX5_SET(wq, wq, log_wqe_num_of_strides, rwq->log_num_strides - - MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES); + MLX5_SET(wq, wq, log_wqe_num_of_strides, + fw_map[rwq->log_num_strides - + MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES]); } MLX5_SET(wq, wq, log_wq_sz, rwq->log_rq_size); MLX5_SET(wq, wq, pd, to_mpd(pd)->pdn); @@ -6035,6 +6048,19 @@ static int set_user_rq_size(struct mlx5_ib_dev *dev, return 0; } +static bool log_of_strides_valid(struct mlx5_ib_dev *dev, u32 log_num_strides) +{ + if ((log_num_strides > MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES) || + (log_num_strides < MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES)) + return false; + + if (!MLX5_CAP_GEN(dev->mdev, ext_stride_num_range) && + (log_num_strides < MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES)) + return false; + + return true; +} + static int prepare_user_rq(struct ib_pd *pd, struct ib_wq_init_attr *init_attr, struct ib_udata *udata, @@ -6082,14 +6108,16 @@ static int prepare_user_rq(struct ib_pd *pd, MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES); return -EINVAL; } - if ((ucmd.single_wqe_log_num_of_strides > - MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES) || - (ucmd.single_wqe_log_num_of_strides < - MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES)) { - mlx5_ib_dbg(dev, "Invalid log num strides (%u. Range is %u - %u)\n", - ucmd.single_wqe_log_num_of_strides, - MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES, - MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES); + if (!log_of_strides_valid(dev, + ucmd.single_wqe_log_num_of_strides)) { + mlx5_ib_dbg( + dev, + "Invalid log num strides (%u. Range is %u - %u)\n", + ucmd.single_wqe_log_num_of_strides, + MLX5_CAP_GEN(dev->mdev, ext_stride_num_range) ? + MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES : + MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES, + MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES); return -EINVAL; } rwq->single_stride_log_num_of_bytes = diff --git a/drivers/infiniband/hw/mlx5/restrack.c b/drivers/infiniband/hw/mlx5/restrack.c new file mode 100644 index 000000000000..8f6c04f12531 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/restrack.c @@ -0,0 +1,90 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved. + */ + +#include <uapi/rdma/rdma_netlink.h> +#include <rdma/ib_umem_odp.h> +#include <rdma/restrack.h> +#include "mlx5_ib.h" + +static int fill_stat_mr_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res) +{ + struct ib_mr *ibmr = container_of(res, struct ib_mr, res); + struct mlx5_ib_mr *mr = to_mmr(ibmr); + struct nlattr *table_attr; + + if (!(mr->access_flags & IB_ACCESS_ON_DEMAND)) + return 0; + + table_attr = nla_nest_start(msg, + RDMA_NLDEV_ATTR_STAT_HWCOUNTERS); + + if (!table_attr) + goto err; + + if (rdma_nl_stat_hwcounter_entry(msg, "page_faults", + atomic64_read(&mr->odp_stats.faults))) + goto err_table; + if (rdma_nl_stat_hwcounter_entry( + msg, "page_invalidations", + atomic64_read(&mr->odp_stats.invalidations))) + goto err_table; + + nla_nest_end(msg, table_attr); + return 0; + +err_table: + nla_nest_cancel(msg, table_attr); +err: + return -EMSGSIZE; +} + +static int fill_res_mr_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res) +{ + struct ib_mr *ibmr = container_of(res, struct ib_mr, res); + struct mlx5_ib_mr *mr = to_mmr(ibmr); + struct nlattr *table_attr; + + if (!(mr->access_flags & IB_ACCESS_ON_DEMAND)) + return 0; + + table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER); + if (!table_attr) + goto err; + + if (mr->is_odp_implicit) { + if (rdma_nl_put_driver_string(msg, "odp", "implicit")) + goto err; + } else { + if (rdma_nl_put_driver_string(msg, "odp", "explicit")) + goto err; + } + + nla_nest_end(msg, table_attr); + return 0; + +err: + nla_nest_cancel(msg, table_attr); + return -EMSGSIZE; +} + +int mlx5_ib_fill_res_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res) +{ + if (res->type == RDMA_RESTRACK_MR) + return fill_res_mr_entry(msg, res); + + return 0; +} + +int mlx5_ib_fill_stat_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res) +{ + if (res->type == RDMA_RESTRACK_MR) + return fill_stat_mr_entry(msg, res); + + return 0; +} diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 4e7fde86c96b..62939df3c692 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -80,7 +80,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE); - srq->umem = ib_umem_get(udata, ucmd.buf_addr, buf_size, 0, 0); + srq->umem = ib_umem_get(udata, ucmd.buf_addr, buf_size, 0); if (IS_ERR(srq->umem)) { mlx5_ib_dbg(dev, "failed umem get, size %d\n", buf_size); err = PTR_ERR(srq->umem); diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index bfd4eebc1182..599794c5a78f 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -576,14 +576,10 @@ enum ib_rate mthca_rate_to_ib(struct mthca_dev *dev, u8 mthca_rate, u8 port); int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); -int mthca_process_mad(struct ib_device *ibdev, - int mad_flags, - u8 port_num, - const struct ib_wc *in_wc, - const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index); +int mthca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad *in, struct ib_mad *out, + size_t *out_mad_size, u16 *out_mad_pkey_index); int mthca_create_agents(struct mthca_dev *dev); void mthca_free_agents(struct mthca_dev *dev); diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c index 7ad517da4917..99aa8183a7f2 100644 --- a/drivers/infiniband/hw/mthca/mthca_mad.c +++ b/drivers/infiniband/hw/mthca/mthca_mad.c @@ -196,30 +196,19 @@ static void forward_trap(struct mthca_dev *dev, } } -int mthca_process_mad(struct ib_device *ibdev, - int mad_flags, - u8 port_num, - const struct ib_wc *in_wc, - const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index) +int mthca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad *in, struct ib_mad *out, + size_t *out_mad_size, u16 *out_mad_pkey_index) { int err; u16 slid = in_wc ? ib_lid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE); u16 prev_lid = 0; struct ib_port_attr pattr; - const struct ib_mad *in_mad = (const struct ib_mad *)in; - struct ib_mad *out_mad = (struct ib_mad *)out; - - if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || - *out_mad_size != sizeof(*out_mad))) - return IB_MAD_RESULT_FAILURE; /* Forward locally generated traps to the SM */ - if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && - slid == 0) { - forward_trap(to_mdev(ibdev), port_num, in_mad); + if (in->mad_hdr.method == IB_MGMT_METHOD_TRAP && !slid) { + forward_trap(to_mdev(ibdev), port_num, in); return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; } @@ -229,40 +218,39 @@ int mthca_process_mad(struct ib_device *ibdev, * Only handle PMA and Mellanox vendor-specific class gets and * sets for other classes. */ - if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || - in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { - if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET && - in_mad->mad_hdr.method != IB_MGMT_METHOD_SET && - in_mad->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS) + if (in->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || + in->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { + if (in->mad_hdr.method != IB_MGMT_METHOD_GET && + in->mad_hdr.method != IB_MGMT_METHOD_SET && + in->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS) return IB_MAD_RESULT_SUCCESS; /* * Don't process SMInfo queries or vendor-specific * MADs -- the SMA can't handle them. */ - if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO || - ((in_mad->mad_hdr.attr_id & IB_SMP_ATTR_VENDOR_MASK) == + if (in->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO || + ((in->mad_hdr.attr_id & IB_SMP_ATTR_VENDOR_MASK) == IB_SMP_ATTR_VENDOR_MASK)) return IB_MAD_RESULT_SUCCESS; - } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT || - in_mad->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS1 || - in_mad->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS2) { - if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET && - in_mad->mad_hdr.method != IB_MGMT_METHOD_SET) + } else if (in->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT || + in->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS1 || + in->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS2) { + if (in->mad_hdr.method != IB_MGMT_METHOD_GET && + in->mad_hdr.method != IB_MGMT_METHOD_SET) return IB_MAD_RESULT_SUCCESS; } else return IB_MAD_RESULT_SUCCESS; - if ((in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || - in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) && - in_mad->mad_hdr.method == IB_MGMT_METHOD_SET && - in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO && + if ((in->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || + in->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) && + in->mad_hdr.method == IB_MGMT_METHOD_SET && + in->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO && !ib_query_port(ibdev, port_num, &pattr)) prev_lid = ib_lid_cpu16(pattr.lid); - err = mthca_MAD_IFC(to_mdev(ibdev), - mad_flags & IB_MAD_IGNORE_MKEY, - mad_flags & IB_MAD_IGNORE_BKEY, - port_num, in_wc, in_grh, in_mad, out_mad); + err = mthca_MAD_IFC(to_mdev(ibdev), mad_flags & IB_MAD_IGNORE_MKEY, + mad_flags & IB_MAD_IGNORE_BKEY, port_num, in_wc, + in_grh, in, out); if (err == -EBADMSG) return IB_MAD_RESULT_SUCCESS; else if (err) { @@ -270,16 +258,16 @@ int mthca_process_mad(struct ib_device *ibdev, return IB_MAD_RESULT_FAILURE; } - if (!out_mad->mad_hdr.status) { - smp_snoop(ibdev, port_num, in_mad, prev_lid); - node_desc_override(ibdev, out_mad); + if (!out->mad_hdr.status) { + smp_snoop(ibdev, port_num, in, prev_lid); + node_desc_override(ibdev, out); } /* set return bit in status of directed route responses */ - if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) - out_mad->mad_hdr.status |= cpu_to_be16(1 << 15); + if (in->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + out->mad_hdr.status |= cpu_to_be16(1 << 15); - if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) + if (in->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) /* no response for trap repress */ return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 23554d8bf241..33002530fee7 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -880,9 +880,7 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (!mr) return ERR_PTR(-ENOMEM); - mr->umem = ib_umem_get(udata, start, length, acc, - ucmd.mr_attrs & MTHCA_MR_DMASYNC); - + mr->umem = ib_umem_get(udata, start, length, acc); if (IS_ERR(mr->umem)) { err = PTR_ERR(mr->umem); goto err; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index 8d3e36d548aa..2b7f00ac41b0 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -247,35 +247,20 @@ int ocrdma_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr) return 0; } -int ocrdma_process_mad(struct ib_device *ibdev, - int process_mad_flags, - u8 port_num, - const struct ib_wc *in_wc, - const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, +int ocrdma_process_mad(struct ib_device *ibdev, int process_mad_flags, + u8 port_num, const struct ib_wc *in_wc, + const struct ib_grh *in_grh, const struct ib_mad *in, + struct ib_mad *out, size_t *out_mad_size, u16 *out_mad_pkey_index) { - int status; + int status = IB_MAD_RESULT_SUCCESS; struct ocrdma_dev *dev; - const struct ib_mad *in_mad = (const struct ib_mad *)in; - struct ib_mad *out_mad = (struct ib_mad *)out; - - if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || - *out_mad_size != sizeof(*out_mad))) - return IB_MAD_RESULT_FAILURE; - switch (in_mad->mad_hdr.mgmt_class) { - case IB_MGMT_CLASS_PERF_MGMT: + if (in->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT) { dev = get_ocrdma_dev(ibdev); - if (!ocrdma_pma_counters(dev, out_mad)) - status = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; - else - status = IB_MAD_RESULT_SUCCESS; - break; - default: - status = IB_MAD_RESULT_SUCCESS; - break; + ocrdma_pma_counters(dev, out); + status |= IB_MAD_RESULT_REPLY; } + return status; } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h index 64cb82c08664..9780afcde780 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h @@ -56,12 +56,9 @@ int ocrdma_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags, void ocrdma_destroy_ah(struct ib_ah *ah, u32 flags); int ocrdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); -int ocrdma_process_mad(struct ib_device *, - int process_mad_flags, - u8 port_num, - const struct ib_wc *in_wc, - const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, +int ocrdma_process_mad(struct ib_device *dev, int process_mad_flags, + u8 port_num, const struct ib_wc *in_wc, + const struct ib_grh *in_grh, const struct ib_mad *in, + struct ib_mad *out, size_t *out_mad_size, u16 *out_mad_pkey_index); #endif /* __OCRDMA_AH_H__ */ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index c15cfc6cef81..d8c47d24d6d6 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -166,7 +166,6 @@ static const struct ib_device_ops ocrdma_dev_ops = { .get_port_immutable = ocrdma_port_immutable, .map_mr_sg = ocrdma_map_mr_sg, .mmap = ocrdma_mmap, - .modify_port = ocrdma_modify_port, .modify_qp = ocrdma_modify_qp, .poll_cq = ocrdma_poll_cq, .post_recv = ocrdma_post_recv, diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index 6ef89c226ad8..c2e0d0fa44be 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -2034,7 +2034,7 @@ struct ocrdma_rx_stats { }; struct ocrdma_rx_qp_err_stats { - u32 nak_invalid_requst_errors; + u32 nak_invalid_request_errors; u32 nak_remote_operation_errors; u32 nak_count_remote_access_errors; u32 local_length_errors; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c index a902942adb5d..5f831e3bdbad 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c @@ -423,8 +423,8 @@ static char *ocrdma_rxqp_errstats(struct ocrdma_dev *dev) memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM)); pcur = stats; - pcur += ocrdma_add_stat(stats, pcur, "nak_invalid_requst_errors", - (u64)rx_qp_err_stats->nak_invalid_requst_errors); + pcur += ocrdma_add_stat(stats, pcur, "nak_invalid_request_errors", + (u64)rx_qp_err_stats->nak_invalid_request_errors); pcur += ocrdma_add_stat(stats, pcur, "nak_remote_operation_errors", (u64)rx_qp_err_stats->nak_remote_operation_errors); pcur += ocrdma_add_stat(stats, pcur, "nak_count_remote_access_errors", @@ -670,12 +670,10 @@ err: return -EFAULT; } -int ocrdma_pma_counters(struct ocrdma_dev *dev, - struct ib_mad *out_mad) +void ocrdma_pma_counters(struct ocrdma_dev *dev, struct ib_mad *out_mad) { struct ib_pma_portcounters *pma_cnt; - memset(out_mad->data, 0, sizeof out_mad->data); pma_cnt = (void *)(out_mad->data + 40); ocrdma_update_stats(dev); @@ -683,7 +681,6 @@ int ocrdma_pma_counters(struct ocrdma_dev *dev, pma_cnt->port_rcv_data = cpu_to_be32(ocrdma_sysfs_rcv_data(dev)); pma_cnt->port_xmit_packets = cpu_to_be32(ocrdma_sysfs_xmit_pkts(dev)); pma_cnt->port_rcv_packets = cpu_to_be32(ocrdma_sysfs_rcv_pkts(dev)); - return 0; } static ssize_t ocrdma_dbgfs_ops_read(struct file *filp, char __user *buffer, diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.h b/drivers/infiniband/hw/ocrdma/ocrdma_stats.h index bba1fec4f11f..98feca26ac55 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.h @@ -69,7 +69,6 @@ bool ocrdma_alloc_stats_resources(struct ocrdma_dev *dev); void ocrdma_release_stats_resources(struct ocrdma_dev *dev); void ocrdma_rem_port_stats(struct ocrdma_dev *dev); void ocrdma_add_port_stats(struct ocrdma_dev *dev); -int ocrdma_pma_counters(struct ocrdma_dev *dev, - struct ib_mad *out_mad); +void ocrdma_pma_counters(struct ocrdma_dev *dev, struct ib_mad *out_mad); #endif /* __OCRDMA_STATS_H__ */ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index e8267e590772..9bc1ca6f6f9e 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -190,12 +190,6 @@ int ocrdma_query_port(struct ib_device *ibdev, return 0; } -int ocrdma_modify_port(struct ib_device *ibdev, u8 port, int mask, - struct ib_port_modify *props) -{ - return 0; -} - static int ocrdma_add_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr, unsigned long len) { @@ -875,7 +869,7 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(status); - mr->umem = ib_umem_get(udata, start, len, acc, 0); + mr->umem = ib_umem_get(udata, start, len, acc); if (IS_ERR(mr->umem)) { status = -EFAULT; goto umem_err; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h index 32488da1b752..3a5010881be5 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h @@ -54,8 +54,6 @@ int ocrdma_arm_cq(struct ib_cq *, enum ib_cq_notify_flags flags); int ocrdma_query_device(struct ib_device *, struct ib_device_attr *props, struct ib_udata *uhw); int ocrdma_query_port(struct ib_device *, u8 port, struct ib_port_attr *props); -int ocrdma_modify_port(struct ib_device *, u8 port, int mask, - struct ib_port_modify *props); enum rdma_protocol_type ocrdma_query_protocol(struct ib_device *device, u8 port_num); diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index 5136b835e1ba..dcdc85a1ab25 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -76,7 +76,7 @@ static void qedr_get_dev_fw_str(struct ib_device *ibdev, char *str) struct qedr_dev *qedr = get_qedr_dev(ibdev); u32 fw_ver = (u32)qedr->attr.fw_ver; - snprintf(str, IB_FW_VERSION_NAME_MAX, "%d. %d. %d. %d", + snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%d.%d", (fw_ver >> 24) & 0xFF, (fw_ver >> 16) & 0xFF, (fw_ver >> 8) & 0xFF, fw_ver & 0xFF); } @@ -212,7 +212,7 @@ static const struct ib_device_ops qedr_dev_ops = { .get_link_layer = qedr_link_layer, .map_mr_sg = qedr_map_mr_sg, .mmap = qedr_mmap, - .modify_port = qedr_modify_port, + .mmap_free = qedr_mmap_free, .modify_qp = qedr_modify_qp, .modify_srq = qedr_modify_srq, .poll_cq = qedr_poll_cq, @@ -357,9 +357,10 @@ static int qedr_alloc_resources(struct qedr_dev *dev) return -ENOMEM; spin_lock_init(&dev->sgid_lock); + xa_init_flags(&dev->srqs, XA_FLAGS_LOCK_IRQ); if (IS_IWARP(dev)) { - xa_init_flags(&dev->qps, XA_FLAGS_LOCK_IRQ); + xa_init(&dev->qps); dev->iwarp_wq = create_singlethread_workqueue("qedr_iwarpq"); } diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index 0cfd849b13d6..5488dbd59d3c 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -40,6 +40,7 @@ #include <linux/qed/qed_rdma_if.h> #include <linux/qed/qede_rdma.h> #include <linux/qed/roce_common.h> +#include <linux/completion.h> #include "qedr_hsi_rdma.h" #define QEDR_NODE_DESC "QLogic 579xx RoCE HCA" @@ -230,14 +231,16 @@ struct qedr_ucontext { struct qedr_dev *dev; struct qedr_pd *pd; void __iomem *dpi_addr; + struct rdma_user_mmap_entry *db_mmap_entry; u64 dpi_phys_addr; u32 dpi_size; u16 dpi; + bool db_rec; +}; - struct list_head mm_head; - - /* Lock to protect mm list */ - struct mutex mm_list_lock; +union db_prod32 { + struct rdma_pwm_val16_data data; + u32 raw; }; union db_prod64 { @@ -265,6 +268,13 @@ struct qedr_userq { struct qedr_pbl *pbl_tbl; u64 buf_addr; size_t buf_len; + + /* doorbell recovery */ + void __iomem *db_addr; + struct qedr_user_db_rec *db_rec_data; + struct rdma_user_mmap_entry *db_mmap_entry; + void __iomem *db_rec_db2_addr; + union db_prod32 db_rec_db2_data; }; struct qedr_cq { @@ -300,19 +310,6 @@ struct qedr_pd { struct qedr_ucontext *uctx; }; -struct qedr_mm { - struct { - u64 phy_addr; - unsigned long len; - } key; - struct list_head entry; -}; - -union db_prod32 { - struct rdma_pwm_val16_data data; - u32 raw; -}; - struct qedr_qp_hwq_info { /* WQE Elements */ struct qed_chain pbl; @@ -377,10 +374,20 @@ enum qedr_qp_err_bitmap { QEDR_QP_ERR_RQ_PBL_FULL = 32, }; +enum qedr_qp_create_type { + QEDR_QP_CREATE_NONE, + QEDR_QP_CREATE_USER, + QEDR_QP_CREATE_KERNEL, +}; + +enum qedr_iwarp_cm_flags { + QEDR_IWARP_CM_WAIT_FOR_CONNECT = BIT(0), + QEDR_IWARP_CM_WAIT_FOR_DISCONNECT = BIT(1), +}; + struct qedr_qp { struct ib_qp ibqp; /* must be first */ struct qedr_dev *dev; - struct qedr_iw_ep *ep; struct qedr_qp_hwq_info sq; struct qedr_qp_hwq_info rq; @@ -395,6 +402,7 @@ struct qedr_qp { u32 id; struct qedr_pd *pd; enum ib_qp_type qp_type; + enum qedr_qp_create_type create_type; struct qed_rdma_qp *qed_qp; u32 qp_id; u16 icid; @@ -437,8 +445,11 @@ struct qedr_qp { /* Relevant to qps created from user space only (applications) */ struct qedr_userq usq; struct qedr_userq urq; - atomic_t refcnt; - bool destroyed; + + /* synchronization objects used with iwarp ep */ + struct kref refcnt; + struct completion iwarp_cm_comp; + unsigned long iwarp_cm_flags; /* enum iwarp_cm_flags */ }; struct qedr_ah { @@ -476,6 +487,18 @@ struct qedr_mr { u32 npages; }; +struct qedr_user_mmap_entry { + struct rdma_user_mmap_entry rdma_entry; + struct qedr_dev *dev; + union { + u64 io_address; + void *address; + }; + size_t length; + u16 dpi; + u8 mmap_flag; +}; + #define SET_FIELD2(value, name, flag) ((value) |= ((flag) << (name ## _SHIFT))) #define QEDR_RESP_IMM (RDMA_CQE_RESPONDER_IMM_FLG_MASK << \ @@ -531,7 +554,7 @@ struct qedr_iw_ep { struct iw_cm_id *cm_id; struct qedr_qp *qp; void *qed_context; - u8 during_connect; + struct kref refcnt; }; static inline @@ -574,4 +597,11 @@ static inline struct qedr_srq *get_qedr_srq(struct ib_srq *ibsrq) { return container_of(ibsrq, struct qedr_srq, ibsrq); } + +static inline struct qedr_user_mmap_entry * +get_qedr_mmap_entry(struct rdma_user_mmap_entry *rdma_entry) +{ + return container_of(rdma_entry, struct qedr_user_mmap_entry, + rdma_entry); +} #endif diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c index 22881d4442b9..792eecd206b6 100644 --- a/drivers/infiniband/hw/qedr/qedr_iw_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c @@ -79,6 +79,27 @@ qedr_fill_sockaddr6(const struct qed_iwarp_cm_info *cm_info, } } +static void qedr_iw_free_qp(struct kref *ref) +{ + struct qedr_qp *qp = container_of(ref, struct qedr_qp, refcnt); + + kfree(qp); +} + +static void +qedr_iw_free_ep(struct kref *ref) +{ + struct qedr_iw_ep *ep = container_of(ref, struct qedr_iw_ep, refcnt); + + if (ep->qp) + kref_put(&ep->qp->refcnt, qedr_iw_free_qp); + + if (ep->cm_id) + ep->cm_id->rem_ref(ep->cm_id); + + kfree(ep); +} + static void qedr_iw_mpa_request(void *context, struct qed_iwarp_cm_event_params *params) { @@ -93,6 +114,7 @@ qedr_iw_mpa_request(void *context, struct qed_iwarp_cm_event_params *params) ep->dev = dev; ep->qed_context = params->ep_context; + kref_init(&ep->refcnt); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_CONNECT_REQUEST; @@ -141,12 +163,10 @@ qedr_iw_close_event(void *context, struct qed_iwarp_cm_event_params *params) { struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context; - if (ep->cm_id) { + if (ep->cm_id) qedr_iw_issue_event(context, params, IW_CM_EVENT_CLOSE); - ep->cm_id->rem_ref(ep->cm_id); - ep->cm_id = NULL; - } + kref_put(&ep->refcnt, qedr_iw_free_ep); } static void @@ -186,11 +206,13 @@ static void qedr_iw_disconnect_worker(struct work_struct *work) struct qedr_qp *qp = ep->qp; struct iw_cm_event event; - if (qp->destroyed) { - kfree(dwork); - qedr_iw_qp_rem_ref(&qp->ibqp); - return; - } + /* The qp won't be released until we release the ep. + * the ep's refcnt was increased before calling this + * function, therefore it is safe to access qp + */ + if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_DISCONNECT, + &qp->iwarp_cm_flags)) + goto out; memset(&event, 0, sizeof(event)); event.status = dwork->status; @@ -204,7 +226,6 @@ static void qedr_iw_disconnect_worker(struct work_struct *work) else qp_params.new_state = QED_ROCE_QP_STATE_SQD; - kfree(dwork); if (ep->cm_id) ep->cm_id->event_handler(ep->cm_id, &event); @@ -214,7 +235,10 @@ static void qedr_iw_disconnect_worker(struct work_struct *work) dev->ops->rdma_modify_qp(dev->rdma_ctx, qp->qed_qp, &qp_params); - qedr_iw_qp_rem_ref(&qp->ibqp); + complete(&ep->qp->iwarp_cm_comp); +out: + kfree(dwork); + kref_put(&ep->refcnt, qedr_iw_free_ep); } static void @@ -224,13 +248,17 @@ qedr_iw_disconnect_event(void *context, struct qedr_discon_work *work; struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context; struct qedr_dev *dev = ep->dev; - struct qedr_qp *qp = ep->qp; work = kzalloc(sizeof(*work), GFP_ATOMIC); if (!work) return; - qedr_iw_qp_add_ref(&qp->ibqp); + /* We can't get a close event before disconnect, but since + * we're scheduling a work queue we need to make sure close + * won't delete the ep, so we increase the refcnt + */ + kref_get(&ep->refcnt); + work->ep = ep; work->event = params->event; work->status = params->status; @@ -252,16 +280,30 @@ qedr_iw_passive_complete(void *context, if ((params->status == -ECONNREFUSED) && (!ep->qp)) { DP_DEBUG(dev, QEDR_MSG_IWARP, "PASSIVE connection refused releasing ep...\n"); - kfree(ep); + kref_put(&ep->refcnt, qedr_iw_free_ep); return; } + complete(&ep->qp->iwarp_cm_comp); qedr_iw_issue_event(context, params, IW_CM_EVENT_ESTABLISHED); if (params->status < 0) qedr_iw_close_event(context, params); } +static void +qedr_iw_active_complete(void *context, + struct qed_iwarp_cm_event_params *params) +{ + struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context; + + complete(&ep->qp->iwarp_cm_comp); + qedr_iw_issue_event(context, params, IW_CM_EVENT_CONNECT_REPLY); + + if (params->status < 0) + kref_put(&ep->refcnt, qedr_iw_free_ep); +} + static int qedr_iw_mpa_reply(void *context, struct qed_iwarp_cm_event_params *params) { @@ -288,27 +330,15 @@ qedr_iw_event_handler(void *context, struct qed_iwarp_cm_event_params *params) qedr_iw_mpa_reply(context, params); break; case QED_IWARP_EVENT_PASSIVE_COMPLETE: - ep->during_connect = 0; qedr_iw_passive_complete(context, params); break; - case QED_IWARP_EVENT_ACTIVE_COMPLETE: - ep->during_connect = 0; - qedr_iw_issue_event(context, - params, - IW_CM_EVENT_CONNECT_REPLY); - if (params->status < 0) { - struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context; - - ep->cm_id->rem_ref(ep->cm_id); - ep->cm_id = NULL; - } + qedr_iw_active_complete(context, params); break; case QED_IWARP_EVENT_DISCONNECT: qedr_iw_disconnect_event(context, params); break; case QED_IWARP_EVENT_CLOSE: - ep->during_connect = 0; qedr_iw_close_event(context, params); break; case QED_IWARP_EVENT_RQ_EMPTY: @@ -451,10 +481,10 @@ qedr_addr6_resolve(struct qedr_dev *dev, if ((!dst) || dst->error) { if (dst) { - dst_release(dst); DP_ERR(dev, "ip6_route_output returned dst->error = %d\n", dst->error); + dst_release(dst); } return -EINVAL; } @@ -476,6 +506,19 @@ qedr_addr6_resolve(struct qedr_dev *dev, return rc; } +static struct qedr_qp *qedr_iw_load_qp(struct qedr_dev *dev, u32 qpn) +{ + struct qedr_qp *qp; + + xa_lock(&dev->qps); + qp = xa_load(&dev->qps, qpn); + if (qp) + kref_get(&qp->refcnt); + xa_unlock(&dev->qps); + + return qp; +} + int qedr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) { struct qedr_dev *dev = get_qedr_dev(cm_id->device); @@ -491,10 +534,6 @@ int qedr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) int rc = 0; int i; - qp = xa_load(&dev->qps, conn_param->qpn); - if (unlikely(!qp)) - return -EINVAL; - laddr = (struct sockaddr_in *)&cm_id->m_local_addr; raddr = (struct sockaddr_in *)&cm_id->m_remote_addr; laddr6 = (struct sockaddr_in6 *)&cm_id->m_local_addr; @@ -516,8 +555,15 @@ int qedr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) return -ENOMEM; ep->dev = dev; + kref_init(&ep->refcnt); + + qp = qedr_iw_load_qp(dev, conn_param->qpn); + if (!qp) { + rc = -EINVAL; + goto err; + } + ep->qp = qp; - qp->ep = ep; cm_id->add_ref(cm_id); ep->cm_id = cm_id; @@ -580,16 +626,20 @@ int qedr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) in_params.qp = qp->qed_qp; memcpy(in_params.local_mac_addr, dev->ndev->dev_addr, ETH_ALEN); - ep->during_connect = 1; + if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT, + &qp->iwarp_cm_flags)) + goto err; /* QP already being destroyed */ + rc = dev->ops->iwarp_connect(dev->rdma_ctx, &in_params, &out_params); - if (rc) + if (rc) { + complete(&qp->iwarp_cm_comp); goto err; + } return rc; err: - cm_id->rem_ref(cm_id); - kfree(ep); + kref_put(&ep->refcnt, qedr_iw_free_ep); return rc; } @@ -677,18 +727,17 @@ int qedr_iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct qedr_dev *dev = ep->dev; struct qedr_qp *qp; struct qed_iwarp_accept_in params; - int rc; + int rc = 0; DP_DEBUG(dev, QEDR_MSG_IWARP, "Accept on qpid=%d\n", conn_param->qpn); - qp = xa_load(&dev->qps, conn_param->qpn); + qp = qedr_iw_load_qp(dev, conn_param->qpn); if (!qp) { DP_ERR(dev, "Invalid QP number %d\n", conn_param->qpn); return -EINVAL; } ep->qp = qp; - qp->ep = ep; cm_id->add_ref(cm_id); ep->cm_id = cm_id; @@ -700,15 +749,21 @@ int qedr_iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) params.ird = conn_param->ird; params.ord = conn_param->ord; - ep->during_connect = 1; + if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT, + &qp->iwarp_cm_flags)) + goto err; /* QP already destroyed */ + rc = dev->ops->iwarp_accept(dev->rdma_ctx, ¶ms); - if (rc) + if (rc) { + complete(&qp->iwarp_cm_comp); goto err; + } return rc; + err: - ep->during_connect = 0; - cm_id->rem_ref(cm_id); + kref_put(&ep->refcnt, qedr_iw_free_ep); + return rc; } @@ -731,17 +786,14 @@ void qedr_iw_qp_add_ref(struct ib_qp *ibqp) { struct qedr_qp *qp = get_qedr_qp(ibqp); - atomic_inc(&qp->refcnt); + kref_get(&qp->refcnt); } void qedr_iw_qp_rem_ref(struct ib_qp *ibqp) { struct qedr_qp *qp = get_qedr_qp(ibqp); - if (atomic_dec_and_test(&qp->refcnt)) { - xa_erase_irq(&qp->dev->qps, qp->qp_id); - kfree(qp); - } + kref_put(&qp->refcnt, qedr_iw_free_qp); } struct ib_qp *qedr_iw_get_qp(struct ib_device *ibdev, int qpn) diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 6f3ce86019b7..4cd292966aa9 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -51,6 +51,7 @@ #include "verbs.h" #include <rdma/qedr-abi.h> #include "qedr_roce_cm.h" +#include "qedr_iw_cm.h" #define QEDR_SRQ_WQE_ELEM_SIZE sizeof(union rdma_srq_elm) #define RDMA_MAX_SGE_PER_SRQ (4) @@ -58,6 +59,11 @@ #define DB_ADDR_SHIFT(addr) ((addr) << DB_PWM_ADDR_OFFSET_SHIFT) +enum { + QEDR_USER_MMAP_IO_WC = 0, + QEDR_USER_MMAP_PHYS_PAGE, +}; + static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len) { @@ -250,78 +256,31 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr) return 0; } -int qedr_modify_port(struct ib_device *ibdev, u8 port, int mask, - struct ib_port_modify *props) -{ - return 0; -} - -static int qedr_add_mmap(struct qedr_ucontext *uctx, u64 phy_addr, - unsigned long len) -{ - struct qedr_mm *mm; - - mm = kzalloc(sizeof(*mm), GFP_KERNEL); - if (!mm) - return -ENOMEM; - - mm->key.phy_addr = phy_addr; - /* This function might be called with a length which is not a multiple - * of PAGE_SIZE, while the mapping is PAGE_SIZE grained and the kernel - * forces this granularity by increasing the requested size if needed. - * When qedr_mmap is called, it will search the list with the updated - * length as a key. To prevent search failures, the length is rounded up - * in advance to PAGE_SIZE. - */ - mm->key.len = roundup(len, PAGE_SIZE); - INIT_LIST_HEAD(&mm->entry); - - mutex_lock(&uctx->mm_list_lock); - list_add(&mm->entry, &uctx->mm_head); - mutex_unlock(&uctx->mm_list_lock); - - DP_DEBUG(uctx->dev, QEDR_MSG_MISC, - "added (addr=0x%llx,len=0x%lx) for ctx=%p\n", - (unsigned long long)mm->key.phy_addr, - (unsigned long)mm->key.len, uctx); - - return 0; -} - -static bool qedr_search_mmap(struct qedr_ucontext *uctx, u64 phy_addr, - unsigned long len) -{ - bool found = false; - struct qedr_mm *mm; - - mutex_lock(&uctx->mm_list_lock); - list_for_each_entry(mm, &uctx->mm_head, entry) { - if (len != mm->key.len || phy_addr != mm->key.phy_addr) - continue; - - found = true; - break; - } - mutex_unlock(&uctx->mm_list_lock); - DP_DEBUG(uctx->dev, QEDR_MSG_MISC, - "searched for (addr=0x%llx,len=0x%lx) for ctx=%p, result=%d\n", - mm->key.phy_addr, mm->key.len, uctx, found); - - return found; -} - int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) { struct ib_device *ibdev = uctx->device; int rc; struct qedr_ucontext *ctx = get_qedr_ucontext(uctx); struct qedr_alloc_ucontext_resp uresp = {}; + struct qedr_alloc_ucontext_req ureq = {}; struct qedr_dev *dev = get_qedr_dev(ibdev); struct qed_rdma_add_user_out_params oparams; + struct qedr_user_mmap_entry *entry; if (!udata) return -EFAULT; + if (udata->inlen) { + rc = ib_copy_from_udata(&ureq, udata, + min(sizeof(ureq), udata->inlen)); + if (rc) { + DP_ERR(dev, "Problem copying data from user space\n"); + return -EFAULT; + } + + ctx->db_rec = !!(ureq.context_flags & QEDR_ALLOC_UCTX_DB_REC); + } + rc = dev->ops->rdma_add_user(dev->rdma_ctx, &oparams); if (rc) { DP_ERR(dev, @@ -334,13 +293,29 @@ int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) ctx->dpi_addr = oparams.dpi_addr; ctx->dpi_phys_addr = oparams.dpi_phys_addr; ctx->dpi_size = oparams.dpi_size; - INIT_LIST_HEAD(&ctx->mm_head); - mutex_init(&ctx->mm_list_lock); + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) { + rc = -ENOMEM; + goto err; + } + + entry->io_address = ctx->dpi_phys_addr; + entry->length = ctx->dpi_size; + entry->mmap_flag = QEDR_USER_MMAP_IO_WC; + entry->dpi = ctx->dpi; + entry->dev = dev; + rc = rdma_user_mmap_entry_insert(uctx, &entry->rdma_entry, + ctx->dpi_size); + if (rc) { + kfree(entry); + goto err; + } + ctx->db_mmap_entry = &entry->rdma_entry; uresp.dpm_enabled = dev->user_dpm_enabled; uresp.wids_enabled = 1; uresp.wid_count = oparams.wid_count; - uresp.db_pa = ctx->dpi_phys_addr; + uresp.db_pa = rdma_user_mmap_get_offset(ctx->db_mmap_entry); uresp.db_size = ctx->dpi_size; uresp.max_send_wr = dev->attr.max_sqe; uresp.max_recv_wr = dev->attr.max_rqe; @@ -352,82 +327,92 @@ int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (rc) - return rc; + goto err; ctx->dev = dev; - rc = qedr_add_mmap(ctx, ctx->dpi_phys_addr, ctx->dpi_size); - if (rc) - return rc; - DP_DEBUG(dev, QEDR_MSG_INIT, "Allocating user context %p\n", &ctx->ibucontext); return 0; + +err: + if (!ctx->db_mmap_entry) + dev->ops->rdma_remove_user(dev->rdma_ctx, ctx->dpi); + else + rdma_user_mmap_entry_remove(ctx->db_mmap_entry); + + return rc; } void qedr_dealloc_ucontext(struct ib_ucontext *ibctx) { struct qedr_ucontext *uctx = get_qedr_ucontext(ibctx); - struct qedr_mm *mm, *tmp; DP_DEBUG(uctx->dev, QEDR_MSG_INIT, "Deallocating user context %p\n", uctx); - uctx->dev->ops->rdma_remove_user(uctx->dev->rdma_ctx, uctx->dpi); - list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) { - DP_DEBUG(uctx->dev, QEDR_MSG_MISC, - "deleted (addr=0x%llx,len=0x%lx) for ctx=%p\n", - mm->key.phy_addr, mm->key.len, uctx); - list_del(&mm->entry); - kfree(mm); - } + rdma_user_mmap_entry_remove(uctx->db_mmap_entry); } -int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) +void qedr_mmap_free(struct rdma_user_mmap_entry *rdma_entry) { - struct qedr_ucontext *ucontext = get_qedr_ucontext(context); - struct qedr_dev *dev = get_qedr_dev(context->device); - unsigned long phys_addr = vma->vm_pgoff << PAGE_SHIFT; - unsigned long len = (vma->vm_end - vma->vm_start); - unsigned long dpi_start; + struct qedr_user_mmap_entry *entry = get_qedr_mmap_entry(rdma_entry); + struct qedr_dev *dev = entry->dev; - dpi_start = dev->db_phys_addr + (ucontext->dpi * ucontext->dpi_size); + if (entry->mmap_flag == QEDR_USER_MMAP_PHYS_PAGE) + free_page((unsigned long)entry->address); + else if (entry->mmap_flag == QEDR_USER_MMAP_IO_WC) + dev->ops->rdma_remove_user(dev->rdma_ctx, entry->dpi); - DP_DEBUG(dev, QEDR_MSG_INIT, - "mmap invoked with vm_start=0x%pK, vm_end=0x%pK,vm_pgoff=0x%pK; dpi_start=0x%pK dpi_size=0x%x\n", - (void *)vma->vm_start, (void *)vma->vm_end, - (void *)vma->vm_pgoff, (void *)dpi_start, ucontext->dpi_size); + kfree(entry); +} - if ((vma->vm_start & (PAGE_SIZE - 1)) || (len & (PAGE_SIZE - 1))) { - DP_ERR(dev, - "failed mmap, addresses must be page aligned: start=0x%pK, end=0x%pK\n", - (void *)vma->vm_start, (void *)vma->vm_end); - return -EINVAL; - } +int qedr_mmap(struct ib_ucontext *ucontext, struct vm_area_struct *vma) +{ + struct ib_device *dev = ucontext->device; + size_t length = vma->vm_end - vma->vm_start; + struct rdma_user_mmap_entry *rdma_entry; + struct qedr_user_mmap_entry *entry; + int rc = 0; + u64 pfn; - if (!qedr_search_mmap(ucontext, phys_addr, len)) { - DP_ERR(dev, "failed mmap, vm_pgoff=0x%lx is not authorized\n", - vma->vm_pgoff); - return -EINVAL; - } + ibdev_dbg(dev, + "start %#lx, end %#lx, length = %#zx, pgoff = %#lx\n", + vma->vm_start, vma->vm_end, length, vma->vm_pgoff); - if (phys_addr < dpi_start || - ((phys_addr + len) > (dpi_start + ucontext->dpi_size))) { - DP_ERR(dev, - "failed mmap, pages are outside of dpi; page address=0x%pK, dpi_start=0x%pK, dpi_size=0x%x\n", - (void *)phys_addr, (void *)dpi_start, - ucontext->dpi_size); + rdma_entry = rdma_user_mmap_entry_get(ucontext, vma); + if (!rdma_entry) { + ibdev_dbg(dev, "pgoff[%#lx] does not have valid entry\n", + vma->vm_pgoff); return -EINVAL; } - - if (vma->vm_flags & VM_READ) { - DP_ERR(dev, "failed mmap, cannot map doorbell bar for read\n"); - return -EINVAL; + entry = get_qedr_mmap_entry(rdma_entry); + ibdev_dbg(dev, + "Mapping address[%#llx], length[%#zx], mmap_flag[%d]\n", + entry->io_address, length, entry->mmap_flag); + + switch (entry->mmap_flag) { + case QEDR_USER_MMAP_IO_WC: + pfn = entry->io_address >> PAGE_SHIFT; + rc = rdma_user_mmap_io(ucontext, vma, pfn, length, + pgprot_writecombine(vma->vm_page_prot), + rdma_entry); + break; + case QEDR_USER_MMAP_PHYS_PAGE: + rc = vm_insert_page(vma, vma->vm_start, + virt_to_page(entry->address)); + break; + default: + rc = -EINVAL; } - vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); - return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, len, - vma->vm_page_prot); + if (rc) + ibdev_dbg(dev, + "Couldn't mmap address[%#llx] length[%#zx] mmap_flag[%d] err[%d]\n", + entry->io_address, length, entry->mmap_flag, rc); + + rdma_user_mmap_entry_put(rdma_entry); + return rc; } int qedr_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) @@ -657,16 +642,50 @@ static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem, } } +static int qedr_db_recovery_add(struct qedr_dev *dev, + void __iomem *db_addr, + void *db_data, + enum qed_db_rec_width db_width, + enum qed_db_rec_space db_space) +{ + if (!db_data) { + DP_DEBUG(dev, QEDR_MSG_INIT, "avoiding db rec since old lib\n"); + return 0; + } + + return dev->ops->common->db_recovery_add(dev->cdev, db_addr, db_data, + db_width, db_space); +} + +static void qedr_db_recovery_del(struct qedr_dev *dev, + void __iomem *db_addr, + void *db_data) +{ + if (!db_data) { + DP_DEBUG(dev, QEDR_MSG_INIT, "avoiding db rec since old lib\n"); + return; + } + + /* Ignore return code as there is not much we can do about it. Error + * log will be printed inside. + */ + dev->ops->common->db_recovery_del(dev->cdev, db_addr, db_data); +} + static int qedr_copy_cq_uresp(struct qedr_dev *dev, - struct qedr_cq *cq, struct ib_udata *udata) + struct qedr_cq *cq, struct ib_udata *udata, + u32 db_offset) { struct qedr_create_cq_uresp uresp; int rc; memset(&uresp, 0, sizeof(uresp)); - uresp.db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT); + uresp.db_offset = db_offset; uresp.icid = cq->icid; + if (cq->q.db_mmap_entry) + uresp.db_rec_addr = + rdma_user_mmap_get_offset(cq->q.db_mmap_entry); rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (rc) @@ -694,10 +713,58 @@ static inline int qedr_align_cq_entries(int entries) return aligned_size / QEDR_CQE_SIZE; } +static int qedr_init_user_db_rec(struct ib_udata *udata, + struct qedr_dev *dev, struct qedr_userq *q, + bool requires_db_rec) +{ + struct qedr_ucontext *uctx = + rdma_udata_to_drv_context(udata, struct qedr_ucontext, + ibucontext); + struct qedr_user_mmap_entry *entry; + int rc; + + /* Aborting for non doorbell userqueue (SRQ) or non-supporting lib */ + if (requires_db_rec == 0 || !uctx->db_rec) + return 0; + + /* Allocate a page for doorbell recovery, add to mmap */ + q->db_rec_data = (void *)get_zeroed_page(GFP_USER); + if (!q->db_rec_data) { + DP_ERR(dev, "get_zeroed_page failed\n"); + return -ENOMEM; + } + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + goto err_free_db_data; + + entry->address = q->db_rec_data; + entry->length = PAGE_SIZE; + entry->mmap_flag = QEDR_USER_MMAP_PHYS_PAGE; + rc = rdma_user_mmap_entry_insert(&uctx->ibucontext, + &entry->rdma_entry, + PAGE_SIZE); + if (rc) + goto err_free_entry; + + q->db_mmap_entry = &entry->rdma_entry; + + return 0; + +err_free_entry: + kfree(entry); + +err_free_db_data: + free_page((unsigned long)q->db_rec_data); + q->db_rec_data = NULL; + return -ENOMEM; +} + static inline int qedr_init_user_queue(struct ib_udata *udata, struct qedr_dev *dev, struct qedr_userq *q, u64 buf_addr, - size_t buf_len, int access, int dmasync, + size_t buf_len, bool requires_db_rec, + int access, int alloc_and_init) { u32 fw_pages; @@ -705,7 +772,7 @@ static inline int qedr_init_user_queue(struct ib_udata *udata, q->buf_addr = buf_addr; q->buf_len = buf_len; - q->umem = ib_umem_get(udata, q->buf_addr, q->buf_len, access, dmasync); + q->umem = ib_umem_get(udata, q->buf_addr, q->buf_len, access); if (IS_ERR(q->umem)) { DP_ERR(dev, "create user queue: failed ib_umem_get, got %ld\n", PTR_ERR(q->umem)); @@ -735,7 +802,8 @@ static inline int qedr_init_user_queue(struct ib_udata *udata, } } - return 0; + /* mmap the user address used to store doorbell data for recovery */ + return qedr_init_user_db_rec(udata, dev, q, requires_db_rec); err0: ib_umem_release(q->umem); @@ -821,6 +889,7 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, int entries = attr->cqe; struct qedr_cq *cq = get_qedr_cq(ibcq); int chain_entries; + u32 db_offset; int page_cnt; u64 pbl_ptr; u16 icid; @@ -840,8 +909,12 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, chain_entries = qedr_align_cq_entries(entries); chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES); + /* calc db offset. user will add DPI base, kernel will add db addr */ + db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT); + if (udata) { - if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) { + if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq), + udata->inlen))) { DP_ERR(dev, "create cq: problem copying data from user space\n"); goto err0; @@ -856,7 +929,7 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, cq->cq_type = QEDR_CQ_TYPE_USER; rc = qedr_init_user_queue(udata, dev, &cq->q, ureq.addr, - ureq.len, IB_ACCESS_LOCAL_WRITE, 1, + ureq.len, true, IB_ACCESS_LOCAL_WRITE, 1); if (rc) goto err0; @@ -865,6 +938,7 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, page_cnt = cq->q.pbl_info.num_pbes; cq->ibcq.cqe = chain_entries; + cq->q.db_addr = ctx->dpi_addr + db_offset; } else { cq->cq_type = QEDR_CQ_TYPE_KERNEL; @@ -876,7 +950,7 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, sizeof(union rdma_cqe), &cq->pbl, NULL); if (rc) - goto err1; + goto err0; page_cnt = qed_chain_get_page_cnt(&cq->pbl); pbl_ptr = qed_chain_get_pbl_phys(&cq->pbl); @@ -888,21 +962,28 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, rc = dev->ops->rdma_create_cq(dev->rdma_ctx, ¶ms, &icid); if (rc) - goto err2; + goto err1; cq->icid = icid; cq->sig = QEDR_CQ_MAGIC_NUMBER; spin_lock_init(&cq->cq_lock); if (udata) { - rc = qedr_copy_cq_uresp(dev, cq, udata); + rc = qedr_copy_cq_uresp(dev, cq, udata, db_offset); + if (rc) + goto err2; + + rc = qedr_db_recovery_add(dev, cq->q.db_addr, + &cq->q.db_rec_data->db_data, + DB_REC_WIDTH_64B, + DB_REC_USER); if (rc) - goto err3; + goto err2; + } else { /* Generate doorbell address. */ - cq->db_addr = dev->db_addr + - DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT); cq->db.data.icid = cq->icid; + cq->db_addr = dev->db_addr + db_offset; cq->db.data.params = DB_AGG_CMD_SET << RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT; @@ -912,6 +993,11 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, cq->latest_cqe = NULL; consume_cqe(cq); cq->cq_cons = qed_chain_get_cons_idx_u32(&cq->pbl); + + rc = qedr_db_recovery_add(dev, cq->db_addr, &cq->db.data, + DB_REC_WIDTH_64B, DB_REC_KERNEL); + if (rc) + goto err2; } DP_DEBUG(dev, QEDR_MSG_CQ, @@ -920,18 +1006,19 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, return 0; -err3: +err2: destroy_iparams.icid = cq->icid; dev->ops->rdma_destroy_cq(dev->rdma_ctx, &destroy_iparams, &destroy_oparams); -err2: - if (udata) - qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl); - else - dev->ops->common->chain_free(dev->cdev, &cq->pbl); err1: - if (udata) + if (udata) { + qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl); ib_umem_release(cq->q.umem); + if (cq->q.db_mmap_entry) + rdma_user_mmap_entry_remove(cq->q.db_mmap_entry); + } else { + dev->ops->common->chain_free(dev->cdev, &cq->pbl); + } err0: return -EINVAL; } @@ -962,8 +1049,10 @@ void qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) cq->destroyed = 1; /* GSIs CQs are handled by driver, so they don't exist in the FW */ - if (cq->cq_type == QEDR_CQ_TYPE_GSI) + if (cq->cq_type == QEDR_CQ_TYPE_GSI) { + qedr_db_recovery_del(dev, cq->db_addr, &cq->db.data); return; + } iparams.icid = cq->icid; dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams); @@ -972,6 +1061,14 @@ void qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) if (udata) { qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl); ib_umem_release(cq->q.umem); + + if (cq->q.db_rec_data) { + qedr_db_recovery_del(dev, cq->q.db_addr, + &cq->q.db_rec_data->db_data); + rdma_user_mmap_entry_remove(cq->q.db_mmap_entry); + } + } else { + qedr_db_recovery_del(dev, cq->db_addr, &cq->db.data); } /* We don't want the IRQ handler to handle a non-existing CQ so we @@ -1136,8 +1233,8 @@ static int qedr_copy_srq_uresp(struct qedr_dev *dev, } static void qedr_copy_rq_uresp(struct qedr_dev *dev, - struct qedr_create_qp_uresp *uresp, - struct qedr_qp *qp) + struct qedr_create_qp_uresp *uresp, + struct qedr_qp *qp) { /* iWARP requires two doorbells per RQ. */ if (rdma_protocol_iwarp(&dev->ibdev, 1)) { @@ -1150,6 +1247,9 @@ static void qedr_copy_rq_uresp(struct qedr_dev *dev, } uresp->rq_icid = qp->icid; + if (qp->urq.db_mmap_entry) + uresp->rq_db_rec_addr = + rdma_user_mmap_get_offset(qp->urq.db_mmap_entry); } static void qedr_copy_sq_uresp(struct qedr_dev *dev, @@ -1163,22 +1263,26 @@ static void qedr_copy_sq_uresp(struct qedr_dev *dev, uresp->sq_icid = qp->icid; else uresp->sq_icid = qp->icid + 1; + + if (qp->usq.db_mmap_entry) + uresp->sq_db_rec_addr = + rdma_user_mmap_get_offset(qp->usq.db_mmap_entry); } static int qedr_copy_qp_uresp(struct qedr_dev *dev, - struct qedr_qp *qp, struct ib_udata *udata) + struct qedr_qp *qp, struct ib_udata *udata, + struct qedr_create_qp_uresp *uresp) { - struct qedr_create_qp_uresp uresp; int rc; - memset(&uresp, 0, sizeof(uresp)); - qedr_copy_sq_uresp(dev, &uresp, qp); - qedr_copy_rq_uresp(dev, &uresp, qp); + memset(uresp, 0, sizeof(*uresp)); + qedr_copy_sq_uresp(dev, uresp, qp); + qedr_copy_rq_uresp(dev, uresp, qp); - uresp.atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE; - uresp.qp_id = qp->qp_id; + uresp->atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE; + uresp->qp_id = qp->qp_id; - rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + rc = qedr_ib_copy_to_udata(udata, uresp, sizeof(*uresp)); if (rc) DP_ERR(dev, "create qp: failed a copy to user space with qp icid=0x%x.\n", @@ -1193,7 +1297,10 @@ static void qedr_set_common_qp_params(struct qedr_dev *dev, struct ib_qp_init_attr *attrs) { spin_lock_init(&qp->q_lock); - atomic_set(&qp->refcnt, 1); + if (rdma_protocol_iwarp(&dev->ibdev, 1)) { + kref_init(&qp->refcnt); + init_completion(&qp->iwarp_cm_comp); + } qp->pd = pd; qp->qp_type = attrs->qp_type; qp->max_inline_data = attrs->cap.max_inline_data; @@ -1222,16 +1329,35 @@ static void qedr_set_common_qp_params(struct qedr_dev *dev, qp->sq.max_sges, qp->sq_cq->icid); } -static void qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp) +static int qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp) { + int rc; + qp->sq.db = dev->db_addr + DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD); qp->sq.db_data.data.icid = qp->icid + 1; + rc = qedr_db_recovery_add(dev, qp->sq.db, + &qp->sq.db_data, + DB_REC_WIDTH_32B, + DB_REC_KERNEL); + if (rc) + return rc; + if (!qp->srq) { qp->rq.db = dev->db_addr + DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD); qp->rq.db_data.data.icid = qp->icid; + + rc = qedr_db_recovery_add(dev, qp->rq.db, + &qp->rq.db_data, + DB_REC_WIDTH_32B, + DB_REC_KERNEL); + if (rc) + qedr_db_recovery_del(dev, qp->sq.db, + &qp->sq.db_data); } + + return rc; } static int qedr_check_srq_params(struct qedr_dev *dev, @@ -1279,19 +1405,19 @@ static void qedr_free_srq_kernel_params(struct qedr_srq *srq) static int qedr_init_srq_user_params(struct ib_udata *udata, struct qedr_srq *srq, struct qedr_create_srq_ureq *ureq, - int access, int dmasync) + int access) { struct scatterlist *sg; int rc; rc = qedr_init_user_queue(udata, srq->dev, &srq->usrq, ureq->srq_addr, - ureq->srq_len, access, dmasync, 1); + ureq->srq_len, false, access, 1); if (rc) return rc; srq->prod_umem = ib_umem_get(udata, ureq->prod_pair_addr, - sizeof(struct rdma_srq_producers), access, dmasync); + sizeof(struct rdma_srq_producers), access); if (IS_ERR(srq->prod_umem)) { qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl); ib_umem_release(srq->usrq.umem); @@ -1381,13 +1507,14 @@ int qedr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr, hw_srq->max_sges = init_attr->attr.max_sge; if (udata) { - if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) { + if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq), + udata->inlen))) { DP_ERR(dev, "create srq: problem copying data from user space\n"); goto err0; } - rc = qedr_init_srq_user_params(udata, srq, &ureq, 0, 0); + rc = qedr_init_srq_user_params(udata, srq, &ureq, 0); if (rc) goto err0; @@ -1570,13 +1697,39 @@ qedr_iwarp_populate_user_qp(struct qedr_dev *dev, &qp->urq.pbl_info, FW_PAGE_SHIFT); } -static void qedr_cleanup_user(struct qedr_dev *dev, struct qedr_qp *qp) +static void qedr_cleanup_user(struct qedr_dev *dev, + struct qedr_ucontext *ctx, + struct qedr_qp *qp) { ib_umem_release(qp->usq.umem); qp->usq.umem = NULL; ib_umem_release(qp->urq.umem); qp->urq.umem = NULL; + + if (rdma_protocol_roce(&dev->ibdev, 1)) { + qedr_free_pbl(dev, &qp->usq.pbl_info, qp->usq.pbl_tbl); + qedr_free_pbl(dev, &qp->urq.pbl_info, qp->urq.pbl_tbl); + } else { + kfree(qp->usq.pbl_tbl); + kfree(qp->urq.pbl_tbl); + } + + if (qp->usq.db_rec_data) { + qedr_db_recovery_del(dev, qp->usq.db_addr, + &qp->usq.db_rec_data->db_data); + rdma_user_mmap_entry_remove(qp->usq.db_mmap_entry); + } + + if (qp->urq.db_rec_data) { + qedr_db_recovery_del(dev, qp->urq.db_addr, + &qp->urq.db_rec_data->db_data); + rdma_user_mmap_entry_remove(qp->urq.db_mmap_entry); + } + + if (rdma_protocol_iwarp(&dev->ibdev, 1)) + qedr_db_recovery_del(dev, qp->urq.db_rec_db2_addr, + &qp->urq.db_rec_db2_data); } static int qedr_create_user_qp(struct qedr_dev *dev, @@ -1588,27 +1741,30 @@ static int qedr_create_user_qp(struct qedr_dev *dev, struct qed_rdma_create_qp_in_params in_params; struct qed_rdma_create_qp_out_params out_params; struct qedr_pd *pd = get_qedr_pd(ibpd); + struct qedr_create_qp_uresp uresp; + struct qedr_ucontext *ctx = NULL; struct qedr_create_qp_ureq ureq; int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1); int rc = -EINVAL; + qp->create_type = QEDR_QP_CREATE_USER; memset(&ureq, 0, sizeof(ureq)); - rc = ib_copy_from_udata(&ureq, udata, sizeof(ureq)); + rc = ib_copy_from_udata(&ureq, udata, min(sizeof(ureq), udata->inlen)); if (rc) { DP_ERR(dev, "Problem copying data from user space\n"); return rc; } - /* SQ - read access only (0), dma sync not required (0) */ + /* SQ - read access only (0) */ rc = qedr_init_user_queue(udata, dev, &qp->usq, ureq.sq_addr, - ureq.sq_len, 0, 0, alloc_and_init); + ureq.sq_len, true, 0, alloc_and_init); if (rc) return rc; if (!qp->srq) { - /* RQ - read access only (0), dma sync not required (0) */ + /* RQ - read access only (0) */ rc = qedr_init_user_queue(udata, dev, &qp->urq, ureq.rq_addr, - ureq.rq_len, 0, 0, alloc_and_init); + ureq.rq_len, true, 0, alloc_and_init); if (rc) return rc; } @@ -1638,29 +1794,76 @@ static int qedr_create_user_qp(struct qedr_dev *dev, qp->qp_id = out_params.qp_id; qp->icid = out_params.icid; - rc = qedr_copy_qp_uresp(dev, qp, udata); + rc = qedr_copy_qp_uresp(dev, qp, udata, &uresp); + if (rc) + goto err; + + /* db offset was calculated in copy_qp_uresp, now set in the user q */ + ctx = pd->uctx; + qp->usq.db_addr = ctx->dpi_addr + uresp.sq_db_offset; + qp->urq.db_addr = ctx->dpi_addr + uresp.rq_db_offset; + + if (rdma_protocol_iwarp(&dev->ibdev, 1)) { + qp->urq.db_rec_db2_addr = ctx->dpi_addr + uresp.rq_db2_offset; + + /* calculate the db_rec_db2 data since it is constant so no + * need to reflect from user + */ + qp->urq.db_rec_db2_data.data.icid = cpu_to_le16(qp->icid); + qp->urq.db_rec_db2_data.data.value = + cpu_to_le16(DQ_TCM_IWARP_POST_RQ_CF_CMD); + } + + rc = qedr_db_recovery_add(dev, qp->usq.db_addr, + &qp->usq.db_rec_data->db_data, + DB_REC_WIDTH_32B, + DB_REC_USER); if (rc) goto err; + rc = qedr_db_recovery_add(dev, qp->urq.db_addr, + &qp->urq.db_rec_data->db_data, + DB_REC_WIDTH_32B, + DB_REC_USER); + if (rc) + goto err; + + if (rdma_protocol_iwarp(&dev->ibdev, 1)) { + rc = qedr_db_recovery_add(dev, qp->urq.db_rec_db2_addr, + &qp->urq.db_rec_db2_data, + DB_REC_WIDTH_32B, + DB_REC_USER); + if (rc) + goto err; + } qedr_qp_user_print(dev, qp); - return 0; + return rc; err: rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp); if (rc) DP_ERR(dev, "create qp: fatal fault. rc=%d", rc); err1: - qedr_cleanup_user(dev, qp); + qedr_cleanup_user(dev, ctx, qp); return rc; } -static void qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp) +static int qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp) { + int rc; + qp->sq.db = dev->db_addr + DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD); qp->sq.db_data.data.icid = qp->icid; + rc = qedr_db_recovery_add(dev, qp->sq.db, + &qp->sq.db_data, + DB_REC_WIDTH_32B, + DB_REC_KERNEL); + if (rc) + return rc; + qp->rq.db = dev->db_addr + DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD); qp->rq.db_data.data.icid = qp->icid; @@ -1668,6 +1871,19 @@ static void qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp) DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS); qp->rq.iwarp_db2_data.data.icid = qp->icid; qp->rq.iwarp_db2_data.data.value = DQ_TCM_IWARP_POST_RQ_CF_CMD; + + rc = qedr_db_recovery_add(dev, qp->rq.db, + &qp->rq.db_data, + DB_REC_WIDTH_32B, + DB_REC_KERNEL); + if (rc) + return rc; + + rc = qedr_db_recovery_add(dev, qp->rq.iwarp_db2, + &qp->rq.iwarp_db2_data, + DB_REC_WIDTH_32B, + DB_REC_KERNEL); + return rc; } static int @@ -1715,8 +1931,7 @@ qedr_roce_create_kernel_qp(struct qedr_dev *dev, qp->qp_id = out_params.qp_id; qp->icid = out_params.icid; - qedr_set_roce_db_info(dev, qp); - return rc; + return qedr_set_roce_db_info(dev, qp); } static int @@ -1774,8 +1989,7 @@ qedr_iwarp_create_kernel_qp(struct qedr_dev *dev, qp->qp_id = out_params.qp_id; qp->icid = out_params.icid; - qedr_set_iwarp_db_info(dev, qp); - return rc; + return qedr_set_iwarp_db_info(dev, qp); err: dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp); @@ -1790,6 +2004,20 @@ static void qedr_cleanup_kernel(struct qedr_dev *dev, struct qedr_qp *qp) dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl); kfree(qp->rqe_wr_id); + + /* GSI qp is not registered to db mechanism so no need to delete */ + if (qp->qp_type == IB_QPT_GSI) + return; + + qedr_db_recovery_del(dev, qp->sq.db, &qp->sq.db_data); + + if (!qp->srq) { + qedr_db_recovery_del(dev, qp->rq.db, &qp->rq.db_data); + + if (rdma_protocol_iwarp(&dev->ibdev, 1)) + qedr_db_recovery_del(dev, qp->rq.iwarp_db2, + &qp->rq.iwarp_db2_data); + } } static int qedr_create_kernel_qp(struct qedr_dev *dev, @@ -1805,6 +2033,7 @@ static int qedr_create_kernel_qp(struct qedr_dev *dev, u32 n_sq_entries; memset(&in_params, 0, sizeof(in_params)); + qp->create_type = QEDR_QP_CREATE_KERNEL; /* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in * the ring. The ring should allow at least a single WR, even if the @@ -1918,7 +2147,7 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd, qp->ibqp.qp_num = qp->qp_id; if (rdma_protocol_iwarp(&dev->ibdev, 1)) { - rc = xa_insert_irq(&dev->qps, qp->qp_id, qp, GFP_KERNEL); + rc = xa_insert(&dev->qps, qp->qp_id, qp, GFP_KERNEL); if (rc) goto err; } @@ -2429,7 +2658,10 @@ err: static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp, struct ib_udata *udata) { - int rc = 0; + struct qedr_ucontext *ctx = + rdma_udata_to_drv_context(udata, struct qedr_ucontext, + ibucontext); + int rc; if (qp->qp_type != IB_QPT_GSI) { rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp); @@ -2437,8 +2669,8 @@ static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp, return rc; } - if (udata) - qedr_cleanup_user(dev, qp); + if (qp->create_type == QEDR_QP_CREATE_USER) + qedr_cleanup_user(dev, ctx, qp); else qedr_cleanup_kernel(dev, qp); @@ -2467,34 +2699,44 @@ int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) qedr_modify_qp(ibqp, &attr, attr_mask, NULL); } } else { - /* Wait for the connect/accept to complete */ - if (qp->ep) { - int wait_count = 1; - - while (qp->ep->during_connect) { - DP_DEBUG(dev, QEDR_MSG_QP, - "Still in during connect/accept\n"); - - msleep(100); - if (wait_count++ > 200) { - DP_NOTICE(dev, - "during connect timeout\n"); - break; - } - } - } + /* If connection establishment started the WAIT_FOR_CONNECT + * bit will be on and we need to Wait for the establishment + * to complete before destroying the qp. + */ + if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT, + &qp->iwarp_cm_flags)) + wait_for_completion(&qp->iwarp_cm_comp); + + /* If graceful disconnect started, the WAIT_FOR_DISCONNECT + * bit will be on, and we need to wait for the disconnect to + * complete before continuing. We can use the same completion, + * iwarp_cm_comp, since this is the only place that waits for + * this completion and it is sequential. In addition, + * disconnect can't occur before the connection is fully + * established, therefore if WAIT_FOR_DISCONNECT is on it + * means WAIT_FOR_CONNECT is also on and the completion for + * CONNECT already occurred. + */ + if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_DISCONNECT, + &qp->iwarp_cm_flags)) + wait_for_completion(&qp->iwarp_cm_comp); } if (qp->qp_type == IB_QPT_GSI) qedr_destroy_gsi_qp(dev); + /* We need to remove the entry from the xarray before we release the + * qp_id to avoid a race of the qp_id being reallocated and failing + * on xa_insert + */ + if (rdma_protocol_iwarp(&dev->ibdev, 1)) + xa_erase(&dev->qps, qp->qp_id); + qedr_free_qp_resources(dev, qp, udata); - if (atomic_dec_and_test(&qp->refcnt) && - rdma_protocol_iwarp(&dev->ibdev, 1)) { - xa_erase_irq(&dev->qps, qp->qp_id); - kfree(qp); - } + if (rdma_protocol_iwarp(&dev->ibdev, 1)) + qedr_iw_qp_rem_ref(&qp->ibqp); + return 0; } @@ -2597,7 +2839,7 @@ struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, mr->type = QEDR_MR_USER; - mr->umem = ib_umem_get(udata, start, len, acc, 0); + mr->umem = ib_umem_get(udata, start, len, acc); if (IS_ERR(mr->umem)) { rc = -EFAULT; goto err0; @@ -2673,8 +2915,8 @@ int qedr_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid); - if ((mr->type != QEDR_MR_DMA) && (mr->type != QEDR_MR_FRMR)) - qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table); + if (mr->type != QEDR_MR_DMA) + free_mr_info(dev, &mr->info); /* it could be user registered memory. */ ib_umem_release(mr->umem); @@ -4106,19 +4348,10 @@ int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) } int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags, - u8 port_num, - const struct ib_wc *in_wc, - const struct ib_grh *in_grh, - const struct ib_mad_hdr *mad_hdr, - size_t in_mad_size, struct ib_mad_hdr *out_mad, - size_t *out_mad_size, u16 *out_mad_pkey_index) + u8 port_num, const struct ib_wc *in_wc, + const struct ib_grh *in_grh, const struct ib_mad *in, + struct ib_mad *out_mad, size_t *out_mad_size, + u16 *out_mad_pkey_index) { - struct qedr_dev *dev = get_qedr_dev(ibdev); - - DP_DEBUG(dev, QEDR_MSG_GSI, - "QEDR_PROCESS_MAD in_mad %x %x %x %x %x %x %x %x\n", - mad_hdr->attr_id, mad_hdr->base_version, mad_hdr->attr_mod, - mad_hdr->class_specific, mad_hdr->class_version, - mad_hdr->method, mad_hdr->mgmt_class, mad_hdr->status); return IB_MAD_RESULT_SUCCESS; } diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h index 9aaa90283d6e..18027844eb87 100644 --- a/drivers/infiniband/hw/qedr/verbs.h +++ b/drivers/infiniband/hw/qedr/verbs.h @@ -35,8 +35,6 @@ int qedr_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, struct ib_udata *udata); int qedr_query_port(struct ib_device *, u8 port, struct ib_port_attr *props); -int qedr_modify_port(struct ib_device *, u8 port, int mask, - struct ib_port_modify *props); int qedr_iw_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid); @@ -46,7 +44,8 @@ int qedr_query_pkey(struct ib_device *, u8 port, u16 index, u16 *pkey); int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata); void qedr_dealloc_ucontext(struct ib_ucontext *uctx); -int qedr_mmap(struct ib_ucontext *, struct vm_area_struct *vma); +int qedr_mmap(struct ib_ucontext *ucontext, struct vm_area_struct *vma); +void qedr_mmap_free(struct rdma_user_mmap_entry *rdma_entry); int qedr_alloc_pd(struct ib_pd *pd, struct ib_udata *udata); void qedr_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); @@ -93,10 +92,9 @@ int qedr_post_recv(struct ib_qp *, const struct ib_recv_wr *, const struct ib_recv_wr **bad_wr); int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags, u8 port_num, const struct ib_wc *in_wc, - const struct ib_grh *in_grh, - const struct ib_mad_hdr *in_mad, - size_t in_mad_size, struct ib_mad_hdr *out_mad, - size_t *out_mad_size, u16 *out_mad_pkey_index); + const struct ib_grh *in_grh, const struct ib_mad *in_mad, + struct ib_mad *out_mad, size_t *out_mad_size, + u16 *out_mad_pkey_index); int qedr_port_immutable(struct ib_device *ibdev, u8 port_num, struct ib_port_immutable *immutable); diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index 531d8a1db2c3..ca5ea734e3d0 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -1417,7 +1417,6 @@ static void qib_6120_quiet_serdes(struct qib_pportdata *ppd) * * The exact combo of LEDs if on is true is determined by looking * at the ibcstatus. - * These LEDs indicate the physical and logical state of IB link. * For this chip (at least with recommended board pinouts), LED1 * is Yellow (logical state) and LED2 is Green (physical state), diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index f92faf5ec369..79bb83222e8d 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -2098,8 +2098,6 @@ static int cc_get_classportinfo(struct ib_cc_mad *ccp, struct ib_cc_classportinfo_attr *p = (struct ib_cc_classportinfo_attr *)ccp->mgmt_data; - memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data)); - p->base_version = 1; p->class_version = 1; p->cap_mask = 0; @@ -2120,8 +2118,6 @@ static int cc_get_congestion_info(struct ib_cc_mad *ccp, struct qib_ibport *ibp = to_iport(ibdev, port); struct qib_pportdata *ppd = ppd_from_ibp(ibp); - memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data)); - p->congestion_info = 0; p->control_table_cap = ppd->cc_max_table_entries; @@ -2138,8 +2134,6 @@ static int cc_get_congestion_setting(struct ib_cc_mad *ccp, struct qib_pportdata *ppd = ppd_from_ibp(ibp); struct ib_cc_congestion_entry_shadow *entries; - memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data)); - spin_lock(&ppd->cc_shadow_lock); entries = ppd->congestion_entries_shadow->entries; @@ -2176,8 +2170,6 @@ static int cc_get_congestion_control_table(struct ib_cc_mad *ccp, if (cct_block_index > IB_CC_TABLE_CAP_DEFAULT - 1) goto bail; - memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data)); - spin_lock(&ppd->cc_shadow_lock); max_cct_block = @@ -2296,18 +2288,11 @@ bail: return reply_failure((struct ib_smp *) ccp); } -static int check_cc_key(struct qib_ibport *ibp, - struct ib_cc_mad *ccp, int mad_flags) -{ - return 0; -} - static int process_cc(struct ib_device *ibdev, int mad_flags, u8 port, const struct ib_mad *in_mad, struct ib_mad *out_mad) { struct ib_cc_mad *ccp = (struct ib_cc_mad *)out_mad; - struct qib_ibport *ibp = to_iport(ibdev, port); int ret; *out_mad = *in_mad; @@ -2318,10 +2303,6 @@ static int process_cc(struct ib_device *ibdev, int mad_flags, goto bail; } - ret = check_cc_key(ibp, ccp, mad_flags); - if (ret) - goto bail; - switch (ccp->method) { case IB_MGMT_METHOD_GET: switch (ccp->attr_id) { @@ -2405,28 +2386,21 @@ bail: */ int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index) + const struct ib_mad *in, struct ib_mad *out, + size_t *out_mad_size, u16 *out_mad_pkey_index) { int ret; struct qib_ibport *ibp = to_iport(ibdev, port); struct qib_pportdata *ppd = ppd_from_ibp(ibp); - const struct ib_mad *in_mad = (const struct ib_mad *)in; - struct ib_mad *out_mad = (struct ib_mad *)out; - - if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || - *out_mad_size != sizeof(*out_mad))) - return IB_MAD_RESULT_FAILURE; - switch (in_mad->mad_hdr.mgmt_class) { + switch (in->mad_hdr.mgmt_class) { case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: case IB_MGMT_CLASS_SUBN_LID_ROUTED: - ret = process_subn(ibdev, mad_flags, port, in_mad, out_mad); + ret = process_subn(ibdev, mad_flags, port, in, out); goto bail; case IB_MGMT_CLASS_PERF_MGMT: - ret = process_perf(ibdev, port, in_mad, out_mad); + ret = process_perf(ibdev, port, in, out); goto bail; case IB_MGMT_CLASS_CONG_MGMT: @@ -2435,7 +2409,7 @@ int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, ret = IB_MAD_RESULT_SUCCESS; goto bail; } - ret = process_cc(ibdev, mad_flags, port, in_mad, out_mad); + ret = process_cc(ibdev, mad_flags, port, in, out); goto bail; default: diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c index 3926be78036e..568b21eb6ea1 100644 --- a/drivers/infiniband/hw/qib/qib_sysfs.c +++ b/drivers/infiniband/hw/qib/qib_sysfs.c @@ -301,6 +301,9 @@ static ssize_t qib_portattr_show(struct kobject *kobj, struct qib_pportdata *ppd = container_of(kobj, struct qib_pportdata, pport_kobj); + if (!pattr->show) + return -EIO; + return pattr->show(ppd, buf); } @@ -312,6 +315,9 @@ static ssize_t qib_portattr_store(struct kobject *kobj, struct qib_pportdata *ppd = container_of(kobj, struct qib_pportdata, pport_kobj); + if (!pattr->store) + return -EIO; + return pattr->store(ppd, buf, len); } diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index 17bdf8acee2f..8bf414b47b96 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -245,9 +245,8 @@ void qib_sys_guid_chg(struct qib_ibport *ibp); void qib_node_desc_chg(struct qib_ibport *ibp); int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index); + const struct ib_mad *in, struct ib_mad *out, + size_t *out_mad_size, u16 *out_mad_pkey_index); void qib_notify_create_mad_agent(struct rvt_dev_info *rdi, int port_idx); void qib_notify_free_mad_agent(struct rvt_dev_info *rdi, int port_idx); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c index 7800e6930502..a26a4fd86bf4 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c @@ -136,7 +136,7 @@ int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, } cq->umem = ib_umem_get(udata, ucmd.buf_addr, ucmd.buf_size, - IB_ACCESS_LOCAL_WRITE, 1); + IB_ACCESS_LOCAL_WRITE); if (IS_ERR(cq->umem)) { ret = PTR_ERR(cq->umem); goto err_cq; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h index 8f9749d54688..86a6c054ea26 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h @@ -58,7 +58,8 @@ #define PVRDMA_ROCEV1_VERSION 17 #define PVRDMA_ROCEV2_VERSION 18 #define PVRDMA_PPN64_VERSION 19 -#define PVRDMA_VERSION PVRDMA_PPN64_VERSION +#define PVRDMA_QPHANDLE_VERSION 20 +#define PVRDMA_VERSION PVRDMA_QPHANDLE_VERSION #define PVRDMA_BOARD_ID 1 #define PVRDMA_REV_ID 1 @@ -581,6 +582,17 @@ struct pvrdma_cmd_create_qp_resp { u32 max_inline_data; }; +struct pvrdma_cmd_create_qp_resp_v2 { + struct pvrdma_cmd_resp_hdr hdr; + u32 qpn; + u32 qp_handle; + u32 max_send_wr; + u32 max_recv_wr; + u32 max_send_sge; + u32 max_recv_sge; + u32 max_inline_data; +}; + struct pvrdma_cmd_modify_qp { struct pvrdma_cmd_hdr hdr; u32 qp_handle; @@ -663,6 +675,7 @@ union pvrdma_cmd_resp { struct pvrdma_cmd_create_cq_resp create_cq_resp; struct pvrdma_cmd_resize_cq_resp resize_cq_resp; struct pvrdma_cmd_create_qp_resp create_qp_resp; + struct pvrdma_cmd_create_qp_resp_v2 create_qp_resp_v2; struct pvrdma_cmd_query_qp_resp query_qp_resp; struct pvrdma_cmd_destroy_qp_resp destroy_qp_resp; struct pvrdma_cmd_create_srq_resp create_srq_resp; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c index f3a3d22ee8d7..c61e665ff261 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c @@ -126,7 +126,7 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return ERR_PTR(-EINVAL); } - umem = ib_umem_get(udata, start, length, access_flags, 0); + umem = ib_umem_get(udata, start, length, access_flags); if (IS_ERR(umem)) { dev_warn(&dev->pdev->dev, "could not get umem for mem region\n"); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c index bca6a58a442e..f15809c28f67 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c @@ -52,6 +52,9 @@ #include "pvrdma.h" +static void __pvrdma_destroy_qp(struct pvrdma_dev *dev, + struct pvrdma_qp *qp); + static inline void get_cqs(struct pvrdma_qp *qp, struct pvrdma_cq **send_cq, struct pvrdma_cq **recv_cq) { @@ -195,7 +198,9 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, union pvrdma_cmd_resp rsp; struct pvrdma_cmd_create_qp *cmd = &req.create_qp; struct pvrdma_cmd_create_qp_resp *resp = &rsp.create_qp_resp; + struct pvrdma_cmd_create_qp_resp_v2 *resp_v2 = &rsp.create_qp_resp_v2; struct pvrdma_create_qp ucmd; + struct pvrdma_create_qp_resp qp_resp = {}; unsigned long flags; int ret; bool is_srq = !!init_attr->srq; @@ -260,10 +265,19 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, goto err_qp; } + /* Userspace supports qpn and qp handles? */ + if (dev->dsr_version >= PVRDMA_QPHANDLE_VERSION && + udata->outlen < sizeof(qp_resp)) { + dev_warn(&dev->pdev->dev, + "create queuepair not supported\n"); + ret = -EOPNOTSUPP; + goto err_qp; + } + if (!is_srq) { /* set qp->sq.wqe_cnt, shift, buf_size.. */ qp->rumem = ib_umem_get(udata, ucmd.rbuf_addr, - ucmd.rbuf_size, 0, 0); + ucmd.rbuf_size, 0); if (IS_ERR(qp->rumem)) { ret = PTR_ERR(qp->rumem); goto err_qp; @@ -275,7 +289,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, } qp->sumem = ib_umem_get(udata, ucmd.sbuf_addr, - ucmd.sbuf_size, 0, 0); + ucmd.sbuf_size, 0); if (IS_ERR(qp->sumem)) { if (!is_srq) ib_umem_release(qp->rumem); @@ -379,13 +393,33 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, } /* max_send_wr/_recv_wr/_send_sge/_recv_sge/_inline_data */ - qp->qp_handle = resp->qpn; qp->port = init_attr->port_num; - qp->ibqp.qp_num = resp->qpn; + + if (dev->dsr_version >= PVRDMA_QPHANDLE_VERSION) { + qp->ibqp.qp_num = resp_v2->qpn; + qp->qp_handle = resp_v2->qp_handle; + } else { + qp->ibqp.qp_num = resp->qpn; + qp->qp_handle = resp->qpn; + } + spin_lock_irqsave(&dev->qp_tbl_lock, flags); dev->qp_tbl[qp->qp_handle % dev->dsr->caps.max_qp] = qp; spin_unlock_irqrestore(&dev->qp_tbl_lock, flags); + if (udata) { + qp_resp.qpn = qp->ibqp.qp_num; + qp_resp.qp_handle = qp->qp_handle; + + if (ib_copy_to_udata(udata, &qp_resp, + min(udata->outlen, sizeof(qp_resp)))) { + dev_warn(&dev->pdev->dev, + "failed to copy back udata\n"); + __pvrdma_destroy_qp(dev, qp); + return ERR_PTR(-EINVAL); + } + } + return &qp->ibqp; err_pdir: @@ -400,27 +434,15 @@ err_qp: return ERR_PTR(ret); } -static void pvrdma_free_qp(struct pvrdma_qp *qp) +static void _pvrdma_free_qp(struct pvrdma_qp *qp) { + unsigned long flags; struct pvrdma_dev *dev = to_vdev(qp->ibqp.device); - struct pvrdma_cq *scq; - struct pvrdma_cq *rcq; - unsigned long flags, scq_flags, rcq_flags; - - /* In case cq is polling */ - get_cqs(qp, &scq, &rcq); - pvrdma_lock_cqs(scq, rcq, &scq_flags, &rcq_flags); - - _pvrdma_flush_cqe(qp, scq); - if (scq != rcq) - _pvrdma_flush_cqe(qp, rcq); spin_lock_irqsave(&dev->qp_tbl_lock, flags); dev->qp_tbl[qp->qp_handle] = NULL; spin_unlock_irqrestore(&dev->qp_tbl_lock, flags); - pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags); - if (refcount_dec_and_test(&qp->refcnt)) complete(&qp->free); wait_for_completion(&qp->free); @@ -435,34 +457,71 @@ static void pvrdma_free_qp(struct pvrdma_qp *qp) atomic_dec(&dev->num_qps); } -/** - * pvrdma_destroy_qp - destroy a queue pair - * @qp: the queue pair to destroy - * @udata: user data or null for kernel object - * - * @return: 0 on success. - */ -int pvrdma_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) +static void pvrdma_free_qp(struct pvrdma_qp *qp) +{ + struct pvrdma_cq *scq; + struct pvrdma_cq *rcq; + unsigned long scq_flags, rcq_flags; + + /* In case cq is polling */ + get_cqs(qp, &scq, &rcq); + pvrdma_lock_cqs(scq, rcq, &scq_flags, &rcq_flags); + + _pvrdma_flush_cqe(qp, scq); + if (scq != rcq) + _pvrdma_flush_cqe(qp, rcq); + + /* + * We're now unlocking the CQs before clearing out the qp handle this + * should still be safe. We have destroyed the backend QP and flushed + * the CQEs so there should be no other completions for this QP. + */ + pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags); + + _pvrdma_free_qp(qp); +} + +static inline void _pvrdma_destroy_qp_work(struct pvrdma_dev *dev, + u32 qp_handle) { - struct pvrdma_qp *vqp = to_vqp(qp); union pvrdma_cmd_req req; struct pvrdma_cmd_destroy_qp *cmd = &req.destroy_qp; int ret; memset(cmd, 0, sizeof(*cmd)); cmd->hdr.cmd = PVRDMA_CMD_DESTROY_QP; - cmd->qp_handle = vqp->qp_handle; + cmd->qp_handle = qp_handle; - ret = pvrdma_cmd_post(to_vdev(qp->device), &req, NULL, 0); + ret = pvrdma_cmd_post(dev, &req, NULL, 0); if (ret < 0) - dev_warn(&to_vdev(qp->device)->pdev->dev, + dev_warn(&dev->pdev->dev, "destroy queuepair failed, error: %d\n", ret); +} +/** + * pvrdma_destroy_qp - destroy a queue pair + * @qp: the queue pair to destroy + * @udata: user data or null for kernel object + * + * @return: always 0. + */ +int pvrdma_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) +{ + struct pvrdma_qp *vqp = to_vqp(qp); + + _pvrdma_destroy_qp_work(to_vdev(qp->device), vqp->qp_handle); pvrdma_free_qp(vqp); return 0; } +static void __pvrdma_destroy_qp(struct pvrdma_dev *dev, + struct pvrdma_qp *qp) +{ + _pvrdma_destroy_qp_work(dev, qp->qp_handle); + _pvrdma_free_qp(qp); +} + /** * pvrdma_modify_qp - modify queue pair attributes * @ibqp: the queue pair diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c index 6cac0c88cf39..98c8be71d91d 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c @@ -146,7 +146,7 @@ int pvrdma_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr, goto err_srq; } - srq->umem = ib_umem_get(udata, ucmd.buf_addr, ucmd.buf_size, 0, 0); + srq->umem = ib_umem_get(udata, ucmd.buf_addr, ucmd.buf_size, 0); if (IS_ERR(srq->umem)) { ret = PTR_ERR(srq->umem); goto err_srq; @@ -230,8 +230,6 @@ static void pvrdma_free_srq(struct pvrdma_dev *dev, struct pvrdma_srq *srq) pvrdma_page_dir_cleanup(dev, &srq->pdir); - kfree(srq); - atomic_dec(&dev->num_srqs); } diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c index fe99da0ff060..ee02c6176007 100644 --- a/drivers/infiniband/sw/rdmavt/ah.c +++ b/drivers/infiniband/sw/rdmavt/ah.c @@ -129,7 +129,6 @@ int rvt_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, * rvt_destory_ah - Destory an address handle * @ibah: address handle * @destroy_flags: destroy address handle flags (see enum rdma_destroy_ah_flags) - * @udata: user data or NULL for kernel object * * Return: 0 on success */ diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index a85571a4cf57..13d7f66eadab 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -552,7 +552,6 @@ int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) /** * rvt_driver_cq_init - Init cq resources on behalf of driver - * @rdi: rvt dev structure * * Return: 0 on success */ @@ -568,7 +567,6 @@ int rvt_driver_cq_init(void) /** * rvt_cq_exit - tear down cq reources - * @rdi: rvt dev structure */ void rvt_cq_exit(void) { diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index a6a39f01dca3..b9a76bf74857 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -390,7 +390,7 @@ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (length == 0) return ERR_PTR(-EINVAL); - umem = ib_umem_get(udata, start, length, mr_access_flags, 0); + umem = ib_umem_get(udata, start, length, mr_access_flags); if (IS_ERR(umem)) return (void *)umem; diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 0b0a241c57ff..3cdf75d0c7a4 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -2563,10 +2563,9 @@ void rvt_add_retry_timer_ext(struct rvt_qp *qp, u8 shift) EXPORT_SYMBOL(rvt_add_retry_timer_ext); /** - * rvt_add_rnr_timer - add/start an rnr timer - * @qp - the QP - * @aeth - aeth of RNR timeout, simulated aeth for loopback - * add an rnr timer on the QP + * rvt_add_rnr_timer - add/start an rnr timer on the QP + * @qp: the QP + * @aeth: aeth of RNR timeout, simulated aeth for loopback */ void rvt_add_rnr_timer(struct rvt_qp *qp, u32 aeth) { @@ -2583,7 +2582,7 @@ EXPORT_SYMBOL(rvt_add_rnr_timer); /** * rvt_stop_rc_timers - stop all timers - * @qp - the QP + * @qp: the QP * stop any pending timers */ void rvt_stop_rc_timers(struct rvt_qp *qp) @@ -2617,7 +2616,7 @@ static void rvt_stop_rnr_timer(struct rvt_qp *qp) /** * rvt_del_timers_sync - wait for any timeout routines to exit - * @qp - the QP + * @qp: the QP */ void rvt_del_timers_sync(struct rvt_qp *qp) { @@ -2626,7 +2625,7 @@ void rvt_del_timers_sync(struct rvt_qp *qp) } EXPORT_SYMBOL(rvt_del_timers_sync); -/** +/* * This is called from s_timer for missing responses. */ static void rvt_rc_timeout(struct timer_list *t) @@ -2676,12 +2675,13 @@ EXPORT_SYMBOL(rvt_rc_rnr_retry); * rvt_qp_iter_init - initial for QP iteration * @rdi: rvt devinfo * @v: u64 value + * @cb: user-defined callback * * This returns an iterator suitable for iterating QPs * in the system. * - * The @cb is a user defined callback and @v is a 64 - * bit value passed to and relevant for processing in the + * The @cb is a user-defined callback and @v is a 64-bit + * value passed to and relevant for processing in the * @cb. An example use case would be to alter QP processing * based on criteria not part of the rvt_qp. * @@ -2712,7 +2712,7 @@ EXPORT_SYMBOL(rvt_qp_iter_init); /** * rvt_qp_iter_next - return the next QP in iter - * @iter - the iterator + * @iter: the iterator * * Fine grained QP iterator suitable for use * with debugfs seq_file mechanisms. @@ -2775,14 +2775,14 @@ EXPORT_SYMBOL(rvt_qp_iter_next); /** * rvt_qp_iter - iterate all QPs - * @rdi - rvt devinfo - * @v - a 64 bit value - * @cb - a callback + * @rdi: rvt devinfo + * @v: a 64-bit value + * @cb: a callback * * This provides a way for iterating all QPs. * - * The @cb is a user defined callback and @v is a 64 - * bit value passed to and relevant for processing in the + * The @cb is a user-defined callback and @v is a 64-bit + * value passed to and relevant for processing in the * cb. An example use case would be to alter QP processing * based on criteria not part of the rvt_qp. * diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 18da1e1ea979..986265ad6e79 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -683,9 +683,10 @@ EXPORT_SYMBOL(rvt_unregister_device); /** * rvt_init_port - init internal data for driver port - * @rdi: rvt dev strut + * @rdi: rvt_dev_info struct * @port: rvt port * @port_index: 0 based index of ports, different from IB core port num + * @pkey_table: pkey_table for @port * * Keep track of a list of ports. No need to have a detach port. * They persist until the driver goes away. diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index a8c11b5e1e94..0946a301a5c5 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -77,12 +77,8 @@ static void rxe_init_device_param(struct rxe_dev *rxe) { rxe->max_inline_data = RXE_MAX_INLINE_DATA; - rxe->attr.fw_ver = RXE_FW_VER; rxe->attr.max_mr_size = RXE_MAX_MR_SIZE; rxe->attr.page_size_cap = RXE_PAGE_SIZE_CAP; - rxe->attr.vendor_id = RXE_VENDOR_ID; - rxe->attr.vendor_part_id = RXE_VENDOR_PART_ID; - rxe->attr.hw_ver = RXE_HW_VER; rxe->attr.max_qp = RXE_MAX_QP; rxe->attr.max_qp_wr = RXE_MAX_QP_WR; rxe->attr.device_cap_flags = RXE_DEVICE_CAP_FLAGS; @@ -94,22 +90,13 @@ static void rxe_init_device_param(struct rxe_dev *rxe) rxe->attr.max_mr = RXE_MAX_MR; rxe->attr.max_pd = RXE_MAX_PD; rxe->attr.max_qp_rd_atom = RXE_MAX_QP_RD_ATOM; - rxe->attr.max_ee_rd_atom = RXE_MAX_EE_RD_ATOM; rxe->attr.max_res_rd_atom = RXE_MAX_RES_RD_ATOM; rxe->attr.max_qp_init_rd_atom = RXE_MAX_QP_INIT_RD_ATOM; - rxe->attr.max_ee_init_rd_atom = RXE_MAX_EE_INIT_RD_ATOM; rxe->attr.atomic_cap = IB_ATOMIC_HCA; - rxe->attr.max_ee = RXE_MAX_EE; - rxe->attr.max_rdd = RXE_MAX_RDD; - rxe->attr.max_mw = RXE_MAX_MW; - rxe->attr.max_raw_ipv6_qp = RXE_MAX_RAW_IPV6_QP; - rxe->attr.max_raw_ethy_qp = RXE_MAX_RAW_ETHY_QP; rxe->attr.max_mcast_grp = RXE_MAX_MCAST_GRP; rxe->attr.max_mcast_qp_attach = RXE_MAX_MCAST_QP_ATTACH; rxe->attr.max_total_mcast_qp_attach = RXE_MAX_TOT_MCAST_QP_ATTACH; rxe->attr.max_ah = RXE_MAX_AH; - rxe->attr.max_fmr = RXE_MAX_FMR; - rxe->attr.max_map_per_fmr = RXE_MAX_MAP_PER_FMR; rxe->attr.max_srq = RXE_MAX_SRQ; rxe->attr.max_srq_wr = RXE_MAX_SRQ_WR; rxe->attr.max_srq_sge = RXE_MAX_SRQ_SGE; diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index ea6a819b7167..35a2baf2f364 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -169,7 +169,7 @@ int rxe_mem_init_user(struct rxe_pd *pd, u64 start, void *vaddr; int err; - umem = ib_umem_get(udata, start, length, access, 0); + umem = ib_umem_get(udata, start, length, access); if (IS_ERR(umem)) { pr_warn("err %d from rxe_umem_get\n", (int)PTR_ERR(umem)); diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h index fe5207386700..353c6668249e 100644 --- a/drivers/infiniband/sw/rxe/rxe_param.h +++ b/drivers/infiniband/sw/rxe/rxe_param.h @@ -60,12 +60,8 @@ static inline enum ib_mtu eth_mtu_int_to_enum(int mtu) /* default/initial rxe device parameter settings */ enum rxe_device_param { - RXE_FW_VER = 0, RXE_MAX_MR_SIZE = -1ull, RXE_PAGE_SIZE_CAP = 0xfffff000, - RXE_VENDOR_ID = 0, - RXE_VENDOR_PART_ID = 0, - RXE_HW_VER = 0, RXE_MAX_QP = 0x10000, RXE_MAX_QP_WR = 0x4000, RXE_MAX_INLINE_DATA = 400, @@ -87,21 +83,12 @@ enum rxe_device_param { RXE_MAX_MR = 256 * 1024, RXE_MAX_PD = 0x7ffc, RXE_MAX_QP_RD_ATOM = 128, - RXE_MAX_EE_RD_ATOM = 0, RXE_MAX_RES_RD_ATOM = 0x3f000, RXE_MAX_QP_INIT_RD_ATOM = 128, - RXE_MAX_EE_INIT_RD_ATOM = 0, - RXE_MAX_EE = 0, - RXE_MAX_RDD = 0, - RXE_MAX_MW = 0, - RXE_MAX_RAW_IPV6_QP = 0, - RXE_MAX_RAW_ETHY_QP = 0, RXE_MAX_MCAST_GRP = 8192, RXE_MAX_MCAST_QP_ATTACH = 56, RXE_MAX_TOT_MCAST_QP_ATTACH = 0x70000, RXE_MAX_AH = 100, - RXE_MAX_FMR = 0, - RXE_MAX_MAP_PER_FMR = 0, RXE_MAX_SRQ = 960, RXE_MAX_SRQ_WR = 0x4000, RXE_MIN_SRQ_WR = 1, diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 623129f27f5a..9dd4bd7aea92 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -106,6 +106,10 @@ static int rxe_modify_device(struct ib_device *dev, { struct rxe_dev *rxe = to_rdev(dev); + if (mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID | + IB_DEVICE_MODIFY_NODE_DESC)) + return -EOPNOTSUPP; + if (mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) rxe->attr.sys_image_guid = cpu_to_be64(attr->sys_image_guid); @@ -1171,6 +1175,9 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name) addrconf_addr_eui48((unsigned char *)&dev->node_guid, rxe->ndev->dev_addr); dev->dev.dma_ops = &dma_virt_ops; + dev->dev.dma_parms = &rxe->dma_parms; + rxe->dma_parms = (struct device_dma_parameters) + { .max_segment_size = SZ_2G }; dma_coerce_mask_and_coherent(&dev->dev, dma_get_required_mask(&dev->dev)); diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index 5c4b2239129c..95834206c80c 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -384,6 +384,7 @@ struct rxe_port { struct rxe_dev { struct ib_device ib_dev; struct ib_device_attr attr; + struct device_dma_parameters dma_parms; int max_ucontext; int max_inline_data; struct mutex usdev_lock; diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h index dba4535494ab..b939f489cd46 100644 --- a/drivers/infiniband/sw/siw/siw.h +++ b/drivers/infiniband/sw/siw/siw.h @@ -70,6 +70,7 @@ struct siw_pd { struct siw_device { struct ib_device base_dev; + struct device_dma_parameters dma_parms; struct net_device *netdev; struct siw_dev_cap attrs; @@ -98,18 +99,9 @@ struct siw_device { struct work_struct netdev_down; }; -struct siw_uobj { - void *addr; - u32 size; -}; - struct siw_ucontext { struct ib_ucontext base_ucontext; struct siw_device *sdev; - - /* xarray of user mappable objects */ - struct xarray xa; - u32 uobj_nextkey; }; /* @@ -149,8 +141,6 @@ struct siw_pbl { struct siw_pble pbe[1]; }; -struct siw_mr; - /* * Generic memory representation for registered siw memory. * Memory lookup always via higher 24 bit of STag (STag index). @@ -220,7 +210,7 @@ struct siw_cq { u32 cq_get; u32 num_cqe; bool kernel_verbs; - u32 xa_cq_index; /* mmap information for CQE array */ + struct rdma_user_mmap_entry *cq_entry; /* mmap info for CQE array */ u32 id; /* For debugging only */ }; @@ -263,7 +253,7 @@ struct siw_srq { u32 rq_put; u32 rq_get; u32 num_rqe; /* max # of wqe's allowed */ - u32 xa_srq_index; /* mmap information for SRQ array */ + struct rdma_user_mmap_entry *srq_entry; /* mmap info for SRQ array */ char armed; /* inform user if limit hit */ char kernel_verbs; /* '1' if kernel client */ }; @@ -477,8 +467,8 @@ struct siw_qp { u8 layer : 4, etype : 4; u8 ecode; } term_info; - u32 xa_sq_index; /* mmap information for SQE array */ - u32 xa_rq_index; /* mmap information for RQE array */ + struct rdma_user_mmap_entry *sq_entry; /* mmap info for SQE array */ + struct rdma_user_mmap_entry *rq_entry; /* mmap info for RQE array */ struct rcu_head rcu; }; @@ -503,6 +493,11 @@ struct iwarp_msg_info { int (*rx_data)(struct siw_qp *qp); }; +struct siw_user_mmap_entry { + struct rdma_user_mmap_entry rdma_entry; + void *address; +}; + /* Global siw parameters. Currently set in siw_main.c */ extern const bool zcopy_tx; extern const bool try_gso; @@ -607,6 +602,12 @@ static inline struct siw_mr *to_siw_mr(struct ib_mr *base_mr) return container_of(base_mr, struct siw_mr, base_mr); } +static inline struct siw_user_mmap_entry * +to_siw_mmap_entry(struct rdma_user_mmap_entry *rdma_mmap) +{ + return container_of(rdma_mmap, struct siw_user_mmap_entry, rdma_entry); +} + static inline struct siw_qp *siw_qp_id2obj(struct siw_device *sdev, int id) { struct siw_qp *qp; diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c index 8c1931a57f4a..3bccfef40e7e 100644 --- a/drivers/infiniband/sw/siw/siw_cm.c +++ b/drivers/infiniband/sw/siw/siw_cm.c @@ -1373,22 +1373,8 @@ int siw_connect(struct iw_cm_id *id, struct iw_cm_conn_param *params) rv = -EINVAL; goto error; } - if (v4) - siw_dbg_qp(qp, - "pd_len %d, laddr %pI4 %d, raddr %pI4 %d\n", - pd_len, - &((struct sockaddr_in *)(laddr))->sin_addr, - ntohs(((struct sockaddr_in *)(laddr))->sin_port), - &((struct sockaddr_in *)(raddr))->sin_addr, - ntohs(((struct sockaddr_in *)(raddr))->sin_port)); - else - siw_dbg_qp(qp, - "pd_len %d, laddr %pI6 %d, raddr %pI6 %d\n", - pd_len, - &((struct sockaddr_in6 *)(laddr))->sin6_addr, - ntohs(((struct sockaddr_in6 *)(laddr))->sin6_port), - &((struct sockaddr_in6 *)(raddr))->sin6_addr, - ntohs(((struct sockaddr_in6 *)(raddr))->sin6_port)); + siw_dbg_qp(qp, "pd_len %d, laddr %pISp, raddr %pISp\n", pd_len, laddr, + raddr); rv = sock_create(v4 ? AF_INET : AF_INET6, SOCK_STREAM, IPPROTO_TCP, &s); if (rv < 0) @@ -1867,14 +1853,7 @@ static int siw_listen_address(struct iw_cm_id *id, int backlog, list_add_tail(&cep->listenq, (struct list_head *)id->provider_data); cep->state = SIW_EPSTATE_LISTENING; - if (addr_family == AF_INET) - siw_dbg(id->device, "Listen at laddr %pI4 %u\n", - &(((struct sockaddr_in *)laddr)->sin_addr), - ((struct sockaddr_in *)laddr)->sin_port); - else - siw_dbg(id->device, "Listen at laddr %pI6 %u\n", - &(((struct sockaddr_in6 *)laddr)->sin6_addr), - ((struct sockaddr_in6 *)laddr)->sin6_port); + siw_dbg(id->device, "Listen at laddr %pISp\n", laddr); return 0; @@ -1935,7 +1914,7 @@ static void siw_drop_listeners(struct iw_cm_id *id) /* * siw_create_listen - Create resources for a listener's IWCM ID @id * - * Listens on the socket addresses id->local_addr and id->remote_addr. + * Listens on the socket address id->local_addr. * * If the listener's @id provides a specific local IP address, at most one * listening socket is created and associated with @id. @@ -1959,7 +1938,7 @@ int siw_create_listen(struct iw_cm_id *id, int backlog) */ if (id->local_addr.ss_family == AF_INET) { struct in_device *in_dev = in_dev_get(dev); - struct sockaddr_in s_laddr, *s_raddr; + struct sockaddr_in s_laddr; const struct in_ifaddr *ifa; if (!in_dev) { @@ -1967,12 +1946,8 @@ int siw_create_listen(struct iw_cm_id *id, int backlog) goto out; } memcpy(&s_laddr, &id->local_addr, sizeof(s_laddr)); - s_raddr = (struct sockaddr_in *)&id->remote_addr; - siw_dbg(id->device, - "laddr %pI4:%d, raddr %pI4:%d\n", - &s_laddr.sin_addr, ntohs(s_laddr.sin_port), - &s_raddr->sin_addr, ntohs(s_raddr->sin_port)); + siw_dbg(id->device, "laddr %pISp\n", &s_laddr); rtnl_lock(); in_dev_for_each_ifa_rtnl(ifa, in_dev) { @@ -1992,17 +1967,13 @@ int siw_create_listen(struct iw_cm_id *id, int backlog) } else if (id->local_addr.ss_family == AF_INET6) { struct inet6_dev *in6_dev = in6_dev_get(dev); struct inet6_ifaddr *ifp; - struct sockaddr_in6 *s_laddr = &to_sockaddr_in6(id->local_addr), - *s_raddr = &to_sockaddr_in6(id->remote_addr); + struct sockaddr_in6 *s_laddr = &to_sockaddr_in6(id->local_addr); if (!in6_dev) { rv = -ENODEV; goto out; } - siw_dbg(id->device, - "laddr %pI6:%d, raddr %pI6:%d\n", - &s_laddr->sin6_addr, ntohs(s_laddr->sin6_port), - &s_raddr->sin6_addr, ntohs(s_raddr->sin6_port)); + siw_dbg(id->device, "laddr %pISp\n", &s_laddr); rtnl_lock(); list_for_each_entry(ifp, &in6_dev->addr_list, if_list) { diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index 05a92f997f60..c147f0613d95 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -16,6 +16,7 @@ #include <linux/module.h> #include <linux/dma-mapping.h> +#include <net/addrconf.h> #include <rdma/ib_verbs.h> #include <rdma/ib_user_verbs.h> #include <rdma/rdma_netlink.h> @@ -248,24 +249,6 @@ static struct ib_qp *siw_get_base_qp(struct ib_device *base_dev, int id) return NULL; } -static void siw_verbs_sq_flush(struct ib_qp *base_qp) -{ - struct siw_qp *qp = to_siw_qp(base_qp); - - down_write(&qp->state_lock); - siw_sq_flush(qp); - up_write(&qp->state_lock); -} - -static void siw_verbs_rq_flush(struct ib_qp *base_qp) -{ - struct siw_qp *qp = to_siw_qp(base_qp); - - down_write(&qp->state_lock); - siw_rq_flush(qp); - up_write(&qp->state_lock); -} - static const struct ib_device_ops siw_device_ops = { .owner = THIS_MODULE, .uverbs_abi_ver = SIW_ABI_VERSION, @@ -284,8 +267,6 @@ static const struct ib_device_ops siw_device_ops = { .destroy_cq = siw_destroy_cq, .destroy_qp = siw_destroy_qp, .destroy_srq = siw_destroy_srq, - .drain_rq = siw_verbs_rq_flush, - .drain_sq = siw_verbs_sq_flush, .get_dma_mr = siw_get_dma_mr, .get_port_immutable = siw_get_port_immutable, .iw_accept = siw_accept, @@ -298,6 +279,7 @@ static const struct ib_device_ops siw_device_ops = { .iw_rem_ref = siw_qp_put_ref, .map_mr_sg = siw_map_mr_sg, .mmap = siw_mmap, + .mmap_free = siw_mmap_free, .modify_qp = siw_verbs_modify_qp, .modify_srq = siw_modify_srq, .poll_cq = siw_poll_cq, @@ -350,15 +332,19 @@ static struct siw_device *siw_device_create(struct net_device *netdev) sdev->netdev = netdev; if (netdev->type != ARPHRD_LOOPBACK) { - memcpy(&base_dev->node_guid, netdev->dev_addr, 6); + addrconf_addr_eui48((unsigned char *)&base_dev->node_guid, + netdev->dev_addr); } else { /* * The loopback device does not have a HW address, * but connection mangagement lib expects gid != 0 */ - size_t gidlen = min_t(size_t, strlen(base_dev->name), 6); + size_t len = min_t(size_t, strlen(base_dev->name), 6); + char addr[6] = { }; - memcpy(&base_dev->node_guid, base_dev->name, gidlen); + memcpy(addr, base_dev->name, len); + addrconf_addr_eui48((unsigned char *)&base_dev->node_guid, + addr); } base_dev->uverbs_cmd_mask = (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | @@ -397,6 +383,9 @@ static struct siw_device *siw_device_create(struct net_device *netdev) base_dev->phys_port_cnt = 1; base_dev->dev.parent = parent; base_dev->dev.dma_ops = &dma_virt_ops; + base_dev->dev.dma_parms = &sdev->dma_parms; + sdev->dma_parms = (struct device_dma_parameters) + { .max_segment_size = SZ_2G }; base_dev->num_comp_vectors = num_possible_cpus(); ib_set_device_ops(base_dev, &siw_device_ops); diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c index 430314c8abd9..b4317480cee7 100644 --- a/drivers/infiniband/sw/siw/siw_qp.c +++ b/drivers/infiniband/sw/siw/siw_qp.c @@ -182,12 +182,19 @@ void siw_qp_llp_close(struct siw_qp *qp) */ void siw_qp_llp_write_space(struct sock *sk) { - struct siw_cep *cep = sk_to_cep(sk); + struct siw_cep *cep; - cep->sk_write_space(sk); + read_lock(&sk->sk_callback_lock); + + cep = sk_to_cep(sk); + if (cep) { + cep->sk_write_space(sk); - if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) - (void)siw_sq_start(cep->qp); + if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) + (void)siw_sq_start(cep->qp); + } + + read_unlock(&sk->sk_callback_lock); } static int siw_qp_readq_init(struct siw_qp *qp, int irq_size, int orq_size) @@ -1305,6 +1312,7 @@ int siw_qp_add(struct siw_device *sdev, struct siw_qp *qp) void siw_free_qp(struct kref *ref) { struct siw_qp *found, *qp = container_of(ref, struct siw_qp, ref); + struct siw_base_qp *siw_base_qp = to_siw_base_qp(qp->ib_qp); struct siw_device *sdev = qp->sdev; unsigned long flags; @@ -1327,4 +1335,5 @@ void siw_free_qp(struct kref *ref) atomic_dec(&sdev->num_qp); siw_dbg_qp(qp, "free QP\n"); kfree_rcu(qp, rcu); + kfree(siw_base_qp); } diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index 869e02b69a01..5fd6d6499b3d 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -34,44 +34,19 @@ static char ib_qp_state_to_string[IB_QPS_ERR + 1][sizeof("RESET")] = { [IB_QPS_ERR] = "ERR" }; -static u32 siw_create_uobj(struct siw_ucontext *uctx, void *vaddr, u32 size) +void siw_mmap_free(struct rdma_user_mmap_entry *rdma_entry) { - struct siw_uobj *uobj; - struct xa_limit limit = XA_LIMIT(0, SIW_UOBJ_MAX_KEY); - u32 key; + struct siw_user_mmap_entry *entry = to_siw_mmap_entry(rdma_entry); - uobj = kzalloc(sizeof(*uobj), GFP_KERNEL); - if (!uobj) - return SIW_INVAL_UOBJ_KEY; - - if (xa_alloc_cyclic(&uctx->xa, &key, uobj, limit, &uctx->uobj_nextkey, - GFP_KERNEL) < 0) { - kfree(uobj); - return SIW_INVAL_UOBJ_KEY; - } - uobj->size = PAGE_ALIGN(size); - uobj->addr = vaddr; - - return key; -} - -static struct siw_uobj *siw_get_uobj(struct siw_ucontext *uctx, - unsigned long off, u32 size) -{ - struct siw_uobj *uobj = xa_load(&uctx->xa, off); - - if (uobj && uobj->size == size) - return uobj; - - return NULL; + kfree(entry); } int siw_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma) { struct siw_ucontext *uctx = to_siw_ctx(ctx); - struct siw_uobj *uobj; - unsigned long off = vma->vm_pgoff; - int size = vma->vm_end - vma->vm_start; + size_t size = vma->vm_end - vma->vm_start; + struct rdma_user_mmap_entry *rdma_entry; + struct siw_user_mmap_entry *entry; int rv = -EINVAL; /* @@ -79,18 +54,25 @@ int siw_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma) */ if (vma->vm_start & (PAGE_SIZE - 1)) { pr_warn("siw: mmap not page aligned\n"); - goto out; + return -EINVAL; + } + rdma_entry = rdma_user_mmap_entry_get(&uctx->base_ucontext, vma); + if (!rdma_entry) { + siw_dbg(&uctx->sdev->base_dev, "mmap lookup failed: %lu, %#zx\n", + vma->vm_pgoff, size); + return -EINVAL; } - uobj = siw_get_uobj(uctx, off, size); - if (!uobj) { - siw_dbg(&uctx->sdev->base_dev, "mmap lookup failed: %lu, %u\n", - off, size); + entry = to_siw_mmap_entry(rdma_entry); + + rv = remap_vmalloc_range(vma, entry->address, 0); + if (rv) { + pr_warn("remap_vmalloc_range failed: %lu, %zu\n", vma->vm_pgoff, + size); goto out; } - rv = remap_vmalloc_range(vma, uobj->addr, 0); - if (rv) - pr_warn("remap_vmalloc_range failed: %lu, %u\n", off, size); out: + rdma_user_mmap_entry_put(rdma_entry); + return rv; } @@ -105,8 +87,6 @@ int siw_alloc_ucontext(struct ib_ucontext *base_ctx, struct ib_udata *udata) rv = -ENOMEM; goto err_out; } - xa_init_flags(&ctx->xa, XA_FLAGS_ALLOC); - ctx->uobj_nextkey = 0; ctx->sdev = sdev; uresp.dev_id = sdev->vendor_part_id; @@ -135,19 +115,7 @@ err_out: void siw_dealloc_ucontext(struct ib_ucontext *base_ctx) { struct siw_ucontext *uctx = to_siw_ctx(base_ctx); - void *entry; - unsigned long index; - /* - * Make sure all user mmap objects are gone. Since QP, CQ - * and SRQ destroy routines destroy related objects, nothing - * should be found here. - */ - xa_for_each(&uctx->xa, index, entry) { - kfree(xa_erase(&uctx->xa, index)); - pr_warn("siw: dropping orphaned uobj at %lu\n", index); - } - xa_destroy(&uctx->xa); atomic_dec(&uctx->sdev->num_ctx); } @@ -293,6 +261,33 @@ void siw_qp_put_ref(struct ib_qp *base_qp) siw_qp_put(to_siw_qp(base_qp)); } +static struct rdma_user_mmap_entry * +siw_mmap_entry_insert(struct siw_ucontext *uctx, + void *address, size_t length, + u64 *offset) +{ + struct siw_user_mmap_entry *entry = kzalloc(sizeof(*entry), GFP_KERNEL); + int rv; + + *offset = SIW_INVAL_UOBJ_KEY; + if (!entry) + return NULL; + + entry->address = address; + + rv = rdma_user_mmap_entry_insert(&uctx->base_ucontext, + &entry->rdma_entry, + length); + if (rv) { + kfree(entry); + return NULL; + } + + *offset = rdma_user_mmap_get_offset(&entry->rdma_entry); + + return &entry->rdma_entry; +} + /* * siw_create_qp() * @@ -317,6 +312,7 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, struct siw_cq *scq = NULL, *rcq = NULL; unsigned long flags; int num_sqe, num_rqe, rv = 0; + size_t length; siw_dbg(base_dev, "create new QP\n"); @@ -380,8 +376,6 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, spin_lock_init(&qp->orq_lock); qp->kernel_verbs = !udata; - qp->xa_sq_index = SIW_INVAL_UOBJ_KEY; - qp->xa_rq_index = SIW_INVAL_UOBJ_KEY; rv = siw_qp_add(sdev, qp); if (rv) @@ -458,22 +452,27 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, uresp.qp_id = qp_id(qp); if (qp->sendq) { - qp->xa_sq_index = - siw_create_uobj(uctx, qp->sendq, - num_sqe * sizeof(struct siw_sqe)); + length = num_sqe * sizeof(struct siw_sqe); + qp->sq_entry = + siw_mmap_entry_insert(uctx, qp->sendq, + length, &uresp.sq_key); + if (!qp->sq_entry) { + rv = -ENOMEM; + goto err_out_xa; + } } + if (qp->recvq) { - qp->xa_rq_index = - siw_create_uobj(uctx, qp->recvq, - num_rqe * sizeof(struct siw_rqe)); - } - if (qp->xa_sq_index == SIW_INVAL_UOBJ_KEY || - qp->xa_rq_index == SIW_INVAL_UOBJ_KEY) { - rv = -ENOMEM; - goto err_out_xa; + length = num_rqe * sizeof(struct siw_rqe); + qp->rq_entry = + siw_mmap_entry_insert(uctx, qp->recvq, + length, &uresp.rq_key); + if (!qp->rq_entry) { + uresp.sq_key = SIW_INVAL_UOBJ_KEY; + rv = -ENOMEM; + goto err_out_xa; + } } - uresp.sq_key = qp->xa_sq_index << PAGE_SHIFT; - uresp.rq_key = qp->xa_rq_index << PAGE_SHIFT; if (udata->outlen < sizeof(uresp)) { rv = -EINVAL; @@ -501,11 +500,10 @@ err_out: kfree(siw_base_qp); if (qp) { - if (qp->xa_sq_index != SIW_INVAL_UOBJ_KEY) - kfree(xa_erase(&uctx->xa, qp->xa_sq_index)); - if (qp->xa_rq_index != SIW_INVAL_UOBJ_KEY) - kfree(xa_erase(&uctx->xa, qp->xa_rq_index)); - + if (uctx) { + rdma_user_mmap_entry_remove(qp->sq_entry); + rdma_user_mmap_entry_remove(qp->rq_entry); + } vfree(qp->sendq); vfree(qp->recvq); kfree(qp); @@ -604,7 +602,6 @@ out: int siw_destroy_qp(struct ib_qp *base_qp, struct ib_udata *udata) { struct siw_qp *qp = to_siw_qp(base_qp); - struct siw_base_qp *siw_base_qp = to_siw_base_qp(base_qp); struct siw_ucontext *uctx = rdma_udata_to_drv_context(udata, struct siw_ucontext, base_ucontext); @@ -619,10 +616,10 @@ int siw_destroy_qp(struct ib_qp *base_qp, struct ib_udata *udata) qp->attrs.flags |= SIW_QP_IN_DESTROY; qp->rx_stream.rx_suspend = 1; - if (uctx && qp->xa_sq_index != SIW_INVAL_UOBJ_KEY) - kfree(xa_erase(&uctx->xa, qp->xa_sq_index)); - if (uctx && qp->xa_rq_index != SIW_INVAL_UOBJ_KEY) - kfree(xa_erase(&uctx->xa, qp->xa_rq_index)); + if (uctx) { + rdma_user_mmap_entry_remove(qp->sq_entry); + rdma_user_mmap_entry_remove(qp->rq_entry); + } down_write(&qp->state_lock); @@ -641,7 +638,6 @@ int siw_destroy_qp(struct ib_qp *base_qp, struct ib_udata *udata) qp->scq = qp->rcq = NULL; siw_qp_put(qp); - kfree(siw_base_qp); return 0; } @@ -687,6 +683,47 @@ static int siw_copy_inline_sgl(const struct ib_send_wr *core_wr, return bytes; } +/* Complete SQ WR's without processing */ +static int siw_sq_flush_wr(struct siw_qp *qp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) +{ + struct siw_sqe sqe = {}; + int rv = 0; + + while (wr) { + sqe.id = wr->wr_id; + sqe.opcode = wr->opcode; + rv = siw_sqe_complete(qp, &sqe, 0, SIW_WC_WR_FLUSH_ERR); + if (rv) { + if (bad_wr) + *bad_wr = wr; + break; + } + wr = wr->next; + } + return rv; +} + +/* Complete RQ WR's without processing */ +static int siw_rq_flush_wr(struct siw_qp *qp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) +{ + struct siw_rqe rqe = {}; + int rv = 0; + + while (wr) { + rqe.id = wr->wr_id; + rv = siw_rqe_complete(qp, &rqe, 0, 0, SIW_WC_WR_FLUSH_ERR); + if (rv) { + if (bad_wr) + *bad_wr = wr; + break; + } + wr = wr->next; + } + return rv; +} + /* * siw_post_send() * @@ -705,26 +742,54 @@ int siw_post_send(struct ib_qp *base_qp, const struct ib_send_wr *wr, unsigned long flags; int rv = 0; + if (wr && !qp->kernel_verbs) { + siw_dbg_qp(qp, "wr must be empty for user mapped sq\n"); + *bad_wr = wr; + return -EINVAL; + } + /* * Try to acquire QP state lock. Must be non-blocking * to accommodate kernel clients needs. */ if (!down_read_trylock(&qp->state_lock)) { - *bad_wr = wr; - siw_dbg_qp(qp, "QP locked, state %d\n", qp->attrs.state); - return -ENOTCONN; + if (qp->attrs.state == SIW_QP_STATE_ERROR) { + /* + * ERROR state is final, so we can be sure + * this state will not change as long as the QP + * exists. + * + * This handles an ib_drain_sq() call with + * a concurrent request to set the QP state + * to ERROR. + */ + rv = siw_sq_flush_wr(qp, wr, bad_wr); + } else { + siw_dbg_qp(qp, "QP locked, state %d\n", + qp->attrs.state); + *bad_wr = wr; + rv = -ENOTCONN; + } + return rv; } if (unlikely(qp->attrs.state != SIW_QP_STATE_RTS)) { + if (qp->attrs.state == SIW_QP_STATE_ERROR) { + /* + * Immediately flush this WR to CQ, if QP + * is in ERROR state. SQ is guaranteed to + * be empty, so WR complets in-order. + * + * Typically triggered by ib_drain_sq(). + */ + rv = siw_sq_flush_wr(qp, wr, bad_wr); + } else { + siw_dbg_qp(qp, "QP out of state %d\n", + qp->attrs.state); + *bad_wr = wr; + rv = -ENOTCONN; + } up_read(&qp->state_lock); - *bad_wr = wr; - siw_dbg_qp(qp, "QP out of state %d\n", qp->attrs.state); - return -ENOTCONN; - } - if (wr && !qp->kernel_verbs) { - siw_dbg_qp(qp, "wr must be empty for user mapped sq\n"); - up_read(&qp->state_lock); - *bad_wr = wr; - return -EINVAL; + return rv; } spin_lock_irqsave(&qp->sq_lock, flags); @@ -919,24 +984,54 @@ int siw_post_receive(struct ib_qp *base_qp, const struct ib_recv_wr *wr, *bad_wr = wr; return -EOPNOTSUPP; /* what else from errno.h? */ } + if (!qp->kernel_verbs) { + siw_dbg_qp(qp, "no kernel post_recv for user mapped sq\n"); + *bad_wr = wr; + return -EINVAL; + } + /* * Try to acquire QP state lock. Must be non-blocking * to accommodate kernel clients needs. */ if (!down_read_trylock(&qp->state_lock)) { - *bad_wr = wr; - return -ENOTCONN; - } - if (!qp->kernel_verbs) { - siw_dbg_qp(qp, "no kernel post_recv for user mapped sq\n"); - up_read(&qp->state_lock); - *bad_wr = wr; - return -EINVAL; + if (qp->attrs.state == SIW_QP_STATE_ERROR) { + /* + * ERROR state is final, so we can be sure + * this state will not change as long as the QP + * exists. + * + * This handles an ib_drain_rq() call with + * a concurrent request to set the QP state + * to ERROR. + */ + rv = siw_rq_flush_wr(qp, wr, bad_wr); + } else { + siw_dbg_qp(qp, "QP locked, state %d\n", + qp->attrs.state); + *bad_wr = wr; + rv = -ENOTCONN; + } + return rv; } if (qp->attrs.state > SIW_QP_STATE_RTS) { + if (qp->attrs.state == SIW_QP_STATE_ERROR) { + /* + * Immediately flush this WR to CQ, if QP + * is in ERROR state. RQ is guaranteed to + * be empty, so WR complets in-order. + * + * Typically triggered by ib_drain_rq(). + */ + rv = siw_rq_flush_wr(qp, wr, bad_wr); + } else { + siw_dbg_qp(qp, "QP out of state %d\n", + qp->attrs.state); + *bad_wr = wr; + rv = -ENOTCONN; + } up_read(&qp->state_lock); - *bad_wr = wr; - return -EINVAL; + return rv; } /* * Serialize potentially multiple producers. @@ -993,8 +1088,8 @@ void siw_destroy_cq(struct ib_cq *base_cq, struct ib_udata *udata) siw_cq_flush(cq); - if (ctx && cq->xa_cq_index != SIW_INVAL_UOBJ_KEY) - kfree(xa_erase(&ctx->xa, cq->xa_cq_index)); + if (ctx) + rdma_user_mmap_entry_remove(cq->cq_entry); atomic_dec(&sdev->num_cq); @@ -1031,7 +1126,6 @@ int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr, size = roundup_pow_of_two(size); cq->base_cq.cqe = size; cq->num_cqe = size; - cq->xa_cq_index = SIW_INVAL_UOBJ_KEY; if (!udata) { cq->kernel_verbs = 1; @@ -1057,16 +1151,17 @@ int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr, struct siw_ucontext *ctx = rdma_udata_to_drv_context(udata, struct siw_ucontext, base_ucontext); + size_t length = size * sizeof(struct siw_cqe) + + sizeof(struct siw_cq_ctrl); - cq->xa_cq_index = - siw_create_uobj(ctx, cq->queue, - size * sizeof(struct siw_cqe) + - sizeof(struct siw_cq_ctrl)); - if (cq->xa_cq_index == SIW_INVAL_UOBJ_KEY) { + cq->cq_entry = + siw_mmap_entry_insert(ctx, cq->queue, + length, &uresp.cq_key); + if (!cq->cq_entry) { rv = -ENOMEM; goto err_out; } - uresp.cq_key = cq->xa_cq_index << PAGE_SHIFT; + uresp.cq_id = cq->id; uresp.num_cqe = size; @@ -1087,8 +1182,8 @@ err_out: struct siw_ucontext *ctx = rdma_udata_to_drv_context(udata, struct siw_ucontext, base_ucontext); - if (cq->xa_cq_index != SIW_INVAL_UOBJ_KEY) - kfree(xa_erase(&ctx->xa, cq->xa_cq_index)); + if (ctx) + rdma_user_mmap_entry_remove(cq->cq_entry); vfree(cq->queue); } atomic_dec(&sdev->num_cq); @@ -1492,7 +1587,6 @@ int siw_create_srq(struct ib_srq *base_srq, } srq->max_sge = attrs->max_sge; srq->num_rqe = roundup_pow_of_two(attrs->max_wr); - srq->xa_srq_index = SIW_INVAL_UOBJ_KEY; srq->limit = attrs->srq_limit; if (srq->limit) srq->armed = 1; @@ -1511,15 +1605,16 @@ int siw_create_srq(struct ib_srq *base_srq, } if (udata) { struct siw_uresp_create_srq uresp = {}; + size_t length = srq->num_rqe * sizeof(struct siw_rqe); - srq->xa_srq_index = siw_create_uobj( - ctx, srq->recvq, srq->num_rqe * sizeof(struct siw_rqe)); - - if (srq->xa_srq_index == SIW_INVAL_UOBJ_KEY) { + srq->srq_entry = + siw_mmap_entry_insert(ctx, srq->recvq, + length, &uresp.srq_key); + if (!srq->srq_entry) { rv = -ENOMEM; goto err_out; } - uresp.srq_key = srq->xa_srq_index; + uresp.num_rqe = srq->num_rqe; if (udata->outlen < sizeof(uresp)) { @@ -1538,8 +1633,8 @@ int siw_create_srq(struct ib_srq *base_srq, err_out: if (srq->recvq) { - if (ctx && srq->xa_srq_index != SIW_INVAL_UOBJ_KEY) - kfree(xa_erase(&ctx->xa, srq->xa_srq_index)); + if (ctx) + rdma_user_mmap_entry_remove(srq->srq_entry); vfree(srq->recvq); } atomic_dec(&sdev->num_srq); @@ -1625,9 +1720,8 @@ void siw_destroy_srq(struct ib_srq *base_srq, struct ib_udata *udata) rdma_udata_to_drv_context(udata, struct siw_ucontext, base_ucontext); - if (ctx && srq->xa_srq_index != SIW_INVAL_UOBJ_KEY) - kfree(xa_erase(&ctx->xa, srq->xa_srq_index)); - + if (ctx) + rdma_user_mmap_entry_remove(srq->srq_entry); vfree(srq->recvq); atomic_dec(&sdev->num_srq); } diff --git a/drivers/infiniband/sw/siw/siw_verbs.h b/drivers/infiniband/sw/siw/siw_verbs.h index 1910869281cb..1a731989fad6 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.h +++ b/drivers/infiniband/sw/siw/siw_verbs.h @@ -83,6 +83,7 @@ void siw_destroy_srq(struct ib_srq *base_srq, struct ib_udata *udata); int siw_post_srq_recv(struct ib_srq *base_srq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr); int siw_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma); +void siw_mmap_free(struct rdma_user_mmap_entry *rdma_entry); void siw_qp_event(struct siw_qp *qp, enum ib_event_type type); void siw_cq_event(struct siw_cq *cq, enum ib_event_type type); void siw_srq_event(struct siw_srq *srq, enum ib_event_type type); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index ac0583ff280d..e5f438ab716c 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -2019,6 +2019,15 @@ static int ipoib_set_vf_guid(struct net_device *dev, int vf, u64 guid, int type) return ib_set_vf_guid(priv->ca, vf, priv->port, guid, type); } +static int ipoib_get_vf_guid(struct net_device *dev, int vf, + struct ifla_vf_guid *node_guid, + struct ifla_vf_guid *port_guid) +{ + struct ipoib_dev_priv *priv = ipoib_priv(dev); + + return ib_get_vf_guid(priv->ca, vf, priv->port, node_guid, port_guid); +} + static int ipoib_get_vf_stats(struct net_device *dev, int vf, struct ifla_vf_stats *vf_stats) { @@ -2045,6 +2054,7 @@ static const struct net_device_ops ipoib_netdev_ops_pf = { .ndo_set_vf_link_state = ipoib_set_vf_link_state, .ndo_get_vf_config = ipoib_get_vf_config, .ndo_get_vf_stats = ipoib_get_vf_stats, + .ndo_get_vf_guid = ipoib_get_vf_guid, .ndo_set_vf_guid = ipoib_set_vf_guid, .ndo_set_mac_address = ipoib_set_mac, .ndo_get_stats64 = ipoib_get_stats, diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 2e72fc5af157..3690e28cc7ea 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -646,13 +646,14 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, if (ib_conn->pi_support) { u32 sig_caps = ib_dev->attrs.sig_prot_cap; + shost->sg_prot_tablesize = shost->sg_tablesize; scsi_host_set_prot(shost, iser_dif_prot_caps(sig_caps)); scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP | SHOST_DIX_GUARD_CRC); } if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)) - shost->virt_boundary_mask = ~MASK_4K; + shost->virt_boundary_mask = SZ_4K - 1; if (iscsi_host_add(shost, ib_dev->dev.parent)) { mutex_unlock(&iser_conn->state_mutex); @@ -785,7 +786,7 @@ static int iscsi_iser_get_ep_param(struct iscsi_endpoint *ep, * iscsi_iser_ep_connect() - Initiate iSER connection establishment * @shost: scsi_host * @dst_addr: destination address - * @non-blocking: indicate if routine can block + * @non_blocking: indicate if routine can block * * Allocate an iscsi endpoint, an iser_conn structure and bind them. * After that start RDMA connection establishment via rdma_cm. We diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 52ce63592dcf..029c00163442 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -96,16 +96,12 @@ #define iser_err(fmt, arg...) \ pr_err(PFX "%s: " fmt, __func__ , ## arg) -#define SHIFT_4K 12 -#define SIZE_4K (1ULL << SHIFT_4K) -#define MASK_4K (~(SIZE_4K-1)) - /* Default support is 512KB I/O size */ #define ISER_DEF_MAX_SECTORS 1024 #define ISCSI_ISER_DEF_SG_TABLESIZE \ - ((ISER_DEF_MAX_SECTORS * SECTOR_SIZE) >> SHIFT_4K) + ((ISER_DEF_MAX_SECTORS * SECTOR_SIZE) >> ilog2(SZ_4K)) /* Maximum support is 16MB I/O size */ -#define ISCSI_ISER_MAX_SG_TABLESIZE ((32768 * SECTOR_SIZE) >> SHIFT_4K) +#define ISCSI_ISER_MAX_SG_TABLESIZE ((32768 * SECTOR_SIZE) >> ilog2(SZ_4K)) #define ISER_DEF_XMIT_CMDS_DEFAULT 512 #if ISCSI_DEF_XMIT_CMDS_MAX > ISER_DEF_XMIT_CMDS_DEFAULT @@ -232,15 +228,16 @@ enum iser_desc_type { * @iser_header: iser header * @iscsi_header: iscsi header * @type: command/control/dataout - * @dam_addr: header buffer dma_address + * @dma_addr: header buffer dma_address * @tx_sg: sg[0] points to iser/iscsi headers * sg[1] optionally points to either of immediate data * unsolicited data-out or control * @num_sge: number sges used on this TX task + * @cqe: completion handler * @mapped: Is the task header mapped - * reg_wr: registration WR - * send_wr: send WR - * inv_wr: invalidate WR + * @reg_wr: registration WR + * @send_wr: send WR + * @inv_wr: invalidate WR */ struct iser_tx_desc { struct iser_ctrl iser_header; @@ -267,6 +264,7 @@ struct iser_tx_desc { * @data: received data segment * @dma_addr: receive buffer dma address * @rx_sg: ib_sge of receive buffer + * @cqe: completion handler * @pad: for sense data TODO: Modify to maximum sense length supported */ struct iser_rx_desc { @@ -283,9 +281,9 @@ struct iser_rx_desc { * struct iser_login_desc - iSER login descriptor * * @req: pointer to login request buffer - * @resp: pointer to login response buffer + * @rsp: pointer to login response buffer * @req_dma: DMA address of login request buffer - * @rsp_dma: DMA address of login response buffer + * @rsp_dma: DMA address of login response buffer * @sge: IB sge for login post recv * @cqe: completion handler */ @@ -315,12 +313,12 @@ struct iser_comp { }; /** - * struct iser_device - Memory registration operations + * struct iser_reg_ops - Memory registration operations * per-device registration schemes * * @alloc_reg_res: Allocate registration resources * @free_reg_res: Free registration resources - * @fast_reg_mem: Register memory buffers + * @reg_mem: Register memory buffers * @unreg_mem: Un-register memory buffers * @reg_desc_get: Get a registration descriptor for pool * @reg_desc_put: Get a registration descriptor to pool @@ -369,7 +367,7 @@ struct iser_device { }; /** - * struct iser_reg_resources - Fast registration recources + * struct iser_reg_resources - Fast registration resources * * @mr: memory region * @fmr_pool: pool of fmrs @@ -402,7 +400,7 @@ struct iser_fr_desc { }; /** - * struct iser_fr_pool: connection fast registration pool + * struct iser_fr_pool - connection fast registration pool * * @list: list of fastreg descriptors * @lock: protects fmr/fastreg pool @@ -427,6 +425,7 @@ struct iser_fr_pool { * @comp: iser completion context * @fr_pool: connection fast registration poool * @pi_support: Indicate device T10-PI support + * @reg_cqe: completion handler */ struct ib_conn { struct rdma_cm_id *cma_id; @@ -467,6 +466,7 @@ struct ib_conn { * @num_rx_descs: number of rx descriptors * @scsi_sg_tablesize: scsi host sg_tablesize * @pages_per_mr: maximum pages available for registration + * @snd_w_inv: connection uses remote invalidation */ struct iser_conn { struct ib_conn ib_conn; @@ -525,7 +525,7 @@ struct iser_page_vec { }; /** - * struct iser_global: iSER global context + * struct iser_global - iSER global context * * @device_list_mutex: protects device_list * @device_list: iser devices global list diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 5cbb4b3a0566..4a7045bb0831 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -358,6 +358,8 @@ static inline bool iser_signal_comp(u8 sig_count) /** * iser_send_command - send command PDU + * @conn: link to matching iscsi connection + * @task: SCSI command task */ int iser_send_command(struct iscsi_conn *conn, struct iscsi_task *task) @@ -429,6 +431,9 @@ send_command_error: /** * iser_send_data_out - send data out PDU + * @conn: link to matching iscsi connection + * @task: SCSI command task + * @hdr: pointer to the LLD's iSCSI message header */ int iser_send_data_out(struct iscsi_conn *conn, struct iscsi_task *task, diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 2cc89a9b9e9b..0f74dc6d12fa 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -170,7 +170,7 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task, dev = iser_task->iser_conn->ib_conn.device->ib_device; data->dma_nents = ib_dma_map_sg(dev, data->sg, data->size, dma_dir); - if (data->dma_nents == 0) { + if (unlikely(data->dma_nents == 0)) { iser_err("dma_map_sg failed!!!\n"); return -EINVAL; } @@ -237,7 +237,7 @@ int iser_fast_reg_fmr(struct iscsi_iser_task *iser_task, int ret, plen; page_vec->npages = 0; - page_vec->fake_mr.page_size = SIZE_4K; + page_vec->fake_mr.page_size = SZ_4K; plen = ib_sg_to_pages(&page_vec->fake_mr, mem->sg, mem->dma_nents, NULL, iser_set_page); if (unlikely(plen < mem->dma_nents)) { @@ -451,7 +451,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey)); - n = ib_map_mr_sg(mr, mem->sg, mem->dma_nents, NULL, SIZE_4K); + n = ib_map_mr_sg(mr, mem->sg, mem->dma_nents, NULL, SZ_4K); if (unlikely(n != mem->dma_nents)) { iser_err("failed to map sg (%d/%d)\n", n, mem->dma_nents); diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index a6548de0e218..1f4a37a3c2b3 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -58,12 +58,12 @@ static void iser_event_handler(struct ib_event_handler *handler, dev_name(&event->device->dev), event->element.port_num); } -/** +/* * iser_create_device_ib_res - creates Protection Domain (PD), Completion * Queue (CQ), DMA Memory Region (DMA MR) with the device associated with - * the adapator. + * the adaptor. * - * returns 0 on success, -1 on failure + * Return: 0 on success, -1 on failure */ static int iser_create_device_ib_res(struct iser_device *device) { @@ -124,9 +124,9 @@ comps_err: return -1; } -/** +/* * iser_free_device_ib_res - destroy/dealloc/dereg the DMA MR, - * CQ and PD created with the device associated with the adapator. + * CQ and PD created with the device associated with the adaptor. */ static void iser_free_device_ib_res(struct iser_device *device) { @@ -149,8 +149,11 @@ static void iser_free_device_ib_res(struct iser_device *device) /** * iser_alloc_fmr_pool - Creates FMR pool and page_vector + * @ib_conn: connection RDMA resources + * @cmds_max: max number of SCSI commands for this connection + * @size: max number of pages per map request * - * returns 0 on success, or errno code on failure + * Return: 0 on success, or errno code on failure */ int iser_alloc_fmr_pool(struct ib_conn *ib_conn, unsigned cmds_max, @@ -180,7 +183,7 @@ int iser_alloc_fmr_pool(struct ib_conn *ib_conn, page_vec->pages = (u64 *)(page_vec + 1); - params.page_shift = SHIFT_4K; + params.page_shift = ilog2(SZ_4K); params.max_pages_per_fmr = size; /* make the pool size twice the max number of SCSI commands * * the ML is expected to queue, watermark for unmap at 50% */ @@ -215,6 +218,7 @@ err_frpl: /** * iser_free_fmr_pool - releases the FMR pool and page vec + * @ib_conn: connection RDMA resources */ void iser_free_fmr_pool(struct ib_conn *ib_conn) { @@ -295,7 +299,11 @@ static void iser_destroy_fastreg_desc(struct iser_fr_desc *desc) /** * iser_alloc_fastreg_pool - Creates pool of fast_reg descriptors * for fast registration work requests. - * returns 0 on success, or errno code on failure + * @ib_conn: connection RDMA resources + * @cmds_max: max number of SCSI commands for this connection + * @size: max number of pages per map request + * + * Return: 0 on success, or errno code on failure */ int iser_alloc_fastreg_pool(struct ib_conn *ib_conn, unsigned cmds_max, @@ -332,6 +340,7 @@ err: /** * iser_free_fastreg_pool - releases the pool of fast_reg descriptors + * @ib_conn: connection RDMA resources */ void iser_free_fastreg_pool(struct ib_conn *ib_conn) { @@ -355,10 +364,10 @@ void iser_free_fastreg_pool(struct ib_conn *ib_conn) fr_pool->size - i); } -/** +/* * iser_create_ib_conn_res - Queue-Pair (QP) * - * returns 0 on success, -1 on failure + * Return: 0 on success, -1 on failure */ static int iser_create_ib_conn_res(struct ib_conn *ib_conn) { @@ -436,7 +445,7 @@ out_err: return ret; } -/** +/* * based on the resolved device node GUID see if there already allocated * device for this device. If there's no such, create one. */ @@ -487,9 +496,9 @@ static void iser_device_try_release(struct iser_device *device) mutex_unlock(&ig.device_list_mutex); } -/** +/* * Called with state mutex held - **/ + */ static int iser_conn_state_comp_exch(struct iser_conn *iser_conn, enum iser_conn_state comp, enum iser_conn_state exch) @@ -561,7 +570,8 @@ static void iser_free_ib_conn_res(struct iser_conn *iser_conn, } /** - * Frees all conn objects and deallocs conn descriptor + * iser_conn_release - Frees all conn objects and deallocs conn descriptor + * @iser_conn: iSER connection context */ void iser_conn_release(struct iser_conn *iser_conn) { @@ -595,7 +605,10 @@ void iser_conn_release(struct iser_conn *iser_conn) } /** - * triggers start of the disconnect procedures and wait for them to be done + * iser_conn_terminate - triggers start of the disconnect procedures and + * waits for them to be done + * @iser_conn: iSER connection context + * * Called with state mutex held */ int iser_conn_terminate(struct iser_conn *iser_conn) @@ -632,9 +645,9 @@ int iser_conn_terminate(struct iser_conn *iser_conn) return 1; } -/** +/* * Called with state mutex held - **/ + */ static void iser_connect_error(struct rdma_cm_id *cma_id) { struct iser_conn *iser_conn; @@ -670,7 +683,7 @@ iser_calc_scsi_params(struct iser_conn *iser_conn, else max_num_sg = attr->max_fast_reg_page_list_len; - sg_tablesize = DIV_ROUND_UP(max_sectors * 512, SIZE_4K); + sg_tablesize = DIV_ROUND_UP(max_sectors * SECTOR_SIZE, SZ_4K); if (attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) sup_sg_tablesize = min_t( @@ -684,9 +697,9 @@ iser_calc_scsi_params(struct iser_conn *iser_conn, iser_conn->scsi_sg_tablesize + reserved_mr_pages; } -/** +/* * Called with state mutex held - **/ + */ static void iser_addr_handler(struct rdma_cm_id *cma_id) { struct iser_device *device; @@ -732,9 +745,9 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id) } } -/** +/* * Called with state mutex held - **/ + */ static void iser_route_handler(struct rdma_cm_id *cma_id) { struct rdma_conn_param conn_param; @@ -1019,7 +1032,7 @@ int iser_post_recvm(struct iser_conn *iser_conn, int count) ib_conn->post_recv_buf_count += count; ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, NULL); - if (ib_ret) { + if (unlikely(ib_ret)) { iser_err("ib_post_recv failed ret=%d\n", ib_ret); ib_conn->post_recv_buf_count -= count; } else @@ -1030,9 +1043,12 @@ int iser_post_recvm(struct iser_conn *iser_conn, int count) /** - * iser_start_send - Initiate a Send DTO operation + * iser_post_send - Initiate a Send DTO operation + * @ib_conn: connection RDMA resources + * @tx_desc: iSER TX descriptor + * @signal: true to send work request as SIGNALED * - * returns 0 on success, -1 on failure + * Return: 0 on success, -1 on failure */ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc, bool signal) @@ -1060,7 +1076,7 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc, first_wr = wr; ib_ret = ib_post_send(ib_conn->qp, first_wr, NULL); - if (ib_ret) + if (unlikely(ib_ret)) iser_err("ib_post_send failed, ret:%d opcode:%d\n", ib_ret, wr->opcode); @@ -1081,7 +1097,7 @@ u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, ret = ib_check_mr_status(desc->rsc.sig_mr, IB_MR_CHECK_SIG_STATUS, &mr_status); if (ret) { - pr_err("ib_check_mr_status failed, ret %d\n", ret); + iser_err("ib_check_mr_status failed, ret %d\n", ret); /* Not a lot we can do, return ambiguous guard error */ *sector = 0; return 0x1; @@ -1093,7 +1109,7 @@ u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, sector_div(sector_off, sector_size + 8); *sector = scsi_get_lba(iser_task->sc) + sector_off; - pr_err("PI error found type %d at sector %llx " + iser_err("PI error found type %d at sector %llx " "expected %x vs actual %x\n", mr_status.sig_err.err_type, (unsigned long long)*sector, diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h b/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h index 43ac61ffef4a..6dbc08e1a6a6 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h @@ -70,7 +70,7 @@ struct opa_vnic_adapter; -/** +/* * struct __opa_vesw_info - OPA vnic virtual switch info * * Same as opa_vesw_info without bitwise attribute. @@ -96,7 +96,7 @@ struct __opa_vesw_info { u8 rsvd4[2]; } __packed; -/** +/* * struct __opa_per_veswport_info - OPA vnic per port info * * Same as opa_per_veswport_info without bitwise attribute. @@ -136,7 +136,7 @@ struct __opa_per_veswport_info { u8 rsvd3[8]; } __packed; -/** +/* * struct __opa_veswport_info - OPA vnic port info * * Same as opa_veswport_info without bitwise attribute. @@ -146,7 +146,7 @@ struct __opa_veswport_info { struct __opa_per_veswport_info vport; }; -/** +/* * struct __opa_veswport_trap - OPA vnic trap info * * Same as opa_veswport_trap without bitwise attribute. diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index b5960351bec0..b7f7a5f7bd98 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -174,9 +174,9 @@ static int srp_tmo_get(char *buffer, const struct kernel_param *kp) int tmo = *(int *)kp->arg; if (tmo >= 0) - return sprintf(buffer, "%d", tmo); + return sprintf(buffer, "%d\n", tmo); else - return sprintf(buffer, "off"); + return sprintf(buffer, "off\n"); } static int srp_tmo_set(const char *val, const struct kernel_param *kp) @@ -352,11 +352,11 @@ static int srp_new_rdma_cm_id(struct srp_rdma_ch *ch) init_completion(&ch->done); ret = rdma_resolve_addr(new_cm_id, target->rdma_cm.src_specified ? - (struct sockaddr *)&target->rdma_cm.src : NULL, - (struct sockaddr *)&target->rdma_cm.dst, + &target->rdma_cm.src.sa : NULL, + &target->rdma_cm.dst.sa, SRP_PATH_REC_TIMEOUT_MS); if (ret) { - pr_err("No route available from %pIS to %pIS (%d)\n", + pr_err("No route available from %pISpsc to %pISpsc (%d)\n", &target->rdma_cm.src, &target->rdma_cm.dst, ret); goto out; } @@ -366,7 +366,7 @@ static int srp_new_rdma_cm_id(struct srp_rdma_ch *ch) ret = ch->status; if (ret) { - pr_err("Resolving address %pIS failed (%d)\n", + pr_err("Resolving address %pISpsc failed (%d)\n", &target->rdma_cm.dst, ret); goto out; } @@ -552,6 +552,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch) { struct srp_target_port *target = ch->target; struct srp_device *dev = target->srp_host->srp_dev; + const struct ib_device_attr *attr = &dev->dev->attrs; struct ib_qp_init_attr *init_attr; struct ib_cq *recv_cq, *send_cq; struct ib_qp *qp; @@ -583,12 +584,14 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch) init_attr->cap.max_send_wr = m * target->queue_size; init_attr->cap.max_recv_wr = target->queue_size + 1; init_attr->cap.max_recv_sge = 1; - init_attr->cap.max_send_sge = SRP_MAX_SGE; + init_attr->cap.max_send_sge = min(SRP_MAX_SGE, attr->max_send_sge); init_attr->sq_sig_type = IB_SIGNAL_REQ_WR; init_attr->qp_type = IB_QPT_RC; init_attr->send_cq = send_cq; init_attr->recv_cq = recv_cq; + ch->max_imm_sge = min(init_attr->cap.max_send_sge - 1U, 255U); + if (target->using_rdma_cm) { ret = rdma_create_qp(ch->rdma_cm.cm_id, dev->pd, init_attr); qp = ch->rdma_cm.cm_id->qp; @@ -1362,7 +1365,8 @@ static void srp_terminate_io(struct srp_rport *rport) } /* Calculate maximum initiator to target information unit length. */ -static uint32_t srp_max_it_iu_len(int cmd_sg_cnt, bool use_imm_data) +static uint32_t srp_max_it_iu_len(int cmd_sg_cnt, bool use_imm_data, + uint32_t max_it_iu_size) { uint32_t max_iu_len = sizeof(struct srp_cmd) + SRP_MAX_ADD_CDB_LEN + sizeof(struct srp_indirect_buf) + @@ -1372,6 +1376,11 @@ static uint32_t srp_max_it_iu_len(int cmd_sg_cnt, bool use_imm_data) max_iu_len = max(max_iu_len, SRP_IMM_DATA_OFFSET + srp_max_imm_data); + if (max_it_iu_size) + max_iu_len = min(max_iu_len, max_it_iu_size); + + pr_debug("max_iu_len = %d\n", max_iu_len); + return max_iu_len; } @@ -1389,7 +1398,8 @@ static int srp_rport_reconnect(struct srp_rport *rport) struct srp_target_port *target = rport->lld_data; struct srp_rdma_ch *ch; uint32_t max_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt, - srp_use_imm_data); + srp_use_imm_data, + target->max_it_iu_size); int i, j, ret = 0; bool multich = false; @@ -1838,7 +1848,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, return -EIO; if (ch->use_imm_data && - count <= SRP_MAX_IMM_SGE && + count <= ch->max_imm_sge && SRP_IMM_DATA_OFFSET + data_len <= ch->max_it_iu_len && scmnd->sc_data_direction == DMA_TO_DEVICE) { struct srp_imm_buf *buf; @@ -2538,7 +2548,8 @@ static void srp_cm_rep_handler(struct ib_cm_id *cm_id, ch->req_lim = be32_to_cpu(lrsp->req_lim_delta); ch->use_imm_data = lrsp->rsp_flags & SRP_LOGIN_RSP_IMMED_SUPP; ch->max_it_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt, - ch->use_imm_data); + ch->use_imm_data, + target->max_it_iu_size); WARN_ON_ONCE(ch->max_it_iu_len > be32_to_cpu(lrsp->max_it_iu_len)); @@ -3411,6 +3422,7 @@ enum { SRP_OPT_IP_SRC = 1 << 15, SRP_OPT_IP_DEST = 1 << 16, SRP_OPT_TARGET_CAN_QUEUE= 1 << 17, + SRP_OPT_MAX_IT_IU_SIZE = 1 << 18, }; static unsigned int srp_opt_mandatory[] = { @@ -3443,6 +3455,7 @@ static const match_table_t srp_opt_tokens = { { SRP_OPT_QUEUE_SIZE, "queue_size=%d" }, { SRP_OPT_IP_SRC, "src=%s" }, { SRP_OPT_IP_DEST, "dest=%s" }, + { SRP_OPT_MAX_IT_IU_SIZE, "max_it_iu_size=%d" }, { SRP_OPT_ERR, NULL } }; @@ -3736,6 +3749,14 @@ static int srp_parse_options(struct net *net, const char *buf, target->tl_retry_count = token; break; + case SRP_OPT_MAX_IT_IU_SIZE: + if (match_int(args, &token) || token < 0) { + pr_warn("bad maximum initiator to target IU size '%s'\n", p); + goto out; + } + target->max_it_iu_size = token; + break; + default: pr_warn("unknown parameter or missing value '%s' in target creation request\n", p); @@ -3887,7 +3908,9 @@ static ssize_t srp_create_target(struct device *dev, target->mr_per_cmd = mr_per_cmd; target->indirect_size = target->sg_tablesize * sizeof (struct srp_direct_buf); - max_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt, srp_use_imm_data); + max_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt, + srp_use_imm_data, + target->max_it_iu_size); INIT_WORK(&target->tl_err_work, srp_tl_err_work); INIT_WORK(&target->remove_work, srp_remove_work); diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index b2861cd2087a..5359ece561ca 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -161,6 +161,7 @@ struct srp_rdma_ch { }; uint32_t max_it_iu_len; uint32_t max_ti_iu_len; + u8 max_imm_sge; bool use_imm_data; /* Everything above this point is used in the hot path of @@ -209,6 +210,7 @@ struct srp_target_port { u32 ch_count; u32 lkey; enum srp_target_state state; + uint32_t max_it_iu_size; unsigned int cmd_sg_cnt; unsigned int indirect_size; bool allow_ext_sg; @@ -245,11 +247,13 @@ struct srp_target_port { union { struct sockaddr_in ip4; struct sockaddr_in6 ip6; + struct sockaddr sa; struct sockaddr_storage ss; } src; union { struct sockaddr_in ip4; struct sockaddr_in6 ip6; + struct sockaddr sa; struct sockaddr_storage ss; } dst; bool src_specified; diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index e25c70a56be6..23c782e3d49a 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -556,34 +556,41 @@ static int srpt_refresh_port(struct srpt_port *sport) struct ib_port_attr port_attr; int ret; - memset(&port_modify, 0, sizeof(port_modify)); - port_modify.set_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP; - port_modify.clr_port_cap_mask = 0; - - ret = ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify); - if (ret) - goto err_mod_port; - ret = ib_query_port(sport->sdev->device, sport->port, &port_attr); if (ret) - goto err_query_port; + return ret; sport->sm_lid = port_attr.sm_lid; sport->lid = port_attr.lid; ret = rdma_query_gid(sport->sdev->device, sport->port, 0, &sport->gid); if (ret) - goto err_query_port; + return ret; - sport->port_guid_wwn.priv = sport; - srpt_format_guid(sport->port_guid, sizeof(sport->port_guid), + sport->port_guid_id.wwn.priv = sport; + srpt_format_guid(sport->port_guid_id.name, + sizeof(sport->port_guid_id.name), &sport->gid.global.interface_id); - sport->port_gid_wwn.priv = sport; - snprintf(sport->port_gid, sizeof(sport->port_gid), + sport->port_gid_id.wwn.priv = sport; + snprintf(sport->port_gid_id.name, sizeof(sport->port_gid_id.name), "0x%016llx%016llx", be64_to_cpu(sport->gid.global.subnet_prefix), be64_to_cpu(sport->gid.global.interface_id)); + if (rdma_protocol_iwarp(sport->sdev->device, sport->port)) + return 0; + + memset(&port_modify, 0, sizeof(port_modify)); + port_modify.set_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP; + port_modify.clr_port_cap_mask = 0; + + ret = ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify); + if (ret) { + pr_warn("%s-%d: enabling device management failed (%d). Note: this is expected if SR-IOV is enabled.\n", + dev_name(&sport->sdev->device->dev), sport->port, ret); + return 0; + } + if (!sport->mad_agent) { memset(®_req, 0, sizeof(reg_req)); reg_req.mgmt_class = IB_MGMT_CLASS_DEVICE_MGMT; @@ -599,23 +606,14 @@ static int srpt_refresh_port(struct srpt_port *sport) srpt_mad_recv_handler, sport, 0); if (IS_ERR(sport->mad_agent)) { - ret = PTR_ERR(sport->mad_agent); + pr_err("%s-%d: MAD agent registration failed (%ld). Note: this is expected if SR-IOV is enabled.\n", + dev_name(&sport->sdev->device->dev), sport->port, + PTR_ERR(sport->mad_agent)); sport->mad_agent = NULL; - goto err_query_port; } } return 0; - -err_query_port: - - port_modify.set_port_cap_mask = 0; - port_modify.clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP; - ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify); - -err_mod_port: - - return ret; } /** @@ -1364,9 +1362,11 @@ static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch, struct srpt_send_ioctx *ioctx, u64 tag, int status) { + struct se_cmd *cmd = &ioctx->cmd; struct srp_rsp *srp_rsp; const u8 *sense_data; int sense_data_len, max_sense_len; + u32 resid = cmd->residual_count; /* * The lowest bit of all SAM-3 status codes is zero (see also @@ -1388,6 +1388,28 @@ static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch, srp_rsp->tag = tag; srp_rsp->status = status; + if (cmd->se_cmd_flags & SCF_UNDERFLOW_BIT) { + if (cmd->data_direction == DMA_TO_DEVICE) { + /* residual data from an underflow write */ + srp_rsp->flags = SRP_RSP_FLAG_DOUNDER; + srp_rsp->data_out_res_cnt = cpu_to_be32(resid); + } else if (cmd->data_direction == DMA_FROM_DEVICE) { + /* residual data from an underflow read */ + srp_rsp->flags = SRP_RSP_FLAG_DIUNDER; + srp_rsp->data_in_res_cnt = cpu_to_be32(resid); + } + } else if (cmd->se_cmd_flags & SCF_OVERFLOW_BIT) { + if (cmd->data_direction == DMA_TO_DEVICE) { + /* residual data from an overflow write */ + srp_rsp->flags = SRP_RSP_FLAG_DOOVER; + srp_rsp->data_out_res_cnt = cpu_to_be32(resid); + } else if (cmd->data_direction == DMA_FROM_DEVICE) { + /* residual data from an overflow read */ + srp_rsp->flags = SRP_RSP_FLAG_DIOVER; + srp_rsp->data_in_res_cnt = cpu_to_be32(resid); + } + } + if (sense_data_len) { BUILD_BUG_ON(MIN_MAX_RSP_SIZE <= sizeof(*srp_rsp)); max_sense_len = ch->max_ti_iu_len - sizeof(*srp_rsp); @@ -1931,41 +1953,22 @@ static int srpt_disconnect_ch(struct srpt_rdma_ch *ch) return ret; } -static bool srpt_ch_closed(struct srpt_port *sport, struct srpt_rdma_ch *ch) -{ - struct srpt_nexus *nexus; - struct srpt_rdma_ch *ch2; - bool res = true; - - rcu_read_lock(); - list_for_each_entry(nexus, &sport->nexus_list, entry) { - list_for_each_entry(ch2, &nexus->ch_list, list) { - if (ch2 == ch) { - res = false; - goto done; - } - } - } -done: - rcu_read_unlock(); - - return res; -} - /* Send DREQ and wait for DREP. */ static void srpt_disconnect_ch_sync(struct srpt_rdma_ch *ch) { + DECLARE_COMPLETION_ONSTACK(closed); struct srpt_port *sport = ch->sport; pr_debug("ch %s-%d state %d\n", ch->sess_name, ch->qp->qp_num, ch->state); + ch->closed = &closed; + mutex_lock(&sport->mutex); srpt_disconnect_ch(ch); mutex_unlock(&sport->mutex); - while (wait_event_timeout(sport->ch_releaseQ, srpt_ch_closed(sport, ch), - 5 * HZ) == 0) + while (wait_for_completion_timeout(&closed, 5 * HZ) == 0) pr_info("%s(%s-%d state %d): still waiting ...\n", __func__, ch->sess_name, ch->qp->qp_num, ch->state); @@ -2045,10 +2048,17 @@ static void srpt_set_enabled(struct srpt_port *sport, bool enabled) __srpt_close_all_ch(sport); } +static void srpt_drop_sport_ref(struct srpt_port *sport) +{ + if (atomic_dec_return(&sport->refcount) == 0 && sport->freed_channels) + complete(sport->freed_channels); +} + static void srpt_free_ch(struct kref *kref) { struct srpt_rdma_ch *ch = container_of(kref, struct srpt_rdma_ch, kref); + srpt_drop_sport_ref(ch->sport); kfree_rcu(ch, rcu); } @@ -2092,6 +2102,9 @@ static void srpt_release_channel_work(struct work_struct *w) list_del_rcu(&ch->list); mutex_unlock(&sport->mutex); + if (ch->closed) + complete(ch->closed); + srpt_destroy_ch_ib(ch); srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring, @@ -2106,8 +2119,6 @@ static void srpt_release_channel_work(struct work_struct *w) kmem_cache_destroy(ch->req_buf_cache); - wake_up(&sport->ch_releaseQ); - kref_put(&ch->kref, srpt_free_ch); } @@ -2144,6 +2155,7 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev, char i_port_id[36]; u32 it_iu_len; int i, tag_num, tag_size, ret; + struct srpt_tpg *stpg; WARN_ON_ONCE(irqs_disabled()); @@ -2296,23 +2308,38 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev, be64_to_cpu(*(__be64 *)nexus->i_port_id), be64_to_cpu(*(__be64 *)(nexus->i_port_id + 8))); - pr_debug("registering session %s\n", ch->sess_name); + pr_debug("registering src addr %s or i_port_id %s\n", ch->sess_name, + i_port_id); tag_num = ch->rq_size; tag_size = 1; /* ib_srpt does not use se_sess->sess_cmd_map */ - if (sport->port_guid_tpg.se_tpg_wwn) - ch->sess = target_setup_session(&sport->port_guid_tpg, tag_num, + + mutex_lock(&sport->port_guid_id.mutex); + list_for_each_entry(stpg, &sport->port_guid_id.tpg_list, entry) { + if (!IS_ERR_OR_NULL(ch->sess)) + break; + ch->sess = target_setup_session(&stpg->tpg, tag_num, tag_size, TARGET_PROT_NORMAL, ch->sess_name, ch, NULL); - if (sport->port_gid_tpg.se_tpg_wwn && IS_ERR_OR_NULL(ch->sess)) - ch->sess = target_setup_session(&sport->port_gid_tpg, tag_num, + } + mutex_unlock(&sport->port_guid_id.mutex); + + mutex_lock(&sport->port_gid_id.mutex); + list_for_each_entry(stpg, &sport->port_gid_id.tpg_list, entry) { + if (!IS_ERR_OR_NULL(ch->sess)) + break; + ch->sess = target_setup_session(&stpg->tpg, tag_num, tag_size, TARGET_PROT_NORMAL, i_port_id, ch, NULL); - /* Retry without leading "0x" */ - if (sport->port_gid_tpg.se_tpg_wwn && IS_ERR_OR_NULL(ch->sess)) - ch->sess = target_setup_session(&sport->port_gid_tpg, tag_num, + if (!IS_ERR_OR_NULL(ch->sess)) + break; + /* Retry without leading "0x" */ + ch->sess = target_setup_session(&stpg->tpg, tag_num, tag_size, TARGET_PROT_NORMAL, i_port_id + 2, ch, NULL); + } + mutex_unlock(&sport->port_gid_id.mutex); + if (IS_ERR_OR_NULL(ch->sess)) { WARN_ON_ONCE(ch->sess == NULL); ret = PTR_ERR(ch->sess); @@ -2325,6 +2352,12 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev, goto destroy_ib; } + /* + * Once a session has been created destruction of srpt_rdma_ch objects + * will decrement sport->refcount. Hence increment sport->refcount now. + */ + atomic_inc(&sport->refcount); + mutex_lock(&sport->mutex); if ((req->req_flags & SRP_MTCH_ACTION) == SRP_MULTICHAN_SINGLE) { @@ -2505,6 +2538,7 @@ static int srpt_rdma_cm_req_recv(struct rdma_cm_id *cm_id, struct srpt_device *sdev; struct srp_login_req req; const struct srp_login_req_rdma *req_rdma; + struct sa_path_rec *path_rec = cm_id->route.path_rec; char src_addr[40]; sdev = ib_get_client_data(cm_id->device, &srpt_client); @@ -2530,7 +2564,7 @@ static int srpt_rdma_cm_req_recv(struct rdma_cm_id *cm_id, &cm_id->route.addr.src_addr); return srpt_cm_req_recv(sdev, NULL, cm_id, cm_id->port_num, - cm_id->route.path_rec->pkey, &req, src_addr); + path_rec ? path_rec->pkey : 0, &req, src_addr); } static void srpt_cm_rej_recv(struct srpt_rdma_ch *ch, @@ -2906,39 +2940,29 @@ static void srpt_refresh_port_work(struct work_struct *work) srpt_refresh_port(sport); } -static bool srpt_ch_list_empty(struct srpt_port *sport) -{ - struct srpt_nexus *nexus; - bool res = true; - - rcu_read_lock(); - list_for_each_entry(nexus, &sport->nexus_list, entry) - if (!list_empty(&nexus->ch_list)) - res = false; - rcu_read_unlock(); - - return res; -} - /** * srpt_release_sport - disable login and wait for associated channels * @sport: SRPT HCA port. */ static int srpt_release_sport(struct srpt_port *sport) { + DECLARE_COMPLETION_ONSTACK(c); struct srpt_nexus *nexus, *next_n; struct srpt_rdma_ch *ch; WARN_ON_ONCE(irqs_disabled()); + sport->freed_channels = &c; + mutex_lock(&sport->mutex); srpt_set_enabled(sport, false); mutex_unlock(&sport->mutex); - while (wait_event_timeout(sport->ch_releaseQ, - srpt_ch_list_empty(sport), 5 * HZ) <= 0) { - pr_info("%s_%d: waiting for session unregistration ...\n", - dev_name(&sport->sdev->device->dev), sport->port); + while (atomic_read(&sport->refcount) > 0 && + wait_for_completion_timeout(&c, 5 * HZ) <= 0) { + pr_info("%s_%d: waiting for unregistration of %d sessions ...\n", + dev_name(&sport->sdev->device->dev), sport->port, + atomic_read(&sport->refcount)); rcu_read_lock(); list_for_each_entry(nexus, &sport->nexus_list, entry) { list_for_each_entry(ch, &nexus->ch_list, list) { @@ -2975,10 +2999,10 @@ static struct se_wwn *__srpt_lookup_wwn(const char *name) for (i = 0; i < dev->phys_port_cnt; i++) { sport = &sdev->port[i]; - if (strcmp(sport->port_guid, name) == 0) - return &sport->port_guid_wwn; - if (strcmp(sport->port_gid, name) == 0) - return &sport->port_gid_wwn; + if (strcmp(sport->port_guid_id.name, name) == 0) + return &sport->port_guid_id.wwn; + if (strcmp(sport->port_gid_id.name, name) == 0) + return &sport->port_gid_id.wwn; } } @@ -3147,7 +3171,6 @@ static void srpt_add_one(struct ib_device *device) for (i = 1; i <= sdev->device->phys_port_cnt; i++) { sport = &sdev->port[i - 1]; INIT_LIST_HEAD(&sport->nexus_list); - init_waitqueue_head(&sport->ch_releaseQ); mutex_init(&sport->mutex); sport->sdev = sdev; sport->port = i; @@ -3156,6 +3179,10 @@ static void srpt_add_one(struct ib_device *device) sport->port_attrib.srp_sq_size = DEF_SRPT_SQ_SIZE; sport->port_attrib.use_srq = false; INIT_WORK(&sport->work, srpt_refresh_port_work); + mutex_init(&sport->port_guid_id.mutex); + INIT_LIST_HEAD(&sport->port_guid_id.tpg_list); + mutex_init(&sport->port_gid_id.mutex); + INIT_LIST_HEAD(&sport->port_gid_id.tpg_list); if (srpt_refresh_port(sport)) { pr_err("MAD registration failed for %s-%d.\n", @@ -3258,14 +3285,23 @@ static struct srpt_port *srpt_tpg_to_sport(struct se_portal_group *tpg) return tpg->se_tpg_wwn->priv; } +static struct srpt_port_id *srpt_wwn_to_sport_id(struct se_wwn *wwn) +{ + struct srpt_port *sport = wwn->priv; + + if (wwn == &sport->port_guid_id.wwn) + return &sport->port_guid_id; + if (wwn == &sport->port_gid_id.wwn) + return &sport->port_gid_id; + WARN_ON_ONCE(true); + return NULL; +} + static char *srpt_get_fabric_wwn(struct se_portal_group *tpg) { - struct srpt_port *sport = srpt_tpg_to_sport(tpg); + struct srpt_tpg *stpg = container_of(tpg, typeof(*stpg), tpg); - WARN_ON_ONCE(tpg != &sport->port_guid_tpg && - tpg != &sport->port_gid_tpg); - return tpg == &sport->port_guid_tpg ? sport->port_guid : - sport->port_gid; + return stpg->sport_id->name; } static u16 srpt_get_tag(struct se_portal_group *tpg) @@ -3721,19 +3757,25 @@ static struct configfs_attribute *srpt_tpg_attrs[] = { static struct se_portal_group *srpt_make_tpg(struct se_wwn *wwn, const char *name) { - struct srpt_port *sport = wwn->priv; - struct se_portal_group *tpg; - int res; - - WARN_ON_ONCE(wwn != &sport->port_guid_wwn && - wwn != &sport->port_gid_wwn); - tpg = wwn == &sport->port_guid_wwn ? &sport->port_guid_tpg : - &sport->port_gid_tpg; - res = core_tpg_register(wwn, tpg, SCSI_PROTOCOL_SRP); - if (res) + struct srpt_port_id *sport_id = srpt_wwn_to_sport_id(wwn); + struct srpt_tpg *stpg; + int res = -ENOMEM; + + stpg = kzalloc(sizeof(*stpg), GFP_KERNEL); + if (!stpg) return ERR_PTR(res); + stpg->sport_id = sport_id; + res = core_tpg_register(wwn, &stpg->tpg, SCSI_PROTOCOL_SRP); + if (res) { + kfree(stpg); + return ERR_PTR(res); + } - return tpg; + mutex_lock(&sport_id->mutex); + list_add_tail(&stpg->entry, &sport_id->tpg_list); + mutex_unlock(&sport_id->mutex); + + return &stpg->tpg; } /** @@ -3742,10 +3784,17 @@ static struct se_portal_group *srpt_make_tpg(struct se_wwn *wwn, */ static void srpt_drop_tpg(struct se_portal_group *tpg) { + struct srpt_tpg *stpg = container_of(tpg, typeof(*stpg), tpg); + struct srpt_port_id *sport_id = stpg->sport_id; struct srpt_port *sport = srpt_tpg_to_sport(tpg); + mutex_lock(&sport_id->mutex); + list_del(&stpg->entry); + mutex_unlock(&sport_id->mutex); + sport->enabled = false; core_tpg_deregister(tpg); + kfree(stpg); } /** diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h index ee9f20e9177a..2e1a69840857 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.h +++ b/drivers/infiniband/ulp/srpt/ib_srpt.h @@ -264,6 +264,8 @@ enum rdma_ch_state { * @zw_cqe: Zero-length write CQE. * @rcu: RCU head. * @kref: kref for this channel. + * @closed: Completion object that will be signaled as soon as a new + * channel object with the same identity can be created. * @rq_size: IB receive queue size. * @max_rsp_size: Maximum size of an RSP response message in bytes. * @sq_wr_avail: number of work requests available in the send queue. @@ -306,6 +308,7 @@ struct srpt_rdma_ch { struct ib_cqe zw_cqe; struct rcu_head rcu; struct kref kref; + struct completion *closed; int rq_size; u32 max_rsp_size; atomic_t sq_wr_avail; @@ -361,24 +364,52 @@ struct srpt_port_attrib { }; /** + * struct srpt_tpg - information about a single "target portal group" + * @entry: Entry in @sport_id->tpg_list. + * @sport_id: Port name this TPG is associated with. + * @tpg: LIO TPG data structure. + * + * Zero or more target portal groups are associated with each port name + * (srpt_port_id). With each TPG an ACL list is associated. + */ +struct srpt_tpg { + struct list_head entry; + struct srpt_port_id *sport_id; + struct se_portal_group tpg; +}; + +/** + * struct srpt_port_id - information about an RDMA port name + * @mutex: Protects @tpg_list changes. + * @tpg_list: TPGs associated with the RDMA port name. + * @wwn: WWN associated with the RDMA port name. + * @name: ASCII representation of the port name. + * + * Multiple sysfs directories can be associated with a single RDMA port. This + * data structure represents a single (port, name) pair. + */ +struct srpt_port_id { + struct mutex mutex; + struct list_head tpg_list; + struct se_wwn wwn; + char name[64]; +}; + +/** * struct srpt_port - information associated by SRPT with a single IB port * @sdev: backpointer to the HCA information. * @mad_agent: per-port management datagram processing information. * @enabled: Whether or not this target port is enabled. - * @port_guid: ASCII representation of Port GUID - * @port_gid: ASCII representation of Port GID * @port: one-based port number. * @sm_lid: cached value of the port's sm_lid. * @lid: cached value of the port's lid. * @gid: cached value of the port's gid. - * @port_acl_lock spinlock for port_acl_list: * @work: work structure for refreshing the aforementioned cached values. - * @port_guid_tpg: TPG associated with target port GUID. - * @port_guid_wwn: WWN associated with target port GUID. - * @port_gid_tpg: TPG associated with target port GID. - * @port_gid_wwn: WWN associated with target port GID. + * @port_guid_id: target port GUID + * @port_gid_id: target port GID * @port_attrib: Port attributes that can be accessed through configfs. - * @ch_releaseQ: Enables waiting for removal from nexus_list. + * @refcount: Number of objects associated with this port. + * @freed_channels: Completion that will be signaled once @refcount becomes 0. * @mutex: Protects nexus_list. * @nexus_list: Nexus list. See also srpt_nexus.entry. */ @@ -386,19 +417,16 @@ struct srpt_port { struct srpt_device *sdev; struct ib_mad_agent *mad_agent; bool enabled; - u8 port_guid[24]; - u8 port_gid[64]; u8 port; u32 sm_lid; u32 lid; union ib_gid gid; struct work_struct work; - struct se_portal_group port_guid_tpg; - struct se_wwn port_guid_wwn; - struct se_portal_group port_gid_tpg; - struct se_wwn port_gid_wwn; + struct srpt_port_id port_guid_id; + struct srpt_port_id port_gid_id; struct srpt_port_attrib port_attrib; - wait_queue_head_t ch_releaseQ; + atomic_t refcount; + struct completion *freed_channels; struct mutex mutex; struct list_head nexus_list; }; |