diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-09 18:02:46 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-09 18:02:46 +0200 |
commit | dce45af5c2e9e85f22578f2f8065f225f5d11764 (patch) | |
tree | e01e7a294586c3074142fb485516ce718a1a82d2 | |
parent | Merge tag 'dmaengine-5.2-rc1' of git://git.infradead.org/users/vkoul/slave-dma (diff) | |
parent | RDMA/ipoib: Allow user space differentiate between valid dev_port (diff) | |
download | linux-dce45af5c2e9e85f22578f2f8065f225f5d11764.tar.xz linux-dce45af5c2e9e85f22578f2f8065f225f5d11764.zip |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe:
"This has been a smaller cycle than normal. One new driver was
accepted, which is unusual, and at least one more driver remains in
review on the list.
Summary:
- Driver fixes for hns, hfi1, nes, rxe, i40iw, mlx5, cxgb4,
vmw_pvrdma
- Many patches from MatthewW converting radix tree and IDR users to
use xarray
- Introduction of tracepoints to the MAD layer
- Build large SGLs at the start for DMA mapping and get the driver to
split them
- Generally clean SGL handling code throughout the subsystem
- Support for restricting RDMA devices to net namespaces for
containers
- Progress to remove object allocation boilerplate code from drivers
- Change in how the mlx5 driver shows representor ports linked to VFs
- mlx5 uapi feature to access the on chip SW ICM memory
- Add a new driver for 'EFA'. This is HW that supports user space
packet processing through QPs in Amazon's cloud"
* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (186 commits)
RDMA/ipoib: Allow user space differentiate between valid dev_port
IB/core, ipoib: Do not overreact to SM LID change event
RDMA/device: Don't fire uevent before device is fully initialized
lib/scatterlist: Remove leftover from sg_page_iter comment
RDMA/efa: Add driver to Kconfig/Makefile
RDMA/efa: Add the efa module
RDMA/efa: Add EFA verbs implementation
RDMA/efa: Add common command handlers
RDMA/efa: Implement functions that submit and complete admin commands
RDMA/efa: Add the ABI definitions
RDMA/efa: Add the com service API definitions
RDMA/efa: Add the efa_com.h file
RDMA/efa: Add the efa.h header file
RDMA/efa: Add EFA device definitions
RDMA: Add EFA related definitions
RDMA/umem: Remove hugetlb flag
RDMA/bnxt_re: Use core helpers to get aligned DMA address
RDMA/i40iw: Use core helpers to get aligned DMA address within a supported page size
RDMA/verbs: Add a DMA iterator to return aligned contiguous memory blocks
RDMA/umem: Add API to find best driver supported page size in an MR
...
251 files changed, 12535 insertions, 4571 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index a225661d6237..ae4063ef5533 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -745,6 +745,15 @@ S: Supported F: Documentation/networking/device_drivers/amazon/ena.txt F: drivers/net/ethernet/amazon/ +AMAZON RDMA EFA DRIVER +M: Gal Pressman <galpress@amazon.com> +R: Yossi Leybovich <sleybo@amazon.com> +L: linux-rdma@vger.kernel.org +Q: https://patchwork.kernel.org/project/linux-rdma/list/ +S: Supported +F: drivers/infiniband/hw/efa/ +F: include/uapi/rdma/efa-abi.h + AMD CRYPTOGRAPHIC COPROCESSOR (CCP) DRIVER M: Tom Lendacky <thomas.lendacky@amd.com> M: Gary Hook <gary.hook@amd.com> @@ -4279,7 +4288,7 @@ S: Supported F: drivers/scsi/cxgbi/cxgb3i CXGB3 IWARP RNIC DRIVER (IW_CXGB3) -M: Steve Wise <swise@chelsio.com> +M: Potnuri Bharat Teja <bharat@chelsio.com> L: linux-rdma@vger.kernel.org W: http://www.openfabrics.org S: Supported @@ -4308,7 +4317,7 @@ S: Supported F: drivers/scsi/cxgbi/cxgb4i CXGB4 IWARP RNIC DRIVER (IW_CXGB4) -M: Steve Wise <swise@chelsio.com> +M: Potnuri Bharat Teja <bharat@chelsio.com> L: linux-rdma@vger.kernel.org W: http://www.openfabrics.org S: Supported @@ -7727,6 +7736,10 @@ F: drivers/infiniband/ F: include/uapi/linux/if_infiniband.h F: include/uapi/rdma/ F: include/rdma/ +F: include/trace/events/ib_mad.h +F: include/trace/events/ib_umad.h +F: samples/bpf/ibumad_kern.c +F: samples/bpf/ibumad_user.c INGENIC JZ4780 DMA Driver M: Zubair Lutfullah Kakakhel <Zubair.Kakakhel@imgtec.com> diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index d318bab25860..cbfbea49f126 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -93,6 +93,7 @@ source "drivers/infiniband/hw/mthca/Kconfig" source "drivers/infiniband/hw/qib/Kconfig" source "drivers/infiniband/hw/cxgb3/Kconfig" source "drivers/infiniband/hw/cxgb4/Kconfig" +source "drivers/infiniband/hw/efa/Kconfig" source "drivers/infiniband/hw/i40iw/Kconfig" source "drivers/infiniband/hw/mlx4/Kconfig" source "drivers/infiniband/hw/mlx5/Kconfig" diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 744b6ec0acb0..ba01b90c04e7 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -45,6 +45,7 @@ #include <net/ipv6_stubs.h> #include <net/ip6_route.h> #include <rdma/ib_addr.h> +#include <rdma/ib_cache.h> #include <rdma/ib_sa.h> #include <rdma/ib.h> #include <rdma/rdma_netlink.h> diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 43c67e5f43c6..18e476b3ced0 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -78,11 +78,22 @@ enum gid_table_entry_state { GID_TABLE_ENTRY_PENDING_DEL = 3, }; +struct roce_gid_ndev_storage { + struct rcu_head rcu_head; + struct net_device *ndev; +}; + struct ib_gid_table_entry { struct kref kref; struct work_struct del_work; struct ib_gid_attr attr; void *context; + /* Store the ndev pointer to release reference later on in + * call_rcu context because by that time gid_table_entry + * and attr might be already freed. So keep a copy of it. + * ndev_storage is freed by rcu callback. + */ + struct roce_gid_ndev_storage *ndev_storage; enum gid_table_entry_state state; }; @@ -206,6 +217,20 @@ static void schedule_free_gid(struct kref *kref) queue_work(ib_wq, &entry->del_work); } +static void put_gid_ndev(struct rcu_head *head) +{ + struct roce_gid_ndev_storage *storage = + container_of(head, struct roce_gid_ndev_storage, rcu_head); + + WARN_ON(!storage->ndev); + /* At this point its safe to release netdev reference, + * as all callers working on gid_attr->ndev are done + * using this netdev. + */ + dev_put(storage->ndev); + kfree(storage); +} + static void free_gid_entry_locked(struct ib_gid_table_entry *entry) { struct ib_device *device = entry->attr.device; @@ -228,8 +253,8 @@ static void free_gid_entry_locked(struct ib_gid_table_entry *entry) /* Now this index is ready to be allocated */ write_unlock_irq(&table->rwlock); - if (entry->attr.ndev) - dev_put(entry->attr.ndev); + if (entry->ndev_storage) + call_rcu(&entry->ndev_storage->rcu_head, put_gid_ndev); kfree(entry); } @@ -266,14 +291,25 @@ static struct ib_gid_table_entry * alloc_gid_entry(const struct ib_gid_attr *attr) { struct ib_gid_table_entry *entry; + struct net_device *ndev; entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (!entry) return NULL; + + ndev = rcu_dereference_protected(attr->ndev, 1); + if (ndev) { + entry->ndev_storage = kzalloc(sizeof(*entry->ndev_storage), + GFP_KERNEL); + if (!entry->ndev_storage) { + kfree(entry); + return NULL; + } + dev_hold(ndev); + entry->ndev_storage->ndev = ndev; + } kref_init(&entry->kref); memcpy(&entry->attr, attr, sizeof(*attr)); - if (entry->attr.ndev) - dev_hold(entry->attr.ndev); INIT_WORK(&entry->del_work, free_gid_work); entry->state = GID_TABLE_ENTRY_INVALID; return entry; @@ -343,6 +379,7 @@ static int add_roce_gid(struct ib_gid_table_entry *entry) static void del_gid(struct ib_device *ib_dev, u8 port, struct ib_gid_table *table, int ix) { + struct roce_gid_ndev_storage *ndev_storage; struct ib_gid_table_entry *entry; lockdep_assert_held(&table->lock); @@ -360,6 +397,13 @@ static void del_gid(struct ib_device *ib_dev, u8 port, table->data_vec[ix] = NULL; write_unlock_irq(&table->rwlock); + ndev_storage = entry->ndev_storage; + if (ndev_storage) { + entry->ndev_storage = NULL; + rcu_assign_pointer(entry->attr.ndev, NULL); + call_rcu(&ndev_storage->rcu_head, put_gid_ndev); + } + if (rdma_cap_roce_gid_table(ib_dev, port)) ib_dev->ops.del_gid(&entry->attr, &entry->context); @@ -543,30 +587,11 @@ out_unlock: int ib_cache_gid_add(struct ib_device *ib_dev, u8 port, union ib_gid *gid, struct ib_gid_attr *attr) { - struct net_device *idev; - unsigned long mask; - int ret; - - idev = ib_device_get_netdev(ib_dev, port); - if (idev && attr->ndev != idev) { - union ib_gid default_gid; - - /* Adding default GIDs is not permitted */ - make_default_gid(idev, &default_gid); - if (!memcmp(gid, &default_gid, sizeof(*gid))) { - dev_put(idev); - return -EPERM; - } - } - if (idev) - dev_put(idev); - - mask = GID_ATTR_FIND_MASK_GID | - GID_ATTR_FIND_MASK_GID_TYPE | - GID_ATTR_FIND_MASK_NETDEV; + unsigned long mask = GID_ATTR_FIND_MASK_GID | + GID_ATTR_FIND_MASK_GID_TYPE | + GID_ATTR_FIND_MASK_NETDEV; - ret = __ib_cache_gid_add(ib_dev, port, gid, attr, mask, false); - return ret; + return __ib_cache_gid_add(ib_dev, port, gid, attr, mask, false); } static int @@ -1263,11 +1288,72 @@ struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr) read_lock_irqsave(&table->rwlock, flags); valid = is_gid_entry_valid(table->data_vec[attr->index]); - if (valid && attr->ndev && (READ_ONCE(attr->ndev->flags) & IFF_UP)) - ndev = attr->ndev; + if (valid) { + ndev = rcu_dereference(attr->ndev); + if (!ndev || + (ndev && ((READ_ONCE(ndev->flags) & IFF_UP) == 0))) + ndev = ERR_PTR(-ENODEV); + } read_unlock_irqrestore(&table->rwlock, flags); return ndev; } +EXPORT_SYMBOL(rdma_read_gid_attr_ndev_rcu); + +static int get_lower_dev_vlan(struct net_device *lower_dev, void *data) +{ + u16 *vlan_id = data; + + if (is_vlan_dev(lower_dev)) + *vlan_id = vlan_dev_vlan_id(lower_dev); + + /* We are interested only in first level vlan device, so + * always return 1 to stop iterating over next level devices. + */ + return 1; +} + +/** + * rdma_read_gid_l2_fields - Read the vlan ID and source MAC address + * of a GID entry. + * + * @attr: GID attribute pointer whose L2 fields to be read + * @vlan_id: Pointer to vlan id to fill up if the GID entry has + * vlan id. It is optional. + * @smac: Pointer to smac to fill up for a GID entry. It is optional. + * + * rdma_read_gid_l2_fields() returns 0 on success and returns vlan id + * (if gid entry has vlan) and source MAC, or returns error. + */ +int rdma_read_gid_l2_fields(const struct ib_gid_attr *attr, + u16 *vlan_id, u8 *smac) +{ + struct net_device *ndev; + + rcu_read_lock(); + ndev = rcu_dereference(attr->ndev); + if (!ndev) { + rcu_read_unlock(); + return -ENODEV; + } + if (smac) + ether_addr_copy(smac, ndev->dev_addr); + if (vlan_id) { + *vlan_id = 0xffff; + if (is_vlan_dev(ndev)) { + *vlan_id = vlan_dev_vlan_id(ndev); + } else { + /* If the netdev is upper device and if it's lower + * device is vlan device, consider vlan id of the + * the lower vlan device for this gid entry. + */ + netdev_walk_all_lower_dev_rcu(attr->ndev, + get_lower_dev_vlan, vlan_id); + } + } + rcu_read_unlock(); + return 0; +} +EXPORT_SYMBOL(rdma_read_gid_l2_fields); static int config_non_roce_gid_cache(struct ib_device *device, u8 port, int gid_tbl_len) @@ -1392,7 +1478,6 @@ static void ib_cache_event(struct ib_event_handler *handler, event->event == IB_EVENT_PORT_ACTIVE || event->event == IB_EVENT_LID_CHANGE || event->event == IB_EVENT_PKEY_CHANGE || - event->event == IB_EVENT_SM_CHANGE || event->event == IB_EVENT_CLIENT_REREGISTER || event->event == IB_EVENT_GID_CHANGE) { work = kmalloc(sizeof *work, GFP_ATOMIC); diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index b9416a6fca36..da10e6ccb43c 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -52,6 +52,7 @@ #include <rdma/ib_cache.h> #include <rdma/ib_cm.h> #include "cm_msgs.h" +#include "core_priv.h" MODULE_AUTHOR("Sean Hefty"); MODULE_DESCRIPTION("InfiniBand CM"); @@ -124,7 +125,8 @@ static struct ib_cm { struct rb_root remote_qp_table; struct rb_root remote_id_table; struct rb_root remote_sidr_table; - struct idr local_id_table; + struct xarray local_id_table; + u32 local_id_next; __be32 random_id_operand; struct list_head timewait_list; struct workqueue_struct *wq; @@ -219,7 +221,6 @@ struct cm_port { struct cm_device { struct list_head list; struct ib_device *ib_device; - struct device *device; u8 ack_delay; int going_down; struct cm_port *port[0]; @@ -598,35 +599,31 @@ static int cm_init_av_by_path(struct sa_path_rec *path, static int cm_alloc_id(struct cm_id_private *cm_id_priv) { - unsigned long flags; - int id; - - idr_preload(GFP_KERNEL); - spin_lock_irqsave(&cm.lock, flags); + int err; + u32 id; - id = idr_alloc_cyclic(&cm.local_id_table, cm_id_priv, 0, 0, GFP_NOWAIT); - - spin_unlock_irqrestore(&cm.lock, flags); - idr_preload_end(); + err = xa_alloc_cyclic_irq(&cm.local_id_table, &id, cm_id_priv, + xa_limit_32b, &cm.local_id_next, GFP_KERNEL); cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand; - return id < 0 ? id : 0; + return err; +} + +static u32 cm_local_id(__be32 local_id) +{ + return (__force u32) (local_id ^ cm.random_id_operand); } static void cm_free_id(__be32 local_id) { - spin_lock_irq(&cm.lock); - idr_remove(&cm.local_id_table, - (__force int) (local_id ^ cm.random_id_operand)); - spin_unlock_irq(&cm.lock); + xa_erase_irq(&cm.local_id_table, cm_local_id(local_id)); } static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id) { struct cm_id_private *cm_id_priv; - cm_id_priv = idr_find(&cm.local_id_table, - (__force int) (local_id ^ cm.random_id_operand)); + cm_id_priv = xa_load(&cm.local_id_table, cm_local_id(local_id)); if (cm_id_priv) { if (cm_id_priv->id.remote_id == remote_id) atomic_inc(&cm_id_priv->refcount); @@ -1988,11 +1985,12 @@ static int cm_req_handler(struct cm_work *work) grh = rdma_ah_read_grh(&cm_id_priv->av.ah_attr); gid_attr = grh->sgid_attr; - if (gid_attr && gid_attr->ndev) { + if (gid_attr && + rdma_protocol_roce(work->port->cm_dev->ib_device, + work->port->port_num)) { work->path[0].rec_type = sa_conv_gid_to_pathrec_type(gid_attr->gid_type); } else { - /* If no GID attribute or ndev is null, it is not RoCE. */ cm_path_set_rec_type(work->port->cm_dev->ib_device, work->port->port_num, &work->path[0], @@ -2824,9 +2822,8 @@ static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg) spin_unlock_irq(&cm.lock); return NULL; } - cm_id_priv = idr_find(&cm.local_id_table, (__force int) - (timewait_info->work.local_id ^ - cm.random_id_operand)); + cm_id_priv = xa_load(&cm.local_id_table, + cm_local_id(timewait_info->work.local_id)); if (cm_id_priv) { if (cm_id_priv->id.remote_id == remote_id) atomic_inc(&cm_id_priv->refcount); @@ -4276,18 +4273,6 @@ static struct kobj_type cm_counter_obj_type = { .default_attrs = cm_counter_default_attrs }; -static void cm_release_port_obj(struct kobject *obj) -{ - struct cm_port *cm_port; - - cm_port = container_of(obj, struct cm_port, port_obj); - kfree(cm_port); -} - -static struct kobj_type cm_port_obj_type = { - .release = cm_release_port_obj -}; - static char *cm_devnode(struct device *dev, umode_t *mode) { if (mode) @@ -4306,19 +4291,12 @@ static int cm_create_port_fs(struct cm_port *port) { int i, ret; - ret = kobject_init_and_add(&port->port_obj, &cm_port_obj_type, - &port->cm_dev->device->kobj, - "%d", port->port_num); - if (ret) { - kfree(port); - return ret; - } - for (i = 0; i < CM_COUNTER_GROUPS; i++) { - ret = kobject_init_and_add(&port->counter_group[i].obj, - &cm_counter_obj_type, - &port->port_obj, - "%s", counter_group_names[i]); + ret = ib_port_register_module_stat(port->cm_dev->ib_device, + port->port_num, + &port->counter_group[i].obj, + &cm_counter_obj_type, + counter_group_names[i]); if (ret) goto error; } @@ -4327,8 +4305,7 @@ static int cm_create_port_fs(struct cm_port *port) error: while (i--) - kobject_put(&port->counter_group[i].obj); - kobject_put(&port->port_obj); + ib_port_unregister_module_stat(&port->counter_group[i].obj); return ret; } @@ -4338,9 +4315,8 @@ static void cm_remove_port_fs(struct cm_port *port) int i; for (i = 0; i < CM_COUNTER_GROUPS; i++) - kobject_put(&port->counter_group[i].obj); + ib_port_unregister_module_stat(&port->counter_group[i].obj); - kobject_put(&port->port_obj); } static void cm_add_one(struct ib_device *ib_device) @@ -4367,13 +4343,6 @@ static void cm_add_one(struct ib_device *ib_device) cm_dev->ib_device = ib_device; cm_dev->ack_delay = ib_device->attrs.local_ca_ack_delay; cm_dev->going_down = 0; - cm_dev->device = device_create(&cm_class, &ib_device->dev, - MKDEV(0, 0), NULL, - "%s", dev_name(&ib_device->dev)); - if (IS_ERR(cm_dev->device)) { - kfree(cm_dev); - return; - } set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask); for (i = 1; i <= ib_device->phys_port_cnt; i++) { @@ -4440,7 +4409,6 @@ error1: cm_remove_port_fs(port); } free: - device_unregister(cm_dev->device); kfree(cm_dev); } @@ -4494,7 +4462,6 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data) cm_remove_port_fs(port); } - device_unregister(cm_dev->device); kfree(cm_dev); } @@ -4502,7 +4469,6 @@ static int __init ib_cm_init(void) { int ret; - memset(&cm, 0, sizeof cm); INIT_LIST_HEAD(&cm.device_list); rwlock_init(&cm.device_lock); spin_lock_init(&cm.lock); @@ -4512,7 +4478,7 @@ static int __init ib_cm_init(void) cm.remote_id_table = RB_ROOT; cm.remote_qp_table = RB_ROOT; cm.remote_sidr_table = RB_ROOT; - idr_init(&cm.local_id_table); + xa_init_flags(&cm.local_id_table, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ); get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand); INIT_LIST_HEAD(&cm.timewait_list); @@ -4538,7 +4504,6 @@ error3: error2: class_unregister(&cm_class); error1: - idr_destroy(&cm.local_id_table); return ret; } @@ -4560,9 +4525,8 @@ static void __exit ib_cm_cleanup(void) } class_unregister(&cm_class); - idr_destroy(&cm.local_id_table); + WARN_ON(!xa_empty(&cm.local_id_table)); } module_init(ib_cm_init); module_exit(ib_cm_cleanup); - diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h index 476d4309576d..3d16d614aff6 100644 --- a/drivers/infiniband/core/cm_msgs.h +++ b/drivers/infiniband/core/cm_msgs.h @@ -98,7 +98,7 @@ struct cm_req_msg { u32 private_data[IB_CM_REQ_PRIVATE_DATA_SIZE / sizeof(u32)]; -} __attribute__ ((packed)); +} __packed; static inline __be32 cm_req_get_local_qpn(struct cm_req_msg *req_msg) { @@ -423,7 +423,7 @@ enum cm_msg_response { u8 private_data[IB_CM_MRA_PRIVATE_DATA_SIZE]; -} __attribute__ ((packed)); +} __packed; static inline u8 cm_mra_get_msg_mraed(struct cm_mra_msg *mra_msg) { @@ -461,7 +461,7 @@ struct cm_rej_msg { u8 private_data[IB_CM_REJ_PRIVATE_DATA_SIZE]; -} __attribute__ ((packed)); +} __packed; static inline u8 cm_rej_get_msg_rejected(struct cm_rej_msg *rej_msg) { @@ -506,7 +506,7 @@ struct cm_rep_msg { u8 private_data[IB_CM_REP_PRIVATE_DATA_SIZE]; -} __attribute__ ((packed)); +} __packed; static inline __be32 cm_rep_get_local_qpn(struct cm_rep_msg *rep_msg) { @@ -614,7 +614,7 @@ struct cm_rtu_msg { u8 private_data[IB_CM_RTU_PRIVATE_DATA_SIZE]; -} __attribute__ ((packed)); +} __packed; struct cm_dreq_msg { struct ib_mad_hdr hdr; @@ -626,7 +626,7 @@ struct cm_dreq_msg { u8 private_data[IB_CM_DREQ_PRIVATE_DATA_SIZE]; -} __attribute__ ((packed)); +} __packed; static inline __be32 cm_dreq_get_remote_qpn(struct cm_dreq_msg *dreq_msg) { @@ -647,7 +647,7 @@ struct cm_drep_msg { u8 private_data[IB_CM_DREP_PRIVATE_DATA_SIZE]; -} __attribute__ ((packed)); +} __packed; struct cm_lap_msg { struct ib_mad_hdr hdr; @@ -675,7 +675,7 @@ struct cm_lap_msg { u8 offset63; u8 private_data[IB_CM_LAP_PRIVATE_DATA_SIZE]; -} __attribute__ ((packed)); +} __packed; static inline __be32 cm_lap_get_remote_qpn(struct cm_lap_msg *lap_msg) { @@ -784,7 +784,7 @@ struct cm_apr_msg { u8 info[IB_CM_APR_INFO_LENGTH]; u8 private_data[IB_CM_APR_PRIVATE_DATA_SIZE]; -} __attribute__ ((packed)); +} __packed; struct cm_sidr_req_msg { struct ib_mad_hdr hdr; @@ -795,7 +795,7 @@ struct cm_sidr_req_msg { __be64 service_id; u32 private_data[IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE / sizeof(u32)]; -} __attribute__ ((packed)); +} __packed; struct cm_sidr_rep_msg { struct ib_mad_hdr hdr; @@ -811,7 +811,7 @@ struct cm_sidr_rep_msg { u8 info[IB_CM_SIDR_REP_INFO_LENGTH]; u8 private_data[IB_CM_SIDR_REP_PRIVATE_DATA_SIZE]; -} __attribute__ ((packed)); +} __packed; static inline __be32 cm_sidr_rep_get_qpn(struct cm_sidr_rep_msg *sidr_rep_msg) { diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 68c997be2429..19f1730a4f24 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -39,7 +39,7 @@ #include <linux/mutex.h> #include <linux/random.h> #include <linux/igmp.h> -#include <linux/idr.h> +#include <linux/xarray.h> #include <linux/inetdevice.h> #include <linux/slab.h> #include <linux/module.h> @@ -191,10 +191,10 @@ static struct workqueue_struct *cma_wq; static unsigned int cma_pernet_id; struct cma_pernet { - struct idr tcp_ps; - struct idr udp_ps; - struct idr ipoib_ps; - struct idr ib_ps; + struct xarray tcp_ps; + struct xarray udp_ps; + struct xarray ipoib_ps; + struct xarray ib_ps; }; static struct cma_pernet *cma_pernet(struct net *net) @@ -202,7 +202,8 @@ static struct cma_pernet *cma_pernet(struct net *net) return net_generic(net, cma_pernet_id); } -static struct idr *cma_pernet_idr(struct net *net, enum rdma_ucm_port_space ps) +static +struct xarray *cma_pernet_xa(struct net *net, enum rdma_ucm_port_space ps) { struct cma_pernet *pernet = cma_pernet(net); @@ -247,25 +248,25 @@ struct class_port_info_context { static int cma_ps_alloc(struct net *net, enum rdma_ucm_port_space ps, struct rdma_bind_list *bind_list, int snum) { - struct idr *idr = cma_pernet_idr(net, ps); + struct xarray *xa = cma_pernet_xa(net, ps); - return idr_alloc(idr, bind_list, snum, snum + 1, GFP_KERNEL); + return xa_insert(xa, snum, bind_list, GFP_KERNEL); } static struct rdma_bind_list *cma_ps_find(struct net *net, enum rdma_ucm_port_space ps, int snum) { - struct idr *idr = cma_pernet_idr(net, ps); + struct xarray *xa = cma_pernet_xa(net, ps); - return idr_find(idr, snum); + return xa_load(xa, snum); } static void cma_ps_remove(struct net *net, enum rdma_ucm_port_space ps, int snum) { - struct idr *idr = cma_pernet_idr(net, ps); + struct xarray *xa = cma_pernet_xa(net, ps); - idr_remove(idr, snum); + xa_erase(xa, snum); } enum { @@ -615,6 +616,9 @@ cma_validate_port(struct ib_device *device, u8 port, int dev_type = dev_addr->dev_type; struct net_device *ndev = NULL; + if (!rdma_dev_access_netns(device, id_priv->id.route.addr.dev_addr.net)) + return ERR_PTR(-ENODEV); + if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port)) return ERR_PTR(-ENODEV); @@ -1173,18 +1177,31 @@ static inline bool cma_any_addr(const struct sockaddr *addr) return cma_zero_addr(addr) || cma_loopback_addr(addr); } -static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst) +static int cma_addr_cmp(const struct sockaddr *src, const struct sockaddr *dst) { if (src->sa_family != dst->sa_family) return -1; switch (src->sa_family) { case AF_INET: - return ((struct sockaddr_in *) src)->sin_addr.s_addr != - ((struct sockaddr_in *) dst)->sin_addr.s_addr; - case AF_INET6: - return ipv6_addr_cmp(&((struct sockaddr_in6 *) src)->sin6_addr, - &((struct sockaddr_in6 *) dst)->sin6_addr); + return ((struct sockaddr_in *)src)->sin_addr.s_addr != + ((struct sockaddr_in *)dst)->sin_addr.s_addr; + case AF_INET6: { + struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *)src; + struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *)dst; + bool link_local; + + if (ipv6_addr_cmp(&src_addr6->sin6_addr, + &dst_addr6->sin6_addr)) + return 1; + link_local = ipv6_addr_type(&dst_addr6->sin6_addr) & + IPV6_ADDR_LINKLOCAL; + /* Link local must match their scope_ids */ + return link_local ? (src_addr6->sin6_scope_id != + dst_addr6->sin6_scope_id) : + 0; + } + default: return ib_addr_cmp(&((struct sockaddr_ib *) src)->sib_addr, &((struct sockaddr_ib *) dst)->sib_addr); @@ -1469,6 +1486,7 @@ static struct net_device * roce_get_net_dev_by_cm_event(const struct ib_cm_event *ib_event) { const struct ib_gid_attr *sgid_attr = NULL; + struct net_device *ndev; if (ib_event->event == IB_CM_REQ_RECEIVED) sgid_attr = ib_event->param.req_rcvd.ppath_sgid_attr; @@ -1477,8 +1495,15 @@ roce_get_net_dev_by_cm_event(const struct ib_cm_event *ib_event) if (!sgid_attr) return NULL; - dev_hold(sgid_attr->ndev); - return sgid_attr->ndev; + + rcu_read_lock(); + ndev = rdma_read_gid_attr_ndev_rcu(sgid_attr); + if (IS_ERR(ndev)) + ndev = NULL; + else + dev_hold(ndev); + rcu_read_unlock(); + return ndev; } static struct net_device *cma_get_net_dev(const struct ib_cm_event *ib_event, @@ -3247,7 +3272,7 @@ static int cma_alloc_port(enum rdma_ucm_port_space ps, goto err; bind_list->ps = ps; - bind_list->port = (unsigned short)ret; + bind_list->port = snum; cma_bind_port(bind_list, id_priv); return 0; err: @@ -4655,10 +4680,10 @@ static int cma_init_net(struct net *net) { struct cma_pernet *pernet = cma_pernet(net); - idr_init(&pernet->tcp_ps); - idr_init(&pernet->udp_ps); - idr_init(&pernet->ipoib_ps); - idr_init(&pernet->ib_ps); + xa_init(&pernet->tcp_ps); + xa_init(&pernet->udp_ps); + xa_init(&pernet->ipoib_ps); + xa_init(&pernet->ib_ps); return 0; } @@ -4667,10 +4692,10 @@ static void cma_exit_net(struct net *net) { struct cma_pernet *pernet = cma_pernet(net); - idr_destroy(&pernet->tcp_ps); - idr_destroy(&pernet->udp_ps); - idr_destroy(&pernet->ipoib_ps); - idr_destroy(&pernet->ib_ps); + WARN_ON(!xa_empty(&pernet->tcp_ps)); + WARN_ON(!xa_empty(&pernet->udp_ps)); + WARN_ON(!xa_empty(&pernet->ipoib_ps)); + WARN_ON(!xa_empty(&pernet->ib_ps)); } static struct pernet_operations cma_pernet_operations = { diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 08c690249594..ff40a450b5d2 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -55,6 +55,7 @@ struct pkey_index_qp_list { }; extern const struct attribute_group ib_dev_attr_group; +extern bool ib_devices_shared_netns; int ib_device_register_sysfs(struct ib_device *device); void ib_device_unregister_sysfs(struct ib_device *device); @@ -279,7 +280,8 @@ static inline void ib_mad_agent_security_change(void) } #endif -struct ib_device *ib_device_get_by_index(u32 ifindex); +struct ib_device *ib_device_get_by_index(const struct net *net, u32 index); + /* RDMA device netlink */ void nldev_init(void); void nldev_exit(void); @@ -302,6 +304,7 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev, qp->device = dev; qp->pd = pd; qp->uobject = uobj; + qp->real_qp = qp; /* * We don't track XRC QPs for now, because they don't have PD * and more importantly they are created internaly by driver, @@ -336,4 +339,17 @@ int roce_resolve_route_from_path(struct sa_path_rec *rec, const struct ib_gid_attr *attr); struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr); + +void ib_free_port_attrs(struct ib_core_device *coredev); +int ib_setup_port_attrs(struct ib_core_device *coredev); + +int rdma_compatdev_set(u8 enable); + +int ib_port_register_module_stat(struct ib_device *device, u8 port_num, + struct kobject *kobj, struct kobj_type *ktype, + const char *name); +void ib_port_unregister_module_stat(struct kobject *kobj); + +int ib_device_set_netns_put(struct sk_buff *skb, + struct ib_device *dev, u32 ns_fd); #endif /* _CORE_PRIV_H */ diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c index d61e5e1427c2..a4c81992267c 100644 --- a/drivers/infiniband/core/cq.c +++ b/drivers/infiniband/core/cq.c @@ -128,15 +128,17 @@ static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private) * @comp_vector: HCA completion vectors for this CQ * @poll_ctx: context to poll the CQ from. * @caller: module owner name. + * @udata: Valid user data or NULL for kernel object * * This is the proper interface to allocate a CQ for in-kernel users. A * CQ allocated with this interface will automatically be polled from the * specified context. The ULP must use wr->wr_cqe instead of wr->wr_id * to use this CQ abstraction. */ -struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private, - int nr_cqe, int comp_vector, - enum ib_poll_context poll_ctx, const char *caller) +struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private, + int nr_cqe, int comp_vector, + enum ib_poll_context poll_ctx, + const char *caller, struct ib_udata *udata) { struct ib_cq_init_attr cq_attr = { .cqe = nr_cqe, @@ -145,7 +147,7 @@ struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private, struct ib_cq *cq; int ret = -ENOMEM; - cq = dev->ops.create_cq(dev, &cq_attr, NULL, NULL); + cq = dev->ops.create_cq(dev, &cq_attr, NULL); if (IS_ERR(cq)) return cq; @@ -193,16 +195,17 @@ out_free_wc: kfree(cq->wc); rdma_restrack_del(&cq->res); out_destroy_cq: - cq->device->ops.destroy_cq(cq); + cq->device->ops.destroy_cq(cq, udata); return ERR_PTR(ret); } -EXPORT_SYMBOL(__ib_alloc_cq); +EXPORT_SYMBOL(__ib_alloc_cq_user); /** * ib_free_cq - free a completion queue * @cq: completion queue to free. + * @udata: User data or NULL for kernel object */ -void ib_free_cq(struct ib_cq *cq) +void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata) { int ret; @@ -225,7 +228,7 @@ void ib_free_cq(struct ib_cq *cq) kfree(cq->wc); rdma_restrack_del(&cq->res); - ret = cq->device->ops.destroy_cq(cq); + ret = cq->device->ops.destroy_cq(cq, udata); WARN_ON_ONCE(ret); } -EXPORT_SYMBOL(ib_free_cq); +EXPORT_SYMBOL(ib_free_cq_user); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 7421ec4883fb..78dc07c6ac4b 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -38,6 +38,8 @@ #include <linux/slab.h> #include <linux/init.h> #include <linux/netdevice.h> +#include <net/net_namespace.h> +#include <net/netns/generic.h> #include <linux/security.h> #include <linux/notifier.h> #include <linux/hashtable.h> @@ -101,6 +103,54 @@ static DECLARE_RWSEM(clients_rwsem); * be registered. */ #define CLIENT_DATA_REGISTERED XA_MARK_1 + +/** + * struct rdma_dev_net - rdma net namespace metadata for a net + * @net: Pointer to owner net namespace + * @id: xarray id to identify the net namespace. + */ +struct rdma_dev_net { + possible_net_t net; + u32 id; +}; + +static unsigned int rdma_dev_net_id; + +/* + * A list of net namespaces is maintained in an xarray. This is necessary + * because we can't get the locking right using the existing net ns list. We + * would require a init_net callback after the list is updated. + */ +static DEFINE_XARRAY_FLAGS(rdma_nets, XA_FLAGS_ALLOC); +/* + * rwsem to protect accessing the rdma_nets xarray entries. + */ +static DECLARE_RWSEM(rdma_nets_rwsem); + +bool ib_devices_shared_netns = true; +module_param_named(netns_mode, ib_devices_shared_netns, bool, 0444); +MODULE_PARM_DESC(netns_mode, + "Share device among net namespaces; default=1 (shared)"); +/** + * rdma_dev_access_netns() - Return whether a rdma device can be accessed + * from a specified net namespace or not. + * @device: Pointer to rdma device which needs to be checked + * @net: Pointer to net namesapce for which access to be checked + * + * rdma_dev_access_netns() - Return whether a rdma device can be accessed + * from a specified net namespace or not. When + * rdma device is in shared mode, it ignores the + * net namespace. When rdma device is exclusive + * to a net namespace, rdma device net namespace is + * checked against the specified one. + */ +bool rdma_dev_access_netns(const struct ib_device *dev, const struct net *net) +{ + return (ib_devices_shared_netns || + net_eq(read_pnet(&dev->coredev.rdma_net), net)); +} +EXPORT_SYMBOL(rdma_dev_access_netns); + /* * xarray has this behavior where it won't iterate over NULL values stored in * allocated arrays. So we need our own iterator to see all values stored in @@ -147,10 +197,73 @@ static int ib_security_change(struct notifier_block *nb, unsigned long event, static void ib_policy_change_task(struct work_struct *work); static DECLARE_WORK(ib_policy_change_work, ib_policy_change_task); +static void __ibdev_printk(const char *level, const struct ib_device *ibdev, + struct va_format *vaf) +{ + if (ibdev && ibdev->dev.parent) + dev_printk_emit(level[1] - '0', + ibdev->dev.parent, + "%s %s %s: %pV", + dev_driver_string(ibdev->dev.parent), + dev_name(ibdev->dev.parent), + dev_name(&ibdev->dev), + vaf); + else if (ibdev) + printk("%s%s: %pV", + level, dev_name(&ibdev->dev), vaf); + else + printk("%s(NULL ib_device): %pV", level, vaf); +} + +void ibdev_printk(const char *level, const struct ib_device *ibdev, + const char *format, ...) +{ + struct va_format vaf; + va_list args; + + va_start(args, format); + + vaf.fmt = format; + vaf.va = &args; + + __ibdev_printk(level, ibdev, &vaf); + + va_end(args); +} +EXPORT_SYMBOL(ibdev_printk); + +#define define_ibdev_printk_level(func, level) \ +void func(const struct ib_device *ibdev, const char *fmt, ...) \ +{ \ + struct va_format vaf; \ + va_list args; \ + \ + va_start(args, fmt); \ + \ + vaf.fmt = fmt; \ + vaf.va = &args; \ + \ + __ibdev_printk(level, ibdev, &vaf); \ + \ + va_end(args); \ +} \ +EXPORT_SYMBOL(func); + +define_ibdev_printk_level(ibdev_emerg, KERN_EMERG); +define_ibdev_printk_level(ibdev_alert, KERN_ALERT); +define_ibdev_printk_level(ibdev_crit, KERN_CRIT); +define_ibdev_printk_level(ibdev_err, KERN_ERR); +define_ibdev_printk_level(ibdev_warn, KERN_WARNING); +define_ibdev_printk_level(ibdev_notice, KERN_NOTICE); +define_ibdev_printk_level(ibdev_info, KERN_INFO); + static struct notifier_block ibdev_lsm_nb = { .notifier_call = ib_security_change, }; +static int rdma_dev_change_netns(struct ib_device *device, struct net *cur_net, + struct net *net); + /* Pointer to the RCU head at the start of the ib_port_data array */ struct ib_port_data_rcu { struct rcu_head rcu_head; @@ -200,16 +313,22 @@ static int ib_device_check_mandatory(struct ib_device *device) * Caller must perform ib_device_put() to return the device reference count * when ib_device_get_by_index() returns valid device pointer. */ -struct ib_device *ib_device_get_by_index(u32 index) +struct ib_device *ib_device_get_by_index(const struct net *net, u32 index) { struct ib_device *device; down_read(&devices_rwsem); device = xa_load(&devices, index); if (device) { + if (!rdma_dev_access_netns(device, net)) { + device = NULL; + goto out; + } + if (!ib_device_try_get(device)) device = NULL; } +out: up_read(&devices_rwsem); return device; } @@ -268,6 +387,26 @@ struct ib_device *ib_device_get_by_name(const char *name, } EXPORT_SYMBOL(ib_device_get_by_name); +static int rename_compat_devs(struct ib_device *device) +{ + struct ib_core_device *cdev; + unsigned long index; + int ret = 0; + + mutex_lock(&device->compat_devs_mutex); + xa_for_each (&device->compat_devs, index, cdev) { + ret = device_rename(&cdev->dev, dev_name(&device->dev)); + if (ret) { + dev_warn(&cdev->dev, + "Fail to rename compatdev to new name %s\n", + dev_name(&device->dev)); + break; + } + } + mutex_unlock(&device->compat_devs_mutex); + return ret; +} + int ib_device_rename(struct ib_device *ibdev, const char *name) { int ret; @@ -287,6 +426,7 @@ int ib_device_rename(struct ib_device *ibdev, const char *name) if (ret) goto out; strlcpy(ibdev->name, name, IB_DEVICE_NAME_MAX); + ret = rename_compat_devs(ibdev); out: up_write(&devices_rwsem); return ret; @@ -336,6 +476,7 @@ static void ib_device_release(struct device *device) WARN_ON(refcount_read(&dev->refcount)); ib_cache_release_one(dev); ib_security_release_port_pkey_list(dev); + xa_destroy(&dev->compat_devs); xa_destroy(&dev->client_data); if (dev->port_data) kfree_rcu(container_of(dev->port_data, struct ib_port_data_rcu, @@ -357,12 +498,42 @@ static int ib_device_uevent(struct device *device, return 0; } +static const void *net_namespace(struct device *d) +{ + struct ib_core_device *coredev = + container_of(d, struct ib_core_device, dev); + + return read_pnet(&coredev->rdma_net); +} + static struct class ib_class = { .name = "infiniband", .dev_release = ib_device_release, .dev_uevent = ib_device_uevent, + .ns_type = &net_ns_type_operations, + .namespace = net_namespace, }; +static void rdma_init_coredev(struct ib_core_device *coredev, + struct ib_device *dev, struct net *net) +{ + /* This BUILD_BUG_ON is intended to catch layout change + * of union of ib_core_device and device. + * dev must be the first element as ib_core and providers + * driver uses it. Adding anything in ib_core_device before + * device will break this assumption. + */ + BUILD_BUG_ON(offsetof(struct ib_device, coredev.dev) != + offsetof(struct ib_device, dev)); + + coredev->dev.class = &ib_class; + coredev->dev.groups = dev->groups; + device_initialize(&coredev->dev); + coredev->owner = dev; + INIT_LIST_HEAD(&coredev->port_list); + write_pnet(&coredev->rdma_net, net); +} + /** * _ib_alloc_device - allocate an IB device struct * @size:size of structure to allocate @@ -389,10 +560,8 @@ struct ib_device *_ib_alloc_device(size_t size) return NULL; } - device->dev.class = &ib_class; device->groups[0] = &ib_dev_attr_group; - device->dev.groups = device->groups; - device_initialize(&device->dev); + rdma_init_coredev(&device->coredev, device, &init_net); INIT_LIST_HEAD(&device->event_handler_list); spin_lock_init(&device->event_handler_lock); @@ -403,7 +572,8 @@ struct ib_device *_ib_alloc_device(size_t size) */ xa_init_flags(&device->client_data, XA_FLAGS_ALLOC); init_rwsem(&device->client_data_rwsem); - INIT_LIST_HEAD(&device->port_list); + xa_init_flags(&device->compat_devs, XA_FLAGS_ALLOC); + mutex_init(&device->compat_devs_mutex); init_completion(&device->unreg_completion); INIT_WORK(&device->unregistration_work, ib_unregister_work); @@ -436,6 +606,7 @@ void ib_dealloc_device(struct ib_device *device) /* Expedite releasing netdev references */ free_netdevs(device); + WARN_ON(!xa_empty(&device->compat_devs)); WARN_ON(!xa_empty(&device->client_data)); WARN_ON(refcount_read(&device->refcount)); rdma_restrack_clean(device); @@ -644,6 +815,283 @@ static int ib_security_change(struct notifier_block *nb, unsigned long event, return NOTIFY_OK; } +static void compatdev_release(struct device *dev) +{ + struct ib_core_device *cdev = + container_of(dev, struct ib_core_device, dev); + + kfree(cdev); +} + +static int add_one_compat_dev(struct ib_device *device, + struct rdma_dev_net *rnet) +{ + struct ib_core_device *cdev; + int ret; + + lockdep_assert_held(&rdma_nets_rwsem); + if (!ib_devices_shared_netns) + return 0; + + /* + * Create and add compat device in all namespaces other than where it + * is currently bound to. + */ + if (net_eq(read_pnet(&rnet->net), + read_pnet(&device->coredev.rdma_net))) + return 0; + + /* + * The first of init_net() or ib_register_device() to take the + * compat_devs_mutex wins and gets to add the device. Others will wait + * for completion here. + */ + mutex_lock(&device->compat_devs_mutex); + cdev = xa_load(&device->compat_devs, rnet->id); + if (cdev) { + ret = 0; + goto done; + } + ret = xa_reserve(&device->compat_devs, rnet->id, GFP_KERNEL); + if (ret) + goto done; + + cdev = kzalloc(sizeof(*cdev), GFP_KERNEL); + if (!cdev) { + ret = -ENOMEM; + goto cdev_err; + } + + cdev->dev.parent = device->dev.parent; + rdma_init_coredev(cdev, device, read_pnet(&rnet->net)); + cdev->dev.release = compatdev_release; + dev_set_name(&cdev->dev, "%s", dev_name(&device->dev)); + + ret = device_add(&cdev->dev); + if (ret) + goto add_err; + ret = ib_setup_port_attrs(cdev); + if (ret) + goto port_err; + + ret = xa_err(xa_store(&device->compat_devs, rnet->id, + cdev, GFP_KERNEL)); + if (ret) + goto insert_err; + + mutex_unlock(&device->compat_devs_mutex); + return 0; + +insert_err: + ib_free_port_attrs(cdev); +port_err: + device_del(&cdev->dev); +add_err: + put_device(&cdev->dev); +cdev_err: + xa_release(&device->compat_devs, rnet->id); +done: + mutex_unlock(&device->compat_devs_mutex); + return ret; +} + +static void remove_one_compat_dev(struct ib_device *device, u32 id) +{ + struct ib_core_device *cdev; + + mutex_lock(&device->compat_devs_mutex); + cdev = xa_erase(&device->compat_devs, id); + mutex_unlock(&device->compat_devs_mutex); + if (cdev) { + ib_free_port_attrs(cdev); + device_del(&cdev->dev); + put_device(&cdev->dev); + } +} + +static void remove_compat_devs(struct ib_device *device) +{ + struct ib_core_device *cdev; + unsigned long index; + + xa_for_each (&device->compat_devs, index, cdev) + remove_one_compat_dev(device, index); +} + +static int add_compat_devs(struct ib_device *device) +{ + struct rdma_dev_net *rnet; + unsigned long index; + int ret = 0; + + lockdep_assert_held(&devices_rwsem); + + down_read(&rdma_nets_rwsem); + xa_for_each (&rdma_nets, index, rnet) { + ret = add_one_compat_dev(device, rnet); + if (ret) + break; + } + up_read(&rdma_nets_rwsem); + return ret; +} + +static void remove_all_compat_devs(void) +{ + struct ib_compat_device *cdev; + struct ib_device *dev; + unsigned long index; + + down_read(&devices_rwsem); + xa_for_each (&devices, index, dev) { + unsigned long c_index = 0; + + /* Hold nets_rwsem so that any other thread modifying this + * system param can sync with this thread. + */ + down_read(&rdma_nets_rwsem); + xa_for_each (&dev->compat_devs, c_index, cdev) + remove_one_compat_dev(dev, c_index); + up_read(&rdma_nets_rwsem); + } + up_read(&devices_rwsem); +} + +static int add_all_compat_devs(void) +{ + struct rdma_dev_net *rnet; + struct ib_device *dev; + unsigned long index; + int ret = 0; + + down_read(&devices_rwsem); + xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) { + unsigned long net_index = 0; + + /* Hold nets_rwsem so that any other thread modifying this + * system param can sync with this thread. + */ + down_read(&rdma_nets_rwsem); + xa_for_each (&rdma_nets, net_index, rnet) { + ret = add_one_compat_dev(dev, rnet); + if (ret) + break; + } + up_read(&rdma_nets_rwsem); + } + up_read(&devices_rwsem); + if (ret) + remove_all_compat_devs(); + return ret; +} + +int rdma_compatdev_set(u8 enable) +{ + struct rdma_dev_net *rnet; + unsigned long index; + int ret = 0; + + down_write(&rdma_nets_rwsem); + if (ib_devices_shared_netns == enable) { + up_write(&rdma_nets_rwsem); + return 0; + } + + /* enable/disable of compat devices is not supported + * when more than default init_net exists. + */ + xa_for_each (&rdma_nets, index, rnet) { + ret++; + break; + } + if (!ret) + ib_devices_shared_netns = enable; + up_write(&rdma_nets_rwsem); + if (ret) + return -EBUSY; + + if (enable) + ret = add_all_compat_devs(); + else + remove_all_compat_devs(); + return ret; +} + +static void rdma_dev_exit_net(struct net *net) +{ + struct rdma_dev_net *rnet = net_generic(net, rdma_dev_net_id); + struct ib_device *dev; + unsigned long index; + int ret; + + down_write(&rdma_nets_rwsem); + /* + * Prevent the ID from being re-used and hide the id from xa_for_each. + */ + ret = xa_err(xa_store(&rdma_nets, rnet->id, NULL, GFP_KERNEL)); + WARN_ON(ret); + up_write(&rdma_nets_rwsem); + + down_read(&devices_rwsem); + xa_for_each (&devices, index, dev) { + get_device(&dev->dev); + /* + * Release the devices_rwsem so that pontentially blocking + * device_del, doesn't hold the devices_rwsem for too long. + */ + up_read(&devices_rwsem); + + remove_one_compat_dev(dev, rnet->id); + + /* + * If the real device is in the NS then move it back to init. + */ + rdma_dev_change_netns(dev, net, &init_net); + + put_device(&dev->dev); + down_read(&devices_rwsem); + } + up_read(&devices_rwsem); + + xa_erase(&rdma_nets, rnet->id); +} + +static __net_init int rdma_dev_init_net(struct net *net) +{ + struct rdma_dev_net *rnet = net_generic(net, rdma_dev_net_id); + unsigned long index; + struct ib_device *dev; + int ret; + + /* No need to create any compat devices in default init_net. */ + if (net_eq(net, &init_net)) + return 0; + + write_pnet(&rnet->net, net); + + ret = xa_alloc(&rdma_nets, &rnet->id, rnet, xa_limit_32b, GFP_KERNEL); + if (ret) + return ret; + + down_read(&devices_rwsem); + xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) { + /* Hold nets_rwsem so that netlink command cannot change + * system configuration for device sharing mode. + */ + down_read(&rdma_nets_rwsem); + ret = add_one_compat_dev(dev, rnet); + up_read(&rdma_nets_rwsem); + if (ret) + break; + } + up_read(&devices_rwsem); + + if (ret) + rdma_dev_exit_net(net); + + return ret; +} + /* * Assign the unique string device name and the unique device index. This is * undone by ib_dealloc_device. @@ -711,6 +1159,9 @@ static void setup_dma_device(struct ib_device *device) WARN_ON_ONCE(!parent); device->dma_device = parent; } + /* Setup default max segment size for all IB devices */ + dma_set_max_seg_size(device->dma_device, SZ_2G); + } /* @@ -765,8 +1216,12 @@ static void disable_device(struct ib_device *device) ib_device_put(device); wait_for_completion(&device->unreg_completion); - /* Expedite removing unregistered pointers from the hash table */ - free_netdevs(device); + /* + * compat devices must be removed after device refcount drops to zero. + * Otherwise init_net() may add more compatdevs after removing compat + * devices and before device is disabled. + */ + remove_compat_devs(device); } /* @@ -807,7 +1262,8 @@ static int enable_device_and_get(struct ib_device *device) break; } up_read(&clients_rwsem); - + if (!ret) + ret = add_compat_devs(device); out: up_read(&devices_rwsem); return ret; @@ -847,6 +1303,11 @@ int ib_register_device(struct ib_device *device, const char *name) ib_device_register_rdmacg(device); + /* + * Ensure that ADD uevent is not fired because it + * is too early amd device is not initialized yet. + */ + dev_set_uevent_suppress(&device->dev, true); ret = device_add(&device->dev); if (ret) goto cg_cleanup; @@ -859,6 +1320,9 @@ int ib_register_device(struct ib_device *device, const char *name) } ret = enable_device_and_get(device); + dev_set_uevent_suppress(&device->dev, false); + /* Mark for userspace that device is ready */ + kobject_uevent(&device->dev.kobj, KOBJ_ADD); if (ret) { void (*dealloc_fn)(struct ib_device *); @@ -887,6 +1351,7 @@ int ib_register_device(struct ib_device *device, const char *name) dev_cleanup: device_del(&device->dev); cg_cleanup: + dev_set_uevent_suppress(&device->dev, false); ib_device_unregister_rdmacg(device); ib_cache_cleanup_one(device); return ret; @@ -908,6 +1373,10 @@ static void __ib_unregister_device(struct ib_device *ib_dev) goto out; disable_device(ib_dev); + + /* Expedite removing unregistered pointers from the hash table */ + free_netdevs(ib_dev); + ib_device_unregister_sysfs(ib_dev); device_del(&ib_dev->dev); ib_device_unregister_rdmacg(ib_dev); @@ -1038,6 +1507,126 @@ void ib_unregister_device_queued(struct ib_device *ib_dev) } EXPORT_SYMBOL(ib_unregister_device_queued); +/* + * The caller must pass in a device that has the kref held and the refcount + * released. If the device is in cur_net and still registered then it is moved + * into net. + */ +static int rdma_dev_change_netns(struct ib_device *device, struct net *cur_net, + struct net *net) +{ + int ret2 = -EINVAL; + int ret; + + mutex_lock(&device->unregistration_lock); + + /* + * If a device not under ib_device_get() or if the unregistration_lock + * is not held, the namespace can be changed, or it can be unregistered. + * Check again under the lock. + */ + if (refcount_read(&device->refcount) == 0 || + !net_eq(cur_net, read_pnet(&device->coredev.rdma_net))) { + ret = -ENODEV; + goto out; + } + + kobject_uevent(&device->dev.kobj, KOBJ_REMOVE); + disable_device(device); + + /* + * At this point no one can be using the device, so it is safe to + * change the namespace. + */ + write_pnet(&device->coredev.rdma_net, net); + + down_read(&devices_rwsem); + /* + * Currently rdma devices are system wide unique. So the device name + * is guaranteed free in the new namespace. Publish the new namespace + * at the sysfs level. + */ + ret = device_rename(&device->dev, dev_name(&device->dev)); + up_read(&devices_rwsem); + if (ret) { + dev_warn(&device->dev, + "%s: Couldn't rename device after namespace change\n", + __func__); + /* Try and put things back and re-enable the device */ + write_pnet(&device->coredev.rdma_net, cur_net); + } + + ret2 = enable_device_and_get(device); + if (ret2) { + /* + * This shouldn't really happen, but if it does, let the user + * retry at later point. So don't disable the device. + */ + dev_warn(&device->dev, + "%s: Couldn't re-enable device after namespace change\n", + __func__); + } + kobject_uevent(&device->dev.kobj, KOBJ_ADD); + + ib_device_put(device); +out: + mutex_unlock(&device->unregistration_lock); + if (ret) + return ret; + return ret2; +} + +int ib_device_set_netns_put(struct sk_buff *skb, + struct ib_device *dev, u32 ns_fd) +{ + struct net *net; + int ret; + + net = get_net_ns_by_fd(ns_fd); + if (IS_ERR(net)) { + ret = PTR_ERR(net); + goto net_err; + } + + if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) { + ret = -EPERM; + goto ns_err; + } + + /* + * Currently supported only for those providers which support + * disassociation and don't do port specific sysfs init. Once a + * port_cleanup infrastructure is implemented, this limitation will be + * removed. + */ + if (!dev->ops.disassociate_ucontext || dev->ops.init_port || + ib_devices_shared_netns) { + ret = -EOPNOTSUPP; + goto ns_err; + } + + get_device(&dev->dev); + ib_device_put(dev); + ret = rdma_dev_change_netns(dev, current->nsproxy->net_ns, net); + put_device(&dev->dev); + + put_net(net); + return ret; + +ns_err: + put_net(net); +net_err: + ib_device_put(dev); + return ret; +} + +static struct pernet_operations rdma_dev_net_ops = { + .init = rdma_dev_init_net, + .exit = rdma_dev_exit_net, + .id = &rdma_dev_net_id, + .size = sizeof(struct rdma_dev_net), +}; + static int assign_client_id(struct ib_client *client) { int ret; @@ -1515,6 +2104,9 @@ int ib_enum_all_devs(nldev_callback nldev_cb, struct sk_buff *skb, down_read(&devices_rwsem); xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) { + if (!rdma_dev_access_netns(dev, sock_net(skb->sk))) + continue; + ret = nldev_cb(dev, skb, cb, idx); if (ret) break; @@ -1787,6 +2379,14 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, get_vf_config); SET_DEVICE_OP(dev_ops, get_vf_stats); SET_DEVICE_OP(dev_ops, init_port); + SET_DEVICE_OP(dev_ops, iw_accept); + SET_DEVICE_OP(dev_ops, iw_add_ref); + SET_DEVICE_OP(dev_ops, iw_connect); + SET_DEVICE_OP(dev_ops, iw_create_listen); + SET_DEVICE_OP(dev_ops, iw_destroy_listen); + SET_DEVICE_OP(dev_ops, iw_get_qp); + SET_DEVICE_OP(dev_ops, iw_reject); + SET_DEVICE_OP(dev_ops, iw_rem_ref); SET_DEVICE_OP(dev_ops, map_mr_sg); SET_DEVICE_OP(dev_ops, map_phys_fmr); SET_DEVICE_OP(dev_ops, mmap); @@ -1823,7 +2423,9 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, set_vf_link_state); SET_DEVICE_OP(dev_ops, unmap_fmr); + SET_OBJ_SIZE(dev_ops, ib_ah); SET_OBJ_SIZE(dev_ops, ib_pd); + SET_OBJ_SIZE(dev_ops, ib_srq); SET_OBJ_SIZE(dev_ops, ib_ucontext); } EXPORT_SYMBOL(ib_set_device_ops); @@ -1903,12 +2505,20 @@ static int __init ib_core_init(void) goto err_sa; } + ret = register_pernet_device(&rdma_dev_net_ops); + if (ret) { + pr_warn("Couldn't init compat dev. ret %d\n", ret); + goto err_compat; + } + nldev_init(); rdma_nl_register(RDMA_NL_LS, ibnl_ls_cb_table); roce_gid_mgmt_init(); return 0; +err_compat: + unregister_lsm_notifier(&ibdev_lsm_nb); err_sa: ib_sa_cleanup(); err_mad: @@ -1933,6 +2543,7 @@ static void __exit ib_core_cleanup(void) roce_gid_mgmt_cleanup(); nldev_exit(); rdma_nl_unregister(RDMA_NL_LS); + unregister_pernet_device(&rdma_dev_net_ops); unregister_lsm_notifier(&ibdev_lsm_nb); ib_sa_cleanup(); ib_mad_cleanup(); @@ -1950,5 +2561,8 @@ static void __exit ib_core_cleanup(void) MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_LS, 4); -subsys_initcall(ib_core_init); +/* ib core relies on netdev stack to first register net_ns_type_operations + * ns kobject type before ib_core initialization. + */ +fs_initcall(ib_core_init); module_exit(ib_core_cleanup); diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index 732637c913d9..72141c5b7c95 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -394,7 +394,7 @@ static void destroy_cm_id(struct iw_cm_id *cm_id) cm_id_priv->state = IW_CM_STATE_DESTROYING; spin_unlock_irqrestore(&cm_id_priv->lock, flags); /* destroy the listening endpoint */ - cm_id->device->iwcm->destroy_listen(cm_id); + cm_id->device->ops.iw_destroy_listen(cm_id); spin_lock_irqsave(&cm_id_priv->lock, flags); break; case IW_CM_STATE_ESTABLISHED: @@ -417,7 +417,7 @@ static void destroy_cm_id(struct iw_cm_id *cm_id) */ cm_id_priv->state = IW_CM_STATE_DESTROYING; spin_unlock_irqrestore(&cm_id_priv->lock, flags); - cm_id->device->iwcm->reject(cm_id, NULL, 0); + cm_id->device->ops.iw_reject(cm_id, NULL, 0); spin_lock_irqsave(&cm_id_priv->lock, flags); break; case IW_CM_STATE_CONN_SENT: @@ -427,7 +427,7 @@ static void destroy_cm_id(struct iw_cm_id *cm_id) break; } if (cm_id_priv->qp) { - cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp); + cm_id_priv->id.device->ops.iw_rem_ref(cm_id_priv->qp); cm_id_priv->qp = NULL; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); @@ -504,7 +504,7 @@ static void iw_cm_check_wildcard(struct sockaddr_storage *pm_addr, static int iw_cm_map(struct iw_cm_id *cm_id, bool active) { const char *devname = dev_name(&cm_id->device->dev); - const char *ifname = cm_id->device->iwcm->ifname; + const char *ifname = cm_id->device->iw_ifname; struct iwpm_dev_data pm_reg_msg = {}; struct iwpm_sa_data pm_msg; int status; @@ -526,7 +526,7 @@ static int iw_cm_map(struct iw_cm_id *cm_id, bool active) cm_id->mapped = true; pm_msg.loc_addr = cm_id->local_addr; pm_msg.rem_addr = cm_id->remote_addr; - pm_msg.flags = (cm_id->device->iwcm->driver_flags & IW_F_NO_PORT_MAP) ? + pm_msg.flags = (cm_id->device->iw_driver_flags & IW_F_NO_PORT_MAP) ? IWPM_FLAGS_NO_PORT_MAP : 0; if (active) status = iwpm_add_and_query_mapping(&pm_msg, @@ -577,7 +577,8 @@ int iw_cm_listen(struct iw_cm_id *cm_id, int backlog) spin_unlock_irqrestore(&cm_id_priv->lock, flags); ret = iw_cm_map(cm_id, false); if (!ret) - ret = cm_id->device->iwcm->create_listen(cm_id, backlog); + ret = cm_id->device->ops.iw_create_listen(cm_id, + backlog); if (ret) cm_id_priv->state = IW_CM_STATE_IDLE; spin_lock_irqsave(&cm_id_priv->lock, flags); @@ -617,7 +618,7 @@ int iw_cm_reject(struct iw_cm_id *cm_id, cm_id_priv->state = IW_CM_STATE_IDLE; spin_unlock_irqrestore(&cm_id_priv->lock, flags); - ret = cm_id->device->iwcm->reject(cm_id, private_data, + ret = cm_id->device->ops.iw_reject(cm_id, private_data, private_data_len); clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); @@ -653,25 +654,25 @@ int iw_cm_accept(struct iw_cm_id *cm_id, return -EINVAL; } /* Get the ib_qp given the QPN */ - qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn); + qp = cm_id->device->ops.iw_get_qp(cm_id->device, iw_param->qpn); if (!qp) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); wake_up_all(&cm_id_priv->connect_wait); return -EINVAL; } - cm_id->device->iwcm->add_ref(qp); + cm_id->device->ops.iw_add_ref(qp); cm_id_priv->qp = qp; spin_unlock_irqrestore(&cm_id_priv->lock, flags); - ret = cm_id->device->iwcm->accept(cm_id, iw_param); + ret = cm_id->device->ops.iw_accept(cm_id, iw_param); if (ret) { /* An error on accept precludes provider events */ BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV); cm_id_priv->state = IW_CM_STATE_IDLE; spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id_priv->qp) { - cm_id->device->iwcm->rem_ref(qp); + cm_id->device->ops.iw_rem_ref(qp); cm_id_priv->qp = NULL; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); @@ -712,25 +713,25 @@ int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) } /* Get the ib_qp given the QPN */ - qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn); + qp = cm_id->device->ops.iw_get_qp(cm_id->device, iw_param->qpn); if (!qp) { ret = -EINVAL; goto err; } - cm_id->device->iwcm->add_ref(qp); + cm_id->device->ops.iw_add_ref(qp); cm_id_priv->qp = qp; cm_id_priv->state = IW_CM_STATE_CONN_SENT; spin_unlock_irqrestore(&cm_id_priv->lock, flags); ret = iw_cm_map(cm_id, true); if (!ret) - ret = cm_id->device->iwcm->connect(cm_id, iw_param); + ret = cm_id->device->ops.iw_connect(cm_id, iw_param); if (!ret) return 0; /* success */ spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id_priv->qp) { - cm_id->device->iwcm->rem_ref(qp); + cm_id->device->ops.iw_rem_ref(qp); cm_id_priv->qp = NULL; } cm_id_priv->state = IW_CM_STATE_IDLE; @@ -895,7 +896,7 @@ static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv, cm_id_priv->state = IW_CM_STATE_ESTABLISHED; } else { /* REJECTED or RESET */ - cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp); + cm_id_priv->id.device->ops.iw_rem_ref(cm_id_priv->qp); cm_id_priv->qp = NULL; cm_id_priv->state = IW_CM_STATE_IDLE; } @@ -946,7 +947,7 @@ static int cm_close_handler(struct iwcm_id_private *cm_id_priv, spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id_priv->qp) { - cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp); + cm_id_priv->id.device->ops.iw_rem_ref(cm_id_priv->qp); cm_id_priv->qp = NULL; } switch (cm_id_priv->state) { diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index e742a6a2c138..cc99479b2c09 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -3,7 +3,7 @@ * Copyright (c) 2005 Intel Corporation. All rights reserved. * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2009 HNR Consulting. All rights reserved. - * Copyright (c) 2014 Intel Corporation. All rights reserved. + * Copyright (c) 2014,2018 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -38,10 +38,10 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/dma-mapping.h> -#include <linux/idr.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/security.h> +#include <linux/xarray.h> #include <rdma/ib_cache.h> #include "mad_priv.h" @@ -51,6 +51,32 @@ #include "opa_smi.h" #include "agent.h" +#define CREATE_TRACE_POINTS +#include <trace/events/ib_mad.h> + +#ifdef CONFIG_TRACEPOINTS +static void create_mad_addr_info(struct ib_mad_send_wr_private *mad_send_wr, + struct ib_mad_qp_info *qp_info, + struct trace_event_raw_ib_mad_send_template *entry) +{ + u16 pkey; + struct ib_device *dev = qp_info->port_priv->device; + u8 pnum = qp_info->port_priv->port_num; + struct ib_ud_wr *wr = &mad_send_wr->send_wr; + struct rdma_ah_attr attr = {}; + + rdma_query_ah(wr->ah, &attr); + + /* These are common */ + entry->sl = attr.sl; + ib_query_pkey(dev, pnum, wr->pkey_index, &pkey); + entry->pkey = pkey; + entry->rqpn = wr->remote_qpn; + entry->rqkey = wr->remote_qkey; + entry->dlid = rdma_ah_get_dlid(&attr); +} +#endif + static int mad_sendq_size = IB_MAD_QP_SEND_SIZE; static int mad_recvq_size = IB_MAD_QP_RECV_SIZE; @@ -59,12 +85,9 @@ MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests module_param_named(recv_queue_size, mad_recvq_size, int, 0444); MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests"); -/* - * The mlx4 driver uses the top byte to distinguish which virtual function - * generated the MAD, so we must avoid using it. - */ -#define AGENT_ID_LIMIT (1 << 24) -static DEFINE_IDR(ib_mad_clients); +/* Client ID 0 is used for snoop-only clients */ +static DEFINE_XARRAY_ALLOC1(ib_mad_clients); +static u32 ib_mad_client_next; static struct list_head ib_mad_port_list; /* Port list lock */ @@ -389,18 +412,17 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, goto error4; } - idr_preload(GFP_KERNEL); - idr_lock(&ib_mad_clients); - ret2 = idr_alloc_cyclic(&ib_mad_clients, mad_agent_priv, 0, - AGENT_ID_LIMIT, GFP_ATOMIC); - idr_unlock(&ib_mad_clients); - idr_preload_end(); - + /* + * The mlx4 driver uses the top byte to distinguish which virtual + * function generated the MAD, so we must avoid using it. + */ + ret2 = xa_alloc_cyclic(&ib_mad_clients, &mad_agent_priv->agent.hi_tid, + mad_agent_priv, XA_LIMIT(0, (1 << 24) - 1), + &ib_mad_client_next, GFP_KERNEL); if (ret2 < 0) { ret = ERR_PTR(ret2); goto error5; } - mad_agent_priv->agent.hi_tid = ret2; /* * Make sure MAD registration (if supplied) @@ -445,12 +467,11 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, } spin_unlock_irq(&port_priv->reg_lock); + trace_ib_mad_create_agent(mad_agent_priv); return &mad_agent_priv->agent; error6: spin_unlock_irq(&port_priv->reg_lock); - idr_lock(&ib_mad_clients); - idr_remove(&ib_mad_clients, mad_agent_priv->agent.hi_tid); - idr_unlock(&ib_mad_clients); + xa_erase(&ib_mad_clients, mad_agent_priv->agent.hi_tid); error5: ib_mad_agent_security_cleanup(&mad_agent_priv->agent); error4: @@ -602,6 +623,7 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) struct ib_mad_port_private *port_priv; /* Note that we could still be handling received MADs */ + trace_ib_mad_unregister_agent(mad_agent_priv); /* * Canceling all sends results in dropping received response @@ -614,9 +636,7 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) spin_lock_irq(&port_priv->reg_lock); remove_mad_reg_req(mad_agent_priv); spin_unlock_irq(&port_priv->reg_lock); - idr_lock(&ib_mad_clients); - idr_remove(&ib_mad_clients, mad_agent_priv->agent.hi_tid); - idr_unlock(&ib_mad_clients); + xa_erase(&ib_mad_clients, mad_agent_priv->agent.hi_tid); flush_workqueue(port_priv->wq); ib_cancel_rmpp_recvs(mad_agent_priv); @@ -821,6 +841,8 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, if (opa && smp->class_version == OPA_SM_CLASS_VERSION) { u32 opa_drslid; + trace_ib_mad_handle_out_opa_smi(opa_smp); + if ((opa_get_smp_direction(opa_smp) ? opa_smp->route.dr.dr_dlid : opa_smp->route.dr.dr_slid) == OPA_LID_PERMISSIVE && @@ -846,6 +868,8 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, opa_smi_check_local_returning_smp(opa_smp, device) == IB_SMI_DISCARD) goto out; } else { + trace_ib_mad_handle_out_ib_smi(smp); + if ((ib_get_smp_direction(smp) ? smp->dr_dlid : smp->dr_slid) == IB_LID_PERMISSIVE && smi_handle_dr_smp_send(smp, rdma_cap_ib_switch(device), port_num) == @@ -1223,6 +1247,7 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) spin_lock_irqsave(&qp_info->send_queue.lock, flags); if (qp_info->send_queue.count < qp_info->send_queue.max_active) { + trace_ib_mad_ib_send_mad(mad_send_wr, qp_info); ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr.wr, NULL); list = &qp_info->send_queue.list; @@ -1756,7 +1781,7 @@ find_mad_agent(struct ib_mad_port_private *port_priv, */ hi_tid = be64_to_cpu(mad_hdr->tid) >> 32; rcu_read_lock(); - mad_agent = idr_find(&ib_mad_clients, hi_tid); + mad_agent = xa_load(&ib_mad_clients, hi_tid); if (mad_agent && !atomic_inc_not_zero(&mad_agent->refcount)) mad_agent = NULL; rcu_read_unlock(); @@ -2077,6 +2102,8 @@ static enum smi_action handle_ib_smi(const struct ib_mad_port_private *port_priv enum smi_forward_action retsmi; struct ib_smp *smp = (struct ib_smp *)recv->mad; + trace_ib_mad_handle_ib_smi(smp); + if (smi_handle_dr_smp_recv(smp, rdma_cap_ib_switch(port_priv->device), port_num, @@ -2162,6 +2189,8 @@ handle_opa_smi(struct ib_mad_port_private *port_priv, enum smi_forward_action retsmi; struct opa_smp *smp = (struct opa_smp *)recv->mad; + trace_ib_mad_handle_opa_smi(smp); + if (opa_smi_handle_dr_smp_recv(smp, rdma_cap_ib_switch(port_priv->device), port_num, @@ -2286,6 +2315,9 @@ static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc) if (!validate_mad((const struct ib_mad_hdr *)recv->mad, qp_info, opa)) goto out; + trace_ib_mad_recv_done_handler(qp_info, wc, + (struct ib_mad_hdr *)recv->mad); + mad_size = recv->mad_size; response = alloc_mad_private(mad_size, GFP_KERNEL); if (!response) @@ -2332,6 +2364,7 @@ static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc) mad_agent = find_mad_agent(port_priv, (const struct ib_mad_hdr *)recv->mad); if (mad_agent) { + trace_ib_mad_recv_done_agent(mad_agent); ib_mad_complete_recv(mad_agent, &recv->header.recv_wc); /* * recv is freed up in error cases in ib_mad_complete_recv @@ -2496,6 +2529,9 @@ static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc) send_queue = mad_list->mad_queue; qp_info = send_queue->qp_info; + trace_ib_mad_send_done_agent(mad_send_wr->mad_agent_priv); + trace_ib_mad_send_done_handler(mad_send_wr, wc); + retry: ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent->device, mad_send_wr->header_mapping, @@ -2527,6 +2563,7 @@ retry: ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc); if (queued_send_wr) { + trace_ib_mad_send_done_resend(queued_send_wr, qp_info); ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr.wr, NULL); if (ret) { @@ -2574,6 +2611,7 @@ static bool ib_mad_send_error(struct ib_mad_port_private *port_priv, if (mad_send_wr->retry) { /* Repost send */ mad_send_wr->retry = 0; + trace_ib_mad_error_handler(mad_send_wr, qp_info); ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr.wr, NULL); if (!ret) @@ -3356,9 +3394,6 @@ int ib_mad_init(void) INIT_LIST_HEAD(&ib_mad_port_list); - /* Client ID 0 is used for snoop-only clients */ - idr_alloc(&ib_mad_clients, NULL, 0, 0, GFP_KERNEL); - if (ib_register_client(&mad_client)) { pr_err("Couldn't register ib_mad client\n"); return -EINVAL; diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h index 216509036aa8..956b3a7dfed7 100644 --- a/drivers/infiniband/core/mad_priv.h +++ b/drivers/infiniband/core/mad_priv.h @@ -73,14 +73,14 @@ struct ib_mad_private_header { struct ib_mad_recv_wc recv_wc; struct ib_wc wc; u64 mapping; -} __attribute__ ((packed)); +} __packed; struct ib_mad_private { struct ib_mad_private_header header; size_t mad_size; struct ib_grh grh; u8 mad[0]; -} __attribute__ ((packed)); +} __packed; struct ib_rmpp_segment { struct list_head list; diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index d50ff70bb24b..cd338ddc4a39 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -804,7 +804,6 @@ static void mcast_event_handler(struct ib_event_handler *handler, switch (event->event) { case IB_EVENT_PORT_ERR: case IB_EVENT_LID_CHANGE: - case IB_EVENT_SM_CHANGE: case IB_EVENT_CLIENT_REREGISTER: mcast_groups_event(&dev->port[index], MCAST_GROUP_ERROR); break; diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 85324012bf07..98eadd3089ce 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -116,6 +116,10 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { [RDMA_NLDEV_ATTR_RES_CTXN] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_LINK_TYPE] = { .type = NLA_NUL_STRING, .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN }, + [RDMA_NLDEV_SYS_ATTR_NETNS_MODE] = { .type = NLA_U8 }, + [RDMA_NLDEV_ATTR_DEV_PROTOCOL] = { .type = NLA_NUL_STRING, + .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN }, + [RDMA_NLDEV_NET_NS_FD] = { .type = NLA_U32 }, }; static int put_driver_name_print_type(struct sk_buff *msg, const char *name, @@ -198,6 +202,8 @@ static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device) static int fill_dev_info(struct sk_buff *msg, struct ib_device *device) { char fw[IB_FW_VERSION_NAME_MAX]; + int ret = 0; + u8 port; if (fill_nldev_handle(msg, device)) return -EMSGSIZE; @@ -226,7 +232,25 @@ static int fill_dev_info(struct sk_buff *msg, struct ib_device *device) return -EMSGSIZE; if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_NODE_TYPE, device->node_type)) return -EMSGSIZE; - return 0; + + /* + * Link type is determined on first port and mlx4 device + * which can potentially have two different link type for the same + * IB device is considered as better to be avoided in the future, + */ + port = rdma_start_port(device); + if (rdma_cap_opa_mad(device, port)) + ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "opa"); + else if (rdma_protocol_ib(device, port)) + ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "ib"); + else if (rdma_protocol_iwarp(device, port)) + ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "iw"); + else if (rdma_protocol_roce(device, port)) + ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "roce"); + else if (rdma_protocol_usnic(device, port)) + ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, + "usnic"); + return ret; } static int fill_port_info(struct sk_buff *msg, @@ -615,7 +639,7 @@ static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); - device = ib_device_get_by_index(index); + device = ib_device_get_by_index(sock_net(skb->sk), index); if (!device) return -EINVAL; @@ -659,7 +683,7 @@ static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, return -EINVAL; index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); - device = ib_device_get_by_index(index); + device = ib_device_get_by_index(sock_net(skb->sk), index); if (!device) return -EINVAL; @@ -669,9 +693,20 @@ static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, nla_strlcpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME], IB_DEVICE_NAME_MAX); err = ib_device_rename(device, name); + goto done; } + if (tb[RDMA_NLDEV_NET_NS_FD]) { + u32 ns_fd; + + ns_fd = nla_get_u32(tb[RDMA_NLDEV_NET_NS_FD]); + err = ib_device_set_netns_put(skb, device, ns_fd); + goto put_done; + } + +done: ib_device_put(device); +put_done: return err; } @@ -707,7 +742,7 @@ static int nldev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { /* * There is no need to take lock, because - * we are relying on ib_core's lists_rwsem + * we are relying on ib_core's locking. */ return ib_enum_all_devs(_nldev_get_dumpit, skb, cb); } @@ -730,7 +765,7 @@ static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, return -EINVAL; index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); - device = ib_device_get_by_index(index); + device = ib_device_get_by_index(sock_net(skb->sk), index); if (!device) return -EINVAL; @@ -784,7 +819,7 @@ static int nldev_port_get_dumpit(struct sk_buff *skb, return -EINVAL; ifindex = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); - device = ib_device_get_by_index(ifindex); + device = ib_device_get_by_index(sock_net(skb->sk), ifindex); if (!device) return -EINVAL; @@ -839,7 +874,7 @@ static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, return -EINVAL; index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); - device = ib_device_get_by_index(index); + device = ib_device_get_by_index(sock_net(skb->sk), index); if (!device) return -EINVAL; @@ -887,7 +922,6 @@ static int _nldev_res_get_dumpit(struct ib_device *device, nlmsg_cancel(skb, nlh); goto out; } - nlmsg_end(skb, nlh); idx++; @@ -988,7 +1022,7 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh, return -EINVAL; index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); - device = ib_device_get_by_index(index); + device = ib_device_get_by_index(sock_net(skb->sk), index); if (!device) return -EINVAL; @@ -1085,7 +1119,7 @@ static int res_get_common_dumpit(struct sk_buff *skb, return -EINVAL; index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); - device = ib_device_get_by_index(index); + device = ib_device_get_by_index(sock_net(skb->sk), index); if (!device) return -EINVAL; @@ -1300,7 +1334,7 @@ static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, return -EINVAL; index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); - device = ib_device_get_by_index(index); + device = ib_device_get_by_index(sock_net(skb->sk), index); if (!device) return -EINVAL; @@ -1313,6 +1347,55 @@ static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, return 0; } +static int nldev_get_sys_get_dumpit(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; + struct nlmsghdr *nlh; + int err; + + err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, + nldev_policy, NULL); + if (err) + return err; + + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, + RDMA_NLDEV_CMD_SYS_GET), + 0, 0); + + err = nla_put_u8(skb, RDMA_NLDEV_SYS_ATTR_NETNS_MODE, + (u8)ib_devices_shared_netns); + if (err) { + nlmsg_cancel(skb, nlh); + return err; + } + + nlmsg_end(skb, nlh); + return skb->len; +} + +static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; + u8 enable; + int err; + + err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, + nldev_policy, extack); + if (err || !tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]) + return -EINVAL; + + enable = nla_get_u8(tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]); + /* Only 0 and 1 are supported */ + if (enable > 1) + return -EINVAL; + + err = rdma_compatdev_set(enable); + return err; +} + static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { [RDMA_NLDEV_CMD_GET] = { .doit = nldev_get_doit, @@ -1358,6 +1441,13 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { .doit = nldev_res_get_pd_doit, .dump = nldev_res_get_pd_dumpit, }, + [RDMA_NLDEV_CMD_SYS_GET] = { + .dump = nldev_get_sys_get_dumpit, + }, + [RDMA_NLDEV_CMD_SYS_SET] = { + .doit = nldev_set_sys_set_doit, + .flags = RDMA_NL_ADMIN_PERM, + }, }; void __init nldev_init(void) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 778375ff664e..ccf4d069c25c 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -125,9 +125,10 @@ static void assert_uverbs_usecnt(struct ib_uobject *uobj, * and consumes the kref on the uobj. */ static int uverbs_destroy_uobject(struct ib_uobject *uobj, - enum rdma_remove_reason reason) + enum rdma_remove_reason reason, + struct uverbs_attr_bundle *attrs) { - struct ib_uverbs_file *ufile = uobj->ufile; + struct ib_uverbs_file *ufile = attrs->ufile; unsigned long flags; int ret; @@ -135,7 +136,8 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj, assert_uverbs_usecnt(uobj, UVERBS_LOOKUP_WRITE); if (uobj->object) { - ret = uobj->uapi_object->type_class->destroy_hw(uobj, reason); + ret = uobj->uapi_object->type_class->destroy_hw(uobj, reason, + attrs); if (ret) { if (ib_is_destroy_retryable(ret, reason, uobj)) return ret; @@ -196,9 +198,9 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj, * version requires the caller to have already obtained an * LOOKUP_DESTROY uobject kref. */ -int uobj_destroy(struct ib_uobject *uobj) +int uobj_destroy(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs) { - struct ib_uverbs_file *ufile = uobj->ufile; + struct ib_uverbs_file *ufile = attrs->ufile; int ret; down_read(&ufile->hw_destroy_rwsem); @@ -207,7 +209,7 @@ int uobj_destroy(struct ib_uobject *uobj) if (ret) goto out_unlock; - ret = uverbs_destroy_uobject(uobj, RDMA_REMOVE_DESTROY); + ret = uverbs_destroy_uobject(uobj, RDMA_REMOVE_DESTROY, attrs); if (ret) { atomic_set(&uobj->usecnt, 0); goto out_unlock; @@ -224,18 +226,17 @@ out_unlock: * uverbs_put_destroy. */ struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj, - u32 id, - const struct uverbs_attr_bundle *attrs) + u32 id, struct uverbs_attr_bundle *attrs) { struct ib_uobject *uobj; int ret; uobj = rdma_lookup_get_uobject(obj, attrs->ufile, id, - UVERBS_LOOKUP_DESTROY); + UVERBS_LOOKUP_DESTROY, attrs); if (IS_ERR(uobj)) return uobj; - ret = uobj_destroy(uobj); + ret = uobj_destroy(uobj, attrs); if (ret) { rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_DESTROY); return ERR_PTR(ret); @@ -249,7 +250,7 @@ struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj, * (negative errno on failure). For use by callers that do not need the uobj. */ int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id, - const struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { struct ib_uobject *uobj; @@ -296,25 +297,13 @@ static struct ib_uobject *alloc_uobj(struct ib_uverbs_file *ufile, static int idr_add_uobj(struct ib_uobject *uobj) { - int ret; - - idr_preload(GFP_KERNEL); - spin_lock(&uobj->ufile->idr_lock); - - /* - * We start with allocating an idr pointing to NULL. This represents an - * object which isn't initialized yet. We'll replace it later on with - * the real object once we commit. - */ - ret = idr_alloc(&uobj->ufile->idr, NULL, 0, - min_t(unsigned long, U32_MAX - 1, INT_MAX), GFP_NOWAIT); - if (ret >= 0) - uobj->id = ret; - - spin_unlock(&uobj->ufile->idr_lock); - idr_preload_end(); - - return ret < 0 ? ret : 0; + /* + * We start with allocating an idr pointing to NULL. This represents an + * object which isn't initialized yet. We'll replace it later on with + * the real object once we commit. + */ + return xa_alloc(&uobj->ufile->idr, &uobj->id, NULL, xa_limit_32b, + GFP_KERNEL); } /* Returns the ib_uobject or an error. The caller should check for IS_ERR. */ @@ -324,29 +313,20 @@ lookup_get_idr_uobject(const struct uverbs_api_object *obj, enum rdma_lookup_mode mode) { struct ib_uobject *uobj; - unsigned long idrno = id; if (id < 0 || id > ULONG_MAX) return ERR_PTR(-EINVAL); rcu_read_lock(); - /* object won't be released as we're protected in rcu */ - uobj = idr_find(&ufile->idr, idrno); - if (!uobj) { - uobj = ERR_PTR(-ENOENT); - goto free; - } - /* * The idr_find is guaranteed to return a pointer to something that * isn't freed yet, or NULL, as the free after idr_remove goes through * kfree_rcu(). However the object may still have been released and * kfree() could be called at any time. */ - if (!kref_get_unless_zero(&uobj->ref)) + uobj = xa_load(&ufile->idr, id); + if (!uobj || !kref_get_unless_zero(&uobj->ref)) uobj = ERR_PTR(-ENOENT); - -free: rcu_read_unlock(); return uobj; } @@ -393,12 +373,13 @@ lookup_get_fd_uobject(const struct uverbs_api_object *obj, struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj, struct ib_uverbs_file *ufile, s64 id, - enum rdma_lookup_mode mode) + enum rdma_lookup_mode mode, + struct uverbs_attr_bundle *attrs) { struct ib_uobject *uobj; int ret; - if (IS_ERR(obj) && PTR_ERR(obj) == -ENOMSG) { + if (obj == ERR_PTR(-ENOMSG)) { /* must be UVERBS_IDR_ANY_OBJECT, see uapi_get_object() */ uobj = lookup_get_idr_uobject(NULL, ufile, id, mode); if (IS_ERR(uobj)) @@ -431,6 +412,8 @@ struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj, ret = uverbs_try_lock_object(uobj, mode); if (ret) goto free; + if (attrs) + attrs->context = uobj->context; return uobj; free: @@ -438,38 +421,6 @@ free: uverbs_uobject_put(uobj); return ERR_PTR(ret); } -struct ib_uobject *_uobj_get_read(enum uverbs_default_objects type, - u32 object_id, - struct uverbs_attr_bundle *attrs) -{ - struct ib_uobject *uobj; - - uobj = rdma_lookup_get_uobject(uobj_get_type(attrs, type), attrs->ufile, - object_id, UVERBS_LOOKUP_READ); - if (IS_ERR(uobj)) - return uobj; - - attrs->context = uobj->context; - - return uobj; -} - -struct ib_uobject *_uobj_get_write(enum uverbs_default_objects type, - u32 object_id, - struct uverbs_attr_bundle *attrs) -{ - struct ib_uobject *uobj; - - uobj = rdma_lookup_get_uobject(uobj_get_type(attrs, type), attrs->ufile, - object_id, UVERBS_LOOKUP_WRITE); - - if (IS_ERR(uobj)) - return uobj; - - attrs->context = uobj->context; - - return uobj; -} static struct ib_uobject * alloc_begin_idr_uobject(const struct uverbs_api_object *obj, @@ -489,14 +440,12 @@ alloc_begin_idr_uobject(const struct uverbs_api_object *obj, ret = ib_rdmacg_try_charge(&uobj->cg_obj, uobj->context->device, RDMACG_RESOURCE_HCA_OBJECT); if (ret) - goto idr_remove; + goto remove; return uobj; -idr_remove: - spin_lock(&ufile->idr_lock); - idr_remove(&ufile->idr, uobj->id); - spin_unlock(&ufile->idr_lock); +remove: + xa_erase(&ufile->idr, uobj->id); uobj_put: uverbs_uobject_put(uobj); return ERR_PTR(ret); @@ -526,7 +475,8 @@ alloc_begin_fd_uobject(const struct uverbs_api_object *obj, } struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj, - struct ib_uverbs_file *ufile) + struct ib_uverbs_file *ufile, + struct uverbs_attr_bundle *attrs) { struct ib_uobject *ret; @@ -546,6 +496,8 @@ struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj, up_read(&ufile->hw_destroy_rwsem); return ret; } + if (attrs) + attrs->context = ret->context; return ret; } @@ -554,18 +506,17 @@ static void alloc_abort_idr_uobject(struct ib_uobject *uobj) ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device, RDMACG_RESOURCE_HCA_OBJECT); - spin_lock(&uobj->ufile->idr_lock); - idr_remove(&uobj->ufile->idr, uobj->id); - spin_unlock(&uobj->ufile->idr_lock); + xa_erase(&uobj->ufile->idr, uobj->id); } static int __must_check destroy_hw_idr_uobject(struct ib_uobject *uobj, - enum rdma_remove_reason why) + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) { const struct uverbs_obj_idr_type *idr_type = container_of(uobj->uapi_object->type_attrs, struct uverbs_obj_idr_type, type); - int ret = idr_type->destroy_object(uobj, why); + int ret = idr_type->destroy_object(uobj, why, attrs); /* * We can only fail gracefully if the user requested to destroy the @@ -586,9 +537,7 @@ static int __must_check destroy_hw_idr_uobject(struct ib_uobject *uobj, static void remove_handle_idr_uobject(struct ib_uobject *uobj) { - spin_lock(&uobj->ufile->idr_lock); - idr_remove(&uobj->ufile->idr, uobj->id); - spin_unlock(&uobj->ufile->idr_lock); + xa_erase(&uobj->ufile->idr, uobj->id); /* Matches the kref in alloc_commit_idr_uobject */ uverbs_uobject_put(uobj); } @@ -599,7 +548,8 @@ static void alloc_abort_fd_uobject(struct ib_uobject *uobj) } static int __must_check destroy_hw_fd_uobject(struct ib_uobject *uobj, - enum rdma_remove_reason why) + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) { const struct uverbs_obj_fd_type *fd_type = container_of( uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type); @@ -618,17 +568,17 @@ static void remove_handle_fd_uobject(struct ib_uobject *uobj) static int alloc_commit_idr_uobject(struct ib_uobject *uobj) { struct ib_uverbs_file *ufile = uobj->ufile; + void *old; - spin_lock(&ufile->idr_lock); /* * We already allocated this IDR with a NULL object, so * this shouldn't fail. * - * NOTE: Once we set the IDR we loose ownership of our kref on uobj. + * NOTE: Storing the uobj transfers our kref on uobj to the XArray. * It will be put by remove_commit_idr_uobject() */ - WARN_ON(idr_replace(&ufile->idr, uobj, uobj->id)); - spin_unlock(&ufile->idr_lock); + old = xa_store(&ufile->idr, uobj->id, uobj, GFP_KERNEL); + WARN_ON(old != NULL); return 0; } @@ -675,15 +625,16 @@ static int alloc_commit_fd_uobject(struct ib_uobject *uobj) * caller can no longer assume uobj is valid. If this function fails it * destroys the uboject, including the attached HW object. */ -int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj) +int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj, + struct uverbs_attr_bundle *attrs) { - struct ib_uverbs_file *ufile = uobj->ufile; + struct ib_uverbs_file *ufile = attrs->ufile; int ret; /* alloc_commit consumes the uobj kref */ ret = uobj->uapi_object->type_class->alloc_commit(uobj); if (ret) { - uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT); + uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT, attrs); up_read(&ufile->hw_destroy_rwsem); return ret; } @@ -707,12 +658,13 @@ int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj) * This consumes the kref for uobj. It is up to the caller to unwind the HW * object and anything else connected to uobj before calling this. */ -void rdma_alloc_abort_uobject(struct ib_uobject *uobj) +void rdma_alloc_abort_uobject(struct ib_uobject *uobj, + struct uverbs_attr_bundle *attrs) { struct ib_uverbs_file *ufile = uobj->ufile; uobj->object = NULL; - uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT); + uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT, attrs); /* Matches the down_read in rdma_alloc_begin_uobject */ up_read(&ufile->hw_destroy_rwsem); @@ -760,29 +712,28 @@ void rdma_lookup_put_uobject(struct ib_uobject *uobj, void setup_ufile_idr_uobject(struct ib_uverbs_file *ufile) { - spin_lock_init(&ufile->idr_lock); - idr_init(&ufile->idr); + xa_init_flags(&ufile->idr, XA_FLAGS_ALLOC); } void release_ufile_idr_uobject(struct ib_uverbs_file *ufile) { struct ib_uobject *entry; - int id; + unsigned long id; /* * At this point uverbs_cleanup_ufile() is guaranteed to have run, and - * there are no HW objects left, however the IDR is still populated + * there are no HW objects left, however the xarray is still populated * with anything that has not been cleaned up by userspace. Since the * kref on ufile is 0, nothing is allowed to call lookup_get. * * This is an optimized equivalent to remove_handle_idr_uobject */ - idr_for_each_entry(&ufile->idr, entry, id) { + xa_for_each(&ufile->idr, id, entry) { WARN_ON(entry->object); uverbs_uobject_put(entry); } - idr_destroy(&ufile->idr); + xa_destroy(&ufile->idr); } const struct uverbs_obj_type_class uverbs_idr_class = { @@ -814,6 +765,10 @@ void uverbs_close_fd(struct file *f) { struct ib_uobject *uobj = f->private_data; struct ib_uverbs_file *ufile = uobj->ufile; + struct uverbs_attr_bundle attrs = { + .context = uobj->context, + .ufile = ufile, + }; if (down_read_trylock(&ufile->hw_destroy_rwsem)) { /* @@ -823,7 +778,7 @@ void uverbs_close_fd(struct file *f) * write lock here, or we have a kernel bug. */ WARN_ON(uverbs_try_lock_object(uobj, UVERBS_LOOKUP_WRITE)); - uverbs_destroy_uobject(uobj, RDMA_REMOVE_CLOSE); + uverbs_destroy_uobject(uobj, RDMA_REMOVE_CLOSE, &attrs); up_read(&ufile->hw_destroy_rwsem); } @@ -872,6 +827,7 @@ static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, { struct ib_uobject *obj, *next_obj; int ret = -EINVAL; + struct uverbs_attr_bundle attrs = { .ufile = ufile }; /* * This shouldn't run while executing other commands on this @@ -883,12 +839,13 @@ static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, * other threads (which might still use the FDs) chance to run. */ list_for_each_entry_safe(obj, next_obj, &ufile->uobjects, list) { + attrs.context = obj->context; /* * if we hit this WARN_ON, that means we are * racing with a lookup_get. */ WARN_ON(uverbs_try_lock_object(obj, UVERBS_LOOKUP_WRITE)); - if (!uverbs_destroy_uobject(obj, reason)) + if (!uverbs_destroy_uobject(obj, reason, &attrs)) ret = 0; else atomic_set(&obj->usecnt, 0); @@ -967,26 +924,25 @@ const struct uverbs_obj_type_class uverbs_fd_class = { EXPORT_SYMBOL(uverbs_fd_class); struct ib_uobject * -uverbs_get_uobject_from_file(u16 object_id, - struct ib_uverbs_file *ufile, - enum uverbs_obj_access access, s64 id) +uverbs_get_uobject_from_file(u16 object_id, enum uverbs_obj_access access, + s64 id, struct uverbs_attr_bundle *attrs) { const struct uverbs_api_object *obj = - uapi_get_object(ufile->device->uapi, object_id); + uapi_get_object(attrs->ufile->device->uapi, object_id); switch (access) { case UVERBS_ACCESS_READ: - return rdma_lookup_get_uobject(obj, ufile, id, - UVERBS_LOOKUP_READ); + return rdma_lookup_get_uobject(obj, attrs->ufile, id, + UVERBS_LOOKUP_READ, attrs); case UVERBS_ACCESS_DESTROY: /* Actual destruction is done inside uverbs_handle_method */ - return rdma_lookup_get_uobject(obj, ufile, id, - UVERBS_LOOKUP_DESTROY); + return rdma_lookup_get_uobject(obj, attrs->ufile, id, + UVERBS_LOOKUP_DESTROY, attrs); case UVERBS_ACCESS_WRITE: - return rdma_lookup_get_uobject(obj, ufile, id, - UVERBS_LOOKUP_WRITE); + return rdma_lookup_get_uobject(obj, attrs->ufile, id, + UVERBS_LOOKUP_WRITE, attrs); case UVERBS_ACCESS_NEW: - return rdma_alloc_begin_uobject(obj, ufile); + return rdma_alloc_begin_uobject(obj, attrs->ufile, attrs); default: WARN_ON(true); return ERR_PTR(-EOPNOTSUPP); @@ -994,8 +950,8 @@ uverbs_get_uobject_from_file(u16 object_id, } int uverbs_finalize_object(struct ib_uobject *uobj, - enum uverbs_obj_access access, - bool commit) + enum uverbs_obj_access access, bool commit, + struct uverbs_attr_bundle *attrs) { int ret = 0; @@ -1018,9 +974,9 @@ int uverbs_finalize_object(struct ib_uobject *uobj, break; case UVERBS_ACCESS_NEW: if (commit) - ret = rdma_alloc_commit_uobject(uobj); + ret = rdma_alloc_commit_uobject(uobj, attrs); else - rdma_alloc_abort_uobject(uobj); + rdma_alloc_abort_uobject(uobj, attrs); break; default: WARN_ON(true); diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h index 69f8db66925e..5445323629b5 100644 --- a/drivers/infiniband/core/rdma_core.h +++ b/drivers/infiniband/core/rdma_core.h @@ -48,7 +48,7 @@ struct ib_uverbs_device; void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile, enum rdma_remove_reason reason); -int uobj_destroy(struct ib_uobject *uobj); +int uobj_destroy(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs); /* * uverbs_uobject_get is called in order to increase the reference count on @@ -83,9 +83,8 @@ void uverbs_close_fd(struct file *f); * uverbs_finalize_objects are called. */ struct ib_uobject * -uverbs_get_uobject_from_file(u16 object_id, - struct ib_uverbs_file *ufile, - enum uverbs_obj_access access, s64 id); +uverbs_get_uobject_from_file(u16 object_id, enum uverbs_obj_access access, + s64 id, struct uverbs_attr_bundle *attrs); /* * Note that certain finalize stages could return a status: @@ -103,8 +102,8 @@ uverbs_get_uobject_from_file(u16 object_id, * object. */ int uverbs_finalize_object(struct ib_uobject *uobj, - enum uverbs_obj_access access, - bool commit); + enum uverbs_obj_access access, bool commit, + struct uverbs_attr_bundle *attrs); int uverbs_output_written(const struct uverbs_attr_bundle *bundle, size_t idx); diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index bb534959abf0..7d8071c7e564 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -40,7 +40,7 @@ #include <linux/slab.h> #include <linux/dma-mapping.h> #include <linux/kref.h> -#include <linux/idr.h> +#include <linux/xarray.h> #include <linux/workqueue.h> #include <uapi/linux/if_ether.h> #include <rdma/ib_pack.h> @@ -183,8 +183,7 @@ static struct ib_client sa_client = { .remove = ib_sa_remove_one }; -static DEFINE_SPINLOCK(idr_lock); -static DEFINE_IDR(query_idr); +static DEFINE_XARRAY_FLAGS(queries, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ); static DEFINE_SPINLOCK(tid_lock); static u32 tid; @@ -1180,14 +1179,14 @@ void ib_sa_cancel_query(int id, struct ib_sa_query *query) struct ib_mad_agent *agent; struct ib_mad_send_buf *mad_buf; - spin_lock_irqsave(&idr_lock, flags); - if (idr_find(&query_idr, id) != query) { - spin_unlock_irqrestore(&idr_lock, flags); + xa_lock_irqsave(&queries, flags); + if (xa_load(&queries, id) != query) { + xa_unlock_irqrestore(&queries, flags); return; } agent = query->port->agent; mad_buf = query->mad_buf; - spin_unlock_irqrestore(&idr_lock, flags); + xa_unlock_irqrestore(&queries, flags); /* * If the query is still on the netlink request list, schedule @@ -1363,21 +1362,14 @@ static void init_mad(struct ib_sa_query *query, struct ib_mad_agent *agent) static int send_mad(struct ib_sa_query *query, unsigned long timeout_ms, gfp_t gfp_mask) { - bool preload = gfpflags_allow_blocking(gfp_mask); unsigned long flags; int ret, id; - if (preload) - idr_preload(gfp_mask); - spin_lock_irqsave(&idr_lock, flags); - - id = idr_alloc(&query_idr, query, 0, 0, GFP_NOWAIT); - - spin_unlock_irqrestore(&idr_lock, flags); - if (preload) - idr_preload_end(); - if (id < 0) - return id; + xa_lock_irqsave(&queries, flags); + ret = __xa_alloc(&queries, &id, query, xa_limit_32b, gfp_mask); + xa_unlock_irqrestore(&queries, flags); + if (ret < 0) + return ret; query->mad_buf->timeout_ms = timeout_ms; query->mad_buf->context[0] = query; @@ -1394,9 +1386,9 @@ static int send_mad(struct ib_sa_query *query, unsigned long timeout_ms, ret = ib_post_send_mad(query->mad_buf, NULL); if (ret) { - spin_lock_irqsave(&idr_lock, flags); - idr_remove(&query_idr, id); - spin_unlock_irqrestore(&idr_lock, flags); + xa_lock_irqsave(&queries, flags); + __xa_erase(&queries, id); + xa_unlock_irqrestore(&queries, flags); } /* @@ -2188,9 +2180,9 @@ static void send_handler(struct ib_mad_agent *agent, break; } - spin_lock_irqsave(&idr_lock, flags); - idr_remove(&query_idr, query->id); - spin_unlock_irqrestore(&idr_lock, flags); + xa_lock_irqsave(&queries, flags); + __xa_erase(&queries, query->id); + xa_unlock_irqrestore(&queries, flags); free_mad(query); if (query->client) @@ -2475,5 +2467,5 @@ void ib_sa_cleanup(void) destroy_workqueue(ib_nl_wq); mcast_cleanup(); ib_unregister_client(&sa_client); - idr_destroy(&query_idr); + WARN_ON(!xa_empty(&queries)); } diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 9b6a065bdfa5..c78d0c9646ae 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -349,10 +349,15 @@ static struct attribute *port_default_attrs[] = { static size_t print_ndev(const struct ib_gid_attr *gid_attr, char *buf) { - if (!gid_attr->ndev) - return -EINVAL; - - return sprintf(buf, "%s\n", gid_attr->ndev->name); + struct net_device *ndev; + size_t ret = -EINVAL; + + rcu_read_lock(); + ndev = rcu_dereference(gid_attr->ndev); + if (ndev) + ret = sprintf(buf, "%s\n", ndev->name); + rcu_read_unlock(); + return ret; } static size_t print_gid_type(const struct ib_gid_attr *gid_attr, char *buf) @@ -1015,8 +1020,10 @@ err_free_stats: return; } -static int add_port(struct ib_device *device, int port_num) +static int add_port(struct ib_core_device *coredev, int port_num) { + struct ib_device *device = rdma_device_to_ibdev(&coredev->dev); + bool is_full_dev = &device->coredev == coredev; struct ib_port *p; struct ib_port_attr attr; int i; @@ -1034,7 +1041,7 @@ static int add_port(struct ib_device *device, int port_num) p->port_num = port_num; ret = kobject_init_and_add(&p->kobj, &port_type, - device->ports_kobj, + coredev->ports_kobj, "%d", port_num); if (ret) { kfree(p); @@ -1055,7 +1062,7 @@ static int add_port(struct ib_device *device, int port_num) goto err_put; } - if (device->ops.process_mad) { + if (device->ops.process_mad && is_full_dev) { p->pma_table = get_counter_table(device, port_num); ret = sysfs_create_group(&p->kobj, p->pma_table); if (ret) @@ -1111,7 +1118,7 @@ static int add_port(struct ib_device *device, int port_num) if (ret) goto err_free_pkey; - if (device->ops.init_port) { + if (device->ops.init_port && is_full_dev) { ret = device->ops.init_port(device, port_num, &p->kobj); if (ret) goto err_remove_pkey; @@ -1122,10 +1129,10 @@ static int add_port(struct ib_device *device, int port_num) * port, so holder should be device. Therefore skip per port conunter * initialization. */ - if (device->ops.alloc_hw_stats && port_num) + if (device->ops.alloc_hw_stats && port_num && is_full_dev) setup_hw_stats(device, p, port_num); - list_add_tail(&p->kobj.entry, &device->port_list); + list_add_tail(&p->kobj.entry, &coredev->port_list); kobject_uevent(&p->kobj, KOBJ_ADD); return 0; @@ -1194,6 +1201,7 @@ static ssize_t node_type_show(struct device *device, case RDMA_NODE_RNIC: return sprintf(buf, "%d: RNIC\n", dev->node_type); case RDMA_NODE_USNIC: return sprintf(buf, "%d: usNIC\n", dev->node_type); case RDMA_NODE_USNIC_UDP: return sprintf(buf, "%d: usNIC UDP\n", dev->node_type); + case RDMA_NODE_UNSPECIFIED: return sprintf(buf, "%d: unspecified\n", dev->node_type); case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type); case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type); default: return sprintf(buf, "%d: <unknown>\n", dev->node_type); @@ -1279,11 +1287,11 @@ const struct attribute_group ib_dev_attr_group = { .attrs = ib_dev_attrs, }; -static void ib_free_port_attrs(struct ib_device *device) +void ib_free_port_attrs(struct ib_core_device *coredev) { struct kobject *p, *t; - list_for_each_entry_safe(p, t, &device->port_list, entry) { + list_for_each_entry_safe(p, t, &coredev->port_list, entry) { struct ib_port *port = container_of(p, struct ib_port, kobj); list_del(&p->entry); @@ -1303,20 +1311,22 @@ static void ib_free_port_attrs(struct ib_device *device) kobject_put(p); } - kobject_put(device->ports_kobj); + kobject_put(coredev->ports_kobj); } -static int ib_setup_port_attrs(struct ib_device *device) +int ib_setup_port_attrs(struct ib_core_device *coredev) { + struct ib_device *device = rdma_device_to_ibdev(&coredev->dev); unsigned int port; int ret; - device->ports_kobj = kobject_create_and_add("ports", &device->dev.kobj); - if (!device->ports_kobj) + coredev->ports_kobj = kobject_create_and_add("ports", + &coredev->dev.kobj); + if (!coredev->ports_kobj) return -ENOMEM; rdma_for_each_port (device, port) { - ret = add_port(device, port); + ret = add_port(coredev, port); if (ret) goto err_put; } @@ -1324,7 +1334,7 @@ static int ib_setup_port_attrs(struct ib_device *device) return 0; err_put: - ib_free_port_attrs(device); + ib_free_port_attrs(coredev); return ret; } @@ -1332,7 +1342,7 @@ int ib_device_register_sysfs(struct ib_device *device) { int ret; - ret = ib_setup_port_attrs(device); + ret = ib_setup_port_attrs(&device->coredev); if (ret) return ret; @@ -1348,5 +1358,48 @@ void ib_device_unregister_sysfs(struct ib_device *device) free_hsag(&device->dev.kobj, device->hw_stats_ag); kfree(device->hw_stats); - ib_free_port_attrs(device); + ib_free_port_attrs(&device->coredev); +} + +/** + * ib_port_register_module_stat - add module counters under relevant port + * of IB device. + * + * @device: IB device to add counters + * @port_num: valid port number + * @kobj: pointer to the kobject to initialize + * @ktype: pointer to the ktype for this kobject. + * @name: the name of the kobject + */ +int ib_port_register_module_stat(struct ib_device *device, u8 port_num, + struct kobject *kobj, struct kobj_type *ktype, + const char *name) +{ + struct kobject *p, *t; + int ret; + + list_for_each_entry_safe(p, t, &device->coredev.port_list, entry) { + struct ib_port *port = container_of(p, struct ib_port, kobj); + + if (port->port_num != port_num) + continue; + + ret = kobject_init_and_add(kobj, ktype, &port->kobj, "%s", + name); + if (ret) + return ret; + } + + return 0; +} +EXPORT_SYMBOL(ib_port_register_module_stat); + +/** + * ib_port_unregister_module_stat - release module counters + * @kobj: pointer to the kobject to release + */ +void ib_port_unregister_module_stat(struct kobject *kobj) +{ + kobject_put(kobj); } +EXPORT_SYMBOL(ib_port_unregister_module_stat); diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index 65c3230f5663..8e7da2d41fd8 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -42,7 +42,7 @@ #include <linux/file.h> #include <linux/mount.h> #include <linux/cdev.h> -#include <linux/idr.h> +#include <linux/xarray.h> #include <linux/mutex.h> #include <linux/slab.h> @@ -125,23 +125,22 @@ static struct ib_client ucm_client = { .remove = ib_ucm_remove_one }; -static DEFINE_MUTEX(ctx_id_mutex); -static DEFINE_IDR(ctx_id_table); +static DEFINE_XARRAY_ALLOC(ctx_id_table); static DECLARE_BITMAP(dev_map, IB_UCM_MAX_DEVICES); static struct ib_ucm_context *ib_ucm_ctx_get(struct ib_ucm_file *file, int id) { struct ib_ucm_context *ctx; - mutex_lock(&ctx_id_mutex); - ctx = idr_find(&ctx_id_table, id); + xa_lock(&ctx_id_table); + ctx = xa_load(&ctx_id_table, id); if (!ctx) ctx = ERR_PTR(-ENOENT); else if (ctx->file != file) ctx = ERR_PTR(-EINVAL); else atomic_inc(&ctx->ref); - mutex_unlock(&ctx_id_mutex); + xa_unlock(&ctx_id_table); return ctx; } @@ -194,10 +193,7 @@ static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file) ctx->file = file; INIT_LIST_HEAD(&ctx->events); - mutex_lock(&ctx_id_mutex); - ctx->id = idr_alloc(&ctx_id_table, ctx, 0, 0, GFP_KERNEL); - mutex_unlock(&ctx_id_mutex); - if (ctx->id < 0) + if (xa_alloc(&ctx_id_table, &ctx->id, ctx, xa_limit_32b, GFP_KERNEL)) goto error; list_add_tail(&ctx->file_list, &file->ctxs); @@ -514,9 +510,7 @@ static ssize_t ib_ucm_create_id(struct ib_ucm_file *file, err2: ib_destroy_cm_id(ctx->cm_id); err1: - mutex_lock(&ctx_id_mutex); - idr_remove(&ctx_id_table, ctx->id); - mutex_unlock(&ctx_id_mutex); + xa_erase(&ctx_id_table, ctx->id); kfree(ctx); return result; } @@ -536,15 +530,15 @@ static ssize_t ib_ucm_destroy_id(struct ib_ucm_file *file, if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; - mutex_lock(&ctx_id_mutex); - ctx = idr_find(&ctx_id_table, cmd.id); + xa_lock(&ctx_id_table); + ctx = xa_load(&ctx_id_table, cmd.id); if (!ctx) ctx = ERR_PTR(-ENOENT); else if (ctx->file != file) ctx = ERR_PTR(-EINVAL); else - idr_remove(&ctx_id_table, ctx->id); - mutex_unlock(&ctx_id_mutex); + __xa_erase(&ctx_id_table, ctx->id); + xa_unlock(&ctx_id_table); if (IS_ERR(ctx)) return PTR_ERR(ctx); @@ -1189,10 +1183,7 @@ static int ib_ucm_close(struct inode *inode, struct file *filp) struct ib_ucm_context, file_list); mutex_unlock(&file->file_mutex); - mutex_lock(&ctx_id_mutex); - idr_remove(&ctx_id_table, ctx->id); - mutex_unlock(&ctx_id_mutex); - + xa_erase(&ctx_id_table, ctx->id); ib_destroy_cm_id(ctx->cm_id); ib_ucm_cleanup_events(ctx); kfree(ctx); @@ -1352,7 +1343,7 @@ static void __exit ib_ucm_cleanup(void) class_remove_file(&cm_class, &class_attr_abi_version.attr); unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_NUM_FIXED_MINOR); unregister_chrdev_region(dynamic_ucm_dev, IB_UCM_NUM_DYNAMIC_MINOR); - idr_destroy(&ctx_id_table); + WARN_ON(!xa_empty(&ctx_id_table)); } module_init(ib_ucm_init); diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index fe5551562dbc..0a23048db523 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -37,27 +37,23 @@ #include <linux/sched/signal.h> #include <linux/sched/mm.h> #include <linux/export.h> -#include <linux/hugetlb.h> #include <linux/slab.h> +#include <linux/pagemap.h> #include <rdma/ib_umem_odp.h> #include "uverbs.h" - static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty) { - struct scatterlist *sg; + struct sg_page_iter sg_iter; struct page *page; - int i; if (umem->nmap > 0) - ib_dma_unmap_sg(dev, umem->sg_head.sgl, - umem->npages, + ib_dma_unmap_sg(dev, umem->sg_head.sgl, umem->sg_nents, DMA_BIDIRECTIONAL); - for_each_sg(umem->sg_head.sgl, sg, umem->npages, i) { - - page = sg_page(sg); + for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->sg_nents, 0) { + page = sg_page_iter_page(&sg_iter); if (!PageDirty(page) && umem->writable && dirty) set_page_dirty_lock(page); put_page(page); @@ -66,6 +62,124 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d sg_free_table(&umem->sg_head); } +/* ib_umem_add_sg_table - Add N contiguous pages to scatter table + * + * sg: current scatterlist entry + * page_list: array of npage struct page pointers + * npages: number of pages in page_list + * max_seg_sz: maximum segment size in bytes + * nents: [out] number of entries in the scatterlist + * + * Return new end of scatterlist + */ +static struct scatterlist *ib_umem_add_sg_table(struct scatterlist *sg, + struct page **page_list, + unsigned long npages, + unsigned int max_seg_sz, + int *nents) +{ + unsigned long first_pfn; + unsigned long i = 0; + bool update_cur_sg = false; + bool first = !sg_page(sg); + + /* Check if new page_list is contiguous with end of previous page_list. + * sg->length here is a multiple of PAGE_SIZE and sg->offset is 0. + */ + if (!first && (page_to_pfn(sg_page(sg)) + (sg->length >> PAGE_SHIFT) == + page_to_pfn(page_list[0]))) + update_cur_sg = true; + + while (i != npages) { + unsigned long len; + struct page *first_page = page_list[i]; + + first_pfn = page_to_pfn(first_page); + + /* Compute the number of contiguous pages we have starting + * at i + */ + for (len = 0; i != npages && + first_pfn + len == page_to_pfn(page_list[i]) && + len < (max_seg_sz >> PAGE_SHIFT); + len++) + i++; + + /* Squash N contiguous pages from page_list into current sge */ + if (update_cur_sg) { + if ((max_seg_sz - sg->length) >= (len << PAGE_SHIFT)) { + sg_set_page(sg, sg_page(sg), + sg->length + (len << PAGE_SHIFT), + 0); + update_cur_sg = false; + continue; + } + update_cur_sg = false; + } + + /* Squash N contiguous pages into next sge or first sge */ + if (!first) + sg = sg_next(sg); + + (*nents)++; + sg_set_page(sg, first_page, len << PAGE_SHIFT, 0); + first = false; + } + + return sg; +} + +/** + * ib_umem_find_best_pgsz - Find best HW page size to use for this MR + * + * @umem: umem struct + * @pgsz_bitmap: bitmap of HW supported page sizes + * @virt: IOVA + * + * This helper is intended for HW that support multiple page + * sizes but can do only a single page size in an MR. + * + * Returns 0 if the umem requires page sizes not supported by + * the driver to be mapped. Drivers always supporting PAGE_SIZE + * or smaller will never see a 0 result. + */ +unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, + unsigned long pgsz_bitmap, + unsigned long virt) +{ + struct scatterlist *sg; + unsigned int best_pg_bit; + unsigned long va, pgoff; + dma_addr_t mask; + int i; + + /* At minimum, drivers must support PAGE_SIZE or smaller */ + if (WARN_ON(!(pgsz_bitmap & GENMASK(PAGE_SHIFT, 0)))) + return 0; + + va = virt; + /* max page size not to exceed MR length */ + mask = roundup_pow_of_two(umem->length); + /* offset into first SGL */ + pgoff = umem->address & ~PAGE_MASK; + + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) { + /* Walk SGL and reduce max page size if VA/PA bits differ + * for any address. + */ + mask |= (sg_dma_address(sg) + pgoff) ^ va; + if (i && i != (umem->nmap - 1)) + /* restrict by length as well for interior SGEs */ + mask |= sg_dma_len(sg); + va += sg_dma_len(sg) - pgoff; + pgoff = 0; + } + best_pg_bit = rdma_find_pg_bit(mask, pgsz_bitmap); + + return BIT_ULL(best_pg_bit); +} +EXPORT_SYMBOL(ib_umem_find_best_pgsz); + /** * ib_umem_get - Pin and DMA map userspace memory. * @@ -84,16 +198,14 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, struct ib_ucontext *context; struct ib_umem *umem; struct page **page_list; - struct vm_area_struct **vma_list; unsigned long lock_limit; unsigned long new_pinned; unsigned long cur_base; struct mm_struct *mm; unsigned long npages; int ret; - int i; unsigned long dma_attrs = 0; - struct scatterlist *sg, *sg_list_start; + struct scatterlist *sg; unsigned int gup_flags = FOLL_WRITE; if (!udata) @@ -138,29 +250,23 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, mmgrab(mm); if (access & IB_ACCESS_ON_DEMAND) { + if (WARN_ON_ONCE(!context->invalidate_range)) { + ret = -EINVAL; + goto umem_kfree; + } + ret = ib_umem_odp_get(to_ib_umem_odp(umem), access); if (ret) goto umem_kfree; return umem; } - /* We assume the memory is from hugetlb until proved otherwise */ - umem->hugetlb = 1; - page_list = (struct page **) __get_free_page(GFP_KERNEL); if (!page_list) { ret = -ENOMEM; goto umem_kfree; } - /* - * if we can't alloc the vma_list, it's not so bad; - * just assume the memory is not hugetlb memory - */ - vma_list = (struct vm_area_struct **) __get_free_page(GFP_KERNEL); - if (!vma_list) - umem->hugetlb = 0; - npages = ib_umem_num_pages(umem); if (npages == 0 || npages > UINT_MAX) { ret = -EINVAL; @@ -185,41 +291,34 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, if (!umem->writable) gup_flags |= FOLL_FORCE; - sg_list_start = umem->sg_head.sgl; + sg = umem->sg_head.sgl; while (npages) { down_read(&mm->mmap_sem); ret = get_user_pages_longterm(cur_base, min_t(unsigned long, npages, PAGE_SIZE / sizeof (struct page *)), - gup_flags, page_list, vma_list); + gup_flags, page_list, NULL); if (ret < 0) { up_read(&mm->mmap_sem); goto umem_release; } - umem->npages += ret; cur_base += ret * PAGE_SIZE; npages -= ret; - /* Continue to hold the mmap_sem as vma_list access - * needs to be protected. - */ - for_each_sg(sg_list_start, sg, ret, i) { - if (vma_list && !is_vm_hugetlb_page(vma_list[i])) - umem->hugetlb = 0; + sg = ib_umem_add_sg_table(sg, page_list, ret, + dma_get_max_seg_size(context->device->dma_device), + &umem->sg_nents); - sg_set_page(sg, page_list[i], PAGE_SIZE, 0); - } up_read(&mm->mmap_sem); - - /* preparing for next loop */ - sg_list_start = sg; } + sg_mark_end(sg); + umem->nmap = ib_dma_map_sg_attrs(context->device, umem->sg_head.sgl, - umem->npages, + umem->sg_nents, DMA_BIDIRECTIONAL, dma_attrs); @@ -236,8 +335,6 @@ umem_release: vma: atomic64_sub(ib_umem_num_pages(umem), &mm->pinned_vm); out: - if (vma_list) - free_page((unsigned long) vma_list); free_page((unsigned long) page_list); umem_kfree: if (ret) { @@ -315,7 +412,7 @@ int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset, return -EINVAL; } - ret = sg_pcopy_to_buffer(umem->sg_head.sgl, umem->npages, dst, length, + ret = sg_pcopy_to_buffer(umem->sg_head.sgl, umem->sg_nents, dst, length, offset + ib_umem_offset(umem)); if (ret < 0) diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index e6ec79ad9cc8..c7226cf52acc 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -241,7 +241,7 @@ static struct ib_ucontext_per_mm *alloc_per_mm(struct ib_ucontext *ctx, per_mm->mm = mm; per_mm->umem_tree = RB_ROOT_CACHED; init_rwsem(&per_mm->umem_rwsem); - per_mm->active = ctx->invalidate_range; + per_mm->active = true; rcu_read_lock(); per_mm->tgid = get_task_pid(current->group_leader, PIDTYPE_PID); @@ -417,9 +417,6 @@ int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access) h = hstate_vma(vma); umem->page_shift = huge_page_shift(h); up_read(&mm->mmap_sem); - umem->hugetlb = 1; - } else { - umem->hugetlb = 0; } mutex_init(&umem_odp->umem_mutex); @@ -503,7 +500,6 @@ static int ib_umem_odp_map_dma_single_page( struct ib_umem *umem = &umem_odp->umem; struct ib_device *dev = umem->context->device; dma_addr_t dma_addr; - int stored_page = 0; int remove_existing_mapping = 0; int ret = 0; @@ -527,8 +523,7 @@ static int ib_umem_odp_map_dma_single_page( } umem_odp->dma_list[page_index] = dma_addr | access_mask; umem_odp->page_list[page_index] = page; - umem->npages++; - stored_page = 1; + umem_odp->npages++; } else if (umem_odp->page_list[page_index] == page) { umem_odp->dma_list[page_index] |= access_mask; } else { @@ -540,11 +535,9 @@ static int ib_umem_odp_map_dma_single_page( } out: - /* On Demand Paging - avoid pinning the page */ - if (umem->context->invalidate_range || !stored_page) - put_page(page); + put_page(page); - if (remove_existing_mapping && umem->context->invalidate_range) { + if (remove_existing_mapping) { ib_umem_notifier_start_account(umem_odp); umem->context->invalidate_range( umem_odp, @@ -754,12 +747,9 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt, */ set_page_dirty(head_page); } - /* on demand pinning support */ - if (!umem->context->invalidate_range) - put_page(page); umem_odp->page_list[idx] = NULL; umem_odp->dma_list[idx] = 0; - umem->npages--; + umem_odp->npages--; } } mutex_unlock(&umem_odp->umem_mutex); diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index b58b07c03cfb..671f07ba1fad 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -129,6 +129,9 @@ struct ib_umad_packet { struct ib_user_mad mad; }; +#define CREATE_TRACE_POINTS +#include <trace/events/ib_umad.h> + static const dev_t base_umad_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE); static const dev_t base_issm_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE) + IB_UMAD_NUM_FIXED_MINOR; @@ -334,6 +337,9 @@ static ssize_t copy_recv_mad(struct ib_umad_file *file, char __user *buf, return -EFAULT; } } + + trace_ib_umad_read_recv(file, &packet->mad.hdr, &recv_buf->mad->mad_hdr); + return hdr_size(file) + packet->length; } @@ -353,6 +359,9 @@ static ssize_t copy_send_mad(struct ib_umad_file *file, char __user *buf, if (copy_to_user(buf, packet->mad.data, packet->length)) return -EFAULT; + trace_ib_umad_read_send(file, &packet->mad.hdr, + (struct ib_mad_hdr *)&packet->mad.data); + return size; } @@ -508,6 +517,9 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, mutex_lock(&file->mutex); + trace_ib_umad_write(file, &packet->mad.hdr, + (struct ib_mad_hdr *)&packet->mad.data); + agent = __get_agent(file, packet->mad.hdr.id); if (!agent) { ret = -EINVAL; @@ -968,6 +980,11 @@ static int ib_umad_open(struct inode *inode, struct file *filp) goto out; } + if (!rdma_dev_access_netns(port->ib_dev, current->nsproxy->net_ns)) { + ret = -EPERM; + goto out; + } + file = kzalloc(sizeof(*file), GFP_KERNEL); if (!file) { ret = -ENOMEM; @@ -1061,6 +1078,11 @@ static int ib_umad_sm_open(struct inode *inode, struct file *filp) } } + if (!rdma_dev_access_netns(port->ib_dev, current->nsproxy->net_ns)) { + ret = -EPERM; + goto err_up_sem; + } + ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props); if (ret) goto err_up_sem; diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 32cc8fe7902f..1e5aeb39f774 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -162,9 +162,7 @@ struct ib_uverbs_file { struct list_head umaps; struct page *disassociate_page; - struct idr idr; - /* spinlock protects write access to idr */ - spinlock_t idr_lock; + struct xarray idr; }; struct ib_uverbs_event { @@ -241,7 +239,8 @@ void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_event_handler(struct ib_event_handler *handler, struct ib_event *event); int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd, - enum rdma_remove_reason why); + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs); int uverbs_dealloc_mw(struct ib_mw *mw); void ib_uverbs_detach_umcast(struct ib_qp *qp, diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 062a86c04123..5a3a1780ceea 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -162,7 +162,7 @@ static const void __user *uverbs_request_next_ptr(struct uverbs_req_iter *iter, const void __user *res = iter->cur; if (iter->cur + len > iter->end) - return ERR_PTR(-ENOSPC); + return (void __force __user *)ERR_PTR(-ENOSPC); iter->cur += len; return res; } @@ -175,7 +175,7 @@ static int uverbs_request_finish(struct uverbs_req_iter *iter) } static struct ib_uverbs_completion_event_file * -_ib_uverbs_lookup_comp_file(s32 fd, const struct uverbs_attr_bundle *attrs) +_ib_uverbs_lookup_comp_file(s32 fd, struct uverbs_attr_bundle *attrs) { struct ib_uobject *uobj = ufd_get_read(UVERBS_OBJECT_COMP_CHANNEL, fd, attrs); @@ -230,6 +230,8 @@ static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs) goto err_alloc; } + attrs->context = ucontext; + ucontext->res.type = RDMA_RESTRACK_CTX; ucontext->device = ib_dev; ucontext->cg_obj = cg_obj; @@ -423,7 +425,7 @@ static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs) atomic_set(&pd->usecnt, 0); pd->res.type = RDMA_RESTRACK_PD; - ret = ib_dev->ops.alloc_pd(pd, uobj->context, &attrs->driver_udata); + ret = ib_dev->ops.alloc_pd(pd, &attrs->driver_udata); if (ret) goto err_alloc; @@ -436,15 +438,15 @@ static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs) if (ret) goto err_copy; - return uobj_alloc_commit(uobj); + return uobj_alloc_commit(uobj, attrs); err_copy: - ib_dealloc_pd(pd); + ib_dealloc_pd_user(pd, &attrs->driver_udata); pd = NULL; err_alloc: kfree(pd); err: - uobj_alloc_abort(uobj); + uobj_alloc_abort(uobj, attrs); return ret; } @@ -594,8 +596,7 @@ static int ib_uverbs_open_xrcd(struct uverbs_attr_bundle *attrs) } if (!xrcd) { - xrcd = ib_dev->ops.alloc_xrcd(ib_dev, obj->uobject.context, - &attrs->driver_udata); + xrcd = ib_dev->ops.alloc_xrcd(ib_dev, &attrs->driver_udata); if (IS_ERR(xrcd)) { ret = PTR_ERR(xrcd); goto err; @@ -633,7 +634,7 @@ static int ib_uverbs_open_xrcd(struct uverbs_attr_bundle *attrs) mutex_unlock(&ibudev->xrcd_tree_mutex); - return uobj_alloc_commit(&obj->uobject); + return uobj_alloc_commit(&obj->uobject, attrs); err_copy: if (inode) { @@ -643,10 +644,10 @@ err_copy: } err_dealloc_xrcd: - ib_dealloc_xrcd(xrcd); + ib_dealloc_xrcd(xrcd, &attrs->driver_udata); err: - uobj_alloc_abort(&obj->uobject); + uobj_alloc_abort(&obj->uobject, attrs); err_tree_mutex_unlock: if (f.file) @@ -669,19 +670,19 @@ static int ib_uverbs_close_xrcd(struct uverbs_attr_bundle *attrs) return uobj_perform_destroy(UVERBS_OBJECT_XRCD, cmd.xrcd_handle, attrs); } -int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, - struct ib_xrcd *xrcd, - enum rdma_remove_reason why) +int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd, + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) { struct inode *inode; int ret; - struct ib_uverbs_device *dev = uobject->context->ufile->device; + struct ib_uverbs_device *dev = attrs->ufile->device; inode = xrcd->inode; if (inode && !atomic_dec_and_test(&xrcd->usecnt)) return 0; - ret = ib_dealloc_xrcd(xrcd); + ret = ib_dealloc_xrcd(xrcd, &attrs->driver_udata); if (ib_is_destroy_retryable(ret, why, uobject)) { atomic_inc(&xrcd->usecnt); @@ -763,16 +764,16 @@ static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs) uobj_put_obj_read(pd); - return uobj_alloc_commit(uobj); + return uobj_alloc_commit(uobj, attrs); err_copy: - ib_dereg_mr(mr); + ib_dereg_mr_user(mr, &attrs->driver_udata); err_put: uobj_put_obj_read(pd); err_free: - uobj_alloc_abort(uobj); + uobj_alloc_abort(uobj, attrs); return ret; } @@ -917,14 +918,14 @@ static int ib_uverbs_alloc_mw(struct uverbs_attr_bundle *attrs) goto err_copy; uobj_put_obj_read(pd); - return uobj_alloc_commit(uobj); + return uobj_alloc_commit(uobj, attrs); err_copy: uverbs_dealloc_mw(mw); err_put: uobj_put_obj_read(pd); err_free: - uobj_alloc_abort(uobj); + uobj_alloc_abort(uobj, attrs); return ret; } @@ -965,11 +966,11 @@ static int ib_uverbs_create_comp_channel(struct uverbs_attr_bundle *attrs) ret = uverbs_response(attrs, &resp, sizeof(resp)); if (ret) { - uobj_alloc_abort(uobj); + uobj_alloc_abort(uobj, attrs); return ret; } - return uobj_alloc_commit(uobj); + return uobj_alloc_commit(uobj, attrs); } static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs, @@ -1009,8 +1010,7 @@ static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs, attr.comp_vector = cmd->comp_vector; attr.flags = cmd->flags; - cq = ib_dev->ops.create_cq(ib_dev, &attr, obj->uobject.context, - &attrs->driver_udata); + cq = ib_dev->ops.create_cq(ib_dev, &attr, &attrs->driver_udata); if (IS_ERR(cq)) { ret = PTR_ERR(cq); goto err_file; @@ -1036,7 +1036,7 @@ static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs, if (ret) goto err_cb; - ret = uobj_alloc_commit(&obj->uobject); + ret = uobj_alloc_commit(&obj->uobject, attrs); if (ret) return ERR_PTR(ret); return obj; @@ -1049,7 +1049,7 @@ err_file: ib_uverbs_release_ucq(attrs->ufile, ev_file, obj); err: - uobj_alloc_abort(&obj->uobject); + uobj_alloc_abort(&obj->uobject, attrs); return ERR_PTR(ret); } @@ -1418,7 +1418,6 @@ static int create_qp(struct uverbs_attr_bundle *attrs, if (ret) goto err_cb; - qp->real_qp = qp; qp->pd = pd; qp->send_cq = attr.send_cq; qp->recv_cq = attr.recv_cq; @@ -1477,7 +1476,7 @@ static int create_qp(struct uverbs_attr_bundle *attrs, if (ind_tbl) uobj_put_obj_read(ind_tbl); - return uobj_alloc_commit(&obj->uevent.uobject); + return uobj_alloc_commit(&obj->uevent.uobject, attrs); err_cb: ib_destroy_qp(qp); @@ -1495,7 +1494,7 @@ err_put: if (ind_tbl) uobj_put_obj_read(ind_tbl); - uobj_alloc_abort(&obj->uevent.uobject); + uobj_alloc_abort(&obj->uevent.uobject, attrs); return ret; } @@ -1609,14 +1608,14 @@ static int ib_uverbs_open_qp(struct uverbs_attr_bundle *attrs) qp->uobject = &obj->uevent.uobject; uobj_put_read(xrcd_uobj); - return uobj_alloc_commit(&obj->uevent.uobject); + return uobj_alloc_commit(&obj->uevent.uobject, attrs); err_destroy: ib_destroy_qp(qp); err_xrcd: uobj_put_read(xrcd_uobj); err_put: - uobj_alloc_abort(&obj->uevent.uobject); + uobj_alloc_abort(&obj->uevent.uobject, attrs); return ret; } @@ -2451,7 +2450,7 @@ static int ib_uverbs_create_ah(struct uverbs_attr_bundle *attrs) goto err_copy; uobj_put_obj_read(pd); - return uobj_alloc_commit(uobj); + return uobj_alloc_commit(uobj, attrs); err_copy: rdma_destroy_ah(ah, RDMA_DESTROY_AH_SLEEPABLE); @@ -2460,7 +2459,7 @@ err_put: uobj_put_obj_read(pd); err: - uobj_alloc_abort(uobj); + uobj_alloc_abort(uobj, attrs); return ret; } @@ -2962,16 +2961,16 @@ static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs) uobj_put_obj_read(pd); uobj_put_obj_read(cq); - return uobj_alloc_commit(&obj->uevent.uobject); + return uobj_alloc_commit(&obj->uevent.uobject, attrs); err_copy: - ib_destroy_wq(wq); + ib_destroy_wq(wq, &attrs->driver_udata); err_put_cq: uobj_put_obj_read(cq); err_put_pd: uobj_put_obj_read(pd); err_uobj: - uobj_alloc_abort(&obj->uevent.uobject); + uobj_alloc_abort(&obj->uevent.uobject, attrs); return err; } @@ -3136,12 +3135,12 @@ static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs) for (j = 0; j < num_read_wqs; j++) uobj_put_obj_read(wqs[j]); - return uobj_alloc_commit(uobj); + return uobj_alloc_commit(uobj, attrs); err_copy: ib_destroy_rwq_ind_table(rwq_ind_tbl); err_uobj: - uobj_alloc_abort(uobj); + uobj_alloc_abort(uobj, attrs); put_wqs: for (j = 0; j < num_read_wqs; j++) uobj_put_obj_read(wqs[j]); @@ -3314,7 +3313,7 @@ static int ib_uverbs_ex_create_flow(struct uverbs_attr_bundle *attrs) kfree(flow_attr); if (cmd.flow_attr.num_of_specs) kfree(kern_flow_attr); - return uobj_alloc_commit(uobj); + return uobj_alloc_commit(uobj, attrs); err_copy: if (!qp->device->ops.destroy_flow(flow_id)) atomic_dec(&qp->usecnt); @@ -3325,7 +3324,7 @@ err_free_flow_attr: err_put: uobj_put_obj_read(qp); err_uobj: - uobj_alloc_abort(uobj); + uobj_alloc_abort(uobj, attrs); err_free_attr: if (cmd.flow_attr.num_of_specs) kfree(kern_flow_attr); @@ -3411,9 +3410,9 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs, obj->uevent.events_reported = 0; INIT_LIST_HEAD(&obj->uevent.event_list); - srq = pd->device->ops.create_srq(pd, &attr, udata); - if (IS_ERR(srq)) { - ret = PTR_ERR(srq); + srq = rdma_zalloc_drv_obj(ib_dev, ib_srq); + if (!srq) { + ret = -ENOMEM; goto err_put; } @@ -3424,6 +3423,10 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs, srq->event_handler = attr.event_handler; srq->srq_context = attr.srq_context; + ret = pd->device->ops.create_srq(srq, &attr, udata); + if (ret) + goto err_free; + if (ib_srq_has_cq(cmd->srq_type)) { srq->ext.cq = attr.ext.cq; atomic_inc(&attr.ext.cq->usecnt); @@ -3458,11 +3461,13 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs, uobj_put_obj_read(attr.ext.cq); uobj_put_obj_read(pd); - return uobj_alloc_commit(&obj->uevent.uobject); + return uobj_alloc_commit(&obj->uevent.uobject, attrs); err_copy: - ib_destroy_srq(srq); + ib_destroy_srq_user(srq, &attrs->driver_udata); +err_free: + kfree(srq); err_put: uobj_put_obj_read(pd); @@ -3477,7 +3482,7 @@ err_put_xrcd: } err: - uobj_alloc_abort(&obj->uevent.uobject); + uobj_alloc_abort(&obj->uevent.uobject, attrs); return ret; } diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index e1379949e663..829b0c6944d8 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -207,13 +207,12 @@ static int uverbs_process_idrs_array(struct bundle_priv *pbundle, for (i = 0; i != array_len; i++) { attr->uobjects[i] = uverbs_get_uobject_from_file( - spec->u2.objs_arr.obj_type, pbundle->bundle.ufile, - spec->u2.objs_arr.access, idr_vals[i]); + spec->u2.objs_arr.obj_type, spec->u2.objs_arr.access, + idr_vals[i], &pbundle->bundle); if (IS_ERR(attr->uobjects[i])) { ret = PTR_ERR(attr->uobjects[i]); break; } - pbundle->bundle.context = attr->uobjects[i]->context; } attr->len = i; @@ -223,7 +222,7 @@ static int uverbs_process_idrs_array(struct bundle_priv *pbundle, static int uverbs_free_idrs_array(const struct uverbs_api_attr *attr_uapi, struct uverbs_objs_arr_attr *attr, - bool commit) + bool commit, struct uverbs_attr_bundle *attrs) { const struct uverbs_attr_spec *spec = &attr_uapi->spec; int current_ret; @@ -231,8 +230,9 @@ static int uverbs_free_idrs_array(const struct uverbs_api_attr *attr_uapi, size_t i; for (i = 0; i != attr->len; i++) { - current_ret = uverbs_finalize_object( - attr->uobjects[i], spec->u2.objs_arr.access, commit); + current_ret = uverbs_finalize_object(attr->uobjects[i], + spec->u2.objs_arr.access, + commit, attrs); if (!ret) ret = current_ret; } @@ -325,13 +325,10 @@ static int uverbs_process_attr(struct bundle_priv *pbundle, * IDR implementation today rejects negative IDs */ o_attr->uobject = uverbs_get_uobject_from_file( - spec->u.obj.obj_type, - pbundle->bundle.ufile, - spec->u.obj.access, - uattr->data_s64); + spec->u.obj.obj_type, spec->u.obj.access, + uattr->data_s64, &pbundle->bundle); if (IS_ERR(o_attr->uobject)) return PTR_ERR(o_attr->uobject); - pbundle->bundle.context = o_attr->uobject->context; __set_bit(attr_bkey, pbundle->uobj_finalize); if (spec->u.obj.access == UVERBS_ACCESS_NEW) { @@ -456,12 +453,14 @@ static int ib_uverbs_run_method(struct bundle_priv *pbundle, uverbs_fill_udata(&pbundle->bundle, &pbundle->bundle.driver_udata, UVERBS_ATTR_UHW_IN, UVERBS_ATTR_UHW_OUT); + else + pbundle->bundle.driver_udata = (struct ib_udata){}; if (destroy_bkey != UVERBS_API_ATTR_BKEY_LEN) { struct uverbs_obj_attr *destroy_attr = &pbundle->bundle.attrs[destroy_bkey].obj_attr; - ret = uobj_destroy(destroy_attr->uobject); + ret = uobj_destroy(destroy_attr->uobject, &pbundle->bundle); if (ret) return ret; __clear_bit(destroy_bkey, pbundle->uobj_finalize); @@ -512,7 +511,8 @@ static int bundle_destroy(struct bundle_priv *pbundle, bool commit) current_ret = uverbs_finalize_object( attr->obj_attr.uobject, - attr->obj_attr.attr_elm->spec.u.obj.access, commit); + attr->obj_attr.attr_elm->spec.u.obj.access, commit, + &pbundle->bundle); if (!ret) ret = current_ret; } @@ -535,7 +535,8 @@ static int bundle_destroy(struct bundle_priv *pbundle, bool commit) if (attr_uapi->spec.type == UVERBS_ATTR_TYPE_IDRS_ARRAY) { current_ret = uverbs_free_idrs_array( - attr_uapi, &attr->objs_arr_attr, commit); + attr_uapi, &attr->objs_arr_attr, commit, + &pbundle->bundle); if (!ret) ret = current_ret; } diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 8b43dd96d3b2..84a5e9a6d483 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -723,7 +723,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, * then the command request structure starts * with a '__aligned u64 response' member. */ - ret = get_user(response, (const u64 *)buf); + ret = get_user(response, (const u64 __user *)buf); if (ret) goto out_unlock; @@ -926,43 +926,32 @@ static const struct vm_operations_struct rdma_umap_ops = { .fault = rdma_umap_fault, }; -static struct rdma_umap_priv *rdma_user_mmap_pre(struct ib_ucontext *ucontext, - struct vm_area_struct *vma, - unsigned long size) +/* + * Map IO memory into a process. This is to be called by drivers as part of + * their mmap() functions if they wish to send something like PCI-E BAR memory + * to userspace. + */ +int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, + unsigned long pfn, unsigned long size, pgprot_t prot) { struct ib_uverbs_file *ufile = ucontext->ufile; struct rdma_umap_priv *priv; if (!(vma->vm_flags & VM_SHARED)) - return ERR_PTR(-EINVAL); + return -EINVAL; if (vma->vm_end - vma->vm_start != size) - return ERR_PTR(-EINVAL); + return -EINVAL; /* Driver is using this wrong, must be called by ib_uverbs_mmap */ if (WARN_ON(!vma->vm_file || vma->vm_file->private_data != ufile)) - return ERR_PTR(-EINVAL); + return -EINVAL; lockdep_assert_held(&ufile->device->disassociate_srcu); priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) - return ERR_PTR(-ENOMEM); - return priv; -} - -/* - * Map IO memory into a process. This is to be called by drivers as part of - * their mmap() functions if they wish to send something like PCI-E BAR memory - * to userspace. - */ -int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, - unsigned long pfn, unsigned long size, pgprot_t prot) -{ - struct rdma_umap_priv *priv = rdma_user_mmap_pre(ucontext, vma, size); - - if (IS_ERR(priv)) - return PTR_ERR(priv); + return -ENOMEM; vma->vm_page_prot = prot; if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) { @@ -975,35 +964,6 @@ int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, } EXPORT_SYMBOL(rdma_user_mmap_io); -/* - * The page case is here for a slightly different reason, the driver expects - * to be able to free the page it is sharing to user space when it destroys - * its ucontext, which means we need to zap the user space references. - * - * We could handle this differently by providing an API to allocate a shared - * page and then only freeing the shared page when the last ufile is - * destroyed. - */ -int rdma_user_mmap_page(struct ib_ucontext *ucontext, - struct vm_area_struct *vma, struct page *page, - unsigned long size) -{ - struct rdma_umap_priv *priv = rdma_user_mmap_pre(ucontext, vma, size); - - if (IS_ERR(priv)) - return PTR_ERR(priv); - - if (remap_pfn_range(vma, vma->vm_start, page_to_pfn(page), size, - vma->vm_page_prot)) { - kfree(priv); - return -EAGAIN; - } - - rdma_umap_priv_init(priv, vma); - return 0; -} -EXPORT_SYMBOL(rdma_user_mmap_page); - void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) { struct rdma_umap_priv *priv, *next_priv; @@ -1094,6 +1054,11 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) goto err; } + if (!rdma_dev_access_netns(ib_dev, current->nsproxy->net_ns)) { + ret = -EPERM; + goto err; + } + /* In case IB device supports disassociate ucontext, there is no hard * dependency between uverbs device and its low level device. */ diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index f224cb727224..35b2e2c640cc 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -40,14 +40,17 @@ #include "uverbs.h" static int uverbs_free_ah(struct ib_uobject *uobject, - enum rdma_remove_reason why) + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) { - return rdma_destroy_ah((struct ib_ah *)uobject->object, - RDMA_DESTROY_AH_SLEEPABLE); + return rdma_destroy_ah_user((struct ib_ah *)uobject->object, + RDMA_DESTROY_AH_SLEEPABLE, + &attrs->driver_udata); } static int uverbs_free_flow(struct ib_uobject *uobject, - enum rdma_remove_reason why) + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) { struct ib_flow *flow = (struct ib_flow *)uobject->object; struct ib_uflow_object *uflow = @@ -66,13 +69,15 @@ static int uverbs_free_flow(struct ib_uobject *uobject, } static int uverbs_free_mw(struct ib_uobject *uobject, - enum rdma_remove_reason why) + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) { return uverbs_dealloc_mw((struct ib_mw *)uobject->object); } static int uverbs_free_qp(struct ib_uobject *uobject, - enum rdma_remove_reason why) + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) { struct ib_qp *qp = uobject->object; struct ib_uqp_object *uqp = @@ -93,19 +98,20 @@ static int uverbs_free_qp(struct ib_uobject *uobject, ib_uverbs_detach_umcast(qp, uqp); } - ret = ib_destroy_qp(qp); + ret = ib_destroy_qp_user(qp, &attrs->driver_udata); if (ib_is_destroy_retryable(ret, why, uobject)) return ret; if (uqp->uxrcd) atomic_dec(&uqp->uxrcd->refcnt); - ib_uverbs_release_uevent(uobject->context->ufile, &uqp->uevent); + ib_uverbs_release_uevent(attrs->ufile, &uqp->uevent); return ret; } static int uverbs_free_rwq_ind_tbl(struct ib_uobject *uobject, - enum rdma_remove_reason why) + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) { struct ib_rwq_ind_table *rwq_ind_tbl = uobject->object; struct ib_wq **ind_tbl = rwq_ind_tbl->ind_tbl; @@ -120,23 +126,25 @@ static int uverbs_free_rwq_ind_tbl(struct ib_uobject *uobject, } static int uverbs_free_wq(struct ib_uobject *uobject, - enum rdma_remove_reason why) + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) { struct ib_wq *wq = uobject->object; struct ib_uwq_object *uwq = container_of(uobject, struct ib_uwq_object, uevent.uobject); int ret; - ret = ib_destroy_wq(wq); + ret = ib_destroy_wq(wq, &attrs->driver_udata); if (ib_is_destroy_retryable(ret, why, uobject)) return ret; - ib_uverbs_release_uevent(uobject->context->ufile, &uwq->uevent); + ib_uverbs_release_uevent(attrs->ufile, &uwq->uevent); return ret; } static int uverbs_free_srq(struct ib_uobject *uobject, - enum rdma_remove_reason why) + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) { struct ib_srq *srq = uobject->object; struct ib_uevent_object *uevent = @@ -144,7 +152,7 @@ static int uverbs_free_srq(struct ib_uobject *uobject, enum ib_srq_type srq_type = srq->srq_type; int ret; - ret = ib_destroy_srq(srq); + ret = ib_destroy_srq_user(srq, &attrs->driver_udata); if (ib_is_destroy_retryable(ret, why, uobject)) return ret; @@ -155,12 +163,13 @@ static int uverbs_free_srq(struct ib_uobject *uobject, atomic_dec(&us->uxrcd->refcnt); } - ib_uverbs_release_uevent(uobject->context->ufile, uevent); + ib_uverbs_release_uevent(attrs->ufile, uevent); return ret; } static int uverbs_free_xrcd(struct ib_uobject *uobject, - enum rdma_remove_reason why) + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) { struct ib_xrcd *xrcd = uobject->object; struct ib_uxrcd_object *uxrcd = @@ -171,15 +180,16 @@ static int uverbs_free_xrcd(struct ib_uobject *uobject, if (ret) return ret; - mutex_lock(&uobject->context->ufile->device->xrcd_tree_mutex); - ret = ib_uverbs_dealloc_xrcd(uobject, xrcd, why); - mutex_unlock(&uobject->context->ufile->device->xrcd_tree_mutex); + mutex_lock(&attrs->ufile->device->xrcd_tree_mutex); + ret = ib_uverbs_dealloc_xrcd(uobject, xrcd, why, attrs); + mutex_unlock(&attrs->ufile->device->xrcd_tree_mutex); return ret; } static int uverbs_free_pd(struct ib_uobject *uobject, - enum rdma_remove_reason why) + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) { struct ib_pd *pd = uobject->object; int ret; @@ -188,7 +198,7 @@ static int uverbs_free_pd(struct ib_uobject *uobject, if (ret) return ret; - ib_dealloc_pd(pd); + ib_dealloc_pd_user(pd, &attrs->driver_udata); return 0; } diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c index 309c5e80988d..9f013304e677 100644 --- a/drivers/infiniband/core/uverbs_std_types_counters.c +++ b/drivers/infiniband/core/uverbs_std_types_counters.c @@ -31,11 +31,13 @@ * SOFTWARE. */ +#include "rdma_core.h" #include "uverbs.h" #include <rdma/uverbs_std_types.h> static int uverbs_free_counters(struct ib_uobject *uobject, - enum rdma_remove_reason why) + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) { struct ib_counters *counters = uobject->object; int ret; @@ -52,7 +54,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)( { struct ib_uobject *uobj = uverbs_attr_get_uobject( attrs, UVERBS_ATTR_CREATE_COUNTERS_HANDLE); - struct ib_device *ib_dev = uobj->context->device; + struct ib_device *ib_dev = attrs->context->device; struct ib_counters *counters; int ret; diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c index a59ea89e3f2b..db5c46a1bb2d 100644 --- a/drivers/infiniband/core/uverbs_std_types_cq.c +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -35,7 +35,8 @@ #include "uverbs.h" static int uverbs_free_cq(struct ib_uobject *uobject, - enum rdma_remove_reason why) + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) { struct ib_cq *cq = uobject->object; struct ib_uverbs_event_queue *ev_queue = cq->cq_context; @@ -43,12 +44,12 @@ static int uverbs_free_cq(struct ib_uobject *uobject, container_of(uobject, struct ib_ucq_object, uobject); int ret; - ret = ib_destroy_cq(cq); + ret = ib_destroy_cq_user(cq, &attrs->driver_udata); if (ib_is_destroy_retryable(ret, why, uobject)) return ret; ib_uverbs_release_ucq( - uobject->context->ufile, + attrs->ufile, ev_queue ? container_of(ev_queue, struct ib_uverbs_completion_event_file, ev_queue) : @@ -63,7 +64,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( struct ib_ucq_object *obj = container_of( uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_CQ_HANDLE), typeof(*obj), uobject); - struct ib_device *ib_dev = obj->uobject.context->device; + struct ib_device *ib_dev = attrs->context->device; int ret; u64 user_handle; struct ib_cq_init_attr attr = {}; @@ -110,8 +111,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( INIT_LIST_HEAD(&obj->comp_list); INIT_LIST_HEAD(&obj->async_list); - cq = ib_dev->ops.create_cq(ib_dev, &attr, obj->uobject.context, - &attrs->driver_udata); + cq = ib_dev->ops.create_cq(ib_dev, &attr, &attrs->driver_udata); if (IS_ERR(cq)) { ret = PTR_ERR(cq); goto err_event_file; diff --git a/drivers/infiniband/core/uverbs_std_types_dm.c b/drivers/infiniband/core/uverbs_std_types_dm.c index 2ef70637bee1..d5a1de33c2c9 100644 --- a/drivers/infiniband/core/uverbs_std_types_dm.c +++ b/drivers/infiniband/core/uverbs_std_types_dm.c @@ -30,11 +30,13 @@ * SOFTWARE. */ +#include "rdma_core.h" #include "uverbs.h" #include <rdma/uverbs_std_types.h> static int uverbs_free_dm(struct ib_uobject *uobject, - enum rdma_remove_reason why) + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) { struct ib_dm *dm = uobject->object; int ret; @@ -43,7 +45,7 @@ static int uverbs_free_dm(struct ib_uobject *uobject, if (ret) return ret; - return dm->device->ops.dealloc_dm(dm); + return dm->device->ops.dealloc_dm(dm, attrs); } static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)( @@ -53,7 +55,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)( struct ib_uobject *uobj = uverbs_attr_get(attrs, UVERBS_ATTR_ALLOC_DM_HANDLE) ->obj_attr.uobject; - struct ib_device *ib_dev = uobj->context->device; + struct ib_device *ib_dev = attrs->context->device; struct ib_dm *dm; int ret; @@ -70,7 +72,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)( if (ret) return ret; - dm = ib_dev->ops.alloc_dm(ib_dev, uobj->context, &attr, attrs); + dm = ib_dev->ops.alloc_dm(ib_dev, attrs->context, &attr, attrs); if (IS_ERR(dm)) return PTR_ERR(dm); diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c index 4962b87fa600..459cf165b231 100644 --- a/drivers/infiniband/core/uverbs_std_types_flow_action.c +++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c @@ -30,11 +30,13 @@ * SOFTWARE. */ +#include "rdma_core.h" #include "uverbs.h" #include <rdma/uverbs_std_types.h> static int uverbs_free_flow_action(struct ib_uobject *uobject, - enum rdma_remove_reason why) + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) { struct ib_flow_action *action = uobject->object; int ret; @@ -308,7 +310,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)( { struct ib_uobject *uobj = uverbs_attr_get_uobject( attrs, UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE); - struct ib_device *ib_dev = uobj->context->device; + struct ib_device *ib_dev = attrs->context->device; int ret; struct ib_flow_action *action; struct ib_flow_action_esp_attr esp_attr = {}; diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c index 4d4be0c2b752..610d3b9f7654 100644 --- a/drivers/infiniband/core/uverbs_std_types_mr.c +++ b/drivers/infiniband/core/uverbs_std_types_mr.c @@ -30,13 +30,16 @@ * SOFTWARE. */ +#include "rdma_core.h" #include "uverbs.h" #include <rdma/uverbs_std_types.h> static int uverbs_free_mr(struct ib_uobject *uobject, - enum rdma_remove_reason why) + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) { - return ib_dereg_mr((struct ib_mr *)uobject->object); + return ib_dereg_mr_user((struct ib_mr *)uobject->object, + &attrs->driver_udata); } static int UVERBS_HANDLER(UVERBS_METHOD_ADVISE_MR)( @@ -145,7 +148,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)( return 0; err_dereg: - ib_dereg_mr(mr); + ib_dereg_mr_user(mr, &attrs->driver_udata); return ret; } diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 5a5e83f5f0fc..e666a1f7608d 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -218,6 +218,8 @@ rdma_node_get_transport(enum rdma_node_type node_type) return RDMA_TRANSPORT_USNIC_UDP; if (node_type == RDMA_NODE_RNIC) return RDMA_TRANSPORT_IWARP; + if (node_type == RDMA_NODE_UNSPECIFIED) + return RDMA_TRANSPORT_UNSPECIFIED; return RDMA_TRANSPORT_IB; } @@ -269,7 +271,7 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, pd->res.type = RDMA_RESTRACK_PD; rdma_restrack_set_task(&pd->res, caller); - ret = device->ops.alloc_pd(pd, NULL, NULL); + ret = device->ops.alloc_pd(pd, NULL); if (ret) { kfree(pd); return ERR_PTR(ret); @@ -316,17 +318,18 @@ EXPORT_SYMBOL(__ib_alloc_pd); /** * ib_dealloc_pd - Deallocates a protection domain. * @pd: The protection domain to deallocate. + * @udata: Valid user data or NULL for kernel object * * It is an error to call this function while any resources in the pd still * exist. The caller is responsible to synchronously destroy them and * guarantee no new allocations will happen. */ -void ib_dealloc_pd(struct ib_pd *pd) +void ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata) { int ret; if (pd->__internal_mr) { - ret = pd->device->ops.dereg_mr(pd->__internal_mr); + ret = pd->device->ops.dereg_mr(pd->__internal_mr, NULL); WARN_ON(ret); pd->__internal_mr = NULL; } @@ -336,10 +339,10 @@ void ib_dealloc_pd(struct ib_pd *pd) WARN_ON(atomic_read(&pd->usecnt)); rdma_restrack_del(&pd->res); - pd->device->ops.dealloc_pd(pd); + pd->device->ops.dealloc_pd(pd, udata); kfree(pd); } -EXPORT_SYMBOL(ib_dealloc_pd); +EXPORT_SYMBOL(ib_dealloc_pd_user); /* Address handles */ @@ -495,25 +498,33 @@ static struct ib_ah *_rdma_create_ah(struct ib_pd *pd, u32 flags, struct ib_udata *udata) { + struct ib_device *device = pd->device; struct ib_ah *ah; + int ret; might_sleep_if(flags & RDMA_CREATE_AH_SLEEPABLE); - if (!pd->device->ops.create_ah) + if (!device->ops.create_ah) return ERR_PTR(-EOPNOTSUPP); - ah = pd->device->ops.create_ah(pd, ah_attr, flags, udata); + ah = rdma_zalloc_drv_obj_gfp( + device, ib_ah, + (flags & RDMA_CREATE_AH_SLEEPABLE) ? GFP_KERNEL : GFP_ATOMIC); + if (!ah) + return ERR_PTR(-ENOMEM); - if (!IS_ERR(ah)) { - ah->device = pd->device; - ah->pd = pd; - ah->uobject = NULL; - ah->type = ah_attr->type; - ah->sgid_attr = rdma_update_sgid_attr(ah_attr, NULL); + ah->device = device; + ah->pd = pd; + ah->type = ah_attr->type; + ah->sgid_attr = rdma_update_sgid_attr(ah_attr, NULL); - atomic_inc(&pd->usecnt); + ret = device->ops.create_ah(ah, ah_attr, flags, udata); + if (ret) { + kfree(ah); + return ERR_PTR(ret); } + atomic_inc(&pd->usecnt); return ah; } @@ -930,25 +941,24 @@ int rdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr) } EXPORT_SYMBOL(rdma_query_ah); -int rdma_destroy_ah(struct ib_ah *ah, u32 flags) +int rdma_destroy_ah_user(struct ib_ah *ah, u32 flags, struct ib_udata *udata) { const struct ib_gid_attr *sgid_attr = ah->sgid_attr; struct ib_pd *pd; - int ret; might_sleep_if(flags & RDMA_DESTROY_AH_SLEEPABLE); pd = ah->pd; - ret = ah->device->ops.destroy_ah(ah, flags); - if (!ret) { - atomic_dec(&pd->usecnt); - if (sgid_attr) - rdma_put_gid_attr(sgid_attr); - } - return ret; + ah->device->ops.destroy_ah(ah, flags); + atomic_dec(&pd->usecnt); + if (sgid_attr) + rdma_put_gid_attr(sgid_attr); + + kfree(ah); + return 0; } -EXPORT_SYMBOL(rdma_destroy_ah); +EXPORT_SYMBOL(rdma_destroy_ah_user); /* Shared receive queues */ @@ -956,29 +966,40 @@ struct ib_srq *ib_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *srq_init_attr) { struct ib_srq *srq; + int ret; if (!pd->device->ops.create_srq) return ERR_PTR(-EOPNOTSUPP); - srq = pd->device->ops.create_srq(pd, srq_init_attr, NULL); - - if (!IS_ERR(srq)) { - srq->device = pd->device; - srq->pd = pd; - srq->uobject = NULL; - srq->event_handler = srq_init_attr->event_handler; - srq->srq_context = srq_init_attr->srq_context; - srq->srq_type = srq_init_attr->srq_type; - if (ib_srq_has_cq(srq->srq_type)) { - srq->ext.cq = srq_init_attr->ext.cq; - atomic_inc(&srq->ext.cq->usecnt); - } - if (srq->srq_type == IB_SRQT_XRC) { - srq->ext.xrc.xrcd = srq_init_attr->ext.xrc.xrcd; - atomic_inc(&srq->ext.xrc.xrcd->usecnt); - } - atomic_inc(&pd->usecnt); - atomic_set(&srq->usecnt, 0); + srq = rdma_zalloc_drv_obj(pd->device, ib_srq); + if (!srq) + return ERR_PTR(-ENOMEM); + + srq->device = pd->device; + srq->pd = pd; + srq->event_handler = srq_init_attr->event_handler; + srq->srq_context = srq_init_attr->srq_context; + srq->srq_type = srq_init_attr->srq_type; + + if (ib_srq_has_cq(srq->srq_type)) { + srq->ext.cq = srq_init_attr->ext.cq; + atomic_inc(&srq->ext.cq->usecnt); + } + if (srq->srq_type == IB_SRQT_XRC) { + srq->ext.xrc.xrcd = srq_init_attr->ext.xrc.xrcd; + atomic_inc(&srq->ext.xrc.xrcd->usecnt); + } + atomic_inc(&pd->usecnt); + + ret = pd->device->ops.create_srq(srq, srq_init_attr, NULL); + if (ret) { + atomic_dec(&srq->pd->usecnt); + if (srq->srq_type == IB_SRQT_XRC) + atomic_dec(&srq->ext.xrc.xrcd->usecnt); + if (ib_srq_has_cq(srq->srq_type)) + atomic_dec(&srq->ext.cq->usecnt); + kfree(srq); + return ERR_PTR(ret); } return srq; @@ -1003,36 +1024,23 @@ int ib_query_srq(struct ib_srq *srq, } EXPORT_SYMBOL(ib_query_srq); -int ib_destroy_srq(struct ib_srq *srq) +int ib_destroy_srq_user(struct ib_srq *srq, struct ib_udata *udata) { - struct ib_pd *pd; - enum ib_srq_type srq_type; - struct ib_xrcd *uninitialized_var(xrcd); - struct ib_cq *uninitialized_var(cq); - int ret; - if (atomic_read(&srq->usecnt)) return -EBUSY; - pd = srq->pd; - srq_type = srq->srq_type; - if (ib_srq_has_cq(srq_type)) - cq = srq->ext.cq; - if (srq_type == IB_SRQT_XRC) - xrcd = srq->ext.xrc.xrcd; + srq->device->ops.destroy_srq(srq, udata); - ret = srq->device->ops.destroy_srq(srq); - if (!ret) { - atomic_dec(&pd->usecnt); - if (srq_type == IB_SRQT_XRC) - atomic_dec(&xrcd->usecnt); - if (ib_srq_has_cq(srq_type)) - atomic_dec(&cq->usecnt); - } + atomic_dec(&srq->pd->usecnt); + if (srq->srq_type == IB_SRQT_XRC) + atomic_dec(&srq->ext.xrc.xrcd->usecnt); + if (ib_srq_has_cq(srq->srq_type)) + atomic_dec(&srq->ext.cq->usecnt); + kfree(srq); - return ret; + return 0; } -EXPORT_SYMBOL(ib_destroy_srq); +EXPORT_SYMBOL(ib_destroy_srq_user); /* Queue pairs */ @@ -1111,8 +1119,9 @@ struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd, } EXPORT_SYMBOL(ib_open_qp); -static struct ib_qp *create_xrc_qp(struct ib_qp *qp, - struct ib_qp_init_attr *qp_init_attr) +static struct ib_qp *create_xrc_qp_user(struct ib_qp *qp, + struct ib_qp_init_attr *qp_init_attr, + struct ib_udata *udata) { struct ib_qp *real_qp = qp; @@ -1134,8 +1143,9 @@ static struct ib_qp *create_xrc_qp(struct ib_qp *qp, return qp; } -struct ib_qp *ib_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *qp_init_attr) +struct ib_qp *ib_create_qp_user(struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr, + struct ib_udata *udata) { struct ib_device *device = pd ? pd->device : qp_init_attr->xrcd->device; struct ib_qp *qp; @@ -1164,7 +1174,6 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, if (ret) goto err; - qp->real_qp = qp; qp->qp_type = qp_init_attr->qp_type; qp->rwq_ind_tbl = qp_init_attr->rwq_ind_tbl; @@ -1176,7 +1185,8 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, qp->port = 0; if (qp_init_attr->qp_type == IB_QPT_XRC_TGT) { - struct ib_qp *xrc_qp = create_xrc_qp(qp, qp_init_attr); + struct ib_qp *xrc_qp = + create_xrc_qp_user(qp, qp_init_attr, udata); if (IS_ERR(xrc_qp)) { ret = PTR_ERR(xrc_qp); @@ -1230,7 +1240,7 @@ err: return ERR_PTR(ret); } -EXPORT_SYMBOL(ib_create_qp); +EXPORT_SYMBOL(ib_create_qp_user); static const struct { int valid; @@ -1837,7 +1847,7 @@ static int __ib_destroy_shared_qp(struct ib_qp *qp) return 0; } -int ib_destroy_qp(struct ib_qp *qp) +int ib_destroy_qp_user(struct ib_qp *qp, struct ib_udata *udata) { const struct ib_gid_attr *alt_path_sgid_attr = qp->alt_path_sgid_attr; const struct ib_gid_attr *av_sgid_attr = qp->av_sgid_attr; @@ -1869,7 +1879,7 @@ int ib_destroy_qp(struct ib_qp *qp) rdma_rw_cleanup_mrs(qp); rdma_restrack_del(&qp->res); - ret = qp->device->ops.destroy_qp(qp); + ret = qp->device->ops.destroy_qp(qp, udata); if (!ret) { if (alt_path_sgid_attr) rdma_put_gid_attr(alt_path_sgid_attr); @@ -1894,7 +1904,7 @@ int ib_destroy_qp(struct ib_qp *qp) return ret; } -EXPORT_SYMBOL(ib_destroy_qp); +EXPORT_SYMBOL(ib_destroy_qp_user); /* Completion queues */ @@ -1907,7 +1917,7 @@ struct ib_cq *__ib_create_cq(struct ib_device *device, { struct ib_cq *cq; - cq = device->ops.create_cq(device, cq_attr, NULL, NULL); + cq = device->ops.create_cq(device, cq_attr, NULL); if (!IS_ERR(cq)) { cq->device = device; @@ -1933,15 +1943,15 @@ int rdma_set_cq_moderation(struct ib_cq *cq, u16 cq_count, u16 cq_period) } EXPORT_SYMBOL(rdma_set_cq_moderation); -int ib_destroy_cq(struct ib_cq *cq) +int ib_destroy_cq_user(struct ib_cq *cq, struct ib_udata *udata) { if (atomic_read(&cq->usecnt)) return -EBUSY; rdma_restrack_del(&cq->res); - return cq->device->ops.destroy_cq(cq); + return cq->device->ops.destroy_cq(cq, udata); } -EXPORT_SYMBOL(ib_destroy_cq); +EXPORT_SYMBOL(ib_destroy_cq_user); int ib_resize_cq(struct ib_cq *cq, int cqe) { @@ -1952,14 +1962,14 @@ EXPORT_SYMBOL(ib_resize_cq); /* Memory regions */ -int ib_dereg_mr(struct ib_mr *mr) +int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata) { struct ib_pd *pd = mr->pd; struct ib_dm *dm = mr->dm; int ret; rdma_restrack_del(&mr->res); - ret = mr->device->ops.dereg_mr(mr); + ret = mr->device->ops.dereg_mr(mr, udata); if (!ret) { atomic_dec(&pd->usecnt); if (dm) @@ -1968,13 +1978,14 @@ int ib_dereg_mr(struct ib_mr *mr) return ret; } -EXPORT_SYMBOL(ib_dereg_mr); +EXPORT_SYMBOL(ib_dereg_mr_user); /** * ib_alloc_mr() - Allocates a memory region * @pd: protection domain associated with the region * @mr_type: memory region type * @max_num_sg: maximum sg entries available for registration. + * @udata: user data or null for kernel objects * * Notes: * Memory registeration page/sg lists must not exceed max_num_sg. @@ -1982,16 +1993,15 @@ EXPORT_SYMBOL(ib_dereg_mr); * max_num_sg * used_page_size. * */ -struct ib_mr *ib_alloc_mr(struct ib_pd *pd, - enum ib_mr_type mr_type, - u32 max_num_sg) +struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg, struct ib_udata *udata) { struct ib_mr *mr; if (!pd->device->ops.alloc_mr) return ERR_PTR(-EOPNOTSUPP); - mr = pd->device->ops.alloc_mr(pd, mr_type, max_num_sg); + mr = pd->device->ops.alloc_mr(pd, mr_type, max_num_sg, udata); if (!IS_ERR(mr)) { mr->device = pd->device; mr->pd = pd; @@ -2005,7 +2015,7 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd, return mr; } -EXPORT_SYMBOL(ib_alloc_mr); +EXPORT_SYMBOL(ib_alloc_mr_user); /* "Fast" memory regions */ @@ -2138,7 +2148,7 @@ struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller) if (!device->ops.alloc_xrcd) return ERR_PTR(-EOPNOTSUPP); - xrcd = device->ops.alloc_xrcd(device, NULL, NULL); + xrcd = device->ops.alloc_xrcd(device, NULL); if (!IS_ERR(xrcd)) { xrcd->device = device; xrcd->inode = NULL; @@ -2151,7 +2161,7 @@ struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller) } EXPORT_SYMBOL(__ib_alloc_xrcd); -int ib_dealloc_xrcd(struct ib_xrcd *xrcd) +int ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata) { struct ib_qp *qp; int ret; @@ -2166,7 +2176,7 @@ int ib_dealloc_xrcd(struct ib_xrcd *xrcd) return ret; } - return xrcd->device->ops.dealloc_xrcd(xrcd); + return xrcd->device->ops.dealloc_xrcd(xrcd, udata); } EXPORT_SYMBOL(ib_dealloc_xrcd); @@ -2210,10 +2220,11 @@ struct ib_wq *ib_create_wq(struct ib_pd *pd, EXPORT_SYMBOL(ib_create_wq); /** - * ib_destroy_wq - Destroys the specified WQ. + * ib_destroy_wq - Destroys the specified user WQ. * @wq: The WQ to destroy. + * @udata: Valid user data */ -int ib_destroy_wq(struct ib_wq *wq) +int ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata) { int err; struct ib_cq *cq = wq->cq; @@ -2222,7 +2233,7 @@ int ib_destroy_wq(struct ib_wq *wq) if (atomic_read(&wq->usecnt)) return -EBUSY; - err = wq->device->ops.destroy_wq(wq); + err = wq->device->ops.destroy_wq(wq, udata); if (!err) { atomic_dec(&pd->usecnt); atomic_dec(&cq->usecnt); @@ -2701,3 +2712,37 @@ int rdma_init_netdev(struct ib_device *device, u8 port_num, netdev, params.param); } EXPORT_SYMBOL(rdma_init_netdev); + +void __rdma_block_iter_start(struct ib_block_iter *biter, + struct scatterlist *sglist, unsigned int nents, + unsigned long pgsz) +{ + memset(biter, 0, sizeof(struct ib_block_iter)); + biter->__sg = sglist; + biter->__sg_nents = nents; + + /* Driver provides best block size to use */ + biter->__pg_bit = __fls(pgsz); +} +EXPORT_SYMBOL(__rdma_block_iter_start); + +bool __rdma_block_iter_next(struct ib_block_iter *biter) +{ + unsigned int block_offset; + + if (!biter->__sg_nents || !biter->__sg) + return false; + + biter->__dma_addr = sg_dma_address(biter->__sg) + biter->__sg_advance; + block_offset = biter->__dma_addr & (BIT_ULL(biter->__pg_bit) - 1); + biter->__sg_advance += BIT_ULL(biter->__pg_bit) - block_offset; + + if (biter->__sg_advance >= sg_dma_len(biter->__sg)) { + biter->__sg_advance = 0; + biter->__sg = sg_next(biter->__sg); + biter->__sg_nents--; + } + + return true; +} +EXPORT_SYMBOL(__rdma_block_iter_next); diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile index e4f31c1be8f7..77094be1b262 100644 --- a/drivers/infiniband/hw/Makefile +++ b/drivers/infiniband/hw/Makefile @@ -3,6 +3,7 @@ obj-$(CONFIG_INFINIBAND_MTHCA) += mthca/ obj-$(CONFIG_INFINIBAND_QIB) += qib/ obj-$(CONFIG_INFINIBAND_CXGB3) += cxgb3/ obj-$(CONFIG_INFINIBAND_CXGB4) += cxgb4/ +obj-$(CONFIG_INFINIBAND_EFA) += efa/ obj-$(CONFIG_INFINIBAND_I40IW) += i40iw/ obj-$(CONFIG_MLX4_INFINIBAND) += mlx4/ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5/ diff --git a/drivers/infiniband/hw/bnxt_re/Kconfig b/drivers/infiniband/hw/bnxt_re/Kconfig index d25439c305f7..51e8234520a9 100644 --- a/drivers/infiniband/hw/bnxt_re/Kconfig +++ b/drivers/infiniband/hw/bnxt_re/Kconfig @@ -1,10 +1,10 @@ config INFINIBAND_BNXT_RE - tristate "Broadcom Netxtreme HCA support" - depends on 64BIT - depends on ETHERNET && NETDEVICES && PCI && INET && DCB - select NET_VENDOR_BROADCOM - select BNXT - ---help--- + tristate "Broadcom Netxtreme HCA support" + depends on 64BIT + depends on ETHERNET && NETDEVICES && PCI && INET && DCB + select NET_VENDOR_BROADCOM + select BNXT + ---help--- This driver supports Broadcom NetXtreme-E 10/25/40/50 gigabit RoCE HCAs. To compile this driver as a module, choose M here: the module will be called bnxt_re. diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 071b2fc38b0b..2c3685faa57a 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -119,21 +119,6 @@ static int bnxt_re_build_sgl(struct ib_sge *ib_sg_list, } /* Device */ -struct net_device *bnxt_re_get_netdev(struct ib_device *ibdev, u8 port_num) -{ - struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); - struct net_device *netdev = NULL; - - rcu_read_lock(); - if (rdev) - netdev = rdev->netdev; - if (netdev) - dev_hold(netdev); - - rcu_read_unlock(); - return netdev; -} - int bnxt_re_query_device(struct ib_device *ibdev, struct ib_device_attr *ib_attr, struct ib_udata *udata) @@ -375,8 +360,9 @@ int bnxt_re_add_gid(const struct ib_gid_attr *attr, void **context) struct bnxt_re_dev *rdev = to_bnxt_re_dev(attr->device, ibdev); struct bnxt_qplib_sgid_tbl *sgid_tbl = &rdev->qplib_res.sgid_tbl; - if ((attr->ndev) && is_vlan_dev(attr->ndev)) - vlan_id = vlan_dev_vlan_id(attr->ndev); + rc = rdma_read_gid_l2_fields(attr, &vlan_id, NULL); + if (rc) + return rc; rc = bnxt_qplib_add_sgid(sgid_tbl, (struct bnxt_qplib_gid *)&attr->gid, rdev->qplib_res.netdev->dev_addr, @@ -564,7 +550,7 @@ fail: } /* Protection Domains */ -void bnxt_re_dealloc_pd(struct ib_pd *ib_pd) +void bnxt_re_dealloc_pd(struct ib_pd *ib_pd, struct ib_udata *udata) { struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); struct bnxt_re_dev *rdev = pd->rdev; @@ -576,14 +562,12 @@ void bnxt_re_dealloc_pd(struct ib_pd *ib_pd) &pd->qplib_pd); } -int bnxt_re_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *ucontext, - struct ib_udata *udata) +int bnxt_re_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct ib_device *ibdev = ibpd->device; struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); - struct bnxt_re_ucontext *ucntx = container_of(ucontext, - struct bnxt_re_ucontext, - ib_uctx); + struct bnxt_re_ucontext *ucntx = rdma_udata_to_drv_context( + udata, struct bnxt_re_ucontext, ib_uctx); struct bnxt_re_pd *pd = container_of(ibpd, struct bnxt_re_pd, ib_pd); int rc; @@ -635,20 +619,13 @@ fail: } /* Address Handles */ -int bnxt_re_destroy_ah(struct ib_ah *ib_ah, u32 flags) +void bnxt_re_destroy_ah(struct ib_ah *ib_ah, u32 flags) { struct bnxt_re_ah *ah = container_of(ib_ah, struct bnxt_re_ah, ib_ah); struct bnxt_re_dev *rdev = ah->rdev; - int rc; - rc = bnxt_qplib_destroy_ah(&rdev->qplib_res, &ah->qplib_ah, - !(flags & RDMA_DESTROY_AH_SLEEPABLE)); - if (rc) { - dev_err(rdev_to_dev(rdev), "Failed to destroy HW AH"); - return rc; - } - kfree(ah); - return 0; + bnxt_qplib_destroy_ah(&rdev->qplib_res, &ah->qplib_ah, + !(flags & RDMA_DESTROY_AH_SLEEPABLE)); } static u8 bnxt_re_stack_to_dev_nw_type(enum rdma_network_type ntype) @@ -669,26 +646,22 @@ static u8 bnxt_re_stack_to_dev_nw_type(enum rdma_network_type ntype) return nw_type; } -struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd, - struct rdma_ah_attr *ah_attr, - u32 flags, - struct ib_udata *udata) +int bnxt_re_create_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata) { + struct ib_pd *ib_pd = ib_ah->pd; struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); struct bnxt_re_dev *rdev = pd->rdev; const struct ib_gid_attr *sgid_attr; - struct bnxt_re_ah *ah; + struct bnxt_re_ah *ah = container_of(ib_ah, struct bnxt_re_ah, ib_ah); u8 nw_type; int rc; if (!(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH)) { dev_err(rdev_to_dev(rdev), "Failed to alloc AH: GRH not set"); - return ERR_PTR(-EINVAL); + return -EINVAL; } - ah = kzalloc(sizeof(*ah), GFP_ATOMIC); - if (!ah) - return ERR_PTR(-ENOMEM); ah->rdev = rdev; ah->qplib_ah.pd = &pd->qplib_pd; @@ -718,7 +691,7 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd, !(flags & RDMA_CREATE_AH_SLEEPABLE)); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to allocate HW AH"); - goto fail; + return rc; } /* Write AVID to shared page. */ @@ -735,11 +708,7 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd, spin_unlock_irqrestore(&uctx->sh_lock, flag); } - return &ah->ib_ah; - -fail: - kfree(ah); - return ERR_PTR(rc); + return 0; } int bnxt_re_modify_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr) @@ -789,7 +758,7 @@ void bnxt_re_unlock_cqs(struct bnxt_re_qp *qp, } /* Queue Pairs */ -int bnxt_re_destroy_qp(struct ib_qp *ib_qp) +int bnxt_re_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata) { struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp); struct bnxt_re_dev *rdev = qp->rdev; @@ -812,13 +781,8 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp) bnxt_qplib_free_qp_res(&rdev->qplib_res, &qp->qplib_qp); if (ib_qp->qp_type == IB_QPT_GSI && rdev->qp1_sqp) { - rc = bnxt_qplib_destroy_ah(&rdev->qplib_res, - &rdev->sqp_ah->qplib_ah, false); - if (rc) { - dev_err(rdev_to_dev(rdev), - "Failed to destroy HW AH for shadow QP"); - return rc; - } + bnxt_qplib_destroy_ah(&rdev->qplib_res, &rdev->sqp_ah->qplib_ah, + false); bnxt_qplib_clean_qp(&qp->qplib_qp); rc = bnxt_qplib_destroy_qp(&rdev->qplib_res, @@ -895,8 +859,9 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd, return PTR_ERR(umem); qp->sumem = umem; - qplib_qp->sq.sglist = umem->sg_head.sgl; - qplib_qp->sq.nmap = umem->nmap; + qplib_qp->sq.sg_info.sglist = umem->sg_head.sgl; + qplib_qp->sq.sg_info.npages = ib_umem_num_pages(umem); + qplib_qp->sq.sg_info.nmap = umem->nmap; qplib_qp->qp_handle = ureq.qp_handle; if (!qp->qplib_qp.srq) { @@ -907,8 +872,9 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd, if (IS_ERR(umem)) goto rqfail; qp->rumem = umem; - qplib_qp->rq.sglist = umem->sg_head.sgl; - qplib_qp->rq.nmap = umem->nmap; + qplib_qp->rq.sg_info.sglist = umem->sg_head.sgl; + qplib_qp->rq.sg_info.npages = ib_umem_num_pages(umem); + qplib_qp->rq.sg_info.nmap = umem->nmap; } qplib_qp->dpi = &cntx->dpi; @@ -916,8 +882,7 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd, rqfail: ib_umem_release(qp->sumem); qp->sumem = NULL; - qplib_qp->sq.sglist = NULL; - qplib_qp->sq.nmap = 0; + memset(&qplib_qp->sq.sg_info, 0, sizeof(qplib_qp->sq.sg_info)); return PTR_ERR(umem); } @@ -1326,30 +1291,22 @@ static enum ib_mtu __to_ib_mtu(u32 mtu) } /* Shared Receive Queues */ -int bnxt_re_destroy_srq(struct ib_srq *ib_srq) +void bnxt_re_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata) { struct bnxt_re_srq *srq = container_of(ib_srq, struct bnxt_re_srq, ib_srq); struct bnxt_re_dev *rdev = srq->rdev; struct bnxt_qplib_srq *qplib_srq = &srq->qplib_srq; struct bnxt_qplib_nq *nq = NULL; - int rc; if (qplib_srq->cq) nq = qplib_srq->cq->nq; - rc = bnxt_qplib_destroy_srq(&rdev->qplib_res, qplib_srq); - if (rc) { - dev_err(rdev_to_dev(rdev), "Destroy HW SRQ failed!"); - return rc; - } - + bnxt_qplib_destroy_srq(&rdev->qplib_res, qplib_srq); if (srq->umem) ib_umem_release(srq->umem); - kfree(srq); atomic_dec(&rdev->srq_count); if (nq) nq->budget--; - return 0; } static int bnxt_re_init_user_srq(struct bnxt_re_dev *rdev, @@ -1374,22 +1331,25 @@ static int bnxt_re_init_user_srq(struct bnxt_re_dev *rdev, return PTR_ERR(umem); srq->umem = umem; - qplib_srq->nmap = umem->nmap; - qplib_srq->sglist = umem->sg_head.sgl; + qplib_srq->sg_info.sglist = umem->sg_head.sgl; + qplib_srq->sg_info.npages = ib_umem_num_pages(umem); + qplib_srq->sg_info.nmap = umem->nmap; qplib_srq->srq_handle = ureq.srq_handle; qplib_srq->dpi = &cntx->dpi; return 0; } -struct ib_srq *bnxt_re_create_srq(struct ib_pd *ib_pd, - struct ib_srq_init_attr *srq_init_attr, - struct ib_udata *udata) +int bnxt_re_create_srq(struct ib_srq *ib_srq, + struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata) { + struct ib_pd *ib_pd = ib_srq->pd; struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); struct bnxt_re_dev *rdev = pd->rdev; struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; - struct bnxt_re_srq *srq; + struct bnxt_re_srq *srq = + container_of(ib_srq, struct bnxt_re_srq, ib_srq); struct bnxt_qplib_nq *nq = NULL; int rc, entries; @@ -1404,11 +1364,6 @@ struct ib_srq *bnxt_re_create_srq(struct ib_pd *ib_pd, goto exit; } - srq = kzalloc(sizeof(*srq), GFP_KERNEL); - if (!srq) { - rc = -ENOMEM; - goto exit; - } srq->rdev = rdev; srq->qplib_srq.pd = &pd->qplib_pd; srq->qplib_srq.dpi = &rdev->dpi_privileged; @@ -1454,14 +1409,13 @@ struct ib_srq *bnxt_re_create_srq(struct ib_pd *ib_pd, nq->budget++; atomic_inc(&rdev->srq_count); - return &srq->ib_srq; + return 0; fail: if (srq->umem) ib_umem_release(srq->umem); - kfree(srq); exit: - return ERR_PTR(rc); + return rc; } int bnxt_re_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr, @@ -1684,8 +1638,11 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, qp_attr->ah_attr.roce.dmac); sgid_attr = qp_attr->ah_attr.grh.sgid_attr; - memcpy(qp->qplib_qp.smac, sgid_attr->ndev->dev_addr, - ETH_ALEN); + rc = rdma_read_gid_l2_fields(sgid_attr, NULL, + &qp->qplib_qp.smac[0]); + if (rc) + return rc; + nw_type = rdma_gid_attr_network_type(sgid_attr); switch (nw_type) { case RDMA_NETWORK_IPV4: @@ -1904,8 +1861,10 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp, memset(&qp->qp1_hdr, 0, sizeof(qp->qp1_hdr)); - if (is_vlan_dev(sgid_attr->ndev)) - vlan_id = vlan_dev_vlan_id(sgid_attr->ndev); + rc = rdma_read_gid_l2_fields(sgid_attr, &vlan_id, NULL); + if (rc) + return rc; + /* Get network header type for this GID */ nw_type = rdma_gid_attr_network_type(sgid_attr); switch (nw_type) { @@ -2558,7 +2517,7 @@ int bnxt_re_post_recv(struct ib_qp *ib_qp, const struct ib_recv_wr *wr, } /* Completion Queues */ -int bnxt_re_destroy_cq(struct ib_cq *ib_cq) +int bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) { int rc; struct bnxt_re_cq *cq; @@ -2587,7 +2546,6 @@ int bnxt_re_destroy_cq(struct ib_cq *ib_cq) struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, struct ib_udata *udata) { struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); @@ -2614,12 +2572,10 @@ struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev, if (entries > dev_attr->max_cq_wqes + 1) entries = dev_attr->max_cq_wqes + 1; - if (context) { + if (udata) { struct bnxt_re_cq_req req; - struct bnxt_re_ucontext *uctx = container_of - (context, - struct bnxt_re_ucontext, - ib_uctx); + struct bnxt_re_ucontext *uctx = rdma_udata_to_drv_context( + udata, struct bnxt_re_ucontext, ib_uctx); if (ib_copy_from_udata(&req, udata, sizeof(req))) { rc = -EFAULT; goto fail; @@ -2632,8 +2588,9 @@ struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev, rc = PTR_ERR(cq->umem); goto fail; } - cq->qplib_cq.sghead = cq->umem->sg_head.sgl; - cq->qplib_cq.nmap = cq->umem->nmap; + cq->qplib_cq.sg_info.sglist = cq->umem->sg_head.sgl; + cq->qplib_cq.sg_info.npages = ib_umem_num_pages(cq->umem); + cq->qplib_cq.sg_info.nmap = cq->umem->nmap; cq->qplib_cq.dpi = &uctx->dpi; } else { cq->max_cql = min_t(u32, entries, MAX_CQL_PER_POLL); @@ -2645,8 +2602,6 @@ struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev, } cq->qplib_cq.dpi = &rdev->dpi_privileged; - cq->qplib_cq.sghead = NULL; - cq->qplib_cq.nmap = 0; } /* * Allocating the NQ in a round robin fashion. nq_alloc_cnt is a @@ -2671,7 +2626,7 @@ struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev, atomic_inc(&rdev->cq_count); spin_lock_init(&cq->cq_lock); - if (context) { + if (udata) { struct bnxt_re_cq_resp resp; resp.cqid = cq->qplib_cq.id; @@ -2689,7 +2644,7 @@ struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev, return &cq->ib_cq; c2fail: - if (context) + if (udata) ib_umem_release(cq->umem); fail: kfree(cq->cql); @@ -3381,7 +3336,7 @@ fail: return ERR_PTR(rc); } -int bnxt_re_dereg_mr(struct ib_mr *ib_mr) +int bnxt_re_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) { struct bnxt_re_mr *mr = container_of(ib_mr, struct bnxt_re_mr, ib_mr); struct bnxt_re_dev *rdev = mr->rdev; @@ -3427,7 +3382,7 @@ int bnxt_re_map_mr_sg(struct ib_mr *ib_mr, struct scatterlist *sg, int sg_nents, } struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type type, - u32 max_num_sg) + u32 max_num_sg, struct ib_udata *udata) { struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); struct bnxt_re_dev *rdev = pd->rdev; @@ -3552,17 +3507,12 @@ static int fill_umem_pbl_tbl(struct ib_umem *umem, u64 *pbl_tbl_orig, int page_shift) { u64 *pbl_tbl = pbl_tbl_orig; - u64 paddr; - u64 page_mask = (1ULL << page_shift) - 1; - struct sg_dma_page_iter sg_iter; + u64 page_size = BIT_ULL(page_shift); + struct ib_block_iter biter; + + rdma_for_each_block(umem->sg_head.sgl, &biter, umem->nmap, page_size) + *pbl_tbl++ = rdma_block_iter_dma_address(&biter); - for_each_sg_dma_page (umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { - paddr = sg_page_iter_dma_address(&sg_iter); - if (pbl_tbl == pbl_tbl_orig) - *pbl_tbl++ = paddr & ~page_mask; - else if ((paddr & page_mask) == 0) - *pbl_tbl++ = paddr; - } return pbl_tbl - pbl_tbl_orig; } @@ -3624,7 +3574,9 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, goto free_umem; } - page_shift = PAGE_SHIFT; + page_shift = __ffs(ib_umem_find_best_pgsz(umem, + BNXT_RE_PAGE_SIZE_4K | BNXT_RE_PAGE_SIZE_2M, + virt_addr)); if (!bnxt_re_page_size_ok(page_shift)) { dev_err(rdev_to_dev(rdev), "umem page size unsupported!"); @@ -3632,17 +3584,13 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, goto fail; } - if (!umem->hugetlb && length > BNXT_RE_MAX_MR_SIZE_LOW) { + if (page_shift == BNXT_RE_PAGE_SHIFT_4K && + length > BNXT_RE_MAX_MR_SIZE_LOW) { dev_err(rdev_to_dev(rdev), "Requested MR Sz:%llu Max sup:%llu", length, (u64)BNXT_RE_MAX_MR_SIZE_LOW); rc = -EINVAL; goto fail; } - if (umem->hugetlb && length > BNXT_RE_PAGE_SIZE_2M) { - page_shift = BNXT_RE_PAGE_SHIFT_2M; - dev_warn(rdev_to_dev(rdev), "umem hugetlb set page_size %x", - 1 << page_shift); - } /* Map umem buf ptrs to the PBL */ umem_pgs = fill_umem_pbl_tbl(umem, pbl_tbl, page_shift); @@ -3709,7 +3657,7 @@ int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata) resp.chip_id0 = chip_met_rev_num; /* Future extension of chip info */ resp.chip_id1 = 0; - /*Temp, Use idr_alloc instead */ + /*Temp, Use xa_alloc instead */ resp.dev_id = rdev->en_dev->pdev->devfn; resp.max_qp = rdev->qplib_ctx.qpc_count; resp.pg_size = PAGE_SIZE; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index e45465ed4eee..09a33049e42f 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -63,15 +63,15 @@ struct bnxt_re_pd { }; struct bnxt_re_ah { - struct bnxt_re_dev *rdev; struct ib_ah ib_ah; + struct bnxt_re_dev *rdev; struct bnxt_qplib_ah qplib_ah; }; struct bnxt_re_srq { + struct ib_srq ib_srq; struct bnxt_re_dev *rdev; u32 srq_limit; - struct ib_srq ib_srq; struct bnxt_qplib_srq qplib_srq; struct ib_umem *umem; spinlock_t lock; /* protect srq */ @@ -142,8 +142,6 @@ struct bnxt_re_ucontext { spinlock_t sh_lock; /* protect shpg */ }; -struct net_device *bnxt_re_get_netdev(struct ib_device *ibdev, u8 port_num); - int bnxt_re_query_device(struct ib_device *ibdev, struct ib_device_attr *ib_attr, struct ib_udata *udata); @@ -163,24 +161,21 @@ int bnxt_re_query_gid(struct ib_device *ibdev, u8 port_num, int index, union ib_gid *gid); enum rdma_link_layer bnxt_re_get_link_layer(struct ib_device *ibdev, u8 port_num); -int bnxt_re_alloc_pd(struct ib_pd *pd, struct ib_ucontext *context, - struct ib_udata *udata); -void bnxt_re_dealloc_pd(struct ib_pd *pd); -struct ib_ah *bnxt_re_create_ah(struct ib_pd *pd, - struct rdma_ah_attr *ah_attr, - u32 flags, - struct ib_udata *udata); +int bnxt_re_alloc_pd(struct ib_pd *pd, struct ib_udata *udata); +void bnxt_re_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); +int bnxt_re_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags, + struct ib_udata *udata); int bnxt_re_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int bnxt_re_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); -int bnxt_re_destroy_ah(struct ib_ah *ah, u32 flags); -struct ib_srq *bnxt_re_create_srq(struct ib_pd *pd, - struct ib_srq_init_attr *srq_init_attr, - struct ib_udata *udata); +void bnxt_re_destroy_ah(struct ib_ah *ah, u32 flags); +int bnxt_re_create_srq(struct ib_srq *srq, + struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata); int bnxt_re_modify_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr, enum ib_srq_attr_mask srq_attr_mask, struct ib_udata *udata); int bnxt_re_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); -int bnxt_re_destroy_srq(struct ib_srq *srq); +void bnxt_re_destroy_srq(struct ib_srq *srq, struct ib_udata *udata); int bnxt_re_post_srq_recv(struct ib_srq *srq, const struct ib_recv_wr *recv_wr, const struct ib_recv_wr **bad_recv_wr); struct ib_qp *bnxt_re_create_qp(struct ib_pd *pd, @@ -190,16 +185,15 @@ int bnxt_re_modify_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_udata *udata); int bnxt_re_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); -int bnxt_re_destroy_qp(struct ib_qp *qp); +int bnxt_re_destroy_qp(struct ib_qp *qp, struct ib_udata *udata); int bnxt_re_post_send(struct ib_qp *qp, const struct ib_send_wr *send_wr, const struct ib_send_wr **bad_send_wr); int bnxt_re_post_recv(struct ib_qp *qp, const struct ib_recv_wr *recv_wr, const struct ib_recv_wr **bad_recv_wr); struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, struct ib_udata *udata); -int bnxt_re_destroy_cq(struct ib_cq *cq); +int bnxt_re_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); int bnxt_re_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc); int bnxt_re_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *pd, int mr_access_flags); @@ -207,8 +201,8 @@ struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *pd, int mr_access_flags); int bnxt_re_map_mr_sg(struct ib_mr *ib_mr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type mr_type, - u32 max_num_sg); -int bnxt_re_dereg_mr(struct ib_mr *mr); + u32 max_num_sg, struct ib_udata *udata); +int bnxt_re_dereg_mr(struct ib_mr *mr, struct ib_udata *udata); struct ib_mw *bnxt_re_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type, struct ib_udata *udata); int bnxt_re_dealloc_mw(struct ib_mw *mw); diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 2bd24ac45ee4..814f959c7db9 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -617,7 +617,6 @@ static const struct ib_device_ops bnxt_re_dev_ops = { .get_dma_mr = bnxt_re_get_dma_mr, .get_hw_stats = bnxt_re_ib_get_hw_stats, .get_link_layer = bnxt_re_get_link_layer, - .get_netdev = bnxt_re_get_netdev, .get_port_immutable = bnxt_re_get_port_immutable, .map_mr_sg = bnxt_re_map_mr_sg, .mmap = bnxt_re_mmap, @@ -637,13 +636,16 @@ static const struct ib_device_ops bnxt_re_dev_ops = { .query_srq = bnxt_re_query_srq, .reg_user_mr = bnxt_re_reg_user_mr, .req_notify_cq = bnxt_re_req_notify_cq, + INIT_RDMA_OBJ_SIZE(ib_ah, bnxt_re_ah, ib_ah), INIT_RDMA_OBJ_SIZE(ib_pd, bnxt_re_pd, ib_pd), + INIT_RDMA_OBJ_SIZE(ib_srq, bnxt_re_srq, ib_srq), INIT_RDMA_OBJ_SIZE(ib_ucontext, bnxt_re_ucontext, ib_uctx), }; static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) { struct ib_device *ibdev = &rdev->ibdev; + int ret; /* ib device init */ ibdev->owner = THIS_MODULE; @@ -691,6 +693,10 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) rdma_set_device_sysfs_group(ibdev, &bnxt_re_dev_attr_group); ibdev->driver_id = RDMA_DRIVER_BNXT_RE; ib_set_device_ops(ibdev, &bnxt_re_dev_ops); + ret = ib_device_set_netdev(&rdev->ibdev, rdev->netdev, 1); + if (ret) + return ret; + return ib_register_device(ibdev, "bnxt_re%d"); } diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index 71c34d5b0ac0..958c1ff9c515 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -478,7 +478,7 @@ int bnxt_qplib_alloc_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq) nq->hwq.max_elements > BNXT_QPLIB_NQE_MAX_CNT) nq->hwq.max_elements = BNXT_QPLIB_NQE_MAX_CNT; hwq_type = bnxt_qplib_get_hwq_type(nq->res); - if (bnxt_qplib_alloc_init_hwq(nq->pdev, &nq->hwq, NULL, 0, + if (bnxt_qplib_alloc_init_hwq(nq->pdev, &nq->hwq, NULL, &nq->hwq.max_elements, BNXT_QPLIB_MAX_NQE_ENTRY_SIZE, 0, PAGE_SIZE, hwq_type)) @@ -507,7 +507,7 @@ static void bnxt_qplib_arm_srq(struct bnxt_qplib_srq *srq, u32 arm_type) writeq(val, db); } -int bnxt_qplib_destroy_srq(struct bnxt_qplib_res *res, +void bnxt_qplib_destroy_srq(struct bnxt_qplib_res *res, struct bnxt_qplib_srq *srq) { struct bnxt_qplib_rcfw *rcfw = res->rcfw; @@ -521,14 +521,12 @@ int bnxt_qplib_destroy_srq(struct bnxt_qplib_res *res, /* Configure the request */ req.srq_cid = cpu_to_le32(srq->id); - rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, - (void *)&resp, NULL, 0); + rc = bnxt_qplib_rcfw_send_message(rcfw, (struct cmdq_base *)&req, + (struct creq_base *)&resp, NULL, 0); + kfree(srq->swq); if (rc) - return rc; - + return; bnxt_qplib_free_hwq(res->pdev, &srq->hwq); - kfree(srq->swq); - return 0; } int bnxt_qplib_create_srq(struct bnxt_qplib_res *res, @@ -542,8 +540,8 @@ int bnxt_qplib_create_srq(struct bnxt_qplib_res *res, int rc, idx; srq->hwq.max_elements = srq->max_wqe; - rc = bnxt_qplib_alloc_init_hwq(res->pdev, &srq->hwq, srq->sglist, - srq->nmap, &srq->hwq.max_elements, + rc = bnxt_qplib_alloc_init_hwq(res->pdev, &srq->hwq, &srq->sg_info, + &srq->hwq.max_elements, BNXT_QPLIB_MAX_RQE_ENTRY_SIZE, 0, PAGE_SIZE, HWQ_TYPE_QUEUE); if (rc) @@ -742,7 +740,7 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) /* SQ */ sq->hwq.max_elements = sq->max_wqe; - rc = bnxt_qplib_alloc_init_hwq(res->pdev, &sq->hwq, NULL, 0, + rc = bnxt_qplib_alloc_init_hwq(res->pdev, &sq->hwq, NULL, &sq->hwq.max_elements, BNXT_QPLIB_MAX_SQE_ENTRY_SIZE, 0, PAGE_SIZE, HWQ_TYPE_QUEUE); @@ -781,7 +779,7 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) /* RQ */ if (rq->max_wqe) { rq->hwq.max_elements = qp->rq.max_wqe; - rc = bnxt_qplib_alloc_init_hwq(res->pdev, &rq->hwq, NULL, 0, + rc = bnxt_qplib_alloc_init_hwq(res->pdev, &rq->hwq, NULL, &rq->hwq.max_elements, BNXT_QPLIB_MAX_RQE_ENTRY_SIZE, 0, PAGE_SIZE, HWQ_TYPE_QUEUE); @@ -890,8 +888,8 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) sizeof(struct sq_psn_search); } sq->hwq.max_elements = sq->max_wqe; - rc = bnxt_qplib_alloc_init_hwq(res->pdev, &sq->hwq, sq->sglist, - sq->nmap, &sq->hwq.max_elements, + rc = bnxt_qplib_alloc_init_hwq(res->pdev, &sq->hwq, &sq->sg_info, + &sq->hwq.max_elements, BNXT_QPLIB_MAX_SQE_ENTRY_SIZE, psn_sz, PAGE_SIZE, HWQ_TYPE_QUEUE); @@ -959,8 +957,9 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) /* RQ */ if (rq->max_wqe) { rq->hwq.max_elements = rq->max_wqe; - rc = bnxt_qplib_alloc_init_hwq(res->pdev, &rq->hwq, rq->sglist, - rq->nmap, &rq->hwq.max_elements, + rc = bnxt_qplib_alloc_init_hwq(res->pdev, &rq->hwq, + &rq->sg_info, + &rq->hwq.max_elements, BNXT_QPLIB_MAX_RQE_ENTRY_SIZE, 0, PAGE_SIZE, HWQ_TYPE_QUEUE); if (rc) @@ -1030,7 +1029,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) req_size = xrrq->max_elements * BNXT_QPLIB_MAX_ORRQE_ENTRY_SIZE + PAGE_SIZE - 1; req_size &= ~(PAGE_SIZE - 1); - rc = bnxt_qplib_alloc_init_hwq(res->pdev, xrrq, NULL, 0, + rc = bnxt_qplib_alloc_init_hwq(res->pdev, xrrq, NULL, &xrrq->max_elements, BNXT_QPLIB_MAX_ORRQE_ENTRY_SIZE, 0, req_size, HWQ_TYPE_CTX); @@ -1046,7 +1045,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) BNXT_QPLIB_MAX_IRRQE_ENTRY_SIZE + PAGE_SIZE - 1; req_size &= ~(PAGE_SIZE - 1); - rc = bnxt_qplib_alloc_init_hwq(res->pdev, xrrq, NULL, 0, + rc = bnxt_qplib_alloc_init_hwq(res->pdev, xrrq, NULL, &xrrq->max_elements, BNXT_QPLIB_MAX_IRRQE_ENTRY_SIZE, 0, req_size, HWQ_TYPE_CTX); @@ -1935,8 +1934,8 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq) int rc; cq->hwq.max_elements = cq->max_wqe; - rc = bnxt_qplib_alloc_init_hwq(res->pdev, &cq->hwq, cq->sghead, - cq->nmap, &cq->hwq.max_elements, + rc = bnxt_qplib_alloc_init_hwq(res->pdev, &cq->hwq, &cq->sg_info, + &cq->hwq.max_elements, BNXT_QPLIB_MAX_CQE_ENTRY_SIZE, 0, PAGE_SIZE, HWQ_TYPE_QUEUE); if (rc) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h index 3f618b5f1f06..99e0a13cbefa 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h @@ -52,10 +52,9 @@ struct bnxt_qplib_srq { struct bnxt_qplib_cq *cq; struct bnxt_qplib_hwq hwq; struct bnxt_qplib_swq *swq; - struct scatterlist *sglist; int start_idx; int last_idx; - u32 nmap; + struct bnxt_qplib_sg_info sg_info; u16 eventq_hw_ring_id; spinlock_t lock; /* protect SRQE link list */ }; @@ -237,8 +236,7 @@ struct bnxt_qplib_swqe { struct bnxt_qplib_q { struct bnxt_qplib_hwq hwq; struct bnxt_qplib_swq *swq; - struct scatterlist *sglist; - u32 nmap; + struct bnxt_qplib_sg_info sg_info; u32 max_wqe; u16 q_full_delta; u16 max_sge; @@ -381,8 +379,7 @@ struct bnxt_qplib_cq { u32 cnq_hw_ring_id; struct bnxt_qplib_nq *nq; bool resize_in_progress; - struct scatterlist *sghead; - u32 nmap; + struct bnxt_qplib_sg_info sg_info; u64 cq_handle; #define CQ_RESIZE_WAIT_TIME_MS 500 @@ -521,8 +518,8 @@ int bnxt_qplib_modify_srq(struct bnxt_qplib_res *res, struct bnxt_qplib_srq *srq); int bnxt_qplib_query_srq(struct bnxt_qplib_res *res, struct bnxt_qplib_srq *srq); -int bnxt_qplib_destroy_srq(struct bnxt_qplib_res *res, - struct bnxt_qplib_srq *srq); +void bnxt_qplib_destroy_srq(struct bnxt_qplib_res *res, + struct bnxt_qplib_srq *srq); int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq, struct bnxt_qplib_swqe *wqe); int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index c6461e957078..48b04d2f175f 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -569,7 +569,7 @@ int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev, rcfw->pdev = pdev; rcfw->creq.max_elements = BNXT_QPLIB_CREQE_MAX_CNT; hwq_type = bnxt_qplib_get_hwq_type(rcfw->res); - if (bnxt_qplib_alloc_init_hwq(rcfw->pdev, &rcfw->creq, NULL, 0, + if (bnxt_qplib_alloc_init_hwq(rcfw->pdev, &rcfw->creq, NULL, &rcfw->creq.max_elements, BNXT_QPLIB_CREQE_UNITS, 0, PAGE_SIZE, hwq_type)) { @@ -584,7 +584,7 @@ int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev, rcfw->cmdq.max_elements = rcfw->cmdq_depth; if (bnxt_qplib_alloc_init_hwq - (rcfw->pdev, &rcfw->cmdq, NULL, 0, + (rcfw->pdev, &rcfw->cmdq, NULL, &rcfw->cmdq.max_elements, BNXT_QPLIB_CMDQE_UNITS, 0, bnxt_qplib_cmdqe_page_size(rcfw->cmdq_depth), diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c index 0bc24f934829..37928b1111df 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c @@ -83,7 +83,8 @@ static void __free_pbl(struct pci_dev *pdev, struct bnxt_qplib_pbl *pbl, } static int __alloc_pbl(struct pci_dev *pdev, struct bnxt_qplib_pbl *pbl, - struct scatterlist *sghead, u32 pages, u32 pg_size) + struct scatterlist *sghead, u32 pages, + u32 nmaps, u32 pg_size) { struct sg_dma_page_iter sg_iter; bool is_umem = false; @@ -116,7 +117,7 @@ static int __alloc_pbl(struct pci_dev *pdev, struct bnxt_qplib_pbl *pbl, } else { i = 0; is_umem = true; - for_each_sg_dma_page (sghead, &sg_iter, pages, 0) { + for_each_sg_dma_page(sghead, &sg_iter, nmaps, 0) { pbl->pg_map_arr[i] = sg_page_iter_dma_address(&sg_iter); pbl->pg_arr[i] = NULL; pbl->pg_count++; @@ -158,12 +159,13 @@ void bnxt_qplib_free_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq) /* All HWQs are power of 2 in size */ int bnxt_qplib_alloc_init_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq, - struct scatterlist *sghead, int nmap, + struct bnxt_qplib_sg_info *sg_info, u32 *elements, u32 element_size, u32 aux, u32 pg_size, enum bnxt_qplib_hwq_type hwq_type) { - u32 pages, slots, size, aux_pages = 0, aux_size = 0; + u32 pages, maps, slots, size, aux_pages = 0, aux_size = 0; dma_addr_t *src_phys_ptr, **dst_virt_ptr; + struct scatterlist *sghead = NULL; int i, rc; hwq->level = PBL_LVL_MAX; @@ -177,6 +179,9 @@ int bnxt_qplib_alloc_init_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq, } size = roundup_pow_of_two(element_size); + if (sg_info) + sghead = sg_info->sglist; + if (!sghead) { hwq->is_user = false; pages = (slots * size) / pg_size + aux_pages; @@ -184,17 +189,20 @@ int bnxt_qplib_alloc_init_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq, pages++; if (!pages) return -EINVAL; + maps = 0; } else { hwq->is_user = true; - pages = nmap; + pages = sg_info->npages; + maps = sg_info->nmap; } /* Alloc the 1st memory block; can be a PDL/PTL/PBL */ if (sghead && (pages == MAX_PBL_LVL_0_PGS)) rc = __alloc_pbl(pdev, &hwq->pbl[PBL_LVL_0], sghead, - pages, pg_size); + pages, maps, pg_size); else - rc = __alloc_pbl(pdev, &hwq->pbl[PBL_LVL_0], NULL, 1, pg_size); + rc = __alloc_pbl(pdev, &hwq->pbl[PBL_LVL_0], NULL, + 1, 0, pg_size); if (rc) goto fail; @@ -204,7 +212,8 @@ int bnxt_qplib_alloc_init_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq, if (pages > MAX_PBL_LVL_1_PGS) { /* 2 levels of indirection */ rc = __alloc_pbl(pdev, &hwq->pbl[PBL_LVL_1], NULL, - MAX_PBL_LVL_1_PGS_FOR_LVL_2, pg_size); + MAX_PBL_LVL_1_PGS_FOR_LVL_2, + 0, pg_size); if (rc) goto fail; /* Fill in lvl0 PBL */ @@ -217,7 +226,7 @@ int bnxt_qplib_alloc_init_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq, hwq->level = PBL_LVL_1; rc = __alloc_pbl(pdev, &hwq->pbl[PBL_LVL_2], sghead, - pages, pg_size); + pages, maps, pg_size); if (rc) goto fail; @@ -246,7 +255,7 @@ int bnxt_qplib_alloc_init_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq, /* 1 level of indirection */ rc = __alloc_pbl(pdev, &hwq->pbl[PBL_LVL_1], sghead, - pages, pg_size); + pages, maps, pg_size); if (rc) goto fail; /* Fill in lvl0 PBL */ @@ -339,7 +348,7 @@ int bnxt_qplib_alloc_ctx(struct pci_dev *pdev, /* QPC Tables */ ctx->qpc_tbl.max_elements = ctx->qpc_count; - rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->qpc_tbl, NULL, 0, + rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->qpc_tbl, NULL, &ctx->qpc_tbl.max_elements, BNXT_QPLIB_MAX_QP_CTX_ENTRY_SIZE, 0, PAGE_SIZE, HWQ_TYPE_CTX); @@ -348,7 +357,7 @@ int bnxt_qplib_alloc_ctx(struct pci_dev *pdev, /* MRW Tables */ ctx->mrw_tbl.max_elements = ctx->mrw_count; - rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->mrw_tbl, NULL, 0, + rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->mrw_tbl, NULL, &ctx->mrw_tbl.max_elements, BNXT_QPLIB_MAX_MRW_CTX_ENTRY_SIZE, 0, PAGE_SIZE, HWQ_TYPE_CTX); @@ -357,7 +366,7 @@ int bnxt_qplib_alloc_ctx(struct pci_dev *pdev, /* SRQ Tables */ ctx->srqc_tbl.max_elements = ctx->srqc_count; - rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->srqc_tbl, NULL, 0, + rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->srqc_tbl, NULL, &ctx->srqc_tbl.max_elements, BNXT_QPLIB_MAX_SRQ_CTX_ENTRY_SIZE, 0, PAGE_SIZE, HWQ_TYPE_CTX); @@ -366,7 +375,7 @@ int bnxt_qplib_alloc_ctx(struct pci_dev *pdev, /* CQ Tables */ ctx->cq_tbl.max_elements = ctx->cq_count; - rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->cq_tbl, NULL, 0, + rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->cq_tbl, NULL, &ctx->cq_tbl.max_elements, BNXT_QPLIB_MAX_CQ_CTX_ENTRY_SIZE, 0, PAGE_SIZE, HWQ_TYPE_CTX); @@ -375,7 +384,7 @@ int bnxt_qplib_alloc_ctx(struct pci_dev *pdev, /* TQM Buffer */ ctx->tqm_pde.max_elements = 512; - rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->tqm_pde, NULL, 0, + rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->tqm_pde, NULL, &ctx->tqm_pde.max_elements, sizeof(u64), 0, PAGE_SIZE, HWQ_TYPE_CTX); if (rc) @@ -386,7 +395,7 @@ int bnxt_qplib_alloc_ctx(struct pci_dev *pdev, continue; ctx->tqm_tbl[i].max_elements = ctx->qpc_count * ctx->tqm_count[i]; - rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->tqm_tbl[i], NULL, 0, + rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->tqm_tbl[i], NULL, &ctx->tqm_tbl[i].max_elements, 1, 0, PAGE_SIZE, HWQ_TYPE_CTX); if (rc) @@ -424,7 +433,7 @@ int bnxt_qplib_alloc_ctx(struct pci_dev *pdev, /* TIM Buffer */ ctx->tim_tbl.max_elements = ctx->qpc_count * 16; - rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->tim_tbl, NULL, 0, + rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->tim_tbl, NULL, &ctx->tim_tbl.max_elements, 1, 0, PAGE_SIZE, HWQ_TYPE_CTX); if (rc) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index 32cebd0f1436..30c42c92fac7 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -219,6 +219,12 @@ static inline u8 bnxt_qplib_get_ring_type(struct bnxt_qplib_chip_ctx *cctx) RING_ALLOC_REQ_RING_TYPE_ROCE_CMPL; } +struct bnxt_qplib_sg_info { + struct scatterlist *sglist; + u32 nmap; + u32 npages; +}; + #define to_bnxt_qplib(ptr, type, member) \ container_of(ptr, type, member) @@ -227,7 +233,7 @@ struct bnxt_qplib_dev_attr; void bnxt_qplib_free_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq); int bnxt_qplib_alloc_init_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq, - struct scatterlist *sl, int nmap, u32 *elements, + struct bnxt_qplib_sg_info *sg_info, u32 *elements, u32 elements_per_page, u32 aux, u32 pg_size, enum bnxt_qplib_hwq_type hwq_type); void bnxt_qplib_get_guid(u8 *dev_addr, u8 *guid); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index e9c53e406404..48793d3512ac 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -532,25 +532,21 @@ int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah, return 0; } -int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah, - bool block) +void bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah, + bool block) { struct bnxt_qplib_rcfw *rcfw = res->rcfw; struct cmdq_destroy_ah req; struct creq_destroy_ah_resp resp; u16 cmd_flags = 0; - int rc; /* Clean up the AH table in the device */ RCFW_CMD_PREP(req, DESTROY_AH, cmd_flags); req.ah_cid = cpu_to_le32(ah->id); - rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp, - NULL, block); - if (rc) - return rc; - return 0; + bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp, NULL, + block); } /* MRW */ @@ -684,7 +680,7 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr, mr->hwq.max_elements = pages; /* Use system PAGE_SIZE */ - rc = bnxt_qplib_alloc_init_hwq(res->pdev, &mr->hwq, NULL, 0, + rc = bnxt_qplib_alloc_init_hwq(res->pdev, &mr->hwq, NULL, &mr->hwq.max_elements, PAGE_SIZE, 0, PAGE_SIZE, HWQ_TYPE_CTX); @@ -754,7 +750,7 @@ int bnxt_qplib_alloc_fast_reg_page_list(struct bnxt_qplib_res *res, return -ENOMEM; frpl->hwq.max_elements = pages; - rc = bnxt_qplib_alloc_init_hwq(res->pdev, &frpl->hwq, NULL, 0, + rc = bnxt_qplib_alloc_init_hwq(res->pdev, &frpl->hwq, NULL, &frpl->hwq.max_elements, PAGE_SIZE, 0, PAGE_SIZE, HWQ_TYPE_CTX); if (!rc) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h index 39454b3f738d..0ec3b12b0bcd 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h @@ -243,8 +243,8 @@ int bnxt_qplib_set_func_resources(struct bnxt_qplib_res *res, struct bnxt_qplib_ctx *ctx); int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah, bool block); -int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah, - bool block); +void bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah, + bool block); int bnxt_qplib_alloc_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw); int bnxt_qplib_dereg_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw, diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h index 83d2e19d31ae..53aa5c36247a 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_wr.h +++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h @@ -64,7 +64,7 @@ enum t3_wr_flags { T3_SOLICITED_EVENT_FLAG = 0x04, T3_READ_FENCE_FLAG = 0x08, T3_LOCAL_FENCE_FLAG = 0x10 -} __attribute__ ((packed)); +} __packed; enum t3_wr_opcode { T3_WR_BP = FW_WROPCODE_RI_BYPASS, @@ -77,7 +77,7 @@ enum t3_wr_opcode { T3_WR_INIT = FW_WROPCODE_RI_RDMA_INIT, T3_WR_QP_MOD = FW_WROPCODE_RI_MODIFY_QP, T3_WR_FASTREG = FW_WROPCODE_RI_FASTREGISTER_MR -} __attribute__ ((packed)); +} __packed; enum t3_rdma_opcode { T3_RDMA_WRITE, /* IETF RDMAP v1.0 ... */ @@ -95,7 +95,7 @@ enum t3_rdma_opcode { T3_QP_MOD, T3_BYPASS, T3_RDMA_READ_REQ_WITH_INV, -} __attribute__ ((packed)); +} __packed; static inline enum t3_rdma_opcode wr2opcode(enum t3_wr_opcode wrop) { @@ -306,7 +306,7 @@ enum t3_mpa_attrs { uP_RI_MPA_TX_MARKER_ENABLE = 0x2, uP_RI_MPA_CRC_ENABLE = 0x4, uP_RI_MPA_IETF_ENABLE = 0x8 -} __attribute__ ((packed)); +} __packed; enum t3_qp_caps { uP_RI_QP_RDMA_READ_ENABLE = 0x01, @@ -314,7 +314,7 @@ enum t3_qp_caps { uP_RI_QP_BIND_ENABLE = 0x04, uP_RI_QP_FAST_REGISTER_ENABLE = 0x08, uP_RI_QP_STAG0_ENABLE = 0x10 -} __attribute__ ((packed)); +} __packed; enum rdma_init_rtr_types { RTR_READ = 1, diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c index fb03bc492ef7..56a8ab6210cf 100644 --- a/drivers/infiniband/hw/cxgb3/iwch.c +++ b/drivers/infiniband/hw/cxgb3/iwch.c @@ -62,37 +62,30 @@ struct cxgb3_client t3c_client = { static LIST_HEAD(dev_list); static DEFINE_MUTEX(dev_mutex); -static int disable_qp_db(int id, void *p, void *data) -{ - struct iwch_qp *qhp = p; - - cxio_disable_wq_db(&qhp->wq); - return 0; -} - -static int enable_qp_db(int id, void *p, void *data) -{ - struct iwch_qp *qhp = p; - - if (data) - ring_doorbell(qhp->rhp->rdev.ctrl_qp.doorbell, qhp->wq.qpid); - cxio_enable_wq_db(&qhp->wq); - return 0; -} - static void disable_dbs(struct iwch_dev *rnicp) { - spin_lock_irq(&rnicp->lock); - idr_for_each(&rnicp->qpidr, disable_qp_db, NULL); - spin_unlock_irq(&rnicp->lock); + unsigned long index; + struct iwch_qp *qhp; + + xa_lock_irq(&rnicp->qps); + xa_for_each(&rnicp->qps, index, qhp) + cxio_disable_wq_db(&qhp->wq); + xa_unlock_irq(&rnicp->qps); } static void enable_dbs(struct iwch_dev *rnicp, int ring_db) { - spin_lock_irq(&rnicp->lock); - idr_for_each(&rnicp->qpidr, enable_qp_db, - (void *)(unsigned long)ring_db); - spin_unlock_irq(&rnicp->lock); + unsigned long index; + struct iwch_qp *qhp; + + xa_lock_irq(&rnicp->qps); + xa_for_each(&rnicp->qps, index, qhp) { + if (ring_db) + ring_doorbell(qhp->rhp->rdev.ctrl_qp.doorbell, + qhp->wq.qpid); + cxio_enable_wq_db(&qhp->wq); + } + xa_unlock_irq(&rnicp->qps); } static void iwch_db_drop_task(struct work_struct *work) @@ -105,10 +98,9 @@ static void iwch_db_drop_task(struct work_struct *work) static void rnic_init(struct iwch_dev *rnicp) { pr_debug("%s iwch_dev %p\n", __func__, rnicp); - idr_init(&rnicp->cqidr); - idr_init(&rnicp->qpidr); - idr_init(&rnicp->mmidr); - spin_lock_init(&rnicp->lock); + xa_init_flags(&rnicp->cqs, XA_FLAGS_LOCK_IRQ); + xa_init_flags(&rnicp->qps, XA_FLAGS_LOCK_IRQ); + xa_init_flags(&rnicp->mrs, XA_FLAGS_LOCK_IRQ); INIT_DELAYED_WORK(&rnicp->db_drop_task, iwch_db_drop_task); rnicp->attr.max_qps = T3_MAX_NUM_QP - 32; @@ -190,9 +182,9 @@ static void close_rnic_dev(struct t3cdev *tdev) list_del(&dev->entry); iwch_unregister_device(dev); cxio_rdev_close(&dev->rdev); - idr_destroy(&dev->cqidr); - idr_destroy(&dev->qpidr); - idr_destroy(&dev->mmidr); + WARN_ON(!xa_empty(&dev->cqs)); + WARN_ON(!xa_empty(&dev->qps)); + WARN_ON(!xa_empty(&dev->mrs)); ib_dealloc_device(&dev->ibdev); break; } diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h index c69bc4f52049..310a937bffcf 100644 --- a/drivers/infiniband/hw/cxgb3/iwch.h +++ b/drivers/infiniband/hw/cxgb3/iwch.h @@ -35,7 +35,7 @@ #include <linux/mutex.h> #include <linux/list.h> #include <linux/spinlock.h> -#include <linux/idr.h> +#include <linux/xarray.h> #include <linux/workqueue.h> #include <rdma/ib_verbs.h> @@ -106,10 +106,9 @@ struct iwch_dev { struct cxio_rdev rdev; u32 device_cap_flags; struct iwch_rnic_attributes attr; - struct idr cqidr; - struct idr qpidr; - struct idr mmidr; - spinlock_t lock; + struct xarray cqs; + struct xarray qps; + struct xarray mrs; struct list_head entry; struct delayed_work db_drop_task; }; @@ -136,40 +135,17 @@ static inline int t3a_device(const struct iwch_dev *rhp) static inline struct iwch_cq *get_chp(struct iwch_dev *rhp, u32 cqid) { - return idr_find(&rhp->cqidr, cqid); + return xa_load(&rhp->cqs, cqid); } static inline struct iwch_qp *get_qhp(struct iwch_dev *rhp, u32 qpid) { - return idr_find(&rhp->qpidr, qpid); + return xa_load(&rhp->qps, qpid); } static inline struct iwch_mr *get_mhp(struct iwch_dev *rhp, u32 mmid) { - return idr_find(&rhp->mmidr, mmid); -} - -static inline int insert_handle(struct iwch_dev *rhp, struct idr *idr, - void *handle, u32 id) -{ - int ret; - - idr_preload(GFP_KERNEL); - spin_lock_irq(&rhp->lock); - - ret = idr_alloc(idr, handle, id, id + 1, GFP_NOWAIT); - - spin_unlock_irq(&rhp->lock); - idr_preload_end(); - - return ret < 0 ? ret : 0; -} - -static inline void remove_handle(struct iwch_dev *rhp, struct idr *idr, u32 id) -{ - spin_lock_irq(&rhp->lock); - idr_remove(idr, id); - spin_unlock_irq(&rhp->lock); + return xa_load(&rhp->mrs, mmid); } extern struct cxgb3_client t3c_client; diff --git a/drivers/infiniband/hw/cxgb3/iwch_ev.c b/drivers/infiniband/hw/cxgb3/iwch_ev.c index 4a0c82a8fb60..9d356c1301c7 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_ev.c +++ b/drivers/infiniband/hw/cxgb3/iwch_ev.c @@ -48,14 +48,14 @@ static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp, struct iwch_qp *qhp; unsigned long flag; - spin_lock(&rnicp->lock); - qhp = get_qhp(rnicp, CQE_QPID(rsp_msg->cqe)); + xa_lock(&rnicp->qps); + qhp = xa_load(&rnicp->qps, CQE_QPID(rsp_msg->cqe)); if (!qhp) { pr_err("%s unaffiliated error 0x%x qpid 0x%x\n", __func__, CQE_STATUS(rsp_msg->cqe), CQE_QPID(rsp_msg->cqe)); - spin_unlock(&rnicp->lock); + xa_unlock(&rnicp->qps); return; } @@ -65,7 +65,7 @@ static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp, __func__, qhp->attr.state, qhp->wq.qpid, CQE_STATUS(rsp_msg->cqe)); - spin_unlock(&rnicp->lock); + xa_unlock(&rnicp->qps); return; } @@ -76,7 +76,7 @@ static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp, CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe)); atomic_inc(&qhp->refcnt); - spin_unlock(&rnicp->lock); + xa_unlock(&rnicp->qps); if (qhp->attr.state == IWCH_QP_STATE_RTS) { attrs.next_state = IWCH_QP_STATE_TERMINATE; @@ -114,21 +114,21 @@ void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb) unsigned long flag; rnicp = (struct iwch_dev *) rdev_p->ulp; - spin_lock(&rnicp->lock); + xa_lock(&rnicp->qps); chp = get_chp(rnicp, cqid); - qhp = get_qhp(rnicp, CQE_QPID(rsp_msg->cqe)); + qhp = xa_load(&rnicp->qps, CQE_QPID(rsp_msg->cqe)); if (!chp || !qhp) { pr_err("BAD AE cqid 0x%x qpid 0x%x opcode %d status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n", cqid, CQE_QPID(rsp_msg->cqe), CQE_OPCODE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe), CQE_TYPE(rsp_msg->cqe), CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe)); - spin_unlock(&rnicp->lock); + xa_unlock(&rnicp->qps); goto out; } iwch_qp_add_ref(&qhp->ibqp); atomic_inc(&chp->refcnt); - spin_unlock(&rnicp->lock); + xa_unlock(&rnicp->qps); /* * 1) completion of our sending a TERMINATE. diff --git a/drivers/infiniband/hw/cxgb3/iwch_mem.c b/drivers/infiniband/hw/cxgb3/iwch_mem.c index 12886b1b4b10..ce0f2741821d 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_mem.c +++ b/drivers/infiniband/hw/cxgb3/iwch_mem.c @@ -49,7 +49,7 @@ static int iwch_finish_mem_reg(struct iwch_mr *mhp, u32 stag) mmid = stag >> 8; mhp->ibmr.rkey = mhp->ibmr.lkey = stag; pr_debug("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp); - return insert_handle(mhp->rhp, &mhp->rhp->mmidr, mhp, mmid); + return xa_insert_irq(&mhp->rhp->mrs, mmid, mhp, GFP_KERNEL); } int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php, diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 4accf7b3dcf2..3a481dfb1607 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -88,14 +88,14 @@ static int iwch_alloc_ucontext(struct ib_ucontext *ucontext, return 0; } -static int iwch_destroy_cq(struct ib_cq *ib_cq) +static int iwch_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) { struct iwch_cq *chp; pr_debug("%s ib_cq %p\n", __func__, ib_cq); chp = to_iwch_cq(ib_cq); - remove_handle(chp->rhp, &chp->rhp->cqidr, chp->cq.cqid); + xa_erase_irq(&chp->rhp->cqs, chp->cq.cqid); atomic_dec(&chp->refcnt); wait_event(chp->wait, !atomic_read(&chp->refcnt)); @@ -106,7 +106,6 @@ static int iwch_destroy_cq(struct ib_cq *ib_cq) static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *ib_context, struct ib_udata *udata) { int entries = attr->cqe; @@ -114,7 +113,6 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, struct iwch_cq *chp; struct iwch_create_cq_resp uresp; struct iwch_create_cq_req ureq; - struct iwch_ucontext *ucontext = NULL; static int warned; size_t resplen; @@ -127,8 +125,7 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, if (!chp) return ERR_PTR(-ENOMEM); - if (ib_context) { - ucontext = to_iwch_ucontext(ib_context); + if (udata) { if (!t3a_device(rhp)) { if (ib_copy_from_udata(&ureq, udata, sizeof (ureq))) { kfree(chp); @@ -154,7 +151,7 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, entries = roundup_pow_of_two(entries); chp->cq.size_log2 = ilog2(entries); - if (cxio_create_cq(&rhp->rdev, &chp->cq, !ucontext)) { + if (cxio_create_cq(&rhp->rdev, &chp->cq, !udata)) { kfree(chp); return ERR_PTR(-ENOMEM); } @@ -164,18 +161,20 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, spin_lock_init(&chp->comp_handler_lock); atomic_set(&chp->refcnt, 1); init_waitqueue_head(&chp->wait); - if (insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid)) { + if (xa_store_irq(&rhp->cqs, chp->cq.cqid, chp, GFP_KERNEL)) { cxio_destroy_cq(&chp->rhp->rdev, &chp->cq); kfree(chp); return ERR_PTR(-ENOMEM); } - if (ucontext) { + if (udata) { struct iwch_mm_entry *mm; + struct iwch_ucontext *ucontext = rdma_udata_to_drv_context( + udata, struct iwch_ucontext, ibucontext); mm = kmalloc(sizeof *mm, GFP_KERNEL); if (!mm) { - iwch_destroy_cq(&chp->ibcq); + iwch_destroy_cq(&chp->ibcq, udata); return ERR_PTR(-ENOMEM); } uresp.cqid = chp->cq.cqid; @@ -201,7 +200,7 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, } if (ib_copy_to_udata(udata, &uresp, resplen)) { kfree(mm); - iwch_destroy_cq(&chp->ibcq); + iwch_destroy_cq(&chp->ibcq, udata); return ERR_PTR(-EFAULT); } insert_mmap(ucontext, mm); @@ -367,7 +366,7 @@ static int iwch_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) return ret; } -static void iwch_deallocate_pd(struct ib_pd *pd) +static void iwch_deallocate_pd(struct ib_pd *pd, struct ib_udata *udata) { struct iwch_dev *rhp; struct iwch_pd *php; @@ -378,8 +377,7 @@ static void iwch_deallocate_pd(struct ib_pd *pd) cxio_hal_put_pdid(rhp->rdev.rscp, php->pdid); } -static int iwch_allocate_pd(struct ib_pd *pd, struct ib_ucontext *context, - struct ib_udata *udata) +static int iwch_allocate_pd(struct ib_pd *pd, struct ib_udata *udata) { struct iwch_pd *php = to_iwch_pd(pd); struct ib_device *ibdev = pd->device; @@ -394,11 +392,11 @@ static int iwch_allocate_pd(struct ib_pd *pd, struct ib_ucontext *context, php->pdid = pdid; php->rhp = rhp; - if (context) { + if (udata) { struct iwch_alloc_pd_resp resp = {.pdid = php->pdid}; if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { - iwch_deallocate_pd(&php->ibpd); + iwch_deallocate_pd(&php->ibpd, udata); return -EFAULT; } } @@ -406,7 +404,7 @@ static int iwch_allocate_pd(struct ib_pd *pd, struct ib_ucontext *context, return 0; } -static int iwch_dereg_mr(struct ib_mr *ib_mr) +static int iwch_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) { struct iwch_dev *rhp; struct iwch_mr *mhp; @@ -421,7 +419,7 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr) cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, mhp->attr.pbl_addr); iwch_free_pbl(mhp); - remove_handle(rhp, &rhp->mmidr, mmid); + xa_erase_irq(&rhp->mrs, mmid); if (mhp->kva) kfree((void *) (unsigned long) mhp->kva); if (mhp->umem) @@ -539,7 +537,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, shift = PAGE_SHIFT; - n = mhp->umem->nmap; + n = ib_umem_num_pages(mhp->umem); err = iwch_alloc_pbl(mhp, n); if (err) @@ -590,7 +588,7 @@ pbl_done: uresp.pbl_addr); if (ib_copy_to_udata(udata, &uresp, sizeof (uresp))) { - iwch_dereg_mr(&mhp->ibmr); + iwch_dereg_mr(&mhp->ibmr, udata); err = -EFAULT; goto err; } @@ -636,7 +634,7 @@ static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, mhp->attr.stag = stag; mmid = (stag) >> 8; mhp->ibmw.rkey = stag; - if (insert_handle(rhp, &rhp->mmidr, mhp, mmid)) { + if (xa_insert_irq(&rhp->mrs, mmid, mhp, GFP_KERNEL)) { cxio_deallocate_window(&rhp->rdev, mhp->attr.stag); kfree(mhp); return ERR_PTR(-ENOMEM); @@ -655,15 +653,14 @@ static int iwch_dealloc_mw(struct ib_mw *mw) rhp = mhp->rhp; mmid = (mw->rkey) >> 8; cxio_deallocate_window(&rhp->rdev, mhp->attr.stag); - remove_handle(rhp, &rhp->mmidr, mmid); + xa_erase_irq(&rhp->mrs, mmid); pr_debug("%s ib_mw %p mmid 0x%x ptr %p\n", __func__, mw, mmid, mhp); kfree(mhp); return 0; } -static struct ib_mr *iwch_alloc_mr(struct ib_pd *pd, - enum ib_mr_type mr_type, - u32 max_num_sg) +static struct ib_mr *iwch_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg, struct ib_udata *udata) { struct iwch_dev *rhp; struct iwch_pd *php; @@ -701,7 +698,7 @@ static struct ib_mr *iwch_alloc_mr(struct ib_pd *pd, mhp->attr.state = 1; mmid = (stag) >> 8; mhp->ibmr.rkey = mhp->ibmr.lkey = stag; - ret = insert_handle(rhp, &rhp->mmidr, mhp, mmid); + ret = xa_insert_irq(&rhp->mrs, mmid, mhp, GFP_KERNEL); if (ret) goto err3; @@ -742,7 +739,7 @@ static int iwch_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, iwch_set_page); } -static int iwch_destroy_qp(struct ib_qp *ib_qp) +static int iwch_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata) { struct iwch_dev *rhp; struct iwch_qp *qhp; @@ -756,13 +753,13 @@ static int iwch_destroy_qp(struct ib_qp *ib_qp) iwch_modify_qp(rhp, qhp, IWCH_QP_ATTR_NEXT_STATE, &attrs, 0); wait_event(qhp->wait, !qhp->ep); - remove_handle(rhp, &rhp->qpidr, qhp->wq.qpid); + xa_erase_irq(&rhp->qps, qhp->wq.qpid); atomic_dec(&qhp->refcnt); wait_event(qhp->wait, !atomic_read(&qhp->refcnt)); - ucontext = ib_qp->uobject ? to_iwch_ucontext(ib_qp->uobject->context) - : NULL; + ucontext = rdma_udata_to_drv_context(udata, struct iwch_ucontext, + ibucontext); cxio_destroy_qp(&rhp->rdev, &qhp->wq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx); @@ -872,7 +869,7 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd, init_waitqueue_head(&qhp->wait); atomic_set(&qhp->refcnt, 1); - if (insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.qpid)) { + if (xa_store_irq(&rhp->qps, qhp->wq.qpid, qhp, GFP_KERNEL)) { cxio_destroy_qp(&rhp->rdev, &qhp->wq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx); kfree(qhp); @@ -885,14 +882,14 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd, mm1 = kmalloc(sizeof *mm1, GFP_KERNEL); if (!mm1) { - iwch_destroy_qp(&qhp->ibqp); + iwch_destroy_qp(&qhp->ibqp, udata); return ERR_PTR(-ENOMEM); } mm2 = kmalloc(sizeof *mm2, GFP_KERNEL); if (!mm2) { kfree(mm1); - iwch_destroy_qp(&qhp->ibqp); + iwch_destroy_qp(&qhp->ibqp, udata); return ERR_PTR(-ENOMEM); } @@ -909,7 +906,7 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd, if (ib_copy_to_udata(udata, &uresp, sizeof (uresp))) { kfree(mm1); kfree(mm2); - iwch_destroy_qp(&qhp->ibqp); + iwch_destroy_qp(&qhp->ibqp, udata); return ERR_PTR(-EFAULT); } mm1->key = uresp.key; @@ -1324,6 +1321,14 @@ static const struct ib_device_ops iwch_dev_ops = { .get_dma_mr = iwch_get_dma_mr, .get_hw_stats = iwch_get_mib, .get_port_immutable = iwch_port_immutable, + .iw_accept = iwch_accept_cr, + .iw_add_ref = iwch_qp_add_ref, + .iw_connect = iwch_connect, + .iw_create_listen = iwch_create_listen, + .iw_destroy_listen = iwch_destroy_listen, + .iw_get_qp = iwch_get_qp, + .iw_reject = iwch_reject_cr, + .iw_rem_ref = iwch_qp_rem_ref, .map_mr_sg = iwch_map_mr_sg, .mmap = iwch_mmap, .modify_qp = iwch_ib_modify_qp, @@ -1343,8 +1348,6 @@ static const struct ib_device_ops iwch_dev_ops = { int iwch_register_device(struct iwch_dev *dev) { - int ret; - pr_debug("%s iwch_dev %p\n", __func__, dev); memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid)); memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6); @@ -1382,34 +1385,18 @@ int iwch_register_device(struct iwch_dev *dev) dev->ibdev.dev.parent = &dev->rdev.rnic_info.pdev->dev; dev->ibdev.uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION; - dev->ibdev.iwcm = kzalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); - if (!dev->ibdev.iwcm) - return -ENOMEM; - - dev->ibdev.iwcm->connect = iwch_connect; - dev->ibdev.iwcm->accept = iwch_accept_cr; - dev->ibdev.iwcm->reject = iwch_reject_cr; - dev->ibdev.iwcm->create_listen = iwch_create_listen; - dev->ibdev.iwcm->destroy_listen = iwch_destroy_listen; - dev->ibdev.iwcm->add_ref = iwch_qp_add_ref; - dev->ibdev.iwcm->rem_ref = iwch_qp_rem_ref; - dev->ibdev.iwcm->get_qp = iwch_get_qp; - memcpy(dev->ibdev.iwcm->ifname, dev->rdev.t3cdev_p->lldev->name, - sizeof(dev->ibdev.iwcm->ifname)); + memcpy(dev->ibdev.iw_ifname, dev->rdev.t3cdev_p->lldev->name, + sizeof(dev->ibdev.iw_ifname)); dev->ibdev.driver_id = RDMA_DRIVER_CXGB3; rdma_set_device_sysfs_group(&dev->ibdev, &iwch_attr_group); ib_set_device_ops(&dev->ibdev, &iwch_dev_ops); - ret = ib_register_device(&dev->ibdev, "cxgb3_%d"); - if (ret) - kfree(dev->ibdev.iwcm); - return ret; + return ib_register_device(&dev->ibdev, "cxgb3_%d"); } void iwch_unregister_device(struct iwch_dev *dev) { pr_debug("%s iwch_dev %p\n", __func__, dev); ib_unregister_device(&dev->ibdev); - kfree(dev->ibdev.iwcm); return; } diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 4d232bdf9e97..0f3b1193d5f8 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -331,20 +331,23 @@ static void remove_ep_tid(struct c4iw_ep *ep) { unsigned long flags; - spin_lock_irqsave(&ep->com.dev->lock, flags); - _remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid, 0); - if (idr_is_empty(&ep->com.dev->hwtid_idr)) + xa_lock_irqsave(&ep->com.dev->hwtids, flags); + __xa_erase(&ep->com.dev->hwtids, ep->hwtid); + if (xa_empty(&ep->com.dev->hwtids)) wake_up(&ep->com.dev->wait); - spin_unlock_irqrestore(&ep->com.dev->lock, flags); + xa_unlock_irqrestore(&ep->com.dev->hwtids, flags); } -static void insert_ep_tid(struct c4iw_ep *ep) +static int insert_ep_tid(struct c4iw_ep *ep) { unsigned long flags; + int err; + + xa_lock_irqsave(&ep->com.dev->hwtids, flags); + err = __xa_insert(&ep->com.dev->hwtids, ep->hwtid, ep, GFP_KERNEL); + xa_unlock_irqrestore(&ep->com.dev->hwtids, flags); - spin_lock_irqsave(&ep->com.dev->lock, flags); - _insert_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep, ep->hwtid, 0); - spin_unlock_irqrestore(&ep->com.dev->lock, flags); + return err; } /* @@ -355,11 +358,11 @@ static struct c4iw_ep *get_ep_from_tid(struct c4iw_dev *dev, unsigned int tid) struct c4iw_ep *ep; unsigned long flags; - spin_lock_irqsave(&dev->lock, flags); - ep = idr_find(&dev->hwtid_idr, tid); + xa_lock_irqsave(&dev->hwtids, flags); + ep = xa_load(&dev->hwtids, tid); if (ep) c4iw_get_ep(&ep->com); - spin_unlock_irqrestore(&dev->lock, flags); + xa_unlock_irqrestore(&dev->hwtids, flags); return ep; } @@ -372,11 +375,11 @@ static struct c4iw_listen_ep *get_ep_from_stid(struct c4iw_dev *dev, struct c4iw_listen_ep *ep; unsigned long flags; - spin_lock_irqsave(&dev->lock, flags); - ep = idr_find(&dev->stid_idr, stid); + xa_lock_irqsave(&dev->stids, flags); + ep = xa_load(&dev->stids, stid); if (ep) c4iw_get_ep(&ep->com); - spin_unlock_irqrestore(&dev->lock, flags); + xa_unlock_irqrestore(&dev->stids, flags); return ep; } @@ -457,6 +460,8 @@ static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp) skb_reset_transport_header(skb); } else { skb = alloc_skb(len, gfp); + if (!skb) + return NULL; } t4_set_arp_err_handler(skb, NULL, NULL); return skb; @@ -555,7 +560,7 @@ static void act_open_req_arp_failure(void *handle, struct sk_buff *skb) cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0], (const u32 *)&sin6->sin6_addr.s6_addr, 1); } - remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid); + xa_erase_irq(&ep->com.dev->atids, ep->atid); cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); queue_arp_failure_cpl(ep, skb, FAKE_CPL_PUT_EP_SAFE); } @@ -1235,7 +1240,7 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb) set_emss(ep, tcp_opt); /* dealloc the atid */ - remove_handle(ep->com.dev, &ep->com.dev->atid_idr, atid); + xa_erase_irq(&ep->com.dev->atids, atid); cxgb4_free_atid(t, atid); set_bit(ACT_ESTAB, &ep->com.history); @@ -2184,7 +2189,9 @@ static int c4iw_reconnect(struct c4iw_ep *ep) err = -ENOMEM; goto fail2; } - insert_handle(ep->com.dev, &ep->com.dev->atid_idr, ep, ep->atid); + err = xa_insert_irq(&ep->com.dev->atids, ep->atid, ep, GFP_KERNEL); + if (err) + goto fail2a; /* find a route */ if (ep->com.cm_id->m_local_addr.ss_family == AF_INET) { @@ -2236,7 +2243,8 @@ static int c4iw_reconnect(struct c4iw_ep *ep) fail4: dst_release(ep->dst); fail3: - remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid); + xa_erase_irq(&ep->com.dev->atids, ep->atid); +fail2a: cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); fail2: /* @@ -2319,8 +2327,7 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) (const u32 *) &sin6->sin6_addr.s6_addr, 1); } - remove_handle(ep->com.dev, &ep->com.dev->atid_idr, - atid); + xa_erase_irq(&ep->com.dev->atids, atid); cxgb4_free_atid(t, atid); dst_release(ep->dst); cxgb4_l2t_release(ep->l2t); @@ -2357,7 +2364,7 @@ fail: cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, GET_TID(rpl), ep->com.local_addr.ss_family); - remove_handle(ep->com.dev, &ep->com.dev->atid_idr, atid); + xa_erase_irq(&ep->com.dev->atids, atid); cxgb4_free_atid(t, atid); dst_release(ep->dst); cxgb4_l2t_release(ep->l2t); @@ -2947,7 +2954,7 @@ out: (const u32 *)&sin6->sin6_addr.s6_addr, 1); } - remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid); + xa_erase_irq(&ep->com.dev->hwtids, ep->hwtid); cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid, ep->com.local_addr.ss_family); dst_release(ep->dst); @@ -3342,7 +3349,9 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) err = -ENOMEM; goto fail2; } - insert_handle(dev, &dev->atid_idr, ep, ep->atid); + err = xa_insert_irq(&dev->atids, ep->atid, ep, GFP_KERNEL); + if (err) + goto fail5; memcpy(&ep->com.local_addr, &cm_id->m_local_addr, sizeof(ep->com.local_addr)); @@ -3430,7 +3439,8 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) fail4: dst_release(ep->dst); fail3: - remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid); + xa_erase_irq(&ep->com.dev->atids, ep->atid); +fail5: cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); fail2: skb_queue_purge(&ep->com.ep_skb_list); @@ -3553,7 +3563,9 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) err = -ENOMEM; goto fail2; } - insert_handle(dev, &dev->stid_idr, ep, ep->stid); + err = xa_insert_irq(&dev->stids, ep->stid, ep, GFP_KERNEL); + if (err) + goto fail3; state_set(&ep->com, LISTEN); if (ep->com.local_addr.ss_family == AF_INET) @@ -3564,7 +3576,8 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) cm_id->provider_data = ep; goto out; } - remove_handle(ep->com.dev, &ep->com.dev->stid_idr, ep->stid); + xa_erase_irq(&ep->com.dev->stids, ep->stid); +fail3: cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, ep->com.local_addr.ss_family); fail2: @@ -3603,7 +3616,7 @@ int c4iw_destroy_listen(struct iw_cm_id *cm_id) cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0], (const u32 *)&sin6->sin6_addr.s6_addr, 1); } - remove_handle(ep->com.dev, &ep->com.dev->stid_idr, ep->stid); + xa_erase_irq(&ep->com.dev->stids, ep->stid); cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, ep->com.local_addr.ss_family); done: @@ -3763,7 +3776,7 @@ static void active_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb, cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0], (const u32 *)&sin6->sin6_addr.s6_addr, 1); } - remove_handle(dev, &dev->atid_idr, atid); + xa_erase_irq(&dev->atids, atid); cxgb4_free_atid(dev->rdev.lldi.tids, atid); dst_release(ep->dst); cxgb4_l2t_release(ep->l2t); diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 1fd8798d91a7..52ce586621c6 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -30,6 +30,8 @@ * SOFTWARE. */ +#include <rdma/uverbs_ioctl.h> + #include "iw_cxgb4.h" static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, @@ -968,7 +970,7 @@ int c4iw_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) return !err || err == -ENODATA ? npolled : err; } -int c4iw_destroy_cq(struct ib_cq *ib_cq) +int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) { struct c4iw_cq *chp; struct c4iw_ucontext *ucontext; @@ -976,12 +978,12 @@ int c4iw_destroy_cq(struct ib_cq *ib_cq) pr_debug("ib_cq %p\n", ib_cq); chp = to_c4iw_cq(ib_cq); - remove_handle(chp->rhp, &chp->rhp->cqidr, chp->cq.cqid); + xa_erase_irq(&chp->rhp->cqs, chp->cq.cqid); atomic_dec(&chp->refcnt); wait_event(chp->wait, !atomic_read(&chp->refcnt)); - ucontext = ib_cq->uobject ? to_c4iw_ucontext(ib_cq->uobject->context) - : NULL; + ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext, + ibucontext); destroy_cq(&chp->rhp->rdev, &chp->cq, ucontext ? &ucontext->uctx : &chp->cq.rdev->uctx, chp->destroy_skb, chp->wr_waitp); @@ -992,7 +994,6 @@ int c4iw_destroy_cq(struct ib_cq *ib_cq) struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *ib_context, struct ib_udata *udata) { int entries = attr->cqe; @@ -1001,10 +1002,11 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, struct c4iw_cq *chp; struct c4iw_create_cq ucmd; struct c4iw_create_cq_resp uresp; - struct c4iw_ucontext *ucontext = NULL; int ret, wr_len; size_t memsize, hwentries; struct c4iw_mm_entry *mm, *mm2; + struct c4iw_ucontext *ucontext = rdma_udata_to_drv_context( + udata, struct c4iw_ucontext, ibucontext); pr_debug("ib_dev %p entries %d\n", ibdev, entries); if (attr->flags) @@ -1015,8 +1017,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, if (vector >= rhp->rdev.lldi.nciq) return ERR_PTR(-EINVAL); - if (ib_context) { - ucontext = to_c4iw_ucontext(ib_context); + if (udata) { if (udata->inlen < sizeof(ucmd)) ucontext->is_32b_cqe = 1; } @@ -1068,7 +1069,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, /* * memsize must be a multiple of the page size if its a user cq. */ - if (ucontext) + if (udata) memsize = roundup(memsize, PAGE_SIZE); chp->cq.size = hwentries; @@ -1088,7 +1089,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, spin_lock_init(&chp->comp_handler_lock); atomic_set(&chp->refcnt, 1); init_waitqueue_head(&chp->wait); - ret = insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid); + ret = xa_insert_irq(&rhp->cqs, chp->cq.cqid, chp, GFP_KERNEL); if (ret) goto err_destroy_cq; @@ -1143,7 +1144,7 @@ err_free_mm2: err_free_mm: kfree(mm); err_remove_handle: - remove_handle(rhp, &rhp->cqidr, chp->cq.cqid); + xa_erase_irq(&rhp->cqs, chp->cq.cqid); err_destroy_cq: destroy_cq(&chp->rhp->rdev, &chp->cq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx, diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index c79cf63fb0bb..4c0d925c5ff5 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -81,14 +81,6 @@ struct c4iw_debugfs_data { int pos; }; -static int count_idrs(int id, void *p, void *data) -{ - int *countp = data; - - *countp = *countp + 1; - return 0; -} - static ssize_t debugfs_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { @@ -250,16 +242,11 @@ static void set_ep_sin6_addrs(struct c4iw_ep *ep, } } -static int dump_qp(int id, void *p, void *data) +static int dump_qp(struct c4iw_qp *qp, struct c4iw_debugfs_data *qpd) { - struct c4iw_qp *qp = p; - struct c4iw_debugfs_data *qpd = data; int space; int cc; - if (id != qp->wq.sq.qid) - return 0; - space = qpd->bufsize - qpd->pos - 1; if (space == 0) return 1; @@ -335,7 +322,9 @@ static int qp_release(struct inode *inode, struct file *file) static int qp_open(struct inode *inode, struct file *file) { + struct c4iw_qp *qp; struct c4iw_debugfs_data *qpd; + unsigned long index; int count = 1; qpd = kmalloc(sizeof *qpd, GFP_KERNEL); @@ -345,9 +334,12 @@ static int qp_open(struct inode *inode, struct file *file) qpd->devp = inode->i_private; qpd->pos = 0; - spin_lock_irq(&qpd->devp->lock); - idr_for_each(&qpd->devp->qpidr, count_idrs, &count); - spin_unlock_irq(&qpd->devp->lock); + /* + * No need to lock; we drop the lock to call vmalloc so it's racy + * anyway. Someone who cares should switch this over to seq_file + */ + xa_for_each(&qpd->devp->qps, index, qp) + count++; qpd->bufsize = count * 180; qpd->buf = vmalloc(qpd->bufsize); @@ -356,9 +348,10 @@ static int qp_open(struct inode *inode, struct file *file) return -ENOMEM; } - spin_lock_irq(&qpd->devp->lock); - idr_for_each(&qpd->devp->qpidr, dump_qp, qpd); - spin_unlock_irq(&qpd->devp->lock); + xa_lock_irq(&qpd->devp->qps); + xa_for_each(&qpd->devp->qps, index, qp) + dump_qp(qp, qpd); + xa_unlock_irq(&qpd->devp->qps); qpd->buf[qpd->pos++] = 0; file->private_data = qpd; @@ -373,9 +366,8 @@ static const struct file_operations qp_debugfs_fops = { .llseek = default_llseek, }; -static int dump_stag(int id, void *p, void *data) +static int dump_stag(unsigned long id, struct c4iw_debugfs_data *stagd) { - struct c4iw_debugfs_data *stagd = data; int space; int cc; struct fw_ri_tpte tpte; @@ -424,6 +416,8 @@ static int stag_release(struct inode *inode, struct file *file) static int stag_open(struct inode *inode, struct file *file) { struct c4iw_debugfs_data *stagd; + void *p; + unsigned long index; int ret = 0; int count = 1; @@ -435,9 +429,8 @@ static int stag_open(struct inode *inode, struct file *file) stagd->devp = inode->i_private; stagd->pos = 0; - spin_lock_irq(&stagd->devp->lock); - idr_for_each(&stagd->devp->mmidr, count_idrs, &count); - spin_unlock_irq(&stagd->devp->lock); + xa_for_each(&stagd->devp->mrs, index, p) + count++; stagd->bufsize = count * 256; stagd->buf = vmalloc(stagd->bufsize); @@ -446,9 +439,10 @@ static int stag_open(struct inode *inode, struct file *file) goto err1; } - spin_lock_irq(&stagd->devp->lock); - idr_for_each(&stagd->devp->mmidr, dump_stag, stagd); - spin_unlock_irq(&stagd->devp->lock); + xa_lock_irq(&stagd->devp->mrs); + xa_for_each(&stagd->devp->mrs, index, p) + dump_stag(index, stagd); + xa_unlock_irq(&stagd->devp->mrs); stagd->buf[stagd->pos++] = 0; file->private_data = stagd; @@ -558,10 +552,8 @@ static const struct file_operations stats_debugfs_fops = { .write = stats_clear, }; -static int dump_ep(int id, void *p, void *data) +static int dump_ep(struct c4iw_ep *ep, struct c4iw_debugfs_data *epd) { - struct c4iw_ep *ep = p; - struct c4iw_debugfs_data *epd = data; int space; int cc; @@ -617,10 +609,9 @@ static int dump_ep(int id, void *p, void *data) return 0; } -static int dump_listen_ep(int id, void *p, void *data) +static +int dump_listen_ep(struct c4iw_listen_ep *ep, struct c4iw_debugfs_data *epd) { - struct c4iw_listen_ep *ep = p; - struct c4iw_debugfs_data *epd = data; int space; int cc; @@ -674,6 +665,9 @@ static int ep_release(struct inode *inode, struct file *file) static int ep_open(struct inode *inode, struct file *file) { + struct c4iw_ep *ep; + struct c4iw_listen_ep *lep; + unsigned long index; struct c4iw_debugfs_data *epd; int ret = 0; int count = 1; @@ -686,11 +680,12 @@ static int ep_open(struct inode *inode, struct file *file) epd->devp = inode->i_private; epd->pos = 0; - spin_lock_irq(&epd->devp->lock); - idr_for_each(&epd->devp->hwtid_idr, count_idrs, &count); - idr_for_each(&epd->devp->atid_idr, count_idrs, &count); - idr_for_each(&epd->devp->stid_idr, count_idrs, &count); - spin_unlock_irq(&epd->devp->lock); + xa_for_each(&epd->devp->hwtids, index, ep) + count++; + xa_for_each(&epd->devp->atids, index, ep) + count++; + xa_for_each(&epd->devp->stids, index, lep) + count++; epd->bufsize = count * 240; epd->buf = vmalloc(epd->bufsize); @@ -699,11 +694,18 @@ static int ep_open(struct inode *inode, struct file *file) goto err1; } - spin_lock_irq(&epd->devp->lock); - idr_for_each(&epd->devp->hwtid_idr, dump_ep, epd); - idr_for_each(&epd->devp->atid_idr, dump_ep, epd); - idr_for_each(&epd->devp->stid_idr, dump_listen_ep, epd); - spin_unlock_irq(&epd->devp->lock); + xa_lock_irq(&epd->devp->hwtids); + xa_for_each(&epd->devp->hwtids, index, ep) + dump_ep(ep, epd); + xa_unlock_irq(&epd->devp->hwtids); + xa_lock_irq(&epd->devp->atids); + xa_for_each(&epd->devp->atids, index, ep) + dump_ep(ep, epd); + xa_unlock_irq(&epd->devp->atids); + xa_lock_irq(&epd->devp->stids); + xa_for_each(&epd->devp->stids, index, lep) + dump_listen_ep(lep, epd); + xa_unlock_irq(&epd->devp->stids); file->private_data = epd; goto out; @@ -931,16 +933,12 @@ static void c4iw_rdev_close(struct c4iw_rdev *rdev) void c4iw_dealloc(struct uld_ctx *ctx) { c4iw_rdev_close(&ctx->dev->rdev); - WARN_ON_ONCE(!idr_is_empty(&ctx->dev->cqidr)); - idr_destroy(&ctx->dev->cqidr); - WARN_ON_ONCE(!idr_is_empty(&ctx->dev->qpidr)); - idr_destroy(&ctx->dev->qpidr); - WARN_ON_ONCE(!idr_is_empty(&ctx->dev->mmidr)); - idr_destroy(&ctx->dev->mmidr); - wait_event(ctx->dev->wait, idr_is_empty(&ctx->dev->hwtid_idr)); - idr_destroy(&ctx->dev->hwtid_idr); - idr_destroy(&ctx->dev->stid_idr); - idr_destroy(&ctx->dev->atid_idr); + WARN_ON(!xa_empty(&ctx->dev->cqs)); + WARN_ON(!xa_empty(&ctx->dev->qps)); + WARN_ON(!xa_empty(&ctx->dev->mrs)); + wait_event(ctx->dev->wait, xa_empty(&ctx->dev->hwtids)); + WARN_ON(!xa_empty(&ctx->dev->stids)); + WARN_ON(!xa_empty(&ctx->dev->atids)); if (ctx->dev->rdev.bar2_kva) iounmap(ctx->dev->rdev.bar2_kva); if (ctx->dev->rdev.oc_mw_kva) @@ -1044,13 +1042,12 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) return ERR_PTR(ret); } - idr_init(&devp->cqidr); - idr_init(&devp->qpidr); - idr_init(&devp->mmidr); - idr_init(&devp->hwtid_idr); - idr_init(&devp->stid_idr); - idr_init(&devp->atid_idr); - spin_lock_init(&devp->lock); + xa_init_flags(&devp->cqs, XA_FLAGS_LOCK_IRQ); + xa_init_flags(&devp->qps, XA_FLAGS_LOCK_IRQ); + xa_init_flags(&devp->mrs, XA_FLAGS_LOCK_IRQ); + xa_init_flags(&devp->hwtids, XA_FLAGS_LOCK_IRQ); + xa_init_flags(&devp->atids, XA_FLAGS_LOCK_IRQ); + xa_init_flags(&devp->stids, XA_FLAGS_LOCK_IRQ); mutex_init(&devp->rdev.stats.lock); mutex_init(&devp->db_mutex); INIT_LIST_HEAD(&devp->db_fc_list); @@ -1265,34 +1262,21 @@ static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state) return 0; } -static int disable_qp_db(int id, void *p, void *data) -{ - struct c4iw_qp *qp = p; - - t4_disable_wq_db(&qp->wq); - return 0; -} - static void stop_queues(struct uld_ctx *ctx) { - unsigned long flags; + struct c4iw_qp *qp; + unsigned long index, flags; - spin_lock_irqsave(&ctx->dev->lock, flags); + xa_lock_irqsave(&ctx->dev->qps, flags); ctx->dev->rdev.stats.db_state_transitions++; ctx->dev->db_state = STOPPED; - if (ctx->dev->rdev.flags & T4_STATUS_PAGE_DISABLED) - idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL); - else + if (ctx->dev->rdev.flags & T4_STATUS_PAGE_DISABLED) { + xa_for_each(&ctx->dev->qps, index, qp) + t4_disable_wq_db(&qp->wq); + } else { ctx->dev->rdev.status_page->db_off = 1; - spin_unlock_irqrestore(&ctx->dev->lock, flags); -} - -static int enable_qp_db(int id, void *p, void *data) -{ - struct c4iw_qp *qp = p; - - t4_enable_wq_db(&qp->wq); - return 0; + } + xa_unlock_irqrestore(&ctx->dev->qps, flags); } static void resume_rc_qp(struct c4iw_qp *qp) @@ -1322,18 +1306,21 @@ static void resume_a_chunk(struct uld_ctx *ctx) static void resume_queues(struct uld_ctx *ctx) { - spin_lock_irq(&ctx->dev->lock); + xa_lock_irq(&ctx->dev->qps); if (ctx->dev->db_state != STOPPED) goto out; ctx->dev->db_state = FLOW_CONTROL; while (1) { if (list_empty(&ctx->dev->db_fc_list)) { + struct c4iw_qp *qp; + unsigned long index; + WARN_ON(ctx->dev->db_state != FLOW_CONTROL); ctx->dev->db_state = NORMAL; ctx->dev->rdev.stats.db_state_transitions++; if (ctx->dev->rdev.flags & T4_STATUS_PAGE_DISABLED) { - idr_for_each(&ctx->dev->qpidr, enable_qp_db, - NULL); + xa_for_each(&ctx->dev->qps, index, qp) + t4_enable_wq_db(&qp->wq); } else { ctx->dev->rdev.status_page->db_off = 0; } @@ -1345,12 +1332,12 @@ static void resume_queues(struct uld_ctx *ctx) resume_a_chunk(ctx); } if (!list_empty(&ctx->dev->db_fc_list)) { - spin_unlock_irq(&ctx->dev->lock); + xa_unlock_irq(&ctx->dev->qps); if (DB_FC_RESUME_DELAY) { set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(DB_FC_RESUME_DELAY); } - spin_lock_irq(&ctx->dev->lock); + xa_lock_irq(&ctx->dev->qps); if (ctx->dev->db_state != FLOW_CONTROL) break; } @@ -1359,7 +1346,7 @@ static void resume_queues(struct uld_ctx *ctx) out: if (ctx->dev->db_state != NORMAL) ctx->dev->rdev.stats.db_fc_interruptions++; - spin_unlock_irq(&ctx->dev->lock); + xa_unlock_irq(&ctx->dev->qps); } struct qp_list { @@ -1367,23 +1354,6 @@ struct qp_list { struct c4iw_qp **qps; }; -static int add_and_ref_qp(int id, void *p, void *data) -{ - struct qp_list *qp_listp = data; - struct c4iw_qp *qp = p; - - c4iw_qp_add_ref(&qp->ibqp); - qp_listp->qps[qp_listp->idx++] = qp; - return 0; -} - -static int count_qps(int id, void *p, void *data) -{ - unsigned *countp = data; - (*countp)++; - return 0; -} - static void deref_qps(struct qp_list *qp_list) { int idx; @@ -1400,7 +1370,7 @@ static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list) for (idx = 0; idx < qp_list->idx; idx++) { struct c4iw_qp *qp = qp_list->qps[idx]; - spin_lock_irq(&qp->rhp->lock); + xa_lock_irq(&qp->rhp->qps); spin_lock(&qp->lock); ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0], qp->wq.sq.qid, @@ -1410,7 +1380,7 @@ static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list) pr_err("%s: Fatal error - DB overflow recovery failed - error syncing SQ qid %u\n", pci_name(ctx->lldi.pdev), qp->wq.sq.qid); spin_unlock(&qp->lock); - spin_unlock_irq(&qp->rhp->lock); + xa_unlock_irq(&qp->rhp->qps); return; } qp->wq.sq.wq_pidx_inc = 0; @@ -1424,12 +1394,12 @@ static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list) pr_err("%s: Fatal error - DB overflow recovery failed - error syncing RQ qid %u\n", pci_name(ctx->lldi.pdev), qp->wq.rq.qid); spin_unlock(&qp->lock); - spin_unlock_irq(&qp->rhp->lock); + xa_unlock_irq(&qp->rhp->qps); return; } qp->wq.rq.wq_pidx_inc = 0; spin_unlock(&qp->lock); - spin_unlock_irq(&qp->rhp->lock); + xa_unlock_irq(&qp->rhp->qps); /* Wait for the dbfifo to drain */ while (cxgb4_dbfifo_count(qp->rhp->rdev.lldi.ports[0], 1) > 0) { @@ -1441,6 +1411,8 @@ static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list) static void recover_queues(struct uld_ctx *ctx) { + struct c4iw_qp *qp; + unsigned long index; int count = 0; struct qp_list qp_list; int ret; @@ -1458,22 +1430,26 @@ static void recover_queues(struct uld_ctx *ctx) } /* Count active queues so we can build a list of queues to recover */ - spin_lock_irq(&ctx->dev->lock); + xa_lock_irq(&ctx->dev->qps); WARN_ON(ctx->dev->db_state != STOPPED); ctx->dev->db_state = RECOVERY; - idr_for_each(&ctx->dev->qpidr, count_qps, &count); + xa_for_each(&ctx->dev->qps, index, qp) + count++; qp_list.qps = kcalloc(count, sizeof(*qp_list.qps), GFP_ATOMIC); if (!qp_list.qps) { - spin_unlock_irq(&ctx->dev->lock); + xa_unlock_irq(&ctx->dev->qps); return; } qp_list.idx = 0; /* add and ref each qp so it doesn't get freed */ - idr_for_each(&ctx->dev->qpidr, add_and_ref_qp, &qp_list); + xa_for_each(&ctx->dev->qps, index, qp) { + c4iw_qp_add_ref(&qp->ibqp); + qp_list.qps[qp_list.idx++] = qp; + } - spin_unlock_irq(&ctx->dev->lock); + xa_unlock_irq(&ctx->dev->qps); /* now traverse the list in a safe context to recover the db state*/ recover_lost_dbs(ctx, &qp_list); @@ -1482,10 +1458,10 @@ static void recover_queues(struct uld_ctx *ctx) deref_qps(&qp_list); kfree(qp_list.qps); - spin_lock_irq(&ctx->dev->lock); + xa_lock_irq(&ctx->dev->qps); WARN_ON(ctx->dev->db_state != RECOVERY); ctx->dev->db_state = STOPPED; - spin_unlock_irq(&ctx->dev->lock); + xa_unlock_irq(&ctx->dev->qps); } static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...) diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c index 8741d23168f3..4cd877bd2f56 100644 --- a/drivers/infiniband/hw/cxgb4/ev.c +++ b/drivers/infiniband/hw/cxgb4/ev.c @@ -123,15 +123,15 @@ void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe) struct c4iw_qp *qhp; u32 cqid; - spin_lock_irq(&dev->lock); - qhp = get_qhp(dev, CQE_QPID(err_cqe)); + xa_lock_irq(&dev->qps); + qhp = xa_load(&dev->qps, CQE_QPID(err_cqe)); if (!qhp) { pr_err("BAD AE qpid 0x%x opcode %d status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n", CQE_QPID(err_cqe), CQE_OPCODE(err_cqe), CQE_STATUS(err_cqe), CQE_TYPE(err_cqe), CQE_WRID_HI(err_cqe), CQE_WRID_LOW(err_cqe)); - spin_unlock_irq(&dev->lock); + xa_unlock_irq(&dev->qps); goto out; } @@ -146,13 +146,13 @@ void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe) CQE_OPCODE(err_cqe), CQE_STATUS(err_cqe), CQE_TYPE(err_cqe), CQE_WRID_HI(err_cqe), CQE_WRID_LOW(err_cqe)); - spin_unlock_irq(&dev->lock); + xa_unlock_irq(&dev->qps); goto out; } c4iw_qp_add_ref(&qhp->ibqp); atomic_inc(&chp->refcnt); - spin_unlock_irq(&dev->lock); + xa_unlock_irq(&dev->qps); /* Bad incoming write */ if (RQ_TYPE(err_cqe) && @@ -225,11 +225,11 @@ int c4iw_ev_handler(struct c4iw_dev *dev, u32 qid) struct c4iw_cq *chp; unsigned long flag; - spin_lock_irqsave(&dev->lock, flag); - chp = get_chp(dev, qid); + xa_lock_irqsave(&dev->cqs, flag); + chp = xa_load(&dev->cqs, qid); if (chp) { atomic_inc(&chp->refcnt); - spin_unlock_irqrestore(&dev->lock, flag); + xa_unlock_irqrestore(&dev->cqs, flag); t4_clear_cq_armed(&chp->cq); spin_lock_irqsave(&chp->comp_handler_lock, flag); (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); @@ -238,7 +238,7 @@ int c4iw_ev_handler(struct c4iw_dev *dev, u32 qid) wake_up(&chp->wait); } else { pr_debug("unknown cqid 0x%x\n", qid); - spin_unlock_irqrestore(&dev->lock, flag); + xa_unlock_irqrestore(&dev->cqs, flag); } return 0; } diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 5a5da41faef6..916ef982172e 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -34,7 +34,7 @@ #include <linux/mutex.h> #include <linux/list.h> #include <linux/spinlock.h> -#include <linux/idr.h> +#include <linux/xarray.h> #include <linux/completion.h> #include <linux/netdevice.h> #include <linux/sched/mm.h> @@ -315,16 +315,15 @@ struct c4iw_dev { struct ib_device ibdev; struct c4iw_rdev rdev; u32 device_cap_flags; - struct idr cqidr; - struct idr qpidr; - struct idr mmidr; - spinlock_t lock; + struct xarray cqs; + struct xarray qps; + struct xarray mrs; struct mutex db_mutex; struct dentry *debugfs_root; enum db_state db_state; - struct idr hwtid_idr; - struct idr atid_idr; - struct idr stid_idr; + struct xarray hwtids; + struct xarray atids; + struct xarray stids; struct list_head db_fc_list; u32 avail_ird; wait_queue_head_t wait; @@ -349,70 +348,12 @@ static inline struct c4iw_dev *rdev_to_c4iw_dev(struct c4iw_rdev *rdev) static inline struct c4iw_cq *get_chp(struct c4iw_dev *rhp, u32 cqid) { - return idr_find(&rhp->cqidr, cqid); + return xa_load(&rhp->cqs, cqid); } static inline struct c4iw_qp *get_qhp(struct c4iw_dev *rhp, u32 qpid) { - return idr_find(&rhp->qpidr, qpid); -} - -static inline struct c4iw_mr *get_mhp(struct c4iw_dev *rhp, u32 mmid) -{ - return idr_find(&rhp->mmidr, mmid); -} - -static inline int _insert_handle(struct c4iw_dev *rhp, struct idr *idr, - void *handle, u32 id, int lock) -{ - int ret; - - if (lock) { - idr_preload(GFP_KERNEL); - spin_lock_irq(&rhp->lock); - } - - ret = idr_alloc(idr, handle, id, id + 1, GFP_ATOMIC); - - if (lock) { - spin_unlock_irq(&rhp->lock); - idr_preload_end(); - } - - return ret < 0 ? ret : 0; -} - -static inline int insert_handle(struct c4iw_dev *rhp, struct idr *idr, - void *handle, u32 id) -{ - return _insert_handle(rhp, idr, handle, id, 1); -} - -static inline int insert_handle_nolock(struct c4iw_dev *rhp, struct idr *idr, - void *handle, u32 id) -{ - return _insert_handle(rhp, idr, handle, id, 0); -} - -static inline void _remove_handle(struct c4iw_dev *rhp, struct idr *idr, - u32 id, int lock) -{ - if (lock) - spin_lock_irq(&rhp->lock); - idr_remove(idr, id); - if (lock) - spin_unlock_irq(&rhp->lock); -} - -static inline void remove_handle(struct c4iw_dev *rhp, struct idr *idr, u32 id) -{ - _remove_handle(rhp, idr, id, 1); -} - -static inline void remove_handle_nolock(struct c4iw_dev *rhp, - struct idr *idr, u32 id) -{ - _remove_handle(rhp, idr, id, 0); + return xa_load(&rhp->qps, qpid); } extern uint c4iw_max_read_depth; @@ -1038,9 +979,8 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param); int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len); void c4iw_qp_add_ref(struct ib_qp *qp); void c4iw_qp_rem_ref(struct ib_qp *qp); -struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, - enum ib_mr_type mr_type, - u32 max_num_sg); +struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg, struct ib_udata *udata); int c4iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); int c4iw_dealloc_mw(struct ib_mw *mw); @@ -1051,21 +991,19 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt, int acc, struct ib_udata *udata); struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc); -int c4iw_dereg_mr(struct ib_mr *ib_mr); -int c4iw_destroy_cq(struct ib_cq *ib_cq); +int c4iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata); +int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata); struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *ib_context, struct ib_udata *udata); int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); int c4iw_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *attr, enum ib_srq_attr_mask srq_attr_mask, struct ib_udata *udata); -int c4iw_destroy_srq(struct ib_srq *ib_srq); -struct ib_srq *c4iw_create_srq(struct ib_pd *pd, - struct ib_srq_init_attr *attrs, - struct ib_udata *udata); -int c4iw_destroy_qp(struct ib_qp *ib_qp); +void c4iw_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata); +int c4iw_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *attrs, + struct ib_udata *udata); +int c4iw_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata); struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 5baa31ab6366..811c0c8c5b16 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -395,7 +395,7 @@ static int finish_mem_reg(struct c4iw_mr *mhp, u32 stag) mhp->ibmr.iova = mhp->attr.va_fbo; mhp->ibmr.page_size = 1U << (mhp->attr.page_size + 12); pr_debug("mmid 0x%x mhp %p\n", mmid, mhp); - return insert_handle(mhp->rhp, &mhp->rhp->mmidr, mhp, mmid); + return xa_insert_irq(&mhp->rhp->mrs, mmid, mhp, GFP_KERNEL); } static int register_mem(struct c4iw_dev *rhp, struct c4iw_pd *php, @@ -542,7 +542,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, shift = PAGE_SHIFT; - n = mhp->umem->nmap; + n = ib_umem_num_pages(mhp->umem); err = alloc_pbl(mhp, n); if (err) goto err_umem_release; @@ -645,7 +645,7 @@ struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, mhp->attr.stag = stag; mmid = (stag) >> 8; mhp->ibmw.rkey = stag; - if (insert_handle(rhp, &rhp->mmidr, mhp, mmid)) { + if (xa_insert_irq(&rhp->mrs, mmid, mhp, GFP_KERNEL)) { ret = -ENOMEM; goto dealloc_win; } @@ -673,7 +673,7 @@ int c4iw_dealloc_mw(struct ib_mw *mw) mhp = to_c4iw_mw(mw); rhp = mhp->rhp; mmid = (mw->rkey) >> 8; - remove_handle(rhp, &rhp->mmidr, mmid); + xa_erase_irq(&rhp->mrs, mmid); deallocate_window(&rhp->rdev, mhp->attr.stag, mhp->dereg_skb, mhp->wr_waitp); kfree_skb(mhp->dereg_skb); @@ -683,9 +683,8 @@ int c4iw_dealloc_mw(struct ib_mw *mw) return 0; } -struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, - enum ib_mr_type mr_type, - u32 max_num_sg) +struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg, struct ib_udata *udata) { struct c4iw_dev *rhp; struct c4iw_pd *php; @@ -740,7 +739,7 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, mhp->attr.state = 0; mmid = (stag) >> 8; mhp->ibmr.rkey = mhp->ibmr.lkey = stag; - if (insert_handle(rhp, &rhp->mmidr, mhp, mmid)) { + if (xa_insert_irq(&rhp->mrs, mmid, mhp, GFP_KERNEL)) { ret = -ENOMEM; goto err_dereg; } @@ -786,7 +785,7 @@ int c4iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, c4iw_set_page); } -int c4iw_dereg_mr(struct ib_mr *ib_mr) +int c4iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) { struct c4iw_dev *rhp; struct c4iw_mr *mhp; @@ -797,7 +796,7 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr) mhp = to_c4iw_mr(ib_mr); rhp = mhp->rhp; mmid = mhp->attr.stag >> 8; - remove_handle(rhp, &rhp->mmidr, mmid); + xa_erase_irq(&rhp->mrs, mmid); if (mhp->mpl) dma_free_coherent(&mhp->rhp->rdev.lldi.pdev->dev, mhp->max_mpl_len, mhp->mpl, mhp->mpl_addr); @@ -821,9 +820,9 @@ void c4iw_invalidate_mr(struct c4iw_dev *rhp, u32 rkey) struct c4iw_mr *mhp; unsigned long flags; - spin_lock_irqsave(&rhp->lock, flags); - mhp = get_mhp(rhp, rkey >> 8); + xa_lock_irqsave(&rhp->mrs, flags); + mhp = xa_load(&rhp->mrs, rkey >> 8); if (mhp) mhp->attr.state = 0; - spin_unlock_irqrestore(&rhp->lock, flags); + xa_unlock_irqrestore(&rhp->mrs, flags); } diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 507c54572cc9..74b795642fca 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -190,7 +190,7 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) return ret; } -static void c4iw_deallocate_pd(struct ib_pd *pd) +static void c4iw_deallocate_pd(struct ib_pd *pd, struct ib_udata *udata) { struct c4iw_dev *rhp; struct c4iw_pd *php; @@ -204,8 +204,7 @@ static void c4iw_deallocate_pd(struct ib_pd *pd) mutex_unlock(&rhp->rdev.stats.lock); } -static int c4iw_allocate_pd(struct ib_pd *pd, struct ib_ucontext *context, - struct ib_udata *udata) +static int c4iw_allocate_pd(struct ib_pd *pd, struct ib_udata *udata) { struct c4iw_pd *php = to_c4iw_pd(pd); struct ib_device *ibdev = pd->device; @@ -220,11 +219,11 @@ static int c4iw_allocate_pd(struct ib_pd *pd, struct ib_ucontext *context, php->pdid = pdid; php->rhp = rhp; - if (context) { + if (udata) { struct c4iw_alloc_pd_resp uresp = {.pdid = php->pdid}; if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) { - c4iw_deallocate_pd(&php->ibpd); + c4iw_deallocate_pd(&php->ibpd, udata); return -EFAULT; } } @@ -483,24 +482,6 @@ static void get_dev_fw_str(struct ib_device *dev, char *str) FW_HDR_FW_VER_BUILD_G(c4iw_dev->rdev.lldi.fw_vers)); } -static struct net_device *get_netdev(struct ib_device *dev, u8 port) -{ - struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev, ibdev); - struct c4iw_rdev *rdev = &c4iw_dev->rdev; - struct net_device *ndev; - - if (!port || port > rdev->lldi.nports) - return NULL; - - rcu_read_lock(); - ndev = rdev->lldi.ports[port - 1]; - if (ndev) - dev_hold(ndev); - rcu_read_unlock(); - - return ndev; -} - static int fill_res_entry(struct sk_buff *msg, struct rdma_restrack_entry *res) { return (res->type < ARRAY_SIZE(c4iw_restrack_funcs) && @@ -528,8 +509,15 @@ static const struct ib_device_ops c4iw_dev_ops = { .get_dev_fw_str = get_dev_fw_str, .get_dma_mr = c4iw_get_dma_mr, .get_hw_stats = c4iw_get_mib, - .get_netdev = get_netdev, .get_port_immutable = c4iw_port_immutable, + .iw_accept = c4iw_accept_cr, + .iw_add_ref = c4iw_qp_add_ref, + .iw_connect = c4iw_connect, + .iw_create_listen = c4iw_create_listen, + .iw_destroy_listen = c4iw_destroy_listen, + .iw_get_qp = c4iw_get_qp, + .iw_reject = c4iw_reject_cr, + .iw_rem_ref = c4iw_qp_rem_ref, .map_mr_sg = c4iw_map_mr_sg, .mmap = c4iw_mmap, .modify_qp = c4iw_ib_modify_qp, @@ -546,9 +534,24 @@ static const struct ib_device_ops c4iw_dev_ops = { .reg_user_mr = c4iw_reg_user_mr, .req_notify_cq = c4iw_arm_cq, INIT_RDMA_OBJ_SIZE(ib_pd, c4iw_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_srq, c4iw_srq, ibsrq), INIT_RDMA_OBJ_SIZE(ib_ucontext, c4iw_ucontext, ibucontext), }; +static int set_netdevs(struct ib_device *ib_dev, struct c4iw_rdev *rdev) +{ + int ret; + int i; + + for (i = 0; i < rdev->lldi.nports; i++) { + ret = ib_device_set_netdev(ib_dev, rdev->lldi.ports[i], + i + 1); + if (ret) + return ret; + } + return 0; +} + void c4iw_register_device(struct work_struct *work) { int ret; @@ -593,33 +596,20 @@ void c4iw_register_device(struct work_struct *work) dev->ibdev.dev.parent = &dev->rdev.lldi.pdev->dev; dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION; - dev->ibdev.iwcm = kzalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); - if (!dev->ibdev.iwcm) { - ret = -ENOMEM; - goto err_dealloc_ctx; - } - - dev->ibdev.iwcm->connect = c4iw_connect; - dev->ibdev.iwcm->accept = c4iw_accept_cr; - dev->ibdev.iwcm->reject = c4iw_reject_cr; - dev->ibdev.iwcm->create_listen = c4iw_create_listen; - dev->ibdev.iwcm->destroy_listen = c4iw_destroy_listen; - dev->ibdev.iwcm->add_ref = c4iw_qp_add_ref; - dev->ibdev.iwcm->rem_ref = c4iw_qp_rem_ref; - dev->ibdev.iwcm->get_qp = c4iw_get_qp; - memcpy(dev->ibdev.iwcm->ifname, dev->rdev.lldi.ports[0]->name, - sizeof(dev->ibdev.iwcm->ifname)); + memcpy(dev->ibdev.iw_ifname, dev->rdev.lldi.ports[0]->name, + sizeof(dev->ibdev.iw_ifname)); rdma_set_device_sysfs_group(&dev->ibdev, &c4iw_attr_group); dev->ibdev.driver_id = RDMA_DRIVER_CXGB4; ib_set_device_ops(&dev->ibdev, &c4iw_dev_ops); + ret = set_netdevs(&dev->ibdev, &dev->rdev); + if (ret) + goto err_dealloc_ctx; ret = ib_register_device(&dev->ibdev, "cxgb4_%d"); if (ret) - goto err_kfree_iwcm; + goto err_dealloc_ctx; return; -err_kfree_iwcm: - kfree(dev->ibdev.iwcm); err_dealloc_ctx: pr_err("%s - Failed registering iwarp device: %d\n", pci_name(ctx->lldi.pdev), ret); @@ -631,6 +621,5 @@ void c4iw_unregister_device(struct c4iw_dev *dev) { pr_debug("c4iw_dev %p\n", dev); ib_unregister_device(&dev->ibdev); - kfree(dev->ibdev.iwcm); return; } diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index d3a82839f5ea..e92b9544357a 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -57,18 +57,18 @@ MODULE_PARM_DESC(db_coalescing_threshold, static int max_fr_immd = T4_MAX_FR_IMMD; module_param(max_fr_immd, int, 0644); -MODULE_PARM_DESC(max_fr_immd, "fastreg threshold for using DSGL instead of immedate"); +MODULE_PARM_DESC(max_fr_immd, "fastreg threshold for using DSGL instead of immediate"); static int alloc_ird(struct c4iw_dev *dev, u32 ird) { int ret = 0; - spin_lock_irq(&dev->lock); + xa_lock_irq(&dev->qps); if (ird <= dev->avail_ird) dev->avail_ird -= ird; else ret = -ENOMEM; - spin_unlock_irq(&dev->lock); + xa_unlock_irq(&dev->qps); if (ret) dev_warn(&dev->rdev.lldi.pdev->dev, @@ -79,9 +79,9 @@ static int alloc_ird(struct c4iw_dev *dev, u32 ird) static void free_ird(struct c4iw_dev *dev, int ird) { - spin_lock_irq(&dev->lock); + xa_lock_irq(&dev->qps); dev->avail_ird += ird; - spin_unlock_irq(&dev->lock); + xa_unlock_irq(&dev->qps); } static void set_state(struct c4iw_qp *qhp, enum c4iw_qp_state state) @@ -939,7 +939,7 @@ static int ring_kernel_sq_db(struct c4iw_qp *qhp, u16 inc) { unsigned long flags; - spin_lock_irqsave(&qhp->rhp->lock, flags); + xa_lock_irqsave(&qhp->rhp->qps, flags); spin_lock(&qhp->lock); if (qhp->rhp->db_state == NORMAL) t4_ring_sq_db(&qhp->wq, inc, NULL); @@ -948,7 +948,7 @@ static int ring_kernel_sq_db(struct c4iw_qp *qhp, u16 inc) qhp->wq.sq.wq_pidx_inc += inc; } spin_unlock(&qhp->lock); - spin_unlock_irqrestore(&qhp->rhp->lock, flags); + xa_unlock_irqrestore(&qhp->rhp->qps, flags); return 0; } @@ -956,7 +956,7 @@ static int ring_kernel_rq_db(struct c4iw_qp *qhp, u16 inc) { unsigned long flags; - spin_lock_irqsave(&qhp->rhp->lock, flags); + xa_lock_irqsave(&qhp->rhp->qps, flags); spin_lock(&qhp->lock); if (qhp->rhp->db_state == NORMAL) t4_ring_rq_db(&qhp->wq, inc, NULL); @@ -965,7 +965,7 @@ static int ring_kernel_rq_db(struct c4iw_qp *qhp, u16 inc) qhp->wq.rq.wq_pidx_inc += inc; } spin_unlock(&qhp->lock); - spin_unlock_irqrestore(&qhp->rhp->lock, flags); + xa_unlock_irqrestore(&qhp->rhp->qps, flags); return 0; } @@ -1976,10 +1976,10 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, qhp->attr.layer_etype = attrs->layer_etype; qhp->attr.ecode = attrs->ecode; ep = qhp->ep; + c4iw_get_ep(&ep->com); + disconnect = 1; if (!internal) { - c4iw_get_ep(&qhp->ep->com); terminate = 1; - disconnect = 1; } else { terminate = qhp->attr.send_term; ret = rdma_fini(rhp, qhp, ep); @@ -2095,7 +2095,7 @@ out: return ret; } -int c4iw_destroy_qp(struct ib_qp *ib_qp) +int c4iw_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata) { struct c4iw_dev *rhp; struct c4iw_qp *qhp; @@ -2111,12 +2111,11 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp) c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); wait_event(qhp->wait, !qhp->ep); - remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid); - - spin_lock_irq(&rhp->lock); + xa_lock_irq(&rhp->qps); + __xa_erase(&rhp->qps, qhp->wq.sq.qid); if (!list_empty(&qhp->db_fc_entry)) list_del_init(&qhp->db_fc_entry); - spin_unlock_irq(&rhp->lock); + xa_unlock_irq(&rhp->qps); free_ird(rhp, qhp->attr.max_ird); c4iw_qp_rem_ref(ib_qp); @@ -2234,7 +2233,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, kref_init(&qhp->kref); INIT_WORK(&qhp->free_work, free_qp_work); - ret = insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid); + ret = xa_insert_irq(&rhp->qps, qhp->wq.sq.qid, qhp, GFP_KERNEL); if (ret) goto err_destroy_qp; @@ -2370,7 +2369,7 @@ err_free_rq_key: err_free_sq_key: kfree(sq_key_mm); err_remove_handle: - remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid); + xa_erase_irq(&rhp->qps, qhp->wq.sq.qid); err_destroy_qp: destroy_qp(&rhp->rdev, &qhp->wq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx, !attrs->srq); @@ -2684,11 +2683,12 @@ void c4iw_copy_wr_to_srq(struct t4_srq *srq, union t4_recv_wr *wqe, u8 len16) } } -struct ib_srq *c4iw_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *attrs, +int c4iw_create_srq(struct ib_srq *ib_srq, struct ib_srq_init_attr *attrs, struct ib_udata *udata) { + struct ib_pd *pd = ib_srq->pd; struct c4iw_dev *rhp; - struct c4iw_srq *srq; + struct c4iw_srq *srq = to_c4iw_srq(ib_srq); struct c4iw_pd *php; struct c4iw_create_srq_resp uresp; struct c4iw_ucontext *ucontext; @@ -2703,11 +2703,11 @@ struct ib_srq *c4iw_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *attrs, rhp = php->rhp; if (!rhp->rdev.lldi.vr->srq.size) - return ERR_PTR(-EINVAL); + return -EINVAL; if (attrs->attr.max_wr > rhp->rdev.hw_queue.t4_max_rq_size) - return ERR_PTR(-E2BIG); + return -E2BIG; if (attrs->attr.max_sge > T4_MAX_RECV_SGE) - return ERR_PTR(-E2BIG); + return -E2BIG; /* * SRQ RQT and RQ must be a power of 2 and at least 16 deep. @@ -2718,15 +2718,9 @@ struct ib_srq *c4iw_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *attrs, ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext, ibucontext); - srq = kzalloc(sizeof(*srq), GFP_KERNEL); - if (!srq) - return ERR_PTR(-ENOMEM); - srq->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL); - if (!srq->wr_waitp) { - ret = -ENOMEM; - goto err_free_srq; - } + if (!srq->wr_waitp) + return -ENOMEM; srq->idx = c4iw_alloc_srq_idx(&rhp->rdev); if (srq->idx < 0) { @@ -2760,7 +2754,7 @@ struct ib_srq *c4iw_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *attrs, if (CHELSIO_CHIP_VERSION(rhp->rdev.lldi.adapter_type) > CHELSIO_T6) srq->flags = T4_SRQ_LIMIT_SUPPORT; - ret = insert_handle(rhp, &rhp->qpidr, srq, srq->wq.qid); + ret = xa_insert_irq(&rhp->qps, srq->wq.qid, srq, GFP_KERNEL); if (ret) goto err_free_queue; @@ -2806,13 +2800,14 @@ struct ib_srq *c4iw_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *attrs, (unsigned long)srq->wq.memsize, attrs->attr.max_wr); spin_lock_init(&srq->lock); - return &srq->ibsrq; + return 0; + err_free_srq_db_key_mm: kfree(srq_db_key_mm); err_free_srq_key_mm: kfree(srq_key_mm); err_remove_handle: - remove_handle(rhp, &rhp->qpidr, srq->wq.qid); + xa_erase_irq(&rhp->qps, srq->wq.qid); err_free_queue: free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx, srq->wr_waitp); @@ -2822,12 +2817,10 @@ err_free_srq_idx: c4iw_free_srq_idx(&rhp->rdev, srq->idx); err_free_wr_wait: c4iw_put_wr_wait(srq->wr_waitp); -err_free_srq: - kfree(srq); - return ERR_PTR(ret); + return ret; } -int c4iw_destroy_srq(struct ib_srq *ibsrq) +void c4iw_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) { struct c4iw_dev *rhp; struct c4iw_srq *srq; @@ -2838,13 +2831,11 @@ int c4iw_destroy_srq(struct ib_srq *ibsrq) pr_debug("%s id %d\n", __func__, srq->wq.qid); - remove_handle(rhp, &rhp->qpidr, srq->wq.qid); - ucontext = ibsrq->uobject ? - to_c4iw_ucontext(ibsrq->uobject->context) : NULL; + xa_erase_irq(&rhp->qps, srq->wq.qid); + ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext, + ibucontext); free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx, srq->wr_waitp); c4iw_free_srq_idx(&rhp->rdev, srq->idx); c4iw_put_wr_wait(srq->wr_waitp); - kfree(srq); - return 0; } diff --git a/drivers/infiniband/hw/efa/Kconfig b/drivers/infiniband/hw/efa/Kconfig new file mode 100644 index 000000000000..457e18ba1d57 --- /dev/null +++ b/drivers/infiniband/hw/efa/Kconfig @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +# Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. +# +# Amazon fabric device configuration +# + +config INFINIBAND_EFA + tristate "Amazon Elastic Fabric Adapter (EFA) support" + depends on PCI_MSI && 64BIT && !CPU_BIG_ENDIAN + depends on INFINIBAND_USER_ACCESS + help + This driver supports Amazon Elastic Fabric Adapter (EFA). + + To compile this driver as a module, choose M here. + The module will be called efa. diff --git a/drivers/infiniband/hw/efa/Makefile b/drivers/infiniband/hw/efa/Makefile new file mode 100644 index 000000000000..6e83083af0bc --- /dev/null +++ b/drivers/infiniband/hw/efa/Makefile @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +# Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. +# +# Makefile for Amazon Elastic Fabric Adapter (EFA) device driver. +# + +obj-$(CONFIG_INFINIBAND_EFA) += efa.o + +efa-y := efa_com_cmd.o efa_com.o efa_main.o efa_verbs.o diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h new file mode 100644 index 000000000000..9e3cc3239c13 --- /dev/null +++ b/drivers/infiniband/hw/efa/efa.h @@ -0,0 +1,163 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + */ + +#ifndef _EFA_H_ +#define _EFA_H_ + +#include <linux/bitops.h> +#include <linux/idr.h> +#include <linux/interrupt.h> +#include <linux/pci.h> +#include <linux/sched.h> + +#include <rdma/efa-abi.h> +#include <rdma/ib_verbs.h> + +#include "efa_com_cmd.h" + +#define DRV_MODULE_NAME "efa" +#define DEVICE_NAME "Elastic Fabric Adapter (EFA)" + +#define EFA_IRQNAME_SIZE 40 + +/* 1 for AENQ + ADMIN */ +#define EFA_NUM_MSIX_VEC 1 +#define EFA_MGMNT_MSIX_VEC_IDX 0 + +struct efa_irq { + irq_handler_t handler; + void *data; + int cpu; + u32 vector; + cpumask_t affinity_hint_mask; + char name[EFA_IRQNAME_SIZE]; +}; + +struct efa_sw_stats { + atomic64_t alloc_pd_err; + atomic64_t create_qp_err; + atomic64_t create_cq_err; + atomic64_t reg_mr_err; + atomic64_t alloc_ucontext_err; + atomic64_t create_ah_err; +}; + +/* Don't use anything other than atomic64 */ +struct efa_stats { + struct efa_sw_stats sw_stats; + atomic64_t keep_alive_rcvd; +}; + +struct efa_dev { + struct ib_device ibdev; + struct efa_com_dev edev; + struct pci_dev *pdev; + struct efa_com_get_device_attr_result dev_attr; + + u64 reg_bar_addr; + u64 reg_bar_len; + u64 mem_bar_addr; + u64 mem_bar_len; + u64 db_bar_addr; + u64 db_bar_len; + u8 addr[EFA_GID_SIZE]; + u32 mtu; + + int admin_msix_vector_idx; + struct efa_irq admin_irq; + + struct efa_stats stats; +}; + +struct efa_ucontext { + struct ib_ucontext ibucontext; + struct xarray mmap_xa; + u32 mmap_xa_page; + u16 uarn; +}; + +struct efa_pd { + struct ib_pd ibpd; + u16 pdn; +}; + +struct efa_mr { + struct ib_mr ibmr; + struct ib_umem *umem; +}; + +struct efa_cq { + struct ib_cq ibcq; + struct efa_ucontext *ucontext; + dma_addr_t dma_addr; + void *cpu_addr; + size_t size; + u16 cq_idx; +}; + +struct efa_qp { + struct ib_qp ibqp; + dma_addr_t rq_dma_addr; + void *rq_cpu_addr; + size_t rq_size; + enum ib_qp_state state; + u32 qp_handle; + u32 max_send_wr; + u32 max_recv_wr; + u32 max_send_sge; + u32 max_recv_sge; + u32 max_inline_data; +}; + +struct efa_ah { + struct ib_ah ibah; + u16 ah; + /* dest_addr */ + u8 id[EFA_GID_SIZE]; +}; + +int efa_query_device(struct ib_device *ibdev, + struct ib_device_attr *props, + struct ib_udata *udata); +int efa_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props); +int efa_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr); +int efa_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *gid); +int efa_query_pkey(struct ib_device *ibdev, u8 port, u16 index, + u16 *pkey); +int efa_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); +void efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); +int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata); +struct ib_qp *efa_create_qp(struct ib_pd *ibpd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata); +int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); +struct ib_cq *efa_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_udata *udata); +struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, + u64 virt_addr, int access_flags, + struct ib_udata *udata); +int efa_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); +int efa_get_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable); +int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata); +void efa_dealloc_ucontext(struct ib_ucontext *ibucontext); +int efa_mmap(struct ib_ucontext *ibucontext, + struct vm_area_struct *vma); +int efa_create_ah(struct ib_ah *ibah, + struct rdma_ah_attr *ah_attr, + u32 flags, + struct ib_udata *udata); +void efa_destroy_ah(struct ib_ah *ibah, u32 flags); +int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, struct ib_udata *udata); +enum rdma_link_layer efa_port_link_layer(struct ib_device *ibdev, + u8 port_num); + +#endif /* _EFA_H_ */ diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h new file mode 100644 index 000000000000..2be0469d545f --- /dev/null +++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h @@ -0,0 +1,794 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + */ + +#ifndef _EFA_ADMIN_CMDS_H_ +#define _EFA_ADMIN_CMDS_H_ + +#define EFA_ADMIN_API_VERSION_MAJOR 0 +#define EFA_ADMIN_API_VERSION_MINOR 1 + +/* EFA admin queue opcodes */ +enum efa_admin_aq_opcode { + EFA_ADMIN_CREATE_QP = 1, + EFA_ADMIN_MODIFY_QP = 2, + EFA_ADMIN_QUERY_QP = 3, + EFA_ADMIN_DESTROY_QP = 4, + EFA_ADMIN_CREATE_AH = 5, + EFA_ADMIN_DESTROY_AH = 6, + EFA_ADMIN_REG_MR = 7, + EFA_ADMIN_DEREG_MR = 8, + EFA_ADMIN_CREATE_CQ = 9, + EFA_ADMIN_DESTROY_CQ = 10, + EFA_ADMIN_GET_FEATURE = 11, + EFA_ADMIN_SET_FEATURE = 12, + EFA_ADMIN_GET_STATS = 13, + EFA_ADMIN_ALLOC_PD = 14, + EFA_ADMIN_DEALLOC_PD = 15, + EFA_ADMIN_ALLOC_UAR = 16, + EFA_ADMIN_DEALLOC_UAR = 17, + EFA_ADMIN_MAX_OPCODE = 17, +}; + +enum efa_admin_aq_feature_id { + EFA_ADMIN_DEVICE_ATTR = 1, + EFA_ADMIN_AENQ_CONFIG = 2, + EFA_ADMIN_NETWORK_ATTR = 3, + EFA_ADMIN_QUEUE_ATTR = 4, + EFA_ADMIN_HW_HINTS = 5, + EFA_ADMIN_FEATURES_OPCODE_NUM = 8, +}; + +/* QP transport type */ +enum efa_admin_qp_type { + /* Unreliable Datagram */ + EFA_ADMIN_QP_TYPE_UD = 1, + /* Scalable Reliable Datagram */ + EFA_ADMIN_QP_TYPE_SRD = 2, +}; + +/* QP state */ +enum efa_admin_qp_state { + EFA_ADMIN_QP_STATE_RESET = 0, + EFA_ADMIN_QP_STATE_INIT = 1, + EFA_ADMIN_QP_STATE_RTR = 2, + EFA_ADMIN_QP_STATE_RTS = 3, + EFA_ADMIN_QP_STATE_SQD = 4, + EFA_ADMIN_QP_STATE_SQE = 5, + EFA_ADMIN_QP_STATE_ERR = 6, +}; + +enum efa_admin_get_stats_type { + EFA_ADMIN_GET_STATS_TYPE_BASIC = 0, +}; + +enum efa_admin_get_stats_scope { + EFA_ADMIN_GET_STATS_SCOPE_ALL = 0, + EFA_ADMIN_GET_STATS_SCOPE_QUEUE = 1, +}; + +enum efa_admin_modify_qp_mask_bits { + EFA_ADMIN_QP_STATE_BIT = 0, + EFA_ADMIN_CUR_QP_STATE_BIT = 1, + EFA_ADMIN_QKEY_BIT = 2, + EFA_ADMIN_SQ_PSN_BIT = 3, + EFA_ADMIN_SQ_DRAINED_ASYNC_NOTIFY_BIT = 4, +}; + +/* + * QP allocation sizes, converted by fabric QueuePair (QP) create command + * from QP capabilities. + */ +struct efa_admin_qp_alloc_size { + /* Send descriptor ring size in bytes */ + u32 send_queue_ring_size; + + /* Max number of WQEs that can be outstanding on send queue. */ + u32 send_queue_depth; + + /* + * Recv descriptor ring size in bytes, sufficient for user-provided + * number of WQEs + */ + u32 recv_queue_ring_size; + + /* Max number of WQEs that can be outstanding on recv queue */ + u32 recv_queue_depth; +}; + +struct efa_admin_create_qp_cmd { + /* Common Admin Queue descriptor */ + struct efa_admin_aq_common_desc aq_common_desc; + + /* Protection Domain associated with this QP */ + u16 pd; + + /* QP type */ + u8 qp_type; + + /* + * 0 : sq_virt - If set, SQ ring base address is + * virtual (IOVA returned by MR registration) + * 1 : rq_virt - If set, RQ ring base address is + * virtual (IOVA returned by MR registration) + * 7:2 : reserved - MBZ + */ + u8 flags; + + /* + * Send queue (SQ) ring base physical address. This field is not + * used if this is a Low Latency Queue(LLQ). + */ + u64 sq_base_addr; + + /* Receive queue (RQ) ring base address. */ + u64 rq_base_addr; + + /* Index of CQ to be associated with Send Queue completions */ + u32 send_cq_idx; + + /* Index of CQ to be associated with Recv Queue completions */ + u32 recv_cq_idx; + + /* + * Memory registration key for the SQ ring, used only when not in + * LLQ mode and base address is virtual + */ + u32 sq_l_key; + + /* + * Memory registration key for the RQ ring, used only when base + * address is virtual + */ + u32 rq_l_key; + + /* Requested QP allocation sizes */ + struct efa_admin_qp_alloc_size qp_alloc_size; + + /* UAR number */ + u16 uar; + + /* MBZ */ + u16 reserved; + + /* MBZ */ + u32 reserved2; +}; + +struct efa_admin_create_qp_resp { + /* Common Admin Queue completion descriptor */ + struct efa_admin_acq_common_desc acq_common_desc; + + /* Opaque handle to be used for consequent operations on the QP */ + u32 qp_handle; + + /* QP number in the given EFA virtual device */ + u16 qp_num; + + /* MBZ */ + u16 reserved; + + /* Index of sub-CQ for Send Queue completions */ + u16 send_sub_cq_idx; + + /* Index of sub-CQ for Receive Queue completions */ + u16 recv_sub_cq_idx; + + /* SQ doorbell address, as offset to PCIe DB BAR */ + u32 sq_db_offset; + + /* RQ doorbell address, as offset to PCIe DB BAR */ + u32 rq_db_offset; + + /* + * low latency send queue ring base address as an offset to PCIe + * MMIO LLQ_MEM BAR + */ + u32 llq_descriptors_offset; +}; + +struct efa_admin_modify_qp_cmd { + /* Common Admin Queue descriptor */ + struct efa_admin_aq_common_desc aq_common_desc; + + /* + * Mask indicating which fields should be updated see enum + * efa_admin_modify_qp_mask_bits + */ + u32 modify_mask; + + /* QP handle returned by create_qp command */ + u32 qp_handle; + + /* QP state */ + u32 qp_state; + + /* Override current QP state (before applying the transition) */ + u32 cur_qp_state; + + /* QKey */ + u32 qkey; + + /* SQ PSN */ + u32 sq_psn; + + /* Enable async notification when SQ is drained */ + u8 sq_drained_async_notify; + + /* MBZ */ + u8 reserved1; + + /* MBZ */ + u16 reserved2; +}; + +struct efa_admin_modify_qp_resp { + /* Common Admin Queue completion descriptor */ + struct efa_admin_acq_common_desc acq_common_desc; +}; + +struct efa_admin_query_qp_cmd { + /* Common Admin Queue descriptor */ + struct efa_admin_aq_common_desc aq_common_desc; + + /* QP handle returned by create_qp command */ + u32 qp_handle; +}; + +struct efa_admin_query_qp_resp { + /* Common Admin Queue completion descriptor */ + struct efa_admin_acq_common_desc acq_common_desc; + + /* QP state */ + u32 qp_state; + + /* QKey */ + u32 qkey; + + /* SQ PSN */ + u32 sq_psn; + + /* Indicates that draining is in progress */ + u8 sq_draining; + + /* MBZ */ + u8 reserved1; + + /* MBZ */ + u16 reserved2; +}; + +struct efa_admin_destroy_qp_cmd { + /* Common Admin Queue descriptor */ + struct efa_admin_aq_common_desc aq_common_desc; + + /* QP handle returned by create_qp command */ + u32 qp_handle; +}; + +struct efa_admin_destroy_qp_resp { + /* Common Admin Queue completion descriptor */ + struct efa_admin_acq_common_desc acq_common_desc; +}; + +/* + * Create Address Handle command parameters. Must not be called more than + * once for the same destination + */ +struct efa_admin_create_ah_cmd { + /* Common Admin Queue descriptor */ + struct efa_admin_aq_common_desc aq_common_desc; + + /* Destination address in network byte order */ + u8 dest_addr[16]; + + /* PD number */ + u16 pd; + + u16 reserved; +}; + +struct efa_admin_create_ah_resp { + /* Common Admin Queue completion descriptor */ + struct efa_admin_acq_common_desc acq_common_desc; + + /* Target interface address handle (opaque) */ + u16 ah; + + u16 reserved; +}; + +struct efa_admin_destroy_ah_cmd { + /* Common Admin Queue descriptor */ + struct efa_admin_aq_common_desc aq_common_desc; + + /* Target interface address handle (opaque) */ + u16 ah; + + /* PD number */ + u16 pd; +}; + +struct efa_admin_destroy_ah_resp { + /* Common Admin Queue completion descriptor */ + struct efa_admin_acq_common_desc acq_common_desc; +}; + +/* + * Registration of MemoryRegion, required for QP working with Virtual + * Addresses. In standard verbs semantics, region length is limited to 2GB + * space, but EFA offers larger MR support for large memory space, to ease + * on users working with very large datasets (i.e. full GPU memory mapping). + */ +struct efa_admin_reg_mr_cmd { + /* Common Admin Queue descriptor */ + struct efa_admin_aq_common_desc aq_common_desc; + + /* Protection Domain */ + u16 pd; + + /* MBZ */ + u16 reserved16_w1; + + /* Physical Buffer List, each element is page-aligned. */ + union { + /* + * Inline array of guest-physical page addresses of user + * memory pages (optimization for short region + * registrations) + */ + u64 inline_pbl_array[4]; + + /* points to PBL (direct or indirect, chained if needed) */ + struct efa_admin_ctrl_buff_info pbl; + } pbl; + + /* Memory region length, in bytes. */ + u64 mr_length; + + /* + * flags and page size + * 4:0 : phys_page_size_shift - page size is (1 << + * phys_page_size_shift). Page size is used for + * building the Virtual to Physical address mapping + * 6:5 : reserved - MBZ + * 7 : mem_addr_phy_mode_en - Enable bit for physical + * memory registration (no translation), can be used + * only by privileged clients. If set, PBL must + * contain a single entry. + */ + u8 flags; + + /* + * permissions + * 0 : local_write_enable - Write permissions: value + * of 1 needed for RQ buffers and for RDMA write + * 7:1 : reserved1 - remote access flags, etc + */ + u8 permissions; + + u16 reserved16_w5; + + /* number of pages in PBL (redundant, could be calculated) */ + u32 page_num; + + /* + * IO Virtual Address associated with this MR. If + * mem_addr_phy_mode_en is set, contains the physical address of + * the region. + */ + u64 iova; +}; + +struct efa_admin_reg_mr_resp { + /* Common Admin Queue completion descriptor */ + struct efa_admin_acq_common_desc acq_common_desc; + + /* + * L_Key, to be used in conjunction with local buffer references in + * SQ and RQ WQE, or with virtual RQ/CQ rings + */ + u32 l_key; + + /* + * R_Key, to be used in RDMA messages to refer to remotely accessed + * memory region + */ + u32 r_key; +}; + +struct efa_admin_dereg_mr_cmd { + /* Common Admin Queue descriptor */ + struct efa_admin_aq_common_desc aq_common_desc; + + /* L_Key, memory region's l_key */ + u32 l_key; +}; + +struct efa_admin_dereg_mr_resp { + /* Common Admin Queue completion descriptor */ + struct efa_admin_acq_common_desc acq_common_desc; +}; + +struct efa_admin_create_cq_cmd { + struct efa_admin_aq_common_desc aq_common_desc; + + /* + * 4:0 : reserved5 + * 5 : interrupt_mode_enabled - if set, cq operates + * in interrupt mode (i.e. CQ events and MSI-X are + * generated), otherwise - polling + * 6 : virt - If set, ring base address is virtual + * (IOVA returned by MR registration) + * 7 : reserved6 + */ + u8 cq_caps_1; + + /* + * 4:0 : cq_entry_size_words - size of CQ entry in + * 32-bit words, valid values: 4, 8. + * 7:5 : reserved7 + */ + u8 cq_caps_2; + + /* completion queue depth in # of entries. must be power of 2 */ + u16 cq_depth; + + /* msix vector assigned to this cq */ + u32 msix_vector_idx; + + /* + * CQ ring base address, virtual or physical depending on 'virt' + * flag + */ + struct efa_common_mem_addr cq_ba; + + /* + * Memory registration key for the ring, used only when base + * address is virtual + */ + u32 l_key; + + /* + * number of sub cqs - must be equal to sub_cqs_per_cq of queue + * attributes. + */ + u16 num_sub_cqs; + + /* UAR number */ + u16 uar; +}; + +struct efa_admin_create_cq_resp { + struct efa_admin_acq_common_desc acq_common_desc; + + u16 cq_idx; + + /* actual cq depth in number of entries */ + u16 cq_actual_depth; +}; + +struct efa_admin_destroy_cq_cmd { + struct efa_admin_aq_common_desc aq_common_desc; + + u16 cq_idx; + + u16 reserved1; +}; + +struct efa_admin_destroy_cq_resp { + struct efa_admin_acq_common_desc acq_common_desc; +}; + +/* + * EFA AQ Get Statistics command. Extended statistics are placed in control + * buffer pointed by AQ entry + */ +struct efa_admin_aq_get_stats_cmd { + struct efa_admin_aq_common_desc aq_common_descriptor; + + union { + /* command specific inline data */ + u32 inline_data_w1[3]; + + struct efa_admin_ctrl_buff_info control_buffer; + } u; + + /* stats type as defined in enum efa_admin_get_stats_type */ + u8 type; + + /* stats scope defined in enum efa_admin_get_stats_scope */ + u8 scope; + + u16 scope_modifier; +}; + +struct efa_admin_basic_stats { + u64 tx_bytes; + + u64 tx_pkts; + + u64 rx_bytes; + + u64 rx_pkts; + + u64 rx_drops; +}; + +struct efa_admin_acq_get_stats_resp { + struct efa_admin_acq_common_desc acq_common_desc; + + struct efa_admin_basic_stats basic_stats; +}; + +struct efa_admin_get_set_feature_common_desc { + /* + * 1:0 : select - 0x1 - current value; 0x3 - default + * value + * 7:3 : reserved3 + */ + u8 flags; + + /* as appears in efa_admin_aq_feature_id */ + u8 feature_id; + + /* MBZ */ + u16 reserved16; +}; + +struct efa_admin_feature_device_attr_desc { + /* Bitmap of efa_admin_aq_feature_id */ + u64 supported_features; + + /* Bitmap of supported page sizes in MR registrations */ + u64 page_size_cap; + + u32 fw_version; + + u32 admin_api_version; + + u32 device_version; + + /* Bar used for SQ and RQ doorbells */ + u16 db_bar; + + /* Indicates how many bits are used physical address access */ + u8 phys_addr_width; + + /* Indicates how many bits are used virtual address access */ + u8 virt_addr_width; +}; + +struct efa_admin_feature_queue_attr_desc { + /* The maximum number of queue pairs supported */ + u32 max_qp; + + u32 max_sq_depth; + + /* max send wr used in inline-buf */ + u32 inline_buf_size; + + u32 max_rq_depth; + + /* The maximum number of completion queues supported per VF */ + u32 max_cq; + + u32 max_cq_depth; + + /* Number of sub-CQs to be created for each CQ */ + u16 sub_cqs_per_cq; + + u16 reserved; + + /* + * Maximum number of SGEs (buffs) allowed for a single send work + * queue element (WQE) + */ + u16 max_wr_send_sges; + + /* Maximum number of SGEs allowed for a single recv WQE */ + u16 max_wr_recv_sges; + + /* The maximum number of memory regions supported */ + u32 max_mr; + + /* The maximum number of pages can be registered */ + u32 max_mr_pages; + + /* The maximum number of protection domains supported */ + u32 max_pd; + + /* The maximum number of address handles supported */ + u32 max_ah; + + /* The maximum size of LLQ in bytes */ + u32 max_llq_size; +}; + +struct efa_admin_feature_aenq_desc { + /* bitmask for AENQ groups the device can report */ + u32 supported_groups; + + /* bitmask for AENQ groups to report */ + u32 enabled_groups; +}; + +struct efa_admin_feature_network_attr_desc { + /* Raw address data in network byte order */ + u8 addr[16]; + + u32 mtu; +}; + +/* + * When hint value is 0, hints capabilities are not supported or driver + * should use its own predefined value + */ +struct efa_admin_hw_hints { + /* value in ms */ + u16 mmio_read_timeout; + + /* value in ms */ + u16 driver_watchdog_timeout; + + /* value in ms */ + u16 admin_completion_timeout; + + /* poll interval in ms */ + u16 poll_interval; +}; + +struct efa_admin_get_feature_cmd { + struct efa_admin_aq_common_desc aq_common_descriptor; + + struct efa_admin_ctrl_buff_info control_buffer; + + struct efa_admin_get_set_feature_common_desc feature_common; + + u32 raw[11]; +}; + +struct efa_admin_get_feature_resp { + struct efa_admin_acq_common_desc acq_common_desc; + + union { + u32 raw[14]; + + struct efa_admin_feature_device_attr_desc device_attr; + + struct efa_admin_feature_aenq_desc aenq; + + struct efa_admin_feature_network_attr_desc network_attr; + + struct efa_admin_feature_queue_attr_desc queue_attr; + + struct efa_admin_hw_hints hw_hints; + } u; +}; + +struct efa_admin_set_feature_cmd { + struct efa_admin_aq_common_desc aq_common_descriptor; + + struct efa_admin_ctrl_buff_info control_buffer; + + struct efa_admin_get_set_feature_common_desc feature_common; + + union { + u32 raw[11]; + + /* AENQ configuration */ + struct efa_admin_feature_aenq_desc aenq; + } u; +}; + +struct efa_admin_set_feature_resp { + struct efa_admin_acq_common_desc acq_common_desc; + + union { + u32 raw[14]; + } u; +}; + +struct efa_admin_alloc_pd_cmd { + struct efa_admin_aq_common_desc aq_common_descriptor; +}; + +struct efa_admin_alloc_pd_resp { + struct efa_admin_acq_common_desc acq_common_desc; + + /* PD number */ + u16 pd; + + /* MBZ */ + u16 reserved; +}; + +struct efa_admin_dealloc_pd_cmd { + struct efa_admin_aq_common_desc aq_common_descriptor; + + /* PD number */ + u16 pd; + + /* MBZ */ + u16 reserved; +}; + +struct efa_admin_dealloc_pd_resp { + struct efa_admin_acq_common_desc acq_common_desc; +}; + +struct efa_admin_alloc_uar_cmd { + struct efa_admin_aq_common_desc aq_common_descriptor; +}; + +struct efa_admin_alloc_uar_resp { + struct efa_admin_acq_common_desc acq_common_desc; + + /* UAR number */ + u16 uar; + + /* MBZ */ + u16 reserved; +}; + +struct efa_admin_dealloc_uar_cmd { + struct efa_admin_aq_common_desc aq_common_descriptor; + + /* UAR number */ + u16 uar; + + /* MBZ */ + u16 reserved; +}; + +struct efa_admin_dealloc_uar_resp { + struct efa_admin_acq_common_desc acq_common_desc; +}; + +/* asynchronous event notification groups */ +enum efa_admin_aenq_group { + EFA_ADMIN_FATAL_ERROR = 1, + EFA_ADMIN_WARNING = 2, + EFA_ADMIN_NOTIFICATION = 3, + EFA_ADMIN_KEEP_ALIVE = 4, + EFA_ADMIN_AENQ_GROUPS_NUM = 5, +}; + +enum efa_admin_aenq_notification_syndrom { + EFA_ADMIN_SUSPEND = 0, + EFA_ADMIN_RESUME = 1, + EFA_ADMIN_UPDATE_HINTS = 2, +}; + +struct efa_admin_mmio_req_read_less_resp { + u16 req_id; + + u16 reg_off; + + /* value is valid when poll is cleared */ + u32 reg_val; +}; + +/* create_qp_cmd */ +#define EFA_ADMIN_CREATE_QP_CMD_SQ_VIRT_MASK BIT(0) +#define EFA_ADMIN_CREATE_QP_CMD_RQ_VIRT_SHIFT 1 +#define EFA_ADMIN_CREATE_QP_CMD_RQ_VIRT_MASK BIT(1) + +/* reg_mr_cmd */ +#define EFA_ADMIN_REG_MR_CMD_PHYS_PAGE_SIZE_SHIFT_MASK GENMASK(4, 0) +#define EFA_ADMIN_REG_MR_CMD_MEM_ADDR_PHY_MODE_EN_SHIFT 7 +#define EFA_ADMIN_REG_MR_CMD_MEM_ADDR_PHY_MODE_EN_MASK BIT(7) +#define EFA_ADMIN_REG_MR_CMD_LOCAL_WRITE_ENABLE_MASK BIT(0) + +/* create_cq_cmd */ +#define EFA_ADMIN_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_SHIFT 5 +#define EFA_ADMIN_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_MASK BIT(5) +#define EFA_ADMIN_CREATE_CQ_CMD_VIRT_SHIFT 6 +#define EFA_ADMIN_CREATE_CQ_CMD_VIRT_MASK BIT(6) +#define EFA_ADMIN_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0) + +/* get_set_feature_common_desc */ +#define EFA_ADMIN_GET_SET_FEATURE_COMMON_DESC_SELECT_MASK GENMASK(1, 0) + +#endif /* _EFA_ADMIN_CMDS_H_ */ diff --git a/drivers/infiniband/hw/efa/efa_admin_defs.h b/drivers/infiniband/hw/efa/efa_admin_defs.h new file mode 100644 index 000000000000..c8e0c8b905be --- /dev/null +++ b/drivers/infiniband/hw/efa/efa_admin_defs.h @@ -0,0 +1,136 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + */ + +#ifndef _EFA_ADMIN_H_ +#define _EFA_ADMIN_H_ + +enum efa_admin_aq_completion_status { + EFA_ADMIN_SUCCESS = 0, + EFA_ADMIN_RESOURCE_ALLOCATION_FAILURE = 1, + EFA_ADMIN_BAD_OPCODE = 2, + EFA_ADMIN_UNSUPPORTED_OPCODE = 3, + EFA_ADMIN_MALFORMED_REQUEST = 4, + /* Additional status is provided in ACQ entry extended_status */ + EFA_ADMIN_ILLEGAL_PARAMETER = 5, + EFA_ADMIN_UNKNOWN_ERROR = 6, + EFA_ADMIN_RESOURCE_BUSY = 7, +}; + +struct efa_admin_aq_common_desc { + /* + * 11:0 : command_id + * 15:12 : reserved12 + */ + u16 command_id; + + /* as appears in efa_admin_aq_opcode */ + u8 opcode; + + /* + * 0 : phase + * 1 : ctrl_data - control buffer address valid + * 2 : ctrl_data_indirect - control buffer address + * points to list of pages with addresses of control + * buffers + * 7:3 : reserved3 + */ + u8 flags; +}; + +/* + * used in efa_admin_aq_entry. Can point directly to control data, or to a + * page list chunk. Used also at the end of indirect mode page list chunks, + * for chaining. + */ +struct efa_admin_ctrl_buff_info { + u32 length; + + struct efa_common_mem_addr address; +}; + +struct efa_admin_aq_entry { + struct efa_admin_aq_common_desc aq_common_descriptor; + + union { + u32 inline_data_w1[3]; + + struct efa_admin_ctrl_buff_info control_buffer; + } u; + + u32 inline_data_w4[12]; +}; + +struct efa_admin_acq_common_desc { + /* + * command identifier to associate it with the aq descriptor + * 11:0 : command_id + * 15:12 : reserved12 + */ + u16 command; + + u8 status; + + /* + * 0 : phase + * 7:1 : reserved1 + */ + u8 flags; + + u16 extended_status; + + /* + * indicates to the driver which AQ entry has been consumed by the + * device and could be reused + */ + u16 sq_head_indx; +}; + +struct efa_admin_acq_entry { + struct efa_admin_acq_common_desc acq_common_descriptor; + + u32 response_specific_data[14]; +}; + +struct efa_admin_aenq_common_desc { + u16 group; + + u16 syndrom; + + /* + * 0 : phase + * 7:1 : reserved - MBZ + */ + u8 flags; + + u8 reserved1[3]; + + u32 timestamp_low; + + u32 timestamp_high; +}; + +struct efa_admin_aenq_entry { + struct efa_admin_aenq_common_desc aenq_common_desc; + + /* command specific inline data */ + u32 inline_data_w4[12]; +}; + +/* aq_common_desc */ +#define EFA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK GENMASK(11, 0) +#define EFA_ADMIN_AQ_COMMON_DESC_PHASE_MASK BIT(0) +#define EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_SHIFT 1 +#define EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_MASK BIT(1) +#define EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_SHIFT 2 +#define EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK BIT(2) + +/* acq_common_desc */ +#define EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK GENMASK(11, 0) +#define EFA_ADMIN_ACQ_COMMON_DESC_PHASE_MASK BIT(0) + +/* aenq_common_desc */ +#define EFA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK BIT(0) + +#endif /* _EFA_ADMIN_H_ */ diff --git a/drivers/infiniband/hw/efa/efa_com.c b/drivers/infiniband/hw/efa/efa_com.c new file mode 100644 index 000000000000..a5c788741a04 --- /dev/null +++ b/drivers/infiniband/hw/efa/efa_com.c @@ -0,0 +1,1160 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + */ + +#include "efa_com.h" +#include "efa_regs_defs.h" + +#define ADMIN_CMD_TIMEOUT_US 30000000 /* usecs */ + +#define EFA_REG_READ_TIMEOUT_US 50000 /* usecs */ +#define EFA_MMIO_READ_INVALID 0xffffffff + +#define EFA_POLL_INTERVAL_MS 100 /* msecs */ + +#define EFA_ASYNC_QUEUE_DEPTH 16 +#define EFA_ADMIN_QUEUE_DEPTH 32 + +#define MIN_EFA_VER\ + ((EFA_ADMIN_API_VERSION_MAJOR << EFA_REGS_VERSION_MAJOR_VERSION_SHIFT) | \ + (EFA_ADMIN_API_VERSION_MINOR & EFA_REGS_VERSION_MINOR_VERSION_MASK)) + +#define EFA_CTRL_MAJOR 0 +#define EFA_CTRL_MINOR 0 +#define EFA_CTRL_SUB_MINOR 1 + +#define MIN_EFA_CTRL_VER \ + (((EFA_CTRL_MAJOR) << \ + (EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT)) | \ + ((EFA_CTRL_MINOR) << \ + (EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT)) | \ + (EFA_CTRL_SUB_MINOR)) + +#define EFA_DMA_ADDR_TO_UINT32_LOW(x) ((u32)((u64)(x))) +#define EFA_DMA_ADDR_TO_UINT32_HIGH(x) ((u32)(((u64)(x)) >> 32)) + +#define EFA_REGS_ADMIN_INTR_MASK 1 + +enum efa_cmd_status { + EFA_CMD_SUBMITTED, + EFA_CMD_COMPLETED, + /* Abort - canceled by the driver */ + EFA_CMD_ABORTED, +}; + +struct efa_comp_ctx { + struct completion wait_event; + struct efa_admin_acq_entry *user_cqe; + u32 comp_size; + enum efa_cmd_status status; + /* status from the device */ + u8 comp_status; + u8 cmd_opcode; + u8 occupied; +}; + +static const char *efa_com_cmd_str(u8 cmd) +{ +#define EFA_CMD_STR_CASE(_cmd) case EFA_ADMIN_##_cmd: return #_cmd + + switch (cmd) { + EFA_CMD_STR_CASE(CREATE_QP); + EFA_CMD_STR_CASE(MODIFY_QP); + EFA_CMD_STR_CASE(QUERY_QP); + EFA_CMD_STR_CASE(DESTROY_QP); + EFA_CMD_STR_CASE(CREATE_AH); + EFA_CMD_STR_CASE(DESTROY_AH); + EFA_CMD_STR_CASE(REG_MR); + EFA_CMD_STR_CASE(DEREG_MR); + EFA_CMD_STR_CASE(CREATE_CQ); + EFA_CMD_STR_CASE(DESTROY_CQ); + EFA_CMD_STR_CASE(GET_FEATURE); + EFA_CMD_STR_CASE(SET_FEATURE); + EFA_CMD_STR_CASE(GET_STATS); + EFA_CMD_STR_CASE(ALLOC_PD); + EFA_CMD_STR_CASE(DEALLOC_PD); + EFA_CMD_STR_CASE(ALLOC_UAR); + EFA_CMD_STR_CASE(DEALLOC_UAR); + default: return "unknown command opcode"; + } +#undef EFA_CMD_STR_CASE +} + +static u32 efa_com_reg_read32(struct efa_com_dev *edev, u16 offset) +{ + struct efa_com_mmio_read *mmio_read = &edev->mmio_read; + struct efa_admin_mmio_req_read_less_resp *read_resp; + unsigned long exp_time; + u32 mmio_read_reg; + u32 err; + + read_resp = mmio_read->read_resp; + + spin_lock(&mmio_read->lock); + mmio_read->seq_num++; + + /* trash DMA req_id to identify when hardware is done */ + read_resp->req_id = mmio_read->seq_num + 0x9aL; + mmio_read_reg = (offset << EFA_REGS_MMIO_REG_READ_REG_OFF_SHIFT) & + EFA_REGS_MMIO_REG_READ_REG_OFF_MASK; + mmio_read_reg |= mmio_read->seq_num & + EFA_REGS_MMIO_REG_READ_REQ_ID_MASK; + + writel(mmio_read_reg, edev->reg_bar + EFA_REGS_MMIO_REG_READ_OFF); + + exp_time = jiffies + usecs_to_jiffies(mmio_read->mmio_read_timeout); + do { + if (READ_ONCE(read_resp->req_id) == mmio_read->seq_num) + break; + udelay(1); + } while (time_is_after_jiffies(exp_time)); + + if (read_resp->req_id != mmio_read->seq_num) { + ibdev_err(edev->efa_dev, + "Reading register timed out. expected: req id[%u] offset[%#x] actual: req id[%u] offset[%#x]\n", + mmio_read->seq_num, offset, read_resp->req_id, + read_resp->reg_off); + err = EFA_MMIO_READ_INVALID; + goto out; + } + + if (read_resp->reg_off != offset) { + ibdev_err(edev->efa_dev, + "Reading register failed: wrong offset provided\n"); + err = EFA_MMIO_READ_INVALID; + goto out; + } + + err = read_resp->reg_val; +out: + spin_unlock(&mmio_read->lock); + return err; +} + +static int efa_com_admin_init_sq(struct efa_com_dev *edev) +{ + struct efa_com_admin_queue *aq = &edev->aq; + struct efa_com_admin_sq *sq = &aq->sq; + u16 size = aq->depth * sizeof(*sq->entries); + u32 addr_high; + u32 addr_low; + u32 aq_caps; + + sq->entries = + dma_alloc_coherent(aq->dmadev, size, &sq->dma_addr, GFP_KERNEL); + if (!sq->entries) + return -ENOMEM; + + spin_lock_init(&sq->lock); + + sq->cc = 0; + sq->pc = 0; + sq->phase = 1; + + sq->db_addr = (u32 __iomem *)(edev->reg_bar + EFA_REGS_AQ_PROD_DB_OFF); + + addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(sq->dma_addr); + addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(sq->dma_addr); + + writel(addr_low, edev->reg_bar + EFA_REGS_AQ_BASE_LO_OFF); + writel(addr_high, edev->reg_bar + EFA_REGS_AQ_BASE_HI_OFF); + + aq_caps = aq->depth & EFA_REGS_AQ_CAPS_AQ_DEPTH_MASK; + aq_caps |= (sizeof(struct efa_admin_aq_entry) << + EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_SHIFT) & + EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_MASK; + + writel(aq_caps, edev->reg_bar + EFA_REGS_AQ_CAPS_OFF); + + return 0; +} + +static int efa_com_admin_init_cq(struct efa_com_dev *edev) +{ + struct efa_com_admin_queue *aq = &edev->aq; + struct efa_com_admin_cq *cq = &aq->cq; + u16 size = aq->depth * sizeof(*cq->entries); + u32 addr_high; + u32 addr_low; + u32 acq_caps; + + cq->entries = + dma_alloc_coherent(aq->dmadev, size, &cq->dma_addr, GFP_KERNEL); + if (!cq->entries) + return -ENOMEM; + + spin_lock_init(&cq->lock); + + cq->cc = 0; + cq->phase = 1; + + addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(cq->dma_addr); + addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(cq->dma_addr); + + writel(addr_low, edev->reg_bar + EFA_REGS_ACQ_BASE_LO_OFF); + writel(addr_high, edev->reg_bar + EFA_REGS_ACQ_BASE_HI_OFF); + + acq_caps = aq->depth & EFA_REGS_ACQ_CAPS_ACQ_DEPTH_MASK; + acq_caps |= (sizeof(struct efa_admin_acq_entry) << + EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_SHIFT) & + EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_MASK; + acq_caps |= (aq->msix_vector_idx << + EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR_SHIFT) & + EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR_MASK; + + writel(acq_caps, edev->reg_bar + EFA_REGS_ACQ_CAPS_OFF); + + return 0; +} + +static int efa_com_admin_init_aenq(struct efa_com_dev *edev, + struct efa_aenq_handlers *aenq_handlers) +{ + struct efa_com_aenq *aenq = &edev->aenq; + u32 addr_low, addr_high, aenq_caps; + u16 size; + + if (!aenq_handlers) { + ibdev_err(edev->efa_dev, "aenq handlers pointer is NULL\n"); + return -EINVAL; + } + + size = EFA_ASYNC_QUEUE_DEPTH * sizeof(*aenq->entries); + aenq->entries = dma_alloc_coherent(edev->dmadev, size, &aenq->dma_addr, + GFP_KERNEL); + if (!aenq->entries) + return -ENOMEM; + + aenq->aenq_handlers = aenq_handlers; + aenq->depth = EFA_ASYNC_QUEUE_DEPTH; + aenq->cc = 0; + aenq->phase = 1; + + addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(aenq->dma_addr); + addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(aenq->dma_addr); + + writel(addr_low, edev->reg_bar + EFA_REGS_AENQ_BASE_LO_OFF); + writel(addr_high, edev->reg_bar + EFA_REGS_AENQ_BASE_HI_OFF); + + aenq_caps = aenq->depth & EFA_REGS_AENQ_CAPS_AENQ_DEPTH_MASK; + aenq_caps |= (sizeof(struct efa_admin_aenq_entry) << + EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT) & + EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK; + aenq_caps |= (aenq->msix_vector_idx + << EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR_SHIFT) & + EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR_MASK; + writel(aenq_caps, edev->reg_bar + EFA_REGS_AENQ_CAPS_OFF); + + /* + * Init cons_db to mark that all entries in the queue + * are initially available + */ + writel(edev->aenq.cc, edev->reg_bar + EFA_REGS_AENQ_CONS_DB_OFF); + + return 0; +} + +/* ID to be used with efa_com_get_comp_ctx */ +static u16 efa_com_alloc_ctx_id(struct efa_com_admin_queue *aq) +{ + u16 ctx_id; + + spin_lock(&aq->comp_ctx_lock); + ctx_id = aq->comp_ctx_pool[aq->comp_ctx_pool_next]; + aq->comp_ctx_pool_next++; + spin_unlock(&aq->comp_ctx_lock); + + return ctx_id; +} + +static void efa_com_dealloc_ctx_id(struct efa_com_admin_queue *aq, + u16 ctx_id) +{ + spin_lock(&aq->comp_ctx_lock); + aq->comp_ctx_pool_next--; + aq->comp_ctx_pool[aq->comp_ctx_pool_next] = ctx_id; + spin_unlock(&aq->comp_ctx_lock); +} + +static inline void efa_com_put_comp_ctx(struct efa_com_admin_queue *aq, + struct efa_comp_ctx *comp_ctx) +{ + u16 comp_id = comp_ctx->user_cqe->acq_common_descriptor.command & + EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK; + + ibdev_dbg(aq->efa_dev, "Putting completion command_id %d\n", comp_id); + comp_ctx->occupied = 0; + efa_com_dealloc_ctx_id(aq, comp_id); +} + +static struct efa_comp_ctx *efa_com_get_comp_ctx(struct efa_com_admin_queue *aq, + u16 command_id, bool capture) +{ + if (command_id >= aq->depth) { + ibdev_err(aq->efa_dev, + "command id is larger than the queue size. cmd_id: %u queue size %d\n", + command_id, aq->depth); + return NULL; + } + + if (aq->comp_ctx[command_id].occupied && capture) { + ibdev_err(aq->efa_dev, "Completion context is occupied\n"); + return NULL; + } + + if (capture) { + aq->comp_ctx[command_id].occupied = 1; + ibdev_dbg(aq->efa_dev, "Taking completion ctxt command_id %d\n", + command_id); + } + + return &aq->comp_ctx[command_id]; +} + +static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queue *aq, + struct efa_admin_aq_entry *cmd, + size_t cmd_size_in_bytes, + struct efa_admin_acq_entry *comp, + size_t comp_size_in_bytes) +{ + struct efa_comp_ctx *comp_ctx; + u16 queue_size_mask; + u16 ctx_id; + u16 pi; + + queue_size_mask = aq->depth - 1; + pi = aq->sq.pc & queue_size_mask; + + ctx_id = efa_com_alloc_ctx_id(aq); + + cmd->aq_common_descriptor.flags |= aq->sq.phase & + EFA_ADMIN_AQ_COMMON_DESC_PHASE_MASK; + + cmd->aq_common_descriptor.command_id |= ctx_id & + EFA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK; + + comp_ctx = efa_com_get_comp_ctx(aq, ctx_id, true); + if (!comp_ctx) { + efa_com_dealloc_ctx_id(aq, ctx_id); + return ERR_PTR(-EINVAL); + } + + comp_ctx->status = EFA_CMD_SUBMITTED; + comp_ctx->comp_size = comp_size_in_bytes; + comp_ctx->user_cqe = comp; + comp_ctx->cmd_opcode = cmd->aq_common_descriptor.opcode; + + reinit_completion(&comp_ctx->wait_event); + + memcpy(&aq->sq.entries[pi], cmd, cmd_size_in_bytes); + + aq->sq.pc++; + atomic64_inc(&aq->stats.submitted_cmd); + + if ((aq->sq.pc & queue_size_mask) == 0) + aq->sq.phase = !aq->sq.phase; + + /* barrier not needed in case of writel */ + writel(aq->sq.pc, aq->sq.db_addr); + + return comp_ctx; +} + +static inline int efa_com_init_comp_ctxt(struct efa_com_admin_queue *aq) +{ + size_t pool_size = aq->depth * sizeof(*aq->comp_ctx_pool); + size_t size = aq->depth * sizeof(struct efa_comp_ctx); + struct efa_comp_ctx *comp_ctx; + u16 i; + + aq->comp_ctx = devm_kzalloc(aq->dmadev, size, GFP_KERNEL); + aq->comp_ctx_pool = devm_kzalloc(aq->dmadev, pool_size, GFP_KERNEL); + if (!aq->comp_ctx || !aq->comp_ctx_pool) { + devm_kfree(aq->dmadev, aq->comp_ctx_pool); + devm_kfree(aq->dmadev, aq->comp_ctx); + return -ENOMEM; + } + + for (i = 0; i < aq->depth; i++) { + comp_ctx = efa_com_get_comp_ctx(aq, i, false); + if (comp_ctx) + init_completion(&comp_ctx->wait_event); + + aq->comp_ctx_pool[i] = i; + } + + spin_lock_init(&aq->comp_ctx_lock); + + aq->comp_ctx_pool_next = 0; + + return 0; +} + +static struct efa_comp_ctx *efa_com_submit_admin_cmd(struct efa_com_admin_queue *aq, + struct efa_admin_aq_entry *cmd, + size_t cmd_size_in_bytes, + struct efa_admin_acq_entry *comp, + size_t comp_size_in_bytes) +{ + struct efa_comp_ctx *comp_ctx; + + spin_lock(&aq->sq.lock); + if (!test_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state)) { + ibdev_err(aq->efa_dev, "Admin queue is closed\n"); + spin_unlock(&aq->sq.lock); + return ERR_PTR(-ENODEV); + } + + comp_ctx = __efa_com_submit_admin_cmd(aq, cmd, cmd_size_in_bytes, comp, + comp_size_in_bytes); + spin_unlock(&aq->sq.lock); + if (IS_ERR(comp_ctx)) + clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state); + + return comp_ctx; +} + +static void efa_com_handle_single_admin_completion(struct efa_com_admin_queue *aq, + struct efa_admin_acq_entry *cqe) +{ + struct efa_comp_ctx *comp_ctx; + u16 cmd_id; + + cmd_id = cqe->acq_common_descriptor.command & + EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK; + + comp_ctx = efa_com_get_comp_ctx(aq, cmd_id, false); + if (!comp_ctx) { + ibdev_err(aq->efa_dev, + "comp_ctx is NULL. Changing the admin queue running state\n"); + clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state); + return; + } + + comp_ctx->status = EFA_CMD_COMPLETED; + comp_ctx->comp_status = cqe->acq_common_descriptor.status; + if (comp_ctx->user_cqe) + memcpy(comp_ctx->user_cqe, cqe, comp_ctx->comp_size); + + if (!test_bit(EFA_AQ_STATE_POLLING_BIT, &aq->state)) + complete(&comp_ctx->wait_event); +} + +static void efa_com_handle_admin_completion(struct efa_com_admin_queue *aq) +{ + struct efa_admin_acq_entry *cqe; + u16 queue_size_mask; + u16 comp_num = 0; + u8 phase; + u16 ci; + + queue_size_mask = aq->depth - 1; + + ci = aq->cq.cc & queue_size_mask; + phase = aq->cq.phase; + + cqe = &aq->cq.entries[ci]; + + /* Go over all the completions */ + while ((READ_ONCE(cqe->acq_common_descriptor.flags) & + EFA_ADMIN_ACQ_COMMON_DESC_PHASE_MASK) == phase) { + /* + * Do not read the rest of the completion entry before the + * phase bit was validated + */ + dma_rmb(); + efa_com_handle_single_admin_completion(aq, cqe); + + ci++; + comp_num++; + if (ci == aq->depth) { + ci = 0; + phase = !phase; + } + + cqe = &aq->cq.entries[ci]; + } + + aq->cq.cc += comp_num; + aq->cq.phase = phase; + aq->sq.cc += comp_num; + atomic64_add(comp_num, &aq->stats.completed_cmd); +} + +static int efa_com_comp_status_to_errno(u8 comp_status) +{ + switch (comp_status) { + case EFA_ADMIN_SUCCESS: + return 0; + case EFA_ADMIN_RESOURCE_ALLOCATION_FAILURE: + return -ENOMEM; + case EFA_ADMIN_UNSUPPORTED_OPCODE: + return -EOPNOTSUPP; + case EFA_ADMIN_BAD_OPCODE: + case EFA_ADMIN_MALFORMED_REQUEST: + case EFA_ADMIN_ILLEGAL_PARAMETER: + case EFA_ADMIN_UNKNOWN_ERROR: + return -EINVAL; + default: + return -EINVAL; + } +} + +static int efa_com_wait_and_process_admin_cq_polling(struct efa_comp_ctx *comp_ctx, + struct efa_com_admin_queue *aq) +{ + unsigned long timeout; + unsigned long flags; + int err; + + timeout = jiffies + usecs_to_jiffies(aq->completion_timeout); + + while (1) { + spin_lock_irqsave(&aq->cq.lock, flags); + efa_com_handle_admin_completion(aq); + spin_unlock_irqrestore(&aq->cq.lock, flags); + + if (comp_ctx->status != EFA_CMD_SUBMITTED) + break; + + if (time_is_before_jiffies(timeout)) { + ibdev_err(aq->efa_dev, + "Wait for completion (polling) timeout\n"); + /* EFA didn't have any completion */ + atomic64_inc(&aq->stats.no_completion); + + clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state); + err = -ETIME; + goto out; + } + + msleep(aq->poll_interval); + } + + if (comp_ctx->status == EFA_CMD_ABORTED) { + ibdev_err(aq->efa_dev, "Command was aborted\n"); + atomic64_inc(&aq->stats.aborted_cmd); + err = -ENODEV; + goto out; + } + + WARN_ONCE(comp_ctx->status != EFA_CMD_COMPLETED, + "Invalid completion status %d\n", comp_ctx->status); + + err = efa_com_comp_status_to_errno(comp_ctx->comp_status); +out: + efa_com_put_comp_ctx(aq, comp_ctx); + return err; +} + +static int efa_com_wait_and_process_admin_cq_interrupts(struct efa_comp_ctx *comp_ctx, + struct efa_com_admin_queue *aq) +{ + unsigned long flags; + int err; + + wait_for_completion_timeout(&comp_ctx->wait_event, + usecs_to_jiffies(aq->completion_timeout)); + + /* + * In case the command wasn't completed find out the root cause. + * There might be 2 kinds of errors + * 1) No completion (timeout reached) + * 2) There is completion but the device didn't get any msi-x interrupt. + */ + if (comp_ctx->status == EFA_CMD_SUBMITTED) { + spin_lock_irqsave(&aq->cq.lock, flags); + efa_com_handle_admin_completion(aq); + spin_unlock_irqrestore(&aq->cq.lock, flags); + + atomic64_inc(&aq->stats.no_completion); + + if (comp_ctx->status == EFA_CMD_COMPLETED) + ibdev_err(aq->efa_dev, + "The device sent a completion but the driver didn't receive any MSI-X interrupt for admin cmd %s(%d) status %d (ctx: 0x%p, sq producer: %d, sq consumer: %d, cq consumer: %d)\n", + efa_com_cmd_str(comp_ctx->cmd_opcode), + comp_ctx->cmd_opcode, comp_ctx->status, + comp_ctx, aq->sq.pc, aq->sq.cc, aq->cq.cc); + else + ibdev_err(aq->efa_dev, + "The device didn't send any completion for admin cmd %s(%d) status %d (ctx 0x%p, sq producer: %d, sq consumer: %d, cq consumer: %d)\n", + efa_com_cmd_str(comp_ctx->cmd_opcode), + comp_ctx->cmd_opcode, comp_ctx->status, + comp_ctx, aq->sq.pc, aq->sq.cc, aq->cq.cc); + + clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state); + err = -ETIME; + goto out; + } + + err = efa_com_comp_status_to_errno(comp_ctx->comp_status); +out: + efa_com_put_comp_ctx(aq, comp_ctx); + return err; +} + +/* + * There are two types to wait for completion. + * Polling mode - wait until the completion is available. + * Async mode - wait on wait queue until the completion is ready + * (or the timeout expired). + * It is expected that the IRQ called efa_com_handle_admin_completion + * to mark the completions. + */ +static int efa_com_wait_and_process_admin_cq(struct efa_comp_ctx *comp_ctx, + struct efa_com_admin_queue *aq) +{ + if (test_bit(EFA_AQ_STATE_POLLING_BIT, &aq->state)) + return efa_com_wait_and_process_admin_cq_polling(comp_ctx, aq); + + return efa_com_wait_and_process_admin_cq_interrupts(comp_ctx, aq); +} + +/** + * efa_com_cmd_exec - Execute admin command + * @aq: admin queue. + * @cmd: the admin command to execute. + * @cmd_size: the command size. + * @comp: command completion return entry. + * @comp_size: command completion size. + * Submit an admin command and then wait until the device will return a + * completion. + * The completion will be copied into comp. + * + * @return - 0 on success, negative value on failure. + */ +int efa_com_cmd_exec(struct efa_com_admin_queue *aq, + struct efa_admin_aq_entry *cmd, + size_t cmd_size, + struct efa_admin_acq_entry *comp, + size_t comp_size) +{ + struct efa_comp_ctx *comp_ctx; + int err; + + might_sleep(); + + /* In case of queue FULL */ + down(&aq->avail_cmds); + + ibdev_dbg(aq->efa_dev, "%s (opcode %d)\n", + efa_com_cmd_str(cmd->aq_common_descriptor.opcode), + cmd->aq_common_descriptor.opcode); + comp_ctx = efa_com_submit_admin_cmd(aq, cmd, cmd_size, comp, comp_size); + if (IS_ERR(comp_ctx)) { + ibdev_err(aq->efa_dev, + "Failed to submit command %s (opcode %u) err %ld\n", + efa_com_cmd_str(cmd->aq_common_descriptor.opcode), + cmd->aq_common_descriptor.opcode, PTR_ERR(comp_ctx)); + + up(&aq->avail_cmds); + return PTR_ERR(comp_ctx); + } + + err = efa_com_wait_and_process_admin_cq(comp_ctx, aq); + if (err) + ibdev_err(aq->efa_dev, + "Failed to process command %s (opcode %u) comp_status %d err %d\n", + efa_com_cmd_str(cmd->aq_common_descriptor.opcode), + cmd->aq_common_descriptor.opcode, + comp_ctx->comp_status, err); + + up(&aq->avail_cmds); + + return err; +} + +/** + * efa_com_abort_admin_commands - Abort all the outstanding admin commands. + * @edev: EFA communication layer struct + * + * This method aborts all the outstanding admin commands. + * The caller should then call efa_com_wait_for_abort_completion to make sure + * all the commands were completed. + */ +static void efa_com_abort_admin_commands(struct efa_com_dev *edev) +{ + struct efa_com_admin_queue *aq = &edev->aq; + struct efa_comp_ctx *comp_ctx; + unsigned long flags; + u16 i; + + spin_lock(&aq->sq.lock); + spin_lock_irqsave(&aq->cq.lock, flags); + for (i = 0; i < aq->depth; i++) { + comp_ctx = efa_com_get_comp_ctx(aq, i, false); + if (!comp_ctx) + break; + + comp_ctx->status = EFA_CMD_ABORTED; + + complete(&comp_ctx->wait_event); + } + spin_unlock_irqrestore(&aq->cq.lock, flags); + spin_unlock(&aq->sq.lock); +} + +/** + * efa_com_wait_for_abort_completion - Wait for admin commands abort. + * @edev: EFA communication layer struct + * + * This method wait until all the outstanding admin commands will be completed. + */ +static void efa_com_wait_for_abort_completion(struct efa_com_dev *edev) +{ + struct efa_com_admin_queue *aq = &edev->aq; + int i; + + /* all mine */ + for (i = 0; i < aq->depth; i++) + down(&aq->avail_cmds); + + /* let it go */ + for (i = 0; i < aq->depth; i++) + up(&aq->avail_cmds); +} + +static void efa_com_admin_flush(struct efa_com_dev *edev) +{ + struct efa_com_admin_queue *aq = &edev->aq; + + clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state); + + efa_com_abort_admin_commands(edev); + efa_com_wait_for_abort_completion(edev); +} + +/** + * efa_com_admin_destroy - Destroy the admin and the async events queues. + * @edev: EFA communication layer struct + */ +void efa_com_admin_destroy(struct efa_com_dev *edev) +{ + struct efa_com_admin_queue *aq = &edev->aq; + struct efa_com_aenq *aenq = &edev->aenq; + struct efa_com_admin_cq *cq = &aq->cq; + struct efa_com_admin_sq *sq = &aq->sq; + u16 size; + + efa_com_admin_flush(edev); + + devm_kfree(edev->dmadev, aq->comp_ctx_pool); + devm_kfree(edev->dmadev, aq->comp_ctx); + + size = aq->depth * sizeof(*sq->entries); + dma_free_coherent(edev->dmadev, size, sq->entries, sq->dma_addr); + + size = aq->depth * sizeof(*cq->entries); + dma_free_coherent(edev->dmadev, size, cq->entries, cq->dma_addr); + + size = aenq->depth * sizeof(*aenq->entries); + dma_free_coherent(edev->dmadev, size, aenq->entries, aenq->dma_addr); +} + +/** + * efa_com_set_admin_polling_mode - Set the admin completion queue polling mode + * @edev: EFA communication layer struct + * @polling: Enable/Disable polling mode + * + * Set the admin completion mode. + */ +void efa_com_set_admin_polling_mode(struct efa_com_dev *edev, bool polling) +{ + u32 mask_value = 0; + + if (polling) + mask_value = EFA_REGS_ADMIN_INTR_MASK; + + writel(mask_value, edev->reg_bar + EFA_REGS_INTR_MASK_OFF); + if (polling) + set_bit(EFA_AQ_STATE_POLLING_BIT, &edev->aq.state); + else + clear_bit(EFA_AQ_STATE_POLLING_BIT, &edev->aq.state); +} + +static void efa_com_stats_init(struct efa_com_dev *edev) +{ + atomic64_t *s = (atomic64_t *)&edev->aq.stats; + int i; + + for (i = 0; i < sizeof(edev->aq.stats) / sizeof(*s); i++, s++) + atomic64_set(s, 0); +} + +/** + * efa_com_admin_init - Init the admin and the async queues + * @edev: EFA communication layer struct + * @aenq_handlers: Those handlers to be called upon event. + * + * Initialize the admin submission and completion queues. + * Initialize the asynchronous events notification queues. + * + * @return - 0 on success, negative value on failure. + */ +int efa_com_admin_init(struct efa_com_dev *edev, + struct efa_aenq_handlers *aenq_handlers) +{ + struct efa_com_admin_queue *aq = &edev->aq; + u32 timeout; + u32 dev_sts; + u32 cap; + int err; + + dev_sts = efa_com_reg_read32(edev, EFA_REGS_DEV_STS_OFF); + if (!(dev_sts & EFA_REGS_DEV_STS_READY_MASK)) { + ibdev_err(edev->efa_dev, + "Device isn't ready, abort com init %#x\n", dev_sts); + return -ENODEV; + } + + aq->depth = EFA_ADMIN_QUEUE_DEPTH; + + aq->dmadev = edev->dmadev; + aq->efa_dev = edev->efa_dev; + set_bit(EFA_AQ_STATE_POLLING_BIT, &aq->state); + + sema_init(&aq->avail_cmds, aq->depth); + + efa_com_stats_init(edev); + + err = efa_com_init_comp_ctxt(aq); + if (err) + return err; + + err = efa_com_admin_init_sq(edev); + if (err) + goto err_destroy_comp_ctxt; + + err = efa_com_admin_init_cq(edev); + if (err) + goto err_destroy_sq; + + efa_com_set_admin_polling_mode(edev, false); + + err = efa_com_admin_init_aenq(edev, aenq_handlers); + if (err) + goto err_destroy_cq; + + cap = efa_com_reg_read32(edev, EFA_REGS_CAPS_OFF); + timeout = (cap & EFA_REGS_CAPS_ADMIN_CMD_TO_MASK) >> + EFA_REGS_CAPS_ADMIN_CMD_TO_SHIFT; + if (timeout) + /* the resolution of timeout reg is 100ms */ + aq->completion_timeout = timeout * 100000; + else + aq->completion_timeout = ADMIN_CMD_TIMEOUT_US; + + aq->poll_interval = EFA_POLL_INTERVAL_MS; + + set_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state); + + return 0; + +err_destroy_cq: + dma_free_coherent(edev->dmadev, aq->depth * sizeof(*aq->cq.entries), + aq->cq.entries, aq->cq.dma_addr); +err_destroy_sq: + dma_free_coherent(edev->dmadev, aq->depth * sizeof(*aq->sq.entries), + aq->sq.entries, aq->sq.dma_addr); +err_destroy_comp_ctxt: + devm_kfree(edev->dmadev, aq->comp_ctx); + + return err; +} + +/** + * efa_com_admin_q_comp_intr_handler - admin queue interrupt handler + * @edev: EFA communication layer struct + * + * This method goes over the admin completion queue and wakes up + * all the pending threads that wait on the commands wait event. + * + * @note: Should be called after MSI-X interrupt. + */ +void efa_com_admin_q_comp_intr_handler(struct efa_com_dev *edev) +{ + unsigned long flags; + + spin_lock_irqsave(&edev->aq.cq.lock, flags); + efa_com_handle_admin_completion(&edev->aq); + spin_unlock_irqrestore(&edev->aq.cq.lock, flags); +} + +/* + * efa_handle_specific_aenq_event: + * return the handler that is relevant to the specific event group + */ +static efa_aenq_handler efa_com_get_specific_aenq_cb(struct efa_com_dev *edev, + u16 group) +{ + struct efa_aenq_handlers *aenq_handlers = edev->aenq.aenq_handlers; + + if (group < EFA_MAX_HANDLERS && aenq_handlers->handlers[group]) + return aenq_handlers->handlers[group]; + + return aenq_handlers->unimplemented_handler; +} + +/** + * efa_com_aenq_intr_handler - AENQ interrupt handler + * @edev: EFA communication layer struct + * @data: Data of interrupt handler. + * + * Go over the async event notification queue and call the proper aenq handler. + */ +void efa_com_aenq_intr_handler(struct efa_com_dev *edev, void *data) +{ + struct efa_admin_aenq_common_desc *aenq_common; + struct efa_com_aenq *aenq = &edev->aenq; + struct efa_admin_aenq_entry *aenq_e; + efa_aenq_handler handler_cb; + u32 processed = 0; + u8 phase; + u32 ci; + + ci = aenq->cc & (aenq->depth - 1); + phase = aenq->phase; + aenq_e = &aenq->entries[ci]; /* Get first entry */ + aenq_common = &aenq_e->aenq_common_desc; + + /* Go over all the events */ + while ((READ_ONCE(aenq_common->flags) & + EFA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK) == phase) { + /* + * Do not read the rest of the completion entry before the + * phase bit was validated + */ + dma_rmb(); + + /* Handle specific event*/ + handler_cb = efa_com_get_specific_aenq_cb(edev, + aenq_common->group); + handler_cb(data, aenq_e); /* call the actual event handler*/ + + /* Get next event entry */ + ci++; + processed++; + + if (ci == aenq->depth) { + ci = 0; + phase = !phase; + } + aenq_e = &aenq->entries[ci]; + aenq_common = &aenq_e->aenq_common_desc; + } + + aenq->cc += processed; + aenq->phase = phase; + + /* Don't update aenq doorbell if there weren't any processed events */ + if (!processed) + return; + + /* barrier not needed in case of writel */ + writel(aenq->cc, edev->reg_bar + EFA_REGS_AENQ_CONS_DB_OFF); +} + +static void efa_com_mmio_reg_read_resp_addr_init(struct efa_com_dev *edev) +{ + struct efa_com_mmio_read *mmio_read = &edev->mmio_read; + u32 addr_high; + u32 addr_low; + + /* dma_addr_bits is unknown at this point */ + addr_high = (mmio_read->read_resp_dma_addr >> 32) & GENMASK(31, 0); + addr_low = mmio_read->read_resp_dma_addr & GENMASK(31, 0); + + writel(addr_high, edev->reg_bar + EFA_REGS_MMIO_RESP_HI_OFF); + writel(addr_low, edev->reg_bar + EFA_REGS_MMIO_RESP_LO_OFF); +} + +int efa_com_mmio_reg_read_init(struct efa_com_dev *edev) +{ + struct efa_com_mmio_read *mmio_read = &edev->mmio_read; + + spin_lock_init(&mmio_read->lock); + mmio_read->read_resp = + dma_alloc_coherent(edev->dmadev, sizeof(*mmio_read->read_resp), + &mmio_read->read_resp_dma_addr, GFP_KERNEL); + if (!mmio_read->read_resp) + return -ENOMEM; + + efa_com_mmio_reg_read_resp_addr_init(edev); + + mmio_read->read_resp->req_id = 0; + mmio_read->seq_num = 0; + mmio_read->mmio_read_timeout = EFA_REG_READ_TIMEOUT_US; + + return 0; +} + +void efa_com_mmio_reg_read_destroy(struct efa_com_dev *edev) +{ + struct efa_com_mmio_read *mmio_read = &edev->mmio_read; + + dma_free_coherent(edev->dmadev, sizeof(*mmio_read->read_resp), + mmio_read->read_resp, mmio_read->read_resp_dma_addr); +} + +int efa_com_validate_version(struct efa_com_dev *edev) +{ + u32 ctrl_ver_masked; + u32 ctrl_ver; + u32 ver; + + /* + * Make sure the EFA version and the controller version are at least + * as the driver expects + */ + ver = efa_com_reg_read32(edev, EFA_REGS_VERSION_OFF); + ctrl_ver = efa_com_reg_read32(edev, + EFA_REGS_CONTROLLER_VERSION_OFF); + + ibdev_dbg(edev->efa_dev, "efa device version: %d.%d\n", + (ver & EFA_REGS_VERSION_MAJOR_VERSION_MASK) >> + EFA_REGS_VERSION_MAJOR_VERSION_SHIFT, + ver & EFA_REGS_VERSION_MINOR_VERSION_MASK); + + if (ver < MIN_EFA_VER) { + ibdev_err(edev->efa_dev, + "EFA version is lower than the minimal version the driver supports\n"); + return -EOPNOTSUPP; + } + + ibdev_dbg(edev->efa_dev, + "efa controller version: %d.%d.%d implementation version %d\n", + (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK) >> + EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT, + (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK) >> + EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT, + (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK), + (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_IMPL_ID_MASK) >> + EFA_REGS_CONTROLLER_VERSION_IMPL_ID_SHIFT); + + ctrl_ver_masked = + (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK) | + (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK) | + (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK); + + /* Validate the ctrl version without the implementation ID */ + if (ctrl_ver_masked < MIN_EFA_CTRL_VER) { + ibdev_err(edev->efa_dev, + "EFA ctrl version is lower than the minimal ctrl version the driver supports\n"); + return -EOPNOTSUPP; + } + + return 0; +} + +/** + * efa_com_get_dma_width - Retrieve physical dma address width the device + * supports. + * @edev: EFA communication layer struct + * + * Retrieve the maximum physical address bits the device can handle. + * + * @return: > 0 on Success and negative value otherwise. + */ +int efa_com_get_dma_width(struct efa_com_dev *edev) +{ + u32 caps = efa_com_reg_read32(edev, EFA_REGS_CAPS_OFF); + int width; + + width = (caps & EFA_REGS_CAPS_DMA_ADDR_WIDTH_MASK) >> + EFA_REGS_CAPS_DMA_ADDR_WIDTH_SHIFT; + + ibdev_dbg(edev->efa_dev, "DMA width: %d\n", width); + + if (width < 32 || width > 64) { + ibdev_err(edev->efa_dev, "DMA width illegal value: %d\n", width); + return -EINVAL; + } + + edev->dma_addr_bits = width; + + return width; +} + +static int wait_for_reset_state(struct efa_com_dev *edev, u32 timeout, + u16 exp_state) +{ + u32 val, i; + + for (i = 0; i < timeout; i++) { + val = efa_com_reg_read32(edev, EFA_REGS_DEV_STS_OFF); + + if ((val & EFA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK) == + exp_state) + return 0; + + ibdev_dbg(edev->efa_dev, "Reset indication val %d\n", val); + msleep(EFA_POLL_INTERVAL_MS); + } + + return -ETIME; +} + +/** + * efa_com_dev_reset - Perform device FLR to the device. + * @edev: EFA communication layer struct + * @reset_reason: Specify what is the trigger for the reset in case of an error. + * + * @return - 0 on success, negative value on failure. + */ +int efa_com_dev_reset(struct efa_com_dev *edev, + enum efa_regs_reset_reason_types reset_reason) +{ + u32 stat, timeout, cap, reset_val; + int err; + + stat = efa_com_reg_read32(edev, EFA_REGS_DEV_STS_OFF); + cap = efa_com_reg_read32(edev, EFA_REGS_CAPS_OFF); + + if (!(stat & EFA_REGS_DEV_STS_READY_MASK)) { + ibdev_err(edev->efa_dev, + "Device isn't ready, can't reset device\n"); + return -EINVAL; + } + + timeout = (cap & EFA_REGS_CAPS_RESET_TIMEOUT_MASK) >> + EFA_REGS_CAPS_RESET_TIMEOUT_SHIFT; + if (!timeout) { + ibdev_err(edev->efa_dev, "Invalid timeout value\n"); + return -EINVAL; + } + + /* start reset */ + reset_val = EFA_REGS_DEV_CTL_DEV_RESET_MASK; + reset_val |= (reset_reason << EFA_REGS_DEV_CTL_RESET_REASON_SHIFT) & + EFA_REGS_DEV_CTL_RESET_REASON_MASK; + writel(reset_val, edev->reg_bar + EFA_REGS_DEV_CTL_OFF); + + /* reset clears the mmio readless address, restore it */ + efa_com_mmio_reg_read_resp_addr_init(edev); + + err = wait_for_reset_state(edev, timeout, + EFA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK); + if (err) { + ibdev_err(edev->efa_dev, "Reset indication didn't turn on\n"); + return err; + } + + /* reset done */ + writel(0, edev->reg_bar + EFA_REGS_DEV_CTL_OFF); + err = wait_for_reset_state(edev, timeout, 0); + if (err) { + ibdev_err(edev->efa_dev, "Reset indication didn't turn off\n"); + return err; + } + + timeout = (cap & EFA_REGS_CAPS_ADMIN_CMD_TO_MASK) >> + EFA_REGS_CAPS_ADMIN_CMD_TO_SHIFT; + if (timeout) + /* the resolution of timeout reg is 100ms */ + edev->aq.completion_timeout = timeout * 100000; + else + edev->aq.completion_timeout = ADMIN_CMD_TIMEOUT_US; + + return 0; +} diff --git a/drivers/infiniband/hw/efa/efa_com.h b/drivers/infiniband/hw/efa/efa_com.h new file mode 100644 index 000000000000..84d96724a74b --- /dev/null +++ b/drivers/infiniband/hw/efa/efa_com.h @@ -0,0 +1,144 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + */ + +#ifndef _EFA_COM_H_ +#define _EFA_COM_H_ + +#include <linux/delay.h> +#include <linux/device.h> +#include <linux/dma-mapping.h> +#include <linux/semaphore.h> +#include <linux/sched.h> + +#include <rdma/ib_verbs.h> + +#include "efa_common_defs.h" +#include "efa_admin_defs.h" +#include "efa_admin_cmds_defs.h" +#include "efa_regs_defs.h" + +#define EFA_MAX_HANDLERS 256 + +struct efa_com_admin_cq { + struct efa_admin_acq_entry *entries; + dma_addr_t dma_addr; + spinlock_t lock; /* Protects ACQ */ + + u16 cc; /* consumer counter */ + u8 phase; +}; + +struct efa_com_admin_sq { + struct efa_admin_aq_entry *entries; + dma_addr_t dma_addr; + spinlock_t lock; /* Protects ASQ */ + + u32 __iomem *db_addr; + + u16 cc; /* consumer counter */ + u16 pc; /* producer counter */ + u8 phase; + +}; + +/* Don't use anything other than atomic64 */ +struct efa_com_stats_admin { + atomic64_t aborted_cmd; + atomic64_t submitted_cmd; + atomic64_t completed_cmd; + atomic64_t no_completion; +}; + +enum { + EFA_AQ_STATE_RUNNING_BIT = 0, + EFA_AQ_STATE_POLLING_BIT = 1, +}; + +struct efa_com_admin_queue { + void *dmadev; + void *efa_dev; + struct efa_comp_ctx *comp_ctx; + u32 completion_timeout; /* usecs */ + u16 poll_interval; /* msecs */ + u16 depth; + struct efa_com_admin_cq cq; + struct efa_com_admin_sq sq; + u16 msix_vector_idx; + + unsigned long state; + + /* Count the number of available admin commands */ + struct semaphore avail_cmds; + + struct efa_com_stats_admin stats; + + spinlock_t comp_ctx_lock; /* Protects completion context pool */ + u32 *comp_ctx_pool; + u16 comp_ctx_pool_next; +}; + +struct efa_aenq_handlers; + +struct efa_com_aenq { + struct efa_admin_aenq_entry *entries; + struct efa_aenq_handlers *aenq_handlers; + dma_addr_t dma_addr; + u32 cc; /* consumer counter */ + u16 msix_vector_idx; + u16 depth; + u8 phase; +}; + +struct efa_com_mmio_read { + struct efa_admin_mmio_req_read_less_resp *read_resp; + dma_addr_t read_resp_dma_addr; + u16 seq_num; + u16 mmio_read_timeout; /* usecs */ + /* serializes mmio reads */ + spinlock_t lock; +}; + +struct efa_com_dev { + struct efa_com_admin_queue aq; + struct efa_com_aenq aenq; + u8 __iomem *reg_bar; + void *dmadev; + void *efa_dev; + u32 supported_features; + u32 dma_addr_bits; + + struct efa_com_mmio_read mmio_read; +}; + +typedef void (*efa_aenq_handler)(void *data, + struct efa_admin_aenq_entry *aenq_e); + +/* Holds aenq handlers. Indexed by AENQ event group */ +struct efa_aenq_handlers { + efa_aenq_handler handlers[EFA_MAX_HANDLERS]; + efa_aenq_handler unimplemented_handler; +}; + +int efa_com_admin_init(struct efa_com_dev *edev, + struct efa_aenq_handlers *aenq_handlers); +void efa_com_admin_destroy(struct efa_com_dev *edev); +int efa_com_dev_reset(struct efa_com_dev *edev, + enum efa_regs_reset_reason_types reset_reason); +void efa_com_set_admin_polling_mode(struct efa_com_dev *edev, bool polling); +void efa_com_admin_q_comp_intr_handler(struct efa_com_dev *edev); +int efa_com_mmio_reg_read_init(struct efa_com_dev *edev); +void efa_com_mmio_reg_read_destroy(struct efa_com_dev *edev); + +int efa_com_validate_version(struct efa_com_dev *edev); +int efa_com_get_dma_width(struct efa_com_dev *edev); + +int efa_com_cmd_exec(struct efa_com_admin_queue *aq, + struct efa_admin_aq_entry *cmd, + size_t cmd_size, + struct efa_admin_acq_entry *comp, + size_t comp_size); +void efa_com_aenq_intr_handler(struct efa_com_dev *edev, void *data); + +#endif /* _EFA_COM_H_ */ diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c new file mode 100644 index 000000000000..14227725521c --- /dev/null +++ b/drivers/infiniband/hw/efa/efa_com_cmd.c @@ -0,0 +1,692 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + */ + +#include "efa.h" +#include "efa_com.h" +#include "efa_com_cmd.h" + +void efa_com_set_dma_addr(dma_addr_t addr, u32 *addr_high, u32 *addr_low) +{ + *addr_low = lower_32_bits(addr); + *addr_high = upper_32_bits(addr); +} + +int efa_com_create_qp(struct efa_com_dev *edev, + struct efa_com_create_qp_params *params, + struct efa_com_create_qp_result *res) +{ + struct efa_admin_create_qp_cmd create_qp_cmd = {}; + struct efa_admin_create_qp_resp cmd_completion; + struct efa_com_admin_queue *aq = &edev->aq; + int err; + + create_qp_cmd.aq_common_desc.opcode = EFA_ADMIN_CREATE_QP; + + create_qp_cmd.pd = params->pd; + create_qp_cmd.qp_type = params->qp_type; + create_qp_cmd.rq_base_addr = params->rq_base_addr; + create_qp_cmd.send_cq_idx = params->send_cq_idx; + create_qp_cmd.recv_cq_idx = params->recv_cq_idx; + create_qp_cmd.qp_alloc_size.send_queue_ring_size = + params->sq_ring_size_in_bytes; + create_qp_cmd.qp_alloc_size.send_queue_depth = + params->sq_depth; + create_qp_cmd.qp_alloc_size.recv_queue_ring_size = + params->rq_ring_size_in_bytes; + create_qp_cmd.qp_alloc_size.recv_queue_depth = + params->rq_depth; + create_qp_cmd.uar = params->uarn; + + err = efa_com_cmd_exec(aq, + (struct efa_admin_aq_entry *)&create_qp_cmd, + sizeof(create_qp_cmd), + (struct efa_admin_acq_entry *)&cmd_completion, + sizeof(cmd_completion)); + if (err) { + ibdev_err(edev->efa_dev, "Failed to create qp [%d]\n", err); + return err; + } + + res->qp_handle = cmd_completion.qp_handle; + res->qp_num = cmd_completion.qp_num; + res->sq_db_offset = cmd_completion.sq_db_offset; + res->rq_db_offset = cmd_completion.rq_db_offset; + res->llq_descriptors_offset = cmd_completion.llq_descriptors_offset; + res->send_sub_cq_idx = cmd_completion.send_sub_cq_idx; + res->recv_sub_cq_idx = cmd_completion.recv_sub_cq_idx; + + return err; +} + +int efa_com_modify_qp(struct efa_com_dev *edev, + struct efa_com_modify_qp_params *params) +{ + struct efa_com_admin_queue *aq = &edev->aq; + struct efa_admin_modify_qp_cmd cmd = {}; + struct efa_admin_modify_qp_resp resp; + int err; + + cmd.aq_common_desc.opcode = EFA_ADMIN_MODIFY_QP; + cmd.modify_mask = params->modify_mask; + cmd.qp_handle = params->qp_handle; + cmd.qp_state = params->qp_state; + cmd.cur_qp_state = params->cur_qp_state; + cmd.qkey = params->qkey; + cmd.sq_psn = params->sq_psn; + cmd.sq_drained_async_notify = params->sq_drained_async_notify; + + err = efa_com_cmd_exec(aq, + (struct efa_admin_aq_entry *)&cmd, + sizeof(cmd), + (struct efa_admin_acq_entry *)&resp, + sizeof(resp)); + if (err) { + ibdev_err(edev->efa_dev, + "Failed to modify qp-%u modify_mask[%#x] [%d]\n", + cmd.qp_handle, cmd.modify_mask, err); + return err; + } + + return 0; +} + +int efa_com_query_qp(struct efa_com_dev *edev, + struct efa_com_query_qp_params *params, + struct efa_com_query_qp_result *result) +{ + struct efa_com_admin_queue *aq = &edev->aq; + struct efa_admin_query_qp_cmd cmd = {}; + struct efa_admin_query_qp_resp resp; + int err; + + cmd.aq_common_desc.opcode = EFA_ADMIN_QUERY_QP; + cmd.qp_handle = params->qp_handle; + + err = efa_com_cmd_exec(aq, + (struct efa_admin_aq_entry *)&cmd, + sizeof(cmd), + (struct efa_admin_acq_entry *)&resp, + sizeof(resp)); + if (err) { + ibdev_err(edev->efa_dev, "Failed to query qp-%u [%d]\n", + cmd.qp_handle, err); + return err; + } + + result->qp_state = resp.qp_state; + result->qkey = resp.qkey; + result->sq_draining = resp.sq_draining; + result->sq_psn = resp.sq_psn; + + return 0; +} + +int efa_com_destroy_qp(struct efa_com_dev *edev, + struct efa_com_destroy_qp_params *params) +{ + struct efa_admin_destroy_qp_resp cmd_completion; + struct efa_admin_destroy_qp_cmd qp_cmd = {}; + struct efa_com_admin_queue *aq = &edev->aq; + int err; + + qp_cmd.aq_common_desc.opcode = EFA_ADMIN_DESTROY_QP; + qp_cmd.qp_handle = params->qp_handle; + + err = efa_com_cmd_exec(aq, + (struct efa_admin_aq_entry *)&qp_cmd, + sizeof(qp_cmd), + (struct efa_admin_acq_entry *)&cmd_completion, + sizeof(cmd_completion)); + if (err) + ibdev_err(edev->efa_dev, "Failed to destroy qp-%u [%d]\n", + qp_cmd.qp_handle, err); + + return 0; +} + +int efa_com_create_cq(struct efa_com_dev *edev, + struct efa_com_create_cq_params *params, + struct efa_com_create_cq_result *result) +{ + struct efa_admin_create_cq_resp cmd_completion; + struct efa_admin_create_cq_cmd create_cmd = {}; + struct efa_com_admin_queue *aq = &edev->aq; + int err; + + create_cmd.aq_common_desc.opcode = EFA_ADMIN_CREATE_CQ; + create_cmd.cq_caps_2 = (params->entry_size_in_bytes / 4) & + EFA_ADMIN_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK; + create_cmd.cq_depth = params->cq_depth; + create_cmd.num_sub_cqs = params->num_sub_cqs; + create_cmd.uar = params->uarn; + + efa_com_set_dma_addr(params->dma_addr, + &create_cmd.cq_ba.mem_addr_high, + &create_cmd.cq_ba.mem_addr_low); + + err = efa_com_cmd_exec(aq, + (struct efa_admin_aq_entry *)&create_cmd, + sizeof(create_cmd), + (struct efa_admin_acq_entry *)&cmd_completion, + sizeof(cmd_completion)); + if (err) { + ibdev_err(edev->efa_dev, "Failed to create cq[%d]\n", err); + return err; + } + + result->cq_idx = cmd_completion.cq_idx; + result->actual_depth = params->cq_depth; + + return err; +} + +int efa_com_destroy_cq(struct efa_com_dev *edev, + struct efa_com_destroy_cq_params *params) +{ + struct efa_admin_destroy_cq_cmd destroy_cmd = {}; + struct efa_admin_destroy_cq_resp destroy_resp; + struct efa_com_admin_queue *aq = &edev->aq; + int err; + + destroy_cmd.cq_idx = params->cq_idx; + destroy_cmd.aq_common_desc.opcode = EFA_ADMIN_DESTROY_CQ; + + err = efa_com_cmd_exec(aq, + (struct efa_admin_aq_entry *)&destroy_cmd, + sizeof(destroy_cmd), + (struct efa_admin_acq_entry *)&destroy_resp, + sizeof(destroy_resp)); + + if (err) + ibdev_err(edev->efa_dev, "Failed to destroy CQ-%u [%d]\n", + params->cq_idx, err); + + return 0; +} + +int efa_com_register_mr(struct efa_com_dev *edev, + struct efa_com_reg_mr_params *params, + struct efa_com_reg_mr_result *result) +{ + struct efa_admin_reg_mr_resp cmd_completion; + struct efa_com_admin_queue *aq = &edev->aq; + struct efa_admin_reg_mr_cmd mr_cmd = {}; + int err; + + mr_cmd.aq_common_desc.opcode = EFA_ADMIN_REG_MR; + mr_cmd.pd = params->pd; + mr_cmd.mr_length = params->mr_length_in_bytes; + mr_cmd.flags |= params->page_shift & + EFA_ADMIN_REG_MR_CMD_PHYS_PAGE_SIZE_SHIFT_MASK; + mr_cmd.iova = params->iova; + mr_cmd.permissions |= params->permissions & + EFA_ADMIN_REG_MR_CMD_LOCAL_WRITE_ENABLE_MASK; + + if (params->inline_pbl) { + memcpy(mr_cmd.pbl.inline_pbl_array, + params->pbl.inline_pbl_array, + sizeof(mr_cmd.pbl.inline_pbl_array)); + } else { + mr_cmd.pbl.pbl.length = params->pbl.pbl.length; + mr_cmd.pbl.pbl.address.mem_addr_low = + params->pbl.pbl.address.mem_addr_low; + mr_cmd.pbl.pbl.address.mem_addr_high = + params->pbl.pbl.address.mem_addr_high; + mr_cmd.aq_common_desc.flags |= + EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_MASK; + if (params->indirect) + mr_cmd.aq_common_desc.flags |= + EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK; + } + + err = efa_com_cmd_exec(aq, + (struct efa_admin_aq_entry *)&mr_cmd, + sizeof(mr_cmd), + (struct efa_admin_acq_entry *)&cmd_completion, + sizeof(cmd_completion)); + if (err) { + ibdev_err(edev->efa_dev, "Failed to register mr [%d]\n", err); + return err; + } + + result->l_key = cmd_completion.l_key; + result->r_key = cmd_completion.r_key; + + return 0; +} + +int efa_com_dereg_mr(struct efa_com_dev *edev, + struct efa_com_dereg_mr_params *params) +{ + struct efa_admin_dereg_mr_resp cmd_completion; + struct efa_com_admin_queue *aq = &edev->aq; + struct efa_admin_dereg_mr_cmd mr_cmd = {}; + int err; + + mr_cmd.aq_common_desc.opcode = EFA_ADMIN_DEREG_MR; + mr_cmd.l_key = params->l_key; + + err = efa_com_cmd_exec(aq, + (struct efa_admin_aq_entry *)&mr_cmd, + sizeof(mr_cmd), + (struct efa_admin_acq_entry *)&cmd_completion, + sizeof(cmd_completion)); + if (err) + ibdev_err(edev->efa_dev, + "Failed to de-register mr(lkey-%u) [%d]\n", + mr_cmd.l_key, err); + + return 0; +} + +int efa_com_create_ah(struct efa_com_dev *edev, + struct efa_com_create_ah_params *params, + struct efa_com_create_ah_result *result) +{ + struct efa_admin_create_ah_resp cmd_completion; + struct efa_com_admin_queue *aq = &edev->aq; + struct efa_admin_create_ah_cmd ah_cmd = {}; + int err; + + ah_cmd.aq_common_desc.opcode = EFA_ADMIN_CREATE_AH; + + memcpy(ah_cmd.dest_addr, params->dest_addr, sizeof(ah_cmd.dest_addr)); + ah_cmd.pd = params->pdn; + + err = efa_com_cmd_exec(aq, + (struct efa_admin_aq_entry *)&ah_cmd, + sizeof(ah_cmd), + (struct efa_admin_acq_entry *)&cmd_completion, + sizeof(cmd_completion)); + if (err) { + ibdev_err(edev->efa_dev, "Failed to create ah [%d]\n", err); + return err; + } + + result->ah = cmd_completion.ah; + + return 0; +} + +int efa_com_destroy_ah(struct efa_com_dev *edev, + struct efa_com_destroy_ah_params *params) +{ + struct efa_admin_destroy_ah_resp cmd_completion; + struct efa_admin_destroy_ah_cmd ah_cmd = {}; + struct efa_com_admin_queue *aq = &edev->aq; + int err; + + ah_cmd.aq_common_desc.opcode = EFA_ADMIN_DESTROY_AH; + ah_cmd.ah = params->ah; + ah_cmd.pd = params->pdn; + + err = efa_com_cmd_exec(aq, + (struct efa_admin_aq_entry *)&ah_cmd, + sizeof(ah_cmd), + (struct efa_admin_acq_entry *)&cmd_completion, + sizeof(cmd_completion)); + if (err) + ibdev_err(edev->efa_dev, "Failed to destroy ah-%d pd-%d [%d]\n", + ah_cmd.ah, ah_cmd.pd, err); + + return 0; +} + +static bool +efa_com_check_supported_feature_id(struct efa_com_dev *edev, + enum efa_admin_aq_feature_id feature_id) +{ + u32 feature_mask = 1 << feature_id; + + /* Device attributes is always supported */ + if (feature_id != EFA_ADMIN_DEVICE_ATTR && + !(edev->supported_features & feature_mask)) + return false; + + return true; +} + +static int efa_com_get_feature_ex(struct efa_com_dev *edev, + struct efa_admin_get_feature_resp *get_resp, + enum efa_admin_aq_feature_id feature_id, + dma_addr_t control_buf_dma_addr, + u32 control_buff_size) +{ + struct efa_admin_get_feature_cmd get_cmd = {}; + struct efa_com_admin_queue *aq; + int err; + + if (!efa_com_check_supported_feature_id(edev, feature_id)) { + ibdev_err(edev->efa_dev, "Feature %d isn't supported\n", + feature_id); + return -EOPNOTSUPP; + } + + aq = &edev->aq; + + get_cmd.aq_common_descriptor.opcode = EFA_ADMIN_GET_FEATURE; + + if (control_buff_size) + get_cmd.aq_common_descriptor.flags = + EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK; + + + efa_com_set_dma_addr(control_buf_dma_addr, + &get_cmd.control_buffer.address.mem_addr_high, + &get_cmd.control_buffer.address.mem_addr_low); + + get_cmd.control_buffer.length = control_buff_size; + get_cmd.feature_common.feature_id = feature_id; + err = efa_com_cmd_exec(aq, + (struct efa_admin_aq_entry *) + &get_cmd, + sizeof(get_cmd), + (struct efa_admin_acq_entry *) + get_resp, + sizeof(*get_resp)); + + if (err) + ibdev_err(edev->efa_dev, + "Failed to submit get_feature command %d [%d]\n", + feature_id, err); + + return 0; +} + +static int efa_com_get_feature(struct efa_com_dev *edev, + struct efa_admin_get_feature_resp *get_resp, + enum efa_admin_aq_feature_id feature_id) +{ + return efa_com_get_feature_ex(edev, get_resp, feature_id, 0, 0); +} + +int efa_com_get_network_attr(struct efa_com_dev *edev, + struct efa_com_get_network_attr_result *result) +{ + struct efa_admin_get_feature_resp resp; + int err; + + err = efa_com_get_feature(edev, &resp, + EFA_ADMIN_NETWORK_ATTR); + if (err) { + ibdev_err(edev->efa_dev, + "Failed to get network attributes %d\n", err); + return err; + } + + memcpy(result->addr, resp.u.network_attr.addr, + sizeof(resp.u.network_attr.addr)); + result->mtu = resp.u.network_attr.mtu; + + return 0; +} + +int efa_com_get_device_attr(struct efa_com_dev *edev, + struct efa_com_get_device_attr_result *result) +{ + struct efa_admin_get_feature_resp resp; + int err; + + err = efa_com_get_feature(edev, &resp, EFA_ADMIN_DEVICE_ATTR); + if (err) { + ibdev_err(edev->efa_dev, "Failed to get device attributes %d\n", + err); + return err; + } + + result->page_size_cap = resp.u.device_attr.page_size_cap; + result->fw_version = resp.u.device_attr.fw_version; + result->admin_api_version = resp.u.device_attr.admin_api_version; + result->device_version = resp.u.device_attr.device_version; + result->supported_features = resp.u.device_attr.supported_features; + result->phys_addr_width = resp.u.device_attr.phys_addr_width; + result->virt_addr_width = resp.u.device_attr.virt_addr_width; + result->db_bar = resp.u.device_attr.db_bar; + + if (result->admin_api_version < 1) { + ibdev_err(edev->efa_dev, + "Failed to get device attr api version [%u < 1]\n", + result->admin_api_version); + return -EINVAL; + } + + edev->supported_features = resp.u.device_attr.supported_features; + err = efa_com_get_feature(edev, &resp, + EFA_ADMIN_QUEUE_ATTR); + if (err) { + ibdev_err(edev->efa_dev, + "Failed to get network attributes %d\n", err); + return err; + } + + result->max_qp = resp.u.queue_attr.max_qp; + result->max_sq_depth = resp.u.queue_attr.max_sq_depth; + result->max_rq_depth = resp.u.queue_attr.max_rq_depth; + result->max_cq = resp.u.queue_attr.max_cq; + result->max_cq_depth = resp.u.queue_attr.max_cq_depth; + result->inline_buf_size = resp.u.queue_attr.inline_buf_size; + result->max_sq_sge = resp.u.queue_attr.max_wr_send_sges; + result->max_rq_sge = resp.u.queue_attr.max_wr_recv_sges; + result->max_mr = resp.u.queue_attr.max_mr; + result->max_mr_pages = resp.u.queue_attr.max_mr_pages; + result->max_pd = resp.u.queue_attr.max_pd; + result->max_ah = resp.u.queue_attr.max_ah; + result->max_llq_size = resp.u.queue_attr.max_llq_size; + result->sub_cqs_per_cq = resp.u.queue_attr.sub_cqs_per_cq; + + return 0; +} + +int efa_com_get_hw_hints(struct efa_com_dev *edev, + struct efa_com_get_hw_hints_result *result) +{ + struct efa_admin_get_feature_resp resp; + int err; + + err = efa_com_get_feature(edev, &resp, EFA_ADMIN_HW_HINTS); + if (err) { + ibdev_err(edev->efa_dev, "Failed to get hw hints %d\n", err); + return err; + } + + result->admin_completion_timeout = resp.u.hw_hints.admin_completion_timeout; + result->driver_watchdog_timeout = resp.u.hw_hints.driver_watchdog_timeout; + result->mmio_read_timeout = resp.u.hw_hints.mmio_read_timeout; + result->poll_interval = resp.u.hw_hints.poll_interval; + + return 0; +} + +static int efa_com_set_feature_ex(struct efa_com_dev *edev, + struct efa_admin_set_feature_resp *set_resp, + struct efa_admin_set_feature_cmd *set_cmd, + enum efa_admin_aq_feature_id feature_id, + dma_addr_t control_buf_dma_addr, + u32 control_buff_size) +{ + struct efa_com_admin_queue *aq; + int err; + + if (!efa_com_check_supported_feature_id(edev, feature_id)) { + ibdev_err(edev->efa_dev, "Feature %d isn't supported\n", + feature_id); + return -EOPNOTSUPP; + } + + aq = &edev->aq; + + set_cmd->aq_common_descriptor.opcode = EFA_ADMIN_SET_FEATURE; + if (control_buff_size) { + set_cmd->aq_common_descriptor.flags = + EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK; + efa_com_set_dma_addr(control_buf_dma_addr, + &set_cmd->control_buffer.address.mem_addr_high, + &set_cmd->control_buffer.address.mem_addr_low); + } + + set_cmd->control_buffer.length = control_buff_size; + set_cmd->feature_common.feature_id = feature_id; + err = efa_com_cmd_exec(aq, + (struct efa_admin_aq_entry *)set_cmd, + sizeof(*set_cmd), + (struct efa_admin_acq_entry *)set_resp, + sizeof(*set_resp)); + + if (err) + ibdev_err(edev->efa_dev, + "Failed to submit set_feature command %d error: %d\n", + feature_id, err); + + return 0; +} + +static int efa_com_set_feature(struct efa_com_dev *edev, + struct efa_admin_set_feature_resp *set_resp, + struct efa_admin_set_feature_cmd *set_cmd, + enum efa_admin_aq_feature_id feature_id) +{ + return efa_com_set_feature_ex(edev, set_resp, set_cmd, feature_id, + 0, 0); +} + +int efa_com_set_aenq_config(struct efa_com_dev *edev, u32 groups) +{ + struct efa_admin_get_feature_resp get_resp; + struct efa_admin_set_feature_resp set_resp; + struct efa_admin_set_feature_cmd cmd = {}; + int err; + + ibdev_dbg(edev->efa_dev, "Configuring aenq with groups[%#x]\n", groups); + + err = efa_com_get_feature(edev, &get_resp, EFA_ADMIN_AENQ_CONFIG); + if (err) { + ibdev_err(edev->efa_dev, "Failed to get aenq attributes: %d\n", + err); + return err; + } + + ibdev_dbg(edev->efa_dev, + "Get aenq groups: supported[%#x] enabled[%#x]\n", + get_resp.u.aenq.supported_groups, + get_resp.u.aenq.enabled_groups); + + if ((get_resp.u.aenq.supported_groups & groups) != groups) { + ibdev_err(edev->efa_dev, + "Trying to set unsupported aenq groups[%#x] supported[%#x]\n", + groups, get_resp.u.aenq.supported_groups); + return -EOPNOTSUPP; + } + + cmd.u.aenq.enabled_groups = groups; + err = efa_com_set_feature(edev, &set_resp, &cmd, + EFA_ADMIN_AENQ_CONFIG); + if (err) { + ibdev_err(edev->efa_dev, "Failed to set aenq attributes: %d\n", + err); + return err; + } + + return 0; +} + +int efa_com_alloc_pd(struct efa_com_dev *edev, + struct efa_com_alloc_pd_result *result) +{ + struct efa_com_admin_queue *aq = &edev->aq; + struct efa_admin_alloc_pd_cmd cmd = {}; + struct efa_admin_alloc_pd_resp resp; + int err; + + cmd.aq_common_descriptor.opcode = EFA_ADMIN_ALLOC_PD; + + err = efa_com_cmd_exec(aq, + (struct efa_admin_aq_entry *)&cmd, + sizeof(cmd), + (struct efa_admin_acq_entry *)&resp, + sizeof(resp)); + if (err) { + ibdev_err(edev->efa_dev, "Failed to allocate pd[%d]\n", err); + return err; + } + + result->pdn = resp.pd; + + return 0; +} + +int efa_com_dealloc_pd(struct efa_com_dev *edev, + struct efa_com_dealloc_pd_params *params) +{ + struct efa_com_admin_queue *aq = &edev->aq; + struct efa_admin_dealloc_pd_cmd cmd = {}; + struct efa_admin_dealloc_pd_resp resp; + int err; + + cmd.aq_common_descriptor.opcode = EFA_ADMIN_DEALLOC_PD; + cmd.pd = params->pdn; + + err = efa_com_cmd_exec(aq, + (struct efa_admin_aq_entry *)&cmd, + sizeof(cmd), + (struct efa_admin_acq_entry *)&resp, + sizeof(resp)); + if (err) { + ibdev_err(edev->efa_dev, "Failed to deallocate pd-%u [%d]\n", + cmd.pd, err); + return err; + } + + return 0; +} + +int efa_com_alloc_uar(struct efa_com_dev *edev, + struct efa_com_alloc_uar_result *result) +{ + struct efa_com_admin_queue *aq = &edev->aq; + struct efa_admin_alloc_uar_cmd cmd = {}; + struct efa_admin_alloc_uar_resp resp; + int err; + + cmd.aq_common_descriptor.opcode = EFA_ADMIN_ALLOC_UAR; + + err = efa_com_cmd_exec(aq, + (struct efa_admin_aq_entry *)&cmd, + sizeof(cmd), + (struct efa_admin_acq_entry *)&resp, + sizeof(resp)); + if (err) { + ibdev_err(edev->efa_dev, "Failed to allocate uar[%d]\n", err); + return err; + } + + result->uarn = resp.uar; + + return 0; +} + +int efa_com_dealloc_uar(struct efa_com_dev *edev, + struct efa_com_dealloc_uar_params *params) +{ + struct efa_com_admin_queue *aq = &edev->aq; + struct efa_admin_dealloc_uar_cmd cmd = {}; + struct efa_admin_dealloc_uar_resp resp; + int err; + + cmd.aq_common_descriptor.opcode = EFA_ADMIN_DEALLOC_UAR; + cmd.uar = params->uarn; + + err = efa_com_cmd_exec(aq, + (struct efa_admin_aq_entry *)&cmd, + sizeof(cmd), + (struct efa_admin_acq_entry *)&resp, + sizeof(resp)); + if (err) { + ibdev_err(edev->efa_dev, "Failed to deallocate uar-%u [%d]\n", + cmd.uar, err); + return err; + } + + return 0; +} diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h new file mode 100644 index 000000000000..a1174380462c --- /dev/null +++ b/drivers/infiniband/hw/efa/efa_com_cmd.h @@ -0,0 +1,270 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + */ + +#ifndef _EFA_COM_CMD_H_ +#define _EFA_COM_CMD_H_ + +#include "efa_com.h" + +#define EFA_GID_SIZE 16 + +struct efa_com_create_qp_params { + u64 rq_base_addr; + u32 send_cq_idx; + u32 recv_cq_idx; + /* + * Send descriptor ring size in bytes, + * sufficient for user-provided number of WQEs and SGL size + */ + u32 sq_ring_size_in_bytes; + /* Max number of WQEs that will be posted on send queue */ + u32 sq_depth; + /* Recv descriptor ring size in bytes */ + u32 rq_ring_size_in_bytes; + u32 rq_depth; + u16 pd; + u16 uarn; + u8 qp_type; +}; + +struct efa_com_create_qp_result { + u32 qp_handle; + u32 qp_num; + u32 sq_db_offset; + u32 rq_db_offset; + u32 llq_descriptors_offset; + u16 send_sub_cq_idx; + u16 recv_sub_cq_idx; +}; + +struct efa_com_modify_qp_params { + u32 modify_mask; + u32 qp_handle; + u32 qp_state; + u32 cur_qp_state; + u32 qkey; + u32 sq_psn; + u8 sq_drained_async_notify; +}; + +struct efa_com_query_qp_params { + u32 qp_handle; +}; + +struct efa_com_query_qp_result { + u32 qp_state; + u32 qkey; + u32 sq_draining; + u32 sq_psn; +}; + +struct efa_com_destroy_qp_params { + u32 qp_handle; +}; + +struct efa_com_create_cq_params { + /* cq physical base address in OS memory */ + dma_addr_t dma_addr; + /* completion queue depth in # of entries */ + u16 cq_depth; + u16 num_sub_cqs; + u16 uarn; + u8 entry_size_in_bytes; +}; + +struct efa_com_create_cq_result { + /* cq identifier */ + u16 cq_idx; + /* actual cq depth in # of entries */ + u16 actual_depth; +}; + +struct efa_com_destroy_cq_params { + u16 cq_idx; +}; + +struct efa_com_create_ah_params { + u16 pdn; + /* Destination address in network byte order */ + u8 dest_addr[EFA_GID_SIZE]; +}; + +struct efa_com_create_ah_result { + u16 ah; +}; + +struct efa_com_destroy_ah_params { + u16 ah; + u16 pdn; +}; + +struct efa_com_get_network_attr_result { + u8 addr[EFA_GID_SIZE]; + u32 mtu; +}; + +struct efa_com_get_device_attr_result { + u64 page_size_cap; + u64 max_mr_pages; + u32 fw_version; + u32 admin_api_version; + u32 device_version; + u32 supported_features; + u32 phys_addr_width; + u32 virt_addr_width; + u32 max_qp; + u32 max_sq_depth; /* wqes */ + u32 max_rq_depth; /* wqes */ + u32 max_cq; + u32 max_cq_depth; /* cqes */ + u32 inline_buf_size; + u32 max_mr; + u32 max_pd; + u32 max_ah; + u32 max_llq_size; + u16 sub_cqs_per_cq; + u16 max_sq_sge; + u16 max_rq_sge; + u8 db_bar; +}; + +struct efa_com_get_hw_hints_result { + u16 mmio_read_timeout; + u16 driver_watchdog_timeout; + u16 admin_completion_timeout; + u16 poll_interval; + u32 reserved[4]; +}; + +struct efa_com_mem_addr { + u32 mem_addr_low; + u32 mem_addr_high; +}; + +/* Used at indirect mode page list chunks for chaining */ +struct efa_com_ctrl_buff_info { + /* indicates length of the buffer pointed by control_buffer_address. */ + u32 length; + /* points to control buffer (direct or indirect) */ + struct efa_com_mem_addr address; +}; + +struct efa_com_reg_mr_params { + /* Memory region length, in bytes. */ + u64 mr_length_in_bytes; + /* IO Virtual Address associated with this MR. */ + u64 iova; + /* words 8:15: Physical Buffer List, each element is page-aligned. */ + union { + /* + * Inline array of physical addresses of app pages + * (optimization for short region reservations) + */ + u64 inline_pbl_array[4]; + /* + * Describes the next physically contiguous chunk of indirect + * page list. A page list contains physical addresses of command + * data pages. Data pages are 4KB; page list chunks are + * variable-sized. + */ + struct efa_com_ctrl_buff_info pbl; + } pbl; + /* number of pages in PBL (redundant, could be calculated) */ + u32 page_num; + /* Protection Domain */ + u16 pd; + /* + * phys_page_size_shift - page size is (1 << phys_page_size_shift) + * Page size is used for building the Virtual to Physical + * address mapping + */ + u8 page_shift; + /* + * permissions + * 0: local_write_enable - Write permissions: value of 1 needed + * for RQ buffers and for RDMA write:1: reserved1 - remote + * access flags, etc + */ + u8 permissions; + u8 inline_pbl; + u8 indirect; +}; + +struct efa_com_reg_mr_result { + /* + * To be used in conjunction with local buffers references in SQ and + * RQ WQE + */ + u32 l_key; + /* + * To be used in incoming RDMA semantics messages to refer to remotely + * accessed memory region + */ + u32 r_key; +}; + +struct efa_com_dereg_mr_params { + u32 l_key; +}; + +struct efa_com_alloc_pd_result { + u16 pdn; +}; + +struct efa_com_dealloc_pd_params { + u16 pdn; +}; + +struct efa_com_alloc_uar_result { + u16 uarn; +}; + +struct efa_com_dealloc_uar_params { + u16 uarn; +}; + +void efa_com_set_dma_addr(dma_addr_t addr, u32 *addr_high, u32 *addr_low); +int efa_com_create_qp(struct efa_com_dev *edev, + struct efa_com_create_qp_params *params, + struct efa_com_create_qp_result *res); +int efa_com_modify_qp(struct efa_com_dev *edev, + struct efa_com_modify_qp_params *params); +int efa_com_query_qp(struct efa_com_dev *edev, + struct efa_com_query_qp_params *params, + struct efa_com_query_qp_result *result); +int efa_com_destroy_qp(struct efa_com_dev *edev, + struct efa_com_destroy_qp_params *params); +int efa_com_create_cq(struct efa_com_dev *edev, + struct efa_com_create_cq_params *params, + struct efa_com_create_cq_result *result); +int efa_com_destroy_cq(struct efa_com_dev *edev, + struct efa_com_destroy_cq_params *params); +int efa_com_register_mr(struct efa_com_dev *edev, + struct efa_com_reg_mr_params *params, + struct efa_com_reg_mr_result *result); +int efa_com_dereg_mr(struct efa_com_dev *edev, + struct efa_com_dereg_mr_params *params); +int efa_com_create_ah(struct efa_com_dev *edev, + struct efa_com_create_ah_params *params, + struct efa_com_create_ah_result *result); +int efa_com_destroy_ah(struct efa_com_dev *edev, + struct efa_com_destroy_ah_params *params); +int efa_com_get_network_attr(struct efa_com_dev *edev, + struct efa_com_get_network_attr_result *result); +int efa_com_get_device_attr(struct efa_com_dev *edev, + struct efa_com_get_device_attr_result *result); +int efa_com_get_hw_hints(struct efa_com_dev *edev, + struct efa_com_get_hw_hints_result *result); +int efa_com_set_aenq_config(struct efa_com_dev *edev, u32 groups); +int efa_com_alloc_pd(struct efa_com_dev *edev, + struct efa_com_alloc_pd_result *result); +int efa_com_dealloc_pd(struct efa_com_dev *edev, + struct efa_com_dealloc_pd_params *params); +int efa_com_alloc_uar(struct efa_com_dev *edev, + struct efa_com_alloc_uar_result *result); +int efa_com_dealloc_uar(struct efa_com_dev *edev, + struct efa_com_dealloc_uar_params *params); + +#endif /* _EFA_COM_CMD_H_ */ diff --git a/drivers/infiniband/hw/efa/efa_common_defs.h b/drivers/infiniband/hw/efa/efa_common_defs.h new file mode 100644 index 000000000000..c559ec08898e --- /dev/null +++ b/drivers/infiniband/hw/efa/efa_common_defs.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + */ + +#ifndef _EFA_COMMON_H_ +#define _EFA_COMMON_H_ + +#define EFA_COMMON_SPEC_VERSION_MAJOR 2 +#define EFA_COMMON_SPEC_VERSION_MINOR 0 + +struct efa_common_mem_addr { + u32 mem_addr_low; + + u32 mem_addr_high; +}; + +#endif /* _EFA_COMMON_H_ */ diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c new file mode 100644 index 000000000000..db974caf1eb1 --- /dev/null +++ b/drivers/infiniband/hw/efa/efa_main.c @@ -0,0 +1,533 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + */ + +#include <linux/module.h> +#include <linux/pci.h> + +#include <rdma/ib_user_verbs.h> + +#include "efa.h" + +#define PCI_DEV_ID_EFA_VF 0xefa0 + +static const struct pci_device_id efa_pci_tbl[] = { + { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA_VF) }, + { } +}; + +MODULE_AUTHOR("Amazon.com, Inc. or its affiliates"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION(DEVICE_NAME); +MODULE_DEVICE_TABLE(pci, efa_pci_tbl); + +#define EFA_REG_BAR 0 +#define EFA_MEM_BAR 2 +#define EFA_BASE_BAR_MASK (BIT(EFA_REG_BAR) | BIT(EFA_MEM_BAR)) + +#define EFA_AENQ_ENABLED_GROUPS \ + (BIT(EFA_ADMIN_FATAL_ERROR) | BIT(EFA_ADMIN_WARNING) | \ + BIT(EFA_ADMIN_NOTIFICATION) | BIT(EFA_ADMIN_KEEP_ALIVE)) + +static void efa_update_network_attr(struct efa_dev *dev, + struct efa_com_get_network_attr_result *network_attr) +{ + memcpy(dev->addr, network_attr->addr, sizeof(network_attr->addr)); + dev->mtu = network_attr->mtu; + + dev_dbg(&dev->pdev->dev, "Full address %pI6\n", dev->addr); +} + +/* This handler will called for unknown event group or unimplemented handlers */ +static void unimplemented_aenq_handler(void *data, + struct efa_admin_aenq_entry *aenq_e) +{ + struct efa_dev *dev = (struct efa_dev *)data; + + ibdev_err(&dev->ibdev, + "Unknown event was received or event with unimplemented handler\n"); +} + +static void efa_keep_alive(void *data, struct efa_admin_aenq_entry *aenq_e) +{ + struct efa_dev *dev = (struct efa_dev *)data; + + atomic64_inc(&dev->stats.keep_alive_rcvd); +} + +static struct efa_aenq_handlers aenq_handlers = { + .handlers = { + [EFA_ADMIN_KEEP_ALIVE] = efa_keep_alive, + }, + .unimplemented_handler = unimplemented_aenq_handler +}; + +static void efa_release_bars(struct efa_dev *dev, int bars_mask) +{ + struct pci_dev *pdev = dev->pdev; + int release_bars; + + release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & bars_mask; + pci_release_selected_regions(pdev, release_bars); +} + +static irqreturn_t efa_intr_msix_mgmnt(int irq, void *data) +{ + struct efa_dev *dev = data; + + efa_com_admin_q_comp_intr_handler(&dev->edev); + efa_com_aenq_intr_handler(&dev->edev, data); + + return IRQ_HANDLED; +} + +static int efa_request_mgmnt_irq(struct efa_dev *dev) +{ + struct efa_irq *irq; + int err; + + irq = &dev->admin_irq; + err = request_irq(irq->vector, irq->handler, 0, irq->name, + irq->data); + if (err) { + dev_err(&dev->pdev->dev, "Failed to request admin irq (%d)\n", + err); + return err; + } + + dev_dbg(&dev->pdev->dev, "Set affinity hint of mgmnt irq to %*pbl (irq vector: %d)\n", + nr_cpumask_bits, &irq->affinity_hint_mask, irq->vector); + irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask); + + return err; +} + +static void efa_setup_mgmnt_irq(struct efa_dev *dev) +{ + u32 cpu; + + snprintf(dev->admin_irq.name, EFA_IRQNAME_SIZE, + "efa-mgmnt@pci:%s", pci_name(dev->pdev)); + dev->admin_irq.handler = efa_intr_msix_mgmnt; + dev->admin_irq.data = dev; + dev->admin_irq.vector = + pci_irq_vector(dev->pdev, dev->admin_msix_vector_idx); + cpu = cpumask_first(cpu_online_mask); + dev->admin_irq.cpu = cpu; + cpumask_set_cpu(cpu, + &dev->admin_irq.affinity_hint_mask); + dev_info(&dev->pdev->dev, "Setup irq:0x%p vector:%d name:%s\n", + &dev->admin_irq, + dev->admin_irq.vector, + dev->admin_irq.name); +} + +static void efa_free_mgmnt_irq(struct efa_dev *dev) +{ + struct efa_irq *irq; + + irq = &dev->admin_irq; + irq_set_affinity_hint(irq->vector, NULL); + free_irq(irq->vector, irq->data); +} + +static int efa_set_mgmnt_irq(struct efa_dev *dev) +{ + efa_setup_mgmnt_irq(dev); + + return efa_request_mgmnt_irq(dev); +} + +static int efa_request_doorbell_bar(struct efa_dev *dev) +{ + u8 db_bar_idx = dev->dev_attr.db_bar; + struct pci_dev *pdev = dev->pdev; + int bars; + int err; + + if (!(BIT(db_bar_idx) & EFA_BASE_BAR_MASK)) { + bars = pci_select_bars(pdev, IORESOURCE_MEM) & BIT(db_bar_idx); + + err = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME); + if (err) { + dev_err(&dev->pdev->dev, + "pci_request_selected_regions for bar %d failed %d\n", + db_bar_idx, err); + return err; + } + } + + dev->db_bar_addr = pci_resource_start(dev->pdev, db_bar_idx); + dev->db_bar_len = pci_resource_len(dev->pdev, db_bar_idx); + + return 0; +} + +static void efa_release_doorbell_bar(struct efa_dev *dev) +{ + if (!(BIT(dev->dev_attr.db_bar) & EFA_BASE_BAR_MASK)) + efa_release_bars(dev, BIT(dev->dev_attr.db_bar)); +} + +static void efa_update_hw_hints(struct efa_dev *dev, + struct efa_com_get_hw_hints_result *hw_hints) +{ + struct efa_com_dev *edev = &dev->edev; + + if (hw_hints->mmio_read_timeout) + edev->mmio_read.mmio_read_timeout = + hw_hints->mmio_read_timeout * 1000; + + if (hw_hints->poll_interval) + edev->aq.poll_interval = hw_hints->poll_interval; + + if (hw_hints->admin_completion_timeout) + edev->aq.completion_timeout = + hw_hints->admin_completion_timeout; +} + +static void efa_stats_init(struct efa_dev *dev) +{ + atomic64_t *s = (atomic64_t *)&dev->stats; + int i; + + for (i = 0; i < sizeof(dev->stats) / sizeof(*s); i++, s++) + atomic64_set(s, 0); +} + +static const struct ib_device_ops efa_dev_ops = { + .alloc_pd = efa_alloc_pd, + .alloc_ucontext = efa_alloc_ucontext, + .create_ah = efa_create_ah, + .create_cq = efa_create_cq, + .create_qp = efa_create_qp, + .dealloc_pd = efa_dealloc_pd, + .dealloc_ucontext = efa_dealloc_ucontext, + .dereg_mr = efa_dereg_mr, + .destroy_ah = efa_destroy_ah, + .destroy_cq = efa_destroy_cq, + .destroy_qp = efa_destroy_qp, + .get_link_layer = efa_port_link_layer, + .get_port_immutable = efa_get_port_immutable, + .mmap = efa_mmap, + .modify_qp = efa_modify_qp, + .query_device = efa_query_device, + .query_gid = efa_query_gid, + .query_pkey = efa_query_pkey, + .query_port = efa_query_port, + .query_qp = efa_query_qp, + .reg_user_mr = efa_reg_mr, + + INIT_RDMA_OBJ_SIZE(ib_ah, efa_ah, ibah), + INIT_RDMA_OBJ_SIZE(ib_pd, efa_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_ucontext, efa_ucontext, ibucontext), +}; + +static int efa_ib_device_add(struct efa_dev *dev) +{ + struct efa_com_get_network_attr_result network_attr; + struct efa_com_get_hw_hints_result hw_hints; + struct pci_dev *pdev = dev->pdev; + int err; + + efa_stats_init(dev); + + err = efa_com_get_device_attr(&dev->edev, &dev->dev_attr); + if (err) + return err; + + dev_dbg(&dev->pdev->dev, "Doorbells bar (%d)\n", dev->dev_attr.db_bar); + err = efa_request_doorbell_bar(dev); + if (err) + return err; + + err = efa_com_get_network_attr(&dev->edev, &network_attr); + if (err) + goto err_release_doorbell_bar; + + efa_update_network_attr(dev, &network_attr); + + err = efa_com_get_hw_hints(&dev->edev, &hw_hints); + if (err) + goto err_release_doorbell_bar; + + efa_update_hw_hints(dev, &hw_hints); + + /* Try to enable all the available aenq groups */ + err = efa_com_set_aenq_config(&dev->edev, EFA_AENQ_ENABLED_GROUPS); + if (err) + goto err_release_doorbell_bar; + + dev->ibdev.owner = THIS_MODULE; + dev->ibdev.node_type = RDMA_NODE_UNSPECIFIED; + dev->ibdev.phys_port_cnt = 1; + dev->ibdev.num_comp_vectors = 1; + dev->ibdev.dev.parent = &pdev->dev; + dev->ibdev.uverbs_abi_ver = EFA_UVERBS_ABI_VERSION; + + dev->ibdev.uverbs_cmd_mask = + (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | + (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | + (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | + (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_REG_MR) | + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | + (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | + (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | + (1ull << IB_USER_VERBS_CMD_CREATE_QP) | + (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | + (1ull << IB_USER_VERBS_CMD_QUERY_QP) | + (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | + (1ull << IB_USER_VERBS_CMD_CREATE_AH) | + (1ull << IB_USER_VERBS_CMD_DESTROY_AH); + + dev->ibdev.uverbs_ex_cmd_mask = + (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE); + + dev->ibdev.driver_id = RDMA_DRIVER_EFA; + ib_set_device_ops(&dev->ibdev, &efa_dev_ops); + + err = ib_register_device(&dev->ibdev, "efa_%d"); + if (err) + goto err_release_doorbell_bar; + + ibdev_info(&dev->ibdev, "IB device registered\n"); + + return 0; + +err_release_doorbell_bar: + efa_release_doorbell_bar(dev); + return err; +} + +static void efa_ib_device_remove(struct efa_dev *dev) +{ + efa_com_dev_reset(&dev->edev, EFA_REGS_RESET_NORMAL); + ibdev_info(&dev->ibdev, "Unregister ib device\n"); + ib_unregister_device(&dev->ibdev); + efa_release_doorbell_bar(dev); +} + +static void efa_disable_msix(struct efa_dev *dev) +{ + pci_free_irq_vectors(dev->pdev); +} + +static int efa_enable_msix(struct efa_dev *dev) +{ + int msix_vecs, irq_num; + + /* Reserve the max msix vectors we might need */ + msix_vecs = EFA_NUM_MSIX_VEC; + dev_dbg(&dev->pdev->dev, "Trying to enable MSI-X, vectors %d\n", + msix_vecs); + + dev->admin_msix_vector_idx = EFA_MGMNT_MSIX_VEC_IDX; + irq_num = pci_alloc_irq_vectors(dev->pdev, msix_vecs, + msix_vecs, PCI_IRQ_MSIX); + + if (irq_num < 0) { + dev_err(&dev->pdev->dev, "Failed to enable MSI-X. irq_num %d\n", + irq_num); + return -ENOSPC; + } + + if (irq_num != msix_vecs) { + dev_err(&dev->pdev->dev, + "Allocated %d MSI-X (out of %d requested)\n", + irq_num, msix_vecs); + return -ENOSPC; + } + + return 0; +} + +static int efa_device_init(struct efa_com_dev *edev, struct pci_dev *pdev) +{ + int dma_width; + int err; + + err = efa_com_dev_reset(edev, EFA_REGS_RESET_NORMAL); + if (err) + return err; + + err = efa_com_validate_version(edev); + if (err) + return err; + + dma_width = efa_com_get_dma_width(edev); + if (dma_width < 0) { + err = dma_width; + return err; + } + + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(dma_width)); + if (err) { + dev_err(&pdev->dev, "pci_set_dma_mask failed %d\n", err); + return err; + } + + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(dma_width)); + if (err) { + dev_err(&pdev->dev, + "err_pci_set_consistent_dma_mask failed %d\n", + err); + return err; + } + + return 0; +} + +static struct efa_dev *efa_probe_device(struct pci_dev *pdev) +{ + struct efa_com_dev *edev; + struct efa_dev *dev; + int bars; + int err; + + err = pci_enable_device_mem(pdev); + if (err) { + dev_err(&pdev->dev, "pci_enable_device_mem() failed!\n"); + return ERR_PTR(err); + } + + pci_set_master(pdev); + + dev = ib_alloc_device(efa_dev, ibdev); + if (!dev) { + dev_err(&pdev->dev, "Device alloc failed\n"); + err = -ENOMEM; + goto err_disable_device; + } + + pci_set_drvdata(pdev, dev); + edev = &dev->edev; + edev->efa_dev = dev; + edev->dmadev = &pdev->dev; + dev->pdev = pdev; + + bars = pci_select_bars(pdev, IORESOURCE_MEM) & EFA_BASE_BAR_MASK; + err = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME); + if (err) { + dev_err(&pdev->dev, "pci_request_selected_regions failed %d\n", + err); + goto err_ibdev_destroy; + } + + dev->reg_bar_addr = pci_resource_start(pdev, EFA_REG_BAR); + dev->reg_bar_len = pci_resource_len(pdev, EFA_REG_BAR); + dev->mem_bar_addr = pci_resource_start(pdev, EFA_MEM_BAR); + dev->mem_bar_len = pci_resource_len(pdev, EFA_MEM_BAR); + + edev->reg_bar = devm_ioremap(&pdev->dev, + dev->reg_bar_addr, + dev->reg_bar_len); + if (!edev->reg_bar) { + dev_err(&pdev->dev, "Failed to remap register bar\n"); + err = -EFAULT; + goto err_release_bars; + } + + err = efa_com_mmio_reg_read_init(edev); + if (err) { + dev_err(&pdev->dev, "Failed to init readless MMIO\n"); + goto err_iounmap; + } + + err = efa_device_init(edev, pdev); + if (err) { + dev_err(&pdev->dev, "EFA device init failed\n"); + if (err == -ETIME) + err = -EPROBE_DEFER; + goto err_reg_read_destroy; + } + + err = efa_enable_msix(dev); + if (err) + goto err_reg_read_destroy; + + edev->aq.msix_vector_idx = dev->admin_msix_vector_idx; + edev->aenq.msix_vector_idx = dev->admin_msix_vector_idx; + + err = efa_set_mgmnt_irq(dev); + if (err) + goto err_disable_msix; + + err = efa_com_admin_init(edev, &aenq_handlers); + if (err) + goto err_free_mgmnt_irq; + + return dev; + +err_free_mgmnt_irq: + efa_free_mgmnt_irq(dev); +err_disable_msix: + efa_disable_msix(dev); +err_reg_read_destroy: + efa_com_mmio_reg_read_destroy(edev); +err_iounmap: + devm_iounmap(&pdev->dev, edev->reg_bar); +err_release_bars: + efa_release_bars(dev, EFA_BASE_BAR_MASK); +err_ibdev_destroy: + ib_dealloc_device(&dev->ibdev); +err_disable_device: + pci_disable_device(pdev); + return ERR_PTR(err); +} + +static void efa_remove_device(struct pci_dev *pdev) +{ + struct efa_dev *dev = pci_get_drvdata(pdev); + struct efa_com_dev *edev; + + edev = &dev->edev; + efa_com_admin_destroy(edev); + efa_free_mgmnt_irq(dev); + efa_disable_msix(dev); + efa_com_mmio_reg_read_destroy(edev); + devm_iounmap(&pdev->dev, edev->reg_bar); + efa_release_bars(dev, EFA_BASE_BAR_MASK); + ib_dealloc_device(&dev->ibdev); + pci_disable_device(pdev); +} + +static int efa_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + struct efa_dev *dev; + int err; + + dev = efa_probe_device(pdev); + if (IS_ERR(dev)) + return PTR_ERR(dev); + + err = efa_ib_device_add(dev); + if (err) + goto err_remove_device; + + return 0; + +err_remove_device: + efa_remove_device(pdev); + return err; +} + +static void efa_remove(struct pci_dev *pdev) +{ + struct efa_dev *dev = pci_get_drvdata(pdev); + + efa_ib_device_remove(dev); + efa_remove_device(pdev); +} + +static struct pci_driver efa_pci_driver = { + .name = DRV_MODULE_NAME, + .id_table = efa_pci_tbl, + .probe = efa_probe, + .remove = efa_remove, +}; + +module_pci_driver(efa_pci_driver); diff --git a/drivers/infiniband/hw/efa/efa_regs_defs.h b/drivers/infiniband/hw/efa/efa_regs_defs.h new file mode 100644 index 000000000000..bb9cad3d6a15 --- /dev/null +++ b/drivers/infiniband/hw/efa/efa_regs_defs.h @@ -0,0 +1,113 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + */ + +#ifndef _EFA_REGS_H_ +#define _EFA_REGS_H_ + +enum efa_regs_reset_reason_types { + EFA_REGS_RESET_NORMAL = 0, + /* Keep alive timeout */ + EFA_REGS_RESET_KEEP_ALIVE_TO = 1, + EFA_REGS_RESET_ADMIN_TO = 2, + EFA_REGS_RESET_INIT_ERR = 3, + EFA_REGS_RESET_DRIVER_INVALID_STATE = 4, + EFA_REGS_RESET_OS_TRIGGER = 5, + EFA_REGS_RESET_SHUTDOWN = 6, + EFA_REGS_RESET_USER_TRIGGER = 7, + EFA_REGS_RESET_GENERIC = 8, +}; + +/* efa_registers offsets */ + +/* 0 base */ +#define EFA_REGS_VERSION_OFF 0x0 +#define EFA_REGS_CONTROLLER_VERSION_OFF 0x4 +#define EFA_REGS_CAPS_OFF 0x8 +#define EFA_REGS_AQ_BASE_LO_OFF 0x10 +#define EFA_REGS_AQ_BASE_HI_OFF 0x14 +#define EFA_REGS_AQ_CAPS_OFF 0x18 +#define EFA_REGS_ACQ_BASE_LO_OFF 0x20 +#define EFA_REGS_ACQ_BASE_HI_OFF 0x24 +#define EFA_REGS_ACQ_CAPS_OFF 0x28 +#define EFA_REGS_AQ_PROD_DB_OFF 0x2c +#define EFA_REGS_AENQ_CAPS_OFF 0x34 +#define EFA_REGS_AENQ_BASE_LO_OFF 0x38 +#define EFA_REGS_AENQ_BASE_HI_OFF 0x3c +#define EFA_REGS_AENQ_CONS_DB_OFF 0x40 +#define EFA_REGS_INTR_MASK_OFF 0x4c +#define EFA_REGS_DEV_CTL_OFF 0x54 +#define EFA_REGS_DEV_STS_OFF 0x58 +#define EFA_REGS_MMIO_REG_READ_OFF 0x5c +#define EFA_REGS_MMIO_RESP_LO_OFF 0x60 +#define EFA_REGS_MMIO_RESP_HI_OFF 0x64 + +/* version register */ +#define EFA_REGS_VERSION_MINOR_VERSION_MASK 0xff +#define EFA_REGS_VERSION_MAJOR_VERSION_SHIFT 8 +#define EFA_REGS_VERSION_MAJOR_VERSION_MASK 0xff00 + +/* controller_version register */ +#define EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK 0xff +#define EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT 8 +#define EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK 0xff00 +#define EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT 16 +#define EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK 0xff0000 +#define EFA_REGS_CONTROLLER_VERSION_IMPL_ID_SHIFT 24 +#define EFA_REGS_CONTROLLER_VERSION_IMPL_ID_MASK 0xff000000 + +/* caps register */ +#define EFA_REGS_CAPS_CONTIGUOUS_QUEUE_REQUIRED_MASK 0x1 +#define EFA_REGS_CAPS_RESET_TIMEOUT_SHIFT 1 +#define EFA_REGS_CAPS_RESET_TIMEOUT_MASK 0x3e +#define EFA_REGS_CAPS_DMA_ADDR_WIDTH_SHIFT 8 +#define EFA_REGS_CAPS_DMA_ADDR_WIDTH_MASK 0xff00 +#define EFA_REGS_CAPS_ADMIN_CMD_TO_SHIFT 16 +#define EFA_REGS_CAPS_ADMIN_CMD_TO_MASK 0xf0000 + +/* aq_caps register */ +#define EFA_REGS_AQ_CAPS_AQ_DEPTH_MASK 0xffff +#define EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_SHIFT 16 +#define EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_MASK 0xffff0000 + +/* acq_caps register */ +#define EFA_REGS_ACQ_CAPS_ACQ_DEPTH_MASK 0xffff +#define EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_SHIFT 16 +#define EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_MASK 0xff0000 +#define EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR_SHIFT 24 +#define EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR_MASK 0xff000000 + +/* aenq_caps register */ +#define EFA_REGS_AENQ_CAPS_AENQ_DEPTH_MASK 0xffff +#define EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT 16 +#define EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK 0xff0000 +#define EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR_SHIFT 24 +#define EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR_MASK 0xff000000 + +/* dev_ctl register */ +#define EFA_REGS_DEV_CTL_DEV_RESET_MASK 0x1 +#define EFA_REGS_DEV_CTL_AQ_RESTART_SHIFT 1 +#define EFA_REGS_DEV_CTL_AQ_RESTART_MASK 0x2 +#define EFA_REGS_DEV_CTL_RESET_REASON_SHIFT 28 +#define EFA_REGS_DEV_CTL_RESET_REASON_MASK 0xf0000000 + +/* dev_sts register */ +#define EFA_REGS_DEV_STS_READY_MASK 0x1 +#define EFA_REGS_DEV_STS_AQ_RESTART_IN_PROGRESS_SHIFT 1 +#define EFA_REGS_DEV_STS_AQ_RESTART_IN_PROGRESS_MASK 0x2 +#define EFA_REGS_DEV_STS_AQ_RESTART_FINISHED_SHIFT 2 +#define EFA_REGS_DEV_STS_AQ_RESTART_FINISHED_MASK 0x4 +#define EFA_REGS_DEV_STS_RESET_IN_PROGRESS_SHIFT 3 +#define EFA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK 0x8 +#define EFA_REGS_DEV_STS_RESET_FINISHED_SHIFT 4 +#define EFA_REGS_DEV_STS_RESET_FINISHED_MASK 0x10 +#define EFA_REGS_DEV_STS_FATAL_ERROR_SHIFT 5 +#define EFA_REGS_DEV_STS_FATAL_ERROR_MASK 0x20 + +/* mmio_reg_read register */ +#define EFA_REGS_MMIO_REG_READ_REQ_ID_MASK 0xffff +#define EFA_REGS_MMIO_REG_READ_REG_OFF_SHIFT 16 +#define EFA_REGS_MMIO_REG_READ_REG_OFF_MASK 0xffff0000 + +#endif /* _EFA_REGS_H_ */ diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c new file mode 100644 index 000000000000..6d6886c9009f --- /dev/null +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -0,0 +1,1825 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + */ + +#include <linux/vmalloc.h> + +#include <rdma/ib_addr.h> +#include <rdma/ib_umem.h> +#include <rdma/ib_user_verbs.h> +#include <rdma/ib_verbs.h> +#include <rdma/uverbs_ioctl.h> + +#include "efa.h" + +#define EFA_MMAP_FLAG_SHIFT 56 +#define EFA_MMAP_PAGE_MASK GENMASK(EFA_MMAP_FLAG_SHIFT - 1, 0) +#define EFA_MMAP_INVALID U64_MAX + +enum { + EFA_MMAP_DMA_PAGE = 0, + EFA_MMAP_IO_WC, + EFA_MMAP_IO_NC, +}; + +#define EFA_AENQ_ENABLED_GROUPS \ + (BIT(EFA_ADMIN_FATAL_ERROR) | BIT(EFA_ADMIN_WARNING) | \ + BIT(EFA_ADMIN_NOTIFICATION) | BIT(EFA_ADMIN_KEEP_ALIVE)) + +struct efa_mmap_entry { + void *obj; + u64 address; + u64 length; + u32 mmap_page; + u8 mmap_flag; +}; + +static inline u64 get_mmap_key(const struct efa_mmap_entry *efa) +{ + return ((u64)efa->mmap_flag << EFA_MMAP_FLAG_SHIFT) | + ((u64)efa->mmap_page << PAGE_SHIFT); +} + +#define EFA_CHUNK_PAYLOAD_SHIFT 12 +#define EFA_CHUNK_PAYLOAD_SIZE BIT(EFA_CHUNK_PAYLOAD_SHIFT) +#define EFA_CHUNK_PAYLOAD_PTR_SIZE 8 + +#define EFA_CHUNK_SHIFT 12 +#define EFA_CHUNK_SIZE BIT(EFA_CHUNK_SHIFT) +#define EFA_CHUNK_PTR_SIZE sizeof(struct efa_com_ctrl_buff_info) + +#define EFA_PTRS_PER_CHUNK \ + ((EFA_CHUNK_SIZE - EFA_CHUNK_PTR_SIZE) / EFA_CHUNK_PAYLOAD_PTR_SIZE) + +#define EFA_CHUNK_USED_SIZE \ + ((EFA_PTRS_PER_CHUNK * EFA_CHUNK_PAYLOAD_PTR_SIZE) + EFA_CHUNK_PTR_SIZE) + +#define EFA_SUPPORTED_ACCESS_FLAGS IB_ACCESS_LOCAL_WRITE + +struct pbl_chunk { + dma_addr_t dma_addr; + u64 *buf; + u32 length; +}; + +struct pbl_chunk_list { + struct pbl_chunk *chunks; + unsigned int size; +}; + +struct pbl_context { + union { + struct { + dma_addr_t dma_addr; + } continuous; + struct { + u32 pbl_buf_size_in_pages; + struct scatterlist *sgl; + int sg_dma_cnt; + struct pbl_chunk_list chunk_list; + } indirect; + } phys; + u64 *pbl_buf; + u32 pbl_buf_size_in_bytes; + u8 physically_continuous; +}; + +static inline struct efa_dev *to_edev(struct ib_device *ibdev) +{ + return container_of(ibdev, struct efa_dev, ibdev); +} + +static inline struct efa_ucontext *to_eucontext(struct ib_ucontext *ibucontext) +{ + return container_of(ibucontext, struct efa_ucontext, ibucontext); +} + +static inline struct efa_pd *to_epd(struct ib_pd *ibpd) +{ + return container_of(ibpd, struct efa_pd, ibpd); +} + +static inline struct efa_mr *to_emr(struct ib_mr *ibmr) +{ + return container_of(ibmr, struct efa_mr, ibmr); +} + +static inline struct efa_qp *to_eqp(struct ib_qp *ibqp) +{ + return container_of(ibqp, struct efa_qp, ibqp); +} + +static inline struct efa_cq *to_ecq(struct ib_cq *ibcq) +{ + return container_of(ibcq, struct efa_cq, ibcq); +} + +static inline struct efa_ah *to_eah(struct ib_ah *ibah) +{ + return container_of(ibah, struct efa_ah, ibah); +} + +#define field_avail(x, fld, sz) (offsetof(typeof(x), fld) + \ + sizeof(((typeof(x) *)0)->fld) <= (sz)) + +#define is_reserved_cleared(reserved) \ + !memchr_inv(reserved, 0, sizeof(reserved)) + +static void *efa_zalloc_mapped(struct efa_dev *dev, dma_addr_t *dma_addr, + size_t size, enum dma_data_direction dir) +{ + void *addr; + + addr = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO); + if (!addr) + return NULL; + + *dma_addr = dma_map_single(&dev->pdev->dev, addr, size, dir); + if (dma_mapping_error(&dev->pdev->dev, *dma_addr)) { + ibdev_err(&dev->ibdev, "Failed to map DMA address\n"); + free_pages_exact(addr, size); + return NULL; + } + + return addr; +} + +/* + * This is only called when the ucontext is destroyed and there can be no + * concurrent query via mmap or allocate on the xarray, thus we can be sure no + * other thread is using the entry pointer. We also know that all the BAR + * pages have either been zap'd or munmaped at this point. Normal pages are + * refcounted and will be freed at the proper time. + */ +static void mmap_entries_remove_free(struct efa_dev *dev, + struct efa_ucontext *ucontext) +{ + struct efa_mmap_entry *entry; + unsigned long mmap_page; + + xa_for_each(&ucontext->mmap_xa, mmap_page, entry) { + xa_erase(&ucontext->mmap_xa, mmap_page); + + ibdev_dbg( + &dev->ibdev, + "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n", + entry->obj, get_mmap_key(entry), entry->address, + entry->length); + if (entry->mmap_flag == EFA_MMAP_DMA_PAGE) + /* DMA mapping is already gone, now free the pages */ + free_pages_exact(phys_to_virt(entry->address), + entry->length); + kfree(entry); + } +} + +static struct efa_mmap_entry *mmap_entry_get(struct efa_dev *dev, + struct efa_ucontext *ucontext, + u64 key, u64 len) +{ + struct efa_mmap_entry *entry; + u64 mmap_page; + + mmap_page = (key & EFA_MMAP_PAGE_MASK) >> PAGE_SHIFT; + if (mmap_page > U32_MAX) + return NULL; + + entry = xa_load(&ucontext->mmap_xa, mmap_page); + if (!entry || get_mmap_key(entry) != key || entry->length != len) + return NULL; + + ibdev_dbg(&dev->ibdev, + "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n", + entry->obj, key, entry->address, entry->length); + + return entry; +} + +/* + * Note this locking scheme cannot support removal of entries, except during + * ucontext destruction when the core code guarentees no concurrency. + */ +static u64 mmap_entry_insert(struct efa_dev *dev, struct efa_ucontext *ucontext, + void *obj, u64 address, u64 length, u8 mmap_flag) +{ + struct efa_mmap_entry *entry; + int err; + + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return EFA_MMAP_INVALID; + + entry->obj = obj; + entry->address = address; + entry->length = length; + entry->mmap_flag = mmap_flag; + + xa_lock(&ucontext->mmap_xa); + entry->mmap_page = ucontext->mmap_xa_page; + ucontext->mmap_xa_page += DIV_ROUND_UP(length, PAGE_SIZE); + err = __xa_insert(&ucontext->mmap_xa, entry->mmap_page, entry, + GFP_KERNEL); + xa_unlock(&ucontext->mmap_xa); + if (err){ + kfree(entry); + return EFA_MMAP_INVALID; + } + + ibdev_dbg( + &dev->ibdev, + "mmap: obj[0x%p] addr[%#llx], len[%#llx], key[%#llx] inserted\n", + entry->obj, entry->address, entry->length, get_mmap_key(entry)); + + return get_mmap_key(entry); +} + +int efa_query_device(struct ib_device *ibdev, + struct ib_device_attr *props, + struct ib_udata *udata) +{ + struct efa_com_get_device_attr_result *dev_attr; + struct efa_ibv_ex_query_device_resp resp = {}; + struct efa_dev *dev = to_edev(ibdev); + int err; + + if (udata && udata->inlen && + !ib_is_udata_cleared(udata, 0, udata->inlen)) { + ibdev_dbg(ibdev, + "Incompatible ABI params, udata not cleared\n"); + return -EINVAL; + } + + dev_attr = &dev->dev_attr; + + memset(props, 0, sizeof(*props)); + props->max_mr_size = dev_attr->max_mr_pages * PAGE_SIZE; + props->page_size_cap = dev_attr->page_size_cap; + props->vendor_id = dev->pdev->vendor; + props->vendor_part_id = dev->pdev->device; + props->hw_ver = dev->pdev->subsystem_device; + props->max_qp = dev_attr->max_qp; + props->max_cq = dev_attr->max_cq; + props->max_pd = dev_attr->max_pd; + props->max_mr = dev_attr->max_mr; + props->max_ah = dev_attr->max_ah; + props->max_cqe = dev_attr->max_cq_depth; + props->max_qp_wr = min_t(u32, dev_attr->max_sq_depth, + dev_attr->max_rq_depth); + props->max_send_sge = dev_attr->max_sq_sge; + props->max_recv_sge = dev_attr->max_rq_sge; + + if (udata && udata->outlen) { + resp.max_sq_sge = dev_attr->max_sq_sge; + resp.max_rq_sge = dev_attr->max_rq_sge; + resp.max_sq_wr = dev_attr->max_sq_depth; + resp.max_rq_wr = dev_attr->max_rq_depth; + + err = ib_copy_to_udata(udata, &resp, + min(sizeof(resp), udata->outlen)); + if (err) { + ibdev_dbg(ibdev, + "Failed to copy udata for query_device\n"); + return err; + } + } + + return 0; +} + +int efa_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props) +{ + struct efa_dev *dev = to_edev(ibdev); + + props->lmc = 1; + + props->state = IB_PORT_ACTIVE; + props->phys_state = 5; + props->gid_tbl_len = 1; + props->pkey_tbl_len = 1; + props->active_speed = IB_SPEED_EDR; + props->active_width = IB_WIDTH_4X; + props->max_mtu = ib_mtu_int_to_enum(dev->mtu); + props->active_mtu = ib_mtu_int_to_enum(dev->mtu); + props->max_msg_sz = dev->mtu; + props->max_vl_num = 1; + + return 0; +} + +int efa_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr) +{ + struct efa_dev *dev = to_edev(ibqp->device); + struct efa_com_query_qp_params params = {}; + struct efa_com_query_qp_result result; + struct efa_qp *qp = to_eqp(ibqp); + int err; + +#define EFA_QUERY_QP_SUPP_MASK \ + (IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | \ + IB_QP_QKEY | IB_QP_SQ_PSN | IB_QP_CAP) + + if (qp_attr_mask & ~EFA_QUERY_QP_SUPP_MASK) { + ibdev_dbg(&dev->ibdev, + "Unsupported qp_attr_mask[%#x] supported[%#x]\n", + qp_attr_mask, EFA_QUERY_QP_SUPP_MASK); + return -EOPNOTSUPP; + } + + memset(qp_attr, 0, sizeof(*qp_attr)); + memset(qp_init_attr, 0, sizeof(*qp_init_attr)); + + params.qp_handle = qp->qp_handle; + err = efa_com_query_qp(&dev->edev, ¶ms, &result); + if (err) + return err; + + qp_attr->qp_state = result.qp_state; + qp_attr->qkey = result.qkey; + qp_attr->sq_psn = result.sq_psn; + qp_attr->sq_draining = result.sq_draining; + qp_attr->port_num = 1; + + qp_attr->cap.max_send_wr = qp->max_send_wr; + qp_attr->cap.max_recv_wr = qp->max_recv_wr; + qp_attr->cap.max_send_sge = qp->max_send_sge; + qp_attr->cap.max_recv_sge = qp->max_recv_sge; + qp_attr->cap.max_inline_data = qp->max_inline_data; + + qp_init_attr->qp_type = ibqp->qp_type; + qp_init_attr->recv_cq = ibqp->recv_cq; + qp_init_attr->send_cq = ibqp->send_cq; + qp_init_attr->qp_context = ibqp->qp_context; + qp_init_attr->cap = qp_attr->cap; + + return 0; +} + +int efa_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *gid) +{ + struct efa_dev *dev = to_edev(ibdev); + + memcpy(gid->raw, dev->addr, sizeof(dev->addr)); + + return 0; +} + +int efa_query_pkey(struct ib_device *ibdev, u8 port, u16 index, + u16 *pkey) +{ + if (index > 0) + return -EINVAL; + + *pkey = 0xffff; + return 0; +} + +static int efa_pd_dealloc(struct efa_dev *dev, u16 pdn) +{ + struct efa_com_dealloc_pd_params params = { + .pdn = pdn, + }; + + return efa_com_dealloc_pd(&dev->edev, ¶ms); +} + +int efa_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) +{ + struct efa_dev *dev = to_edev(ibpd->device); + struct efa_ibv_alloc_pd_resp resp = {}; + struct efa_com_alloc_pd_result result; + struct efa_pd *pd = to_epd(ibpd); + int err; + + if (udata->inlen && + !ib_is_udata_cleared(udata, 0, udata->inlen)) { + ibdev_dbg(&dev->ibdev, + "Incompatible ABI params, udata not cleared\n"); + err = -EINVAL; + goto err_out; + } + + err = efa_com_alloc_pd(&dev->edev, &result); + if (err) + goto err_out; + + pd->pdn = result.pdn; + resp.pdn = result.pdn; + + if (udata->outlen) { + err = ib_copy_to_udata(udata, &resp, + min(sizeof(resp), udata->outlen)); + if (err) { + ibdev_dbg(&dev->ibdev, + "Failed to copy udata for alloc_pd\n"); + goto err_dealloc_pd; + } + } + + ibdev_dbg(&dev->ibdev, "Allocated pd[%d]\n", pd->pdn); + + return 0; + +err_dealloc_pd: + efa_pd_dealloc(dev, result.pdn); +err_out: + atomic64_inc(&dev->stats.sw_stats.alloc_pd_err); + return err; +} + +void efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) +{ + struct efa_dev *dev = to_edev(ibpd->device); + struct efa_pd *pd = to_epd(ibpd); + + if (udata->inlen && + !ib_is_udata_cleared(udata, 0, udata->inlen)) { + ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n"); + return; + } + + ibdev_dbg(&dev->ibdev, "Dealloc pd[%d]\n", pd->pdn); + efa_pd_dealloc(dev, pd->pdn); +} + +static int efa_destroy_qp_handle(struct efa_dev *dev, u32 qp_handle) +{ + struct efa_com_destroy_qp_params params = { .qp_handle = qp_handle }; + + return efa_com_destroy_qp(&dev->edev, ¶ms); +} + +int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) +{ + struct efa_dev *dev = to_edev(ibqp->pd->device); + struct efa_qp *qp = to_eqp(ibqp); + int err; + + if (udata->inlen && + !ib_is_udata_cleared(udata, 0, udata->inlen)) { + ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n"); + return -EINVAL; + } + + ibdev_dbg(&dev->ibdev, "Destroy qp[%u]\n", ibqp->qp_num); + err = efa_destroy_qp_handle(dev, qp->qp_handle); + if (err) + return err; + + if (qp->rq_cpu_addr) { + ibdev_dbg(&dev->ibdev, + "qp->cpu_addr[0x%p] freed: size[%lu], dma[%pad]\n", + qp->rq_cpu_addr, qp->rq_size, + &qp->rq_dma_addr); + dma_unmap_single(&dev->pdev->dev, qp->rq_dma_addr, qp->rq_size, + DMA_TO_DEVICE); + } + + kfree(qp); + return 0; +} + +static int qp_mmap_entries_setup(struct efa_qp *qp, + struct efa_dev *dev, + struct efa_ucontext *ucontext, + struct efa_com_create_qp_params *params, + struct efa_ibv_create_qp_resp *resp) +{ + /* + * Once an entry is inserted it might be mmapped, hence cannot be + * cleaned up until dealloc_ucontext. + */ + resp->sq_db_mmap_key = + mmap_entry_insert(dev, ucontext, qp, + dev->db_bar_addr + resp->sq_db_offset, + PAGE_SIZE, EFA_MMAP_IO_NC); + if (resp->sq_db_mmap_key == EFA_MMAP_INVALID) + return -ENOMEM; + + resp->sq_db_offset &= ~PAGE_MASK; + + resp->llq_desc_mmap_key = + mmap_entry_insert(dev, ucontext, qp, + dev->mem_bar_addr + resp->llq_desc_offset, + PAGE_ALIGN(params->sq_ring_size_in_bytes + + (resp->llq_desc_offset & ~PAGE_MASK)), + EFA_MMAP_IO_WC); + if (resp->llq_desc_mmap_key == EFA_MMAP_INVALID) + return -ENOMEM; + + resp->llq_desc_offset &= ~PAGE_MASK; + + if (qp->rq_size) { + resp->rq_db_mmap_key = + mmap_entry_insert(dev, ucontext, qp, + dev->db_bar_addr + resp->rq_db_offset, + PAGE_SIZE, EFA_MMAP_IO_NC); + if (resp->rq_db_mmap_key == EFA_MMAP_INVALID) + return -ENOMEM; + + resp->rq_db_offset &= ~PAGE_MASK; + + resp->rq_mmap_key = + mmap_entry_insert(dev, ucontext, qp, + virt_to_phys(qp->rq_cpu_addr), + qp->rq_size, EFA_MMAP_DMA_PAGE); + if (resp->rq_mmap_key == EFA_MMAP_INVALID) + return -ENOMEM; + + resp->rq_mmap_size = qp->rq_size; + } + + return 0; +} + +static int efa_qp_validate_cap(struct efa_dev *dev, + struct ib_qp_init_attr *init_attr) +{ + if (init_attr->cap.max_send_wr > dev->dev_attr.max_sq_depth) { + ibdev_dbg(&dev->ibdev, + "qp: requested send wr[%u] exceeds the max[%u]\n", + init_attr->cap.max_send_wr, + dev->dev_attr.max_sq_depth); + return -EINVAL; + } + if (init_attr->cap.max_recv_wr > dev->dev_attr.max_rq_depth) { + ibdev_dbg(&dev->ibdev, + "qp: requested receive wr[%u] exceeds the max[%u]\n", + init_attr->cap.max_recv_wr, + dev->dev_attr.max_rq_depth); + return -EINVAL; + } + if (init_attr->cap.max_send_sge > dev->dev_attr.max_sq_sge) { + ibdev_dbg(&dev->ibdev, + "qp: requested sge send[%u] exceeds the max[%u]\n", + init_attr->cap.max_send_sge, dev->dev_attr.max_sq_sge); + return -EINVAL; + } + if (init_attr->cap.max_recv_sge > dev->dev_attr.max_rq_sge) { + ibdev_dbg(&dev->ibdev, + "qp: requested sge recv[%u] exceeds the max[%u]\n", + init_attr->cap.max_recv_sge, dev->dev_attr.max_rq_sge); + return -EINVAL; + } + if (init_attr->cap.max_inline_data > dev->dev_attr.inline_buf_size) { + ibdev_dbg(&dev->ibdev, + "qp: requested inline data[%u] exceeds the max[%u]\n", + init_attr->cap.max_inline_data, + dev->dev_attr.inline_buf_size); + return -EINVAL; + } + + return 0; +} + +static int efa_qp_validate_attr(struct efa_dev *dev, + struct ib_qp_init_attr *init_attr) +{ + if (init_attr->qp_type != IB_QPT_DRIVER && + init_attr->qp_type != IB_QPT_UD) { + ibdev_dbg(&dev->ibdev, + "Unsupported qp type %d\n", init_attr->qp_type); + return -EOPNOTSUPP; + } + + if (init_attr->srq) { + ibdev_dbg(&dev->ibdev, "SRQ is not supported\n"); + return -EOPNOTSUPP; + } + + if (init_attr->create_flags) { + ibdev_dbg(&dev->ibdev, "Unsupported create flags\n"); + return -EOPNOTSUPP; + } + + return 0; +} + +struct ib_qp *efa_create_qp(struct ib_pd *ibpd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) +{ + struct efa_com_create_qp_params create_qp_params = {}; + struct efa_com_create_qp_result create_qp_resp; + struct efa_dev *dev = to_edev(ibpd->device); + struct efa_ibv_create_qp_resp resp = {}; + struct efa_ibv_create_qp cmd = {}; + bool rq_entry_inserted = false; + struct efa_ucontext *ucontext; + struct efa_qp *qp; + int err; + + ucontext = rdma_udata_to_drv_context(udata, struct efa_ucontext, + ibucontext); + + err = efa_qp_validate_cap(dev, init_attr); + if (err) + goto err_out; + + err = efa_qp_validate_attr(dev, init_attr); + if (err) + goto err_out; + + if (!field_avail(cmd, driver_qp_type, udata->inlen)) { + ibdev_dbg(&dev->ibdev, + "Incompatible ABI params, no input udata\n"); + err = -EINVAL; + goto err_out; + } + + if (udata->inlen > sizeof(cmd) && + !ib_is_udata_cleared(udata, sizeof(cmd), + udata->inlen - sizeof(cmd))) { + ibdev_dbg(&dev->ibdev, + "Incompatible ABI params, unknown fields in udata\n"); + err = -EINVAL; + goto err_out; + } + + err = ib_copy_from_udata(&cmd, udata, + min(sizeof(cmd), udata->inlen)); + if (err) { + ibdev_dbg(&dev->ibdev, + "Cannot copy udata for create_qp\n"); + goto err_out; + } + + if (cmd.comp_mask) { + ibdev_dbg(&dev->ibdev, + "Incompatible ABI params, unknown fields in udata\n"); + err = -EINVAL; + goto err_out; + } + + qp = kzalloc(sizeof(*qp), GFP_KERNEL); + if (!qp) { + err = -ENOMEM; + goto err_out; + } + + create_qp_params.uarn = ucontext->uarn; + create_qp_params.pd = to_epd(ibpd)->pdn; + + if (init_attr->qp_type == IB_QPT_UD) { + create_qp_params.qp_type = EFA_ADMIN_QP_TYPE_UD; + } else if (cmd.driver_qp_type == EFA_QP_DRIVER_TYPE_SRD) { + create_qp_params.qp_type = EFA_ADMIN_QP_TYPE_SRD; + } else { + ibdev_dbg(&dev->ibdev, + "Unsupported qp type %d driver qp type %d\n", + init_attr->qp_type, cmd.driver_qp_type); + err = -EOPNOTSUPP; + goto err_free_qp; + } + + ibdev_dbg(&dev->ibdev, "Create QP: qp type %d driver qp type %#x\n", + init_attr->qp_type, cmd.driver_qp_type); + create_qp_params.send_cq_idx = to_ecq(init_attr->send_cq)->cq_idx; + create_qp_params.recv_cq_idx = to_ecq(init_attr->recv_cq)->cq_idx; + create_qp_params.sq_depth = init_attr->cap.max_send_wr; + create_qp_params.sq_ring_size_in_bytes = cmd.sq_ring_size; + + create_qp_params.rq_depth = init_attr->cap.max_recv_wr; + create_qp_params.rq_ring_size_in_bytes = cmd.rq_ring_size; + qp->rq_size = PAGE_ALIGN(create_qp_params.rq_ring_size_in_bytes); + if (qp->rq_size) { + qp->rq_cpu_addr = efa_zalloc_mapped(dev, &qp->rq_dma_addr, + qp->rq_size, DMA_TO_DEVICE); + if (!qp->rq_cpu_addr) { + err = -ENOMEM; + goto err_free_qp; + } + + ibdev_dbg(&dev->ibdev, + "qp->cpu_addr[0x%p] allocated: size[%lu], dma[%pad]\n", + qp->rq_cpu_addr, qp->rq_size, &qp->rq_dma_addr); + create_qp_params.rq_base_addr = qp->rq_dma_addr; + } + + err = efa_com_create_qp(&dev->edev, &create_qp_params, + &create_qp_resp); + if (err) + goto err_free_mapped; + + resp.sq_db_offset = create_qp_resp.sq_db_offset; + resp.rq_db_offset = create_qp_resp.rq_db_offset; + resp.llq_desc_offset = create_qp_resp.llq_descriptors_offset; + resp.send_sub_cq_idx = create_qp_resp.send_sub_cq_idx; + resp.recv_sub_cq_idx = create_qp_resp.recv_sub_cq_idx; + + err = qp_mmap_entries_setup(qp, dev, ucontext, &create_qp_params, + &resp); + if (err) + goto err_destroy_qp; + + rq_entry_inserted = true; + qp->qp_handle = create_qp_resp.qp_handle; + qp->ibqp.qp_num = create_qp_resp.qp_num; + qp->ibqp.qp_type = init_attr->qp_type; + qp->max_send_wr = init_attr->cap.max_send_wr; + qp->max_recv_wr = init_attr->cap.max_recv_wr; + qp->max_send_sge = init_attr->cap.max_send_sge; + qp->max_recv_sge = init_attr->cap.max_recv_sge; + qp->max_inline_data = init_attr->cap.max_inline_data; + + if (udata->outlen) { + err = ib_copy_to_udata(udata, &resp, + min(sizeof(resp), udata->outlen)); + if (err) { + ibdev_dbg(&dev->ibdev, + "Failed to copy udata for qp[%u]\n", + create_qp_resp.qp_num); + goto err_destroy_qp; + } + } + + ibdev_dbg(&dev->ibdev, "Created qp[%d]\n", qp->ibqp.qp_num); + + return &qp->ibqp; + +err_destroy_qp: + efa_destroy_qp_handle(dev, create_qp_resp.qp_handle); +err_free_mapped: + if (qp->rq_size) { + dma_unmap_single(&dev->pdev->dev, qp->rq_dma_addr, qp->rq_size, + DMA_TO_DEVICE); + if (!rq_entry_inserted) + free_pages_exact(qp->rq_cpu_addr, qp->rq_size); + } +err_free_qp: + kfree(qp); +err_out: + atomic64_inc(&dev->stats.sw_stats.create_qp_err); + return ERR_PTR(err); +} + +static int efa_modify_qp_validate(struct efa_dev *dev, struct efa_qp *qp, + struct ib_qp_attr *qp_attr, int qp_attr_mask, + enum ib_qp_state cur_state, + enum ib_qp_state new_state) +{ +#define EFA_MODIFY_QP_SUPP_MASK \ + (IB_QP_STATE | IB_QP_CUR_STATE | IB_QP_EN_SQD_ASYNC_NOTIFY | \ + IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY | IB_QP_SQ_PSN) + + if (qp_attr_mask & ~EFA_MODIFY_QP_SUPP_MASK) { + ibdev_dbg(&dev->ibdev, + "Unsupported qp_attr_mask[%#x] supported[%#x]\n", + qp_attr_mask, EFA_MODIFY_QP_SUPP_MASK); + return -EOPNOTSUPP; + } + + if (!ib_modify_qp_is_ok(cur_state, new_state, IB_QPT_UD, + qp_attr_mask)) { + ibdev_dbg(&dev->ibdev, "Invalid modify QP parameters\n"); + return -EINVAL; + } + + if ((qp_attr_mask & IB_QP_PORT) && qp_attr->port_num != 1) { + ibdev_dbg(&dev->ibdev, "Can't change port num\n"); + return -EOPNOTSUPP; + } + + if ((qp_attr_mask & IB_QP_PKEY_INDEX) && qp_attr->pkey_index) { + ibdev_dbg(&dev->ibdev, "Can't change pkey index\n"); + return -EOPNOTSUPP; + } + + return 0; +} + +int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, struct ib_udata *udata) +{ + struct efa_dev *dev = to_edev(ibqp->device); + struct efa_com_modify_qp_params params = {}; + struct efa_qp *qp = to_eqp(ibqp); + enum ib_qp_state cur_state; + enum ib_qp_state new_state; + int err; + + if (udata->inlen && + !ib_is_udata_cleared(udata, 0, udata->inlen)) { + ibdev_dbg(&dev->ibdev, + "Incompatible ABI params, udata not cleared\n"); + return -EINVAL; + } + + cur_state = qp_attr_mask & IB_QP_CUR_STATE ? qp_attr->cur_qp_state : + qp->state; + new_state = qp_attr_mask & IB_QP_STATE ? qp_attr->qp_state : cur_state; + + err = efa_modify_qp_validate(dev, qp, qp_attr, qp_attr_mask, cur_state, + new_state); + if (err) + return err; + + params.qp_handle = qp->qp_handle; + + if (qp_attr_mask & IB_QP_STATE) { + params.modify_mask |= BIT(EFA_ADMIN_QP_STATE_BIT) | + BIT(EFA_ADMIN_CUR_QP_STATE_BIT); + params.cur_qp_state = qp_attr->cur_qp_state; + params.qp_state = qp_attr->qp_state; + } + + if (qp_attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) { + params.modify_mask |= + BIT(EFA_ADMIN_SQ_DRAINED_ASYNC_NOTIFY_BIT); + params.sq_drained_async_notify = qp_attr->en_sqd_async_notify; + } + + if (qp_attr_mask & IB_QP_QKEY) { + params.modify_mask |= BIT(EFA_ADMIN_QKEY_BIT); + params.qkey = qp_attr->qkey; + } + + if (qp_attr_mask & IB_QP_SQ_PSN) { + params.modify_mask |= BIT(EFA_ADMIN_SQ_PSN_BIT); + params.sq_psn = qp_attr->sq_psn; + } + + err = efa_com_modify_qp(&dev->edev, ¶ms); + if (err) + return err; + + qp->state = new_state; + + return 0; +} + +static int efa_destroy_cq_idx(struct efa_dev *dev, int cq_idx) +{ + struct efa_com_destroy_cq_params params = { .cq_idx = cq_idx }; + + return efa_com_destroy_cq(&dev->edev, ¶ms); +} + +int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) +{ + struct efa_dev *dev = to_edev(ibcq->device); + struct efa_cq *cq = to_ecq(ibcq); + int err; + + if (udata->inlen && + !ib_is_udata_cleared(udata, 0, udata->inlen)) { + ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n"); + return -EINVAL; + } + + ibdev_dbg(&dev->ibdev, + "Destroy cq[%d] virt[0x%p] freed: size[%lu], dma[%pad]\n", + cq->cq_idx, cq->cpu_addr, cq->size, &cq->dma_addr); + + err = efa_destroy_cq_idx(dev, cq->cq_idx); + if (err) + return err; + + dma_unmap_single(&dev->pdev->dev, cq->dma_addr, cq->size, + DMA_FROM_DEVICE); + + kfree(cq); + return 0; +} + +static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq, + struct efa_ibv_create_cq_resp *resp) +{ + resp->q_mmap_size = cq->size; + resp->q_mmap_key = mmap_entry_insert(dev, cq->ucontext, cq, + virt_to_phys(cq->cpu_addr), + cq->size, EFA_MMAP_DMA_PAGE); + if (resp->q_mmap_key == EFA_MMAP_INVALID) + return -ENOMEM; + + return 0; +} + +static struct ib_cq *do_create_cq(struct ib_device *ibdev, int entries, + int vector, struct ib_ucontext *ibucontext, + struct ib_udata *udata) +{ + struct efa_ibv_create_cq_resp resp = {}; + struct efa_com_create_cq_params params; + struct efa_com_create_cq_result result; + struct efa_dev *dev = to_edev(ibdev); + struct efa_ibv_create_cq cmd = {}; + bool cq_entry_inserted = false; + struct efa_cq *cq; + int err; + + ibdev_dbg(ibdev, "create_cq entries %d\n", entries); + + if (entries < 1 || entries > dev->dev_attr.max_cq_depth) { + ibdev_dbg(ibdev, + "cq: requested entries[%u] non-positive or greater than max[%u]\n", + entries, dev->dev_attr.max_cq_depth); + err = -EINVAL; + goto err_out; + } + + if (!field_avail(cmd, num_sub_cqs, udata->inlen)) { + ibdev_dbg(ibdev, + "Incompatible ABI params, no input udata\n"); + err = -EINVAL; + goto err_out; + } + + if (udata->inlen > sizeof(cmd) && + !ib_is_udata_cleared(udata, sizeof(cmd), + udata->inlen - sizeof(cmd))) { + ibdev_dbg(ibdev, + "Incompatible ABI params, unknown fields in udata\n"); + err = -EINVAL; + goto err_out; + } + + err = ib_copy_from_udata(&cmd, udata, + min(sizeof(cmd), udata->inlen)); + if (err) { + ibdev_dbg(ibdev, "Cannot copy udata for create_cq\n"); + goto err_out; + } + + if (cmd.comp_mask || !is_reserved_cleared(cmd.reserved_50)) { + ibdev_dbg(ibdev, + "Incompatible ABI params, unknown fields in udata\n"); + err = -EINVAL; + goto err_out; + } + + if (!cmd.cq_entry_size) { + ibdev_dbg(ibdev, + "Invalid entry size [%u]\n", cmd.cq_entry_size); + err = -EINVAL; + goto err_out; + } + + if (cmd.num_sub_cqs != dev->dev_attr.sub_cqs_per_cq) { + ibdev_dbg(ibdev, + "Invalid number of sub cqs[%u] expected[%u]\n", + cmd.num_sub_cqs, dev->dev_attr.sub_cqs_per_cq); + err = -EINVAL; + goto err_out; + } + + cq = kzalloc(sizeof(*cq), GFP_KERNEL); + if (!cq) { + err = -ENOMEM; + goto err_out; + } + + cq->ucontext = to_eucontext(ibucontext); + cq->size = PAGE_ALIGN(cmd.cq_entry_size * entries * cmd.num_sub_cqs); + cq->cpu_addr = efa_zalloc_mapped(dev, &cq->dma_addr, cq->size, + DMA_FROM_DEVICE); + if (!cq->cpu_addr) { + err = -ENOMEM; + goto err_free_cq; + } + + params.uarn = cq->ucontext->uarn; + params.cq_depth = entries; + params.dma_addr = cq->dma_addr; + params.entry_size_in_bytes = cmd.cq_entry_size; + params.num_sub_cqs = cmd.num_sub_cqs; + err = efa_com_create_cq(&dev->edev, ¶ms, &result); + if (err) + goto err_free_mapped; + + resp.cq_idx = result.cq_idx; + cq->cq_idx = result.cq_idx; + cq->ibcq.cqe = result.actual_depth; + WARN_ON_ONCE(entries != result.actual_depth); + + err = cq_mmap_entries_setup(dev, cq, &resp); + if (err) { + ibdev_dbg(ibdev, + "Could not setup cq[%u] mmap entries\n", cq->cq_idx); + goto err_destroy_cq; + } + + cq_entry_inserted = true; + + if (udata->outlen) { + err = ib_copy_to_udata(udata, &resp, + min(sizeof(resp), udata->outlen)); + if (err) { + ibdev_dbg(ibdev, + "Failed to copy udata for create_cq\n"); + goto err_destroy_cq; + } + } + + ibdev_dbg(ibdev, + "Created cq[%d], cq depth[%u]. dma[%pad] virt[0x%p]\n", + cq->cq_idx, result.actual_depth, &cq->dma_addr, cq->cpu_addr); + + return &cq->ibcq; + +err_destroy_cq: + efa_destroy_cq_idx(dev, cq->cq_idx); +err_free_mapped: + dma_unmap_single(&dev->pdev->dev, cq->dma_addr, cq->size, + DMA_FROM_DEVICE); + if (!cq_entry_inserted) + free_pages_exact(cq->cpu_addr, cq->size); +err_free_cq: + kfree(cq); +err_out: + atomic64_inc(&dev->stats.sw_stats.create_cq_err); + return ERR_PTR(err); +} + +struct ib_cq *efa_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_udata *udata) +{ + struct efa_ucontext *ucontext = rdma_udata_to_drv_context(udata, + struct efa_ucontext, + ibucontext); + + return do_create_cq(ibdev, attr->cqe, attr->comp_vector, + &ucontext->ibucontext, udata); +} + +static int umem_to_page_list(struct efa_dev *dev, + struct ib_umem *umem, + u64 *page_list, + u32 hp_cnt, + u8 hp_shift) +{ + u32 pages_in_hp = BIT(hp_shift - PAGE_SHIFT); + struct sg_dma_page_iter sg_iter; + unsigned int page_idx = 0; + unsigned int hp_idx = 0; + + ibdev_dbg(&dev->ibdev, "hp_cnt[%u], pages_in_hp[%u]\n", + hp_cnt, pages_in_hp); + + for_each_sg_dma_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { + if (page_idx % pages_in_hp == 0) { + page_list[hp_idx] = sg_page_iter_dma_address(&sg_iter); + hp_idx++; + } + + page_idx++; + } + + return 0; +} + +static struct scatterlist *efa_vmalloc_buf_to_sg(u64 *buf, int page_cnt) +{ + struct scatterlist *sglist; + struct page *pg; + int i; + + sglist = kcalloc(page_cnt, sizeof(*sglist), GFP_KERNEL); + if (!sglist) + return NULL; + sg_init_table(sglist, page_cnt); + for (i = 0; i < page_cnt; i++) { + pg = vmalloc_to_page(buf); + if (!pg) + goto err; + sg_set_page(&sglist[i], pg, PAGE_SIZE, 0); + buf += PAGE_SIZE / sizeof(*buf); + } + return sglist; + +err: + kfree(sglist); + return NULL; +} + +/* + * create a chunk list of physical pages dma addresses from the supplied + * scatter gather list + */ +static int pbl_chunk_list_create(struct efa_dev *dev, struct pbl_context *pbl) +{ + unsigned int entry, payloads_in_sg, chunk_list_size, chunk_idx, payload_idx; + struct pbl_chunk_list *chunk_list = &pbl->phys.indirect.chunk_list; + int page_cnt = pbl->phys.indirect.pbl_buf_size_in_pages; + struct scatterlist *pages_sgl = pbl->phys.indirect.sgl; + int sg_dma_cnt = pbl->phys.indirect.sg_dma_cnt; + struct efa_com_ctrl_buff_info *ctrl_buf; + u64 *cur_chunk_buf, *prev_chunk_buf; + struct scatterlist *sg; + dma_addr_t dma_addr; + int i; + + /* allocate a chunk list that consists of 4KB chunks */ + chunk_list_size = DIV_ROUND_UP(page_cnt, EFA_PTRS_PER_CHUNK); + + chunk_list->size = chunk_list_size; + chunk_list->chunks = kcalloc(chunk_list_size, + sizeof(*chunk_list->chunks), + GFP_KERNEL); + if (!chunk_list->chunks) + return -ENOMEM; + + ibdev_dbg(&dev->ibdev, + "chunk_list_size[%u] - pages[%u]\n", chunk_list_size, + page_cnt); + + /* allocate chunk buffers: */ + for (i = 0; i < chunk_list_size; i++) { + chunk_list->chunks[i].buf = kzalloc(EFA_CHUNK_SIZE, GFP_KERNEL); + if (!chunk_list->chunks[i].buf) + goto chunk_list_dealloc; + + chunk_list->chunks[i].length = EFA_CHUNK_USED_SIZE; + } + chunk_list->chunks[chunk_list_size - 1].length = + ((page_cnt % EFA_PTRS_PER_CHUNK) * EFA_CHUNK_PAYLOAD_PTR_SIZE) + + EFA_CHUNK_PTR_SIZE; + + /* fill the dma addresses of sg list pages to chunks: */ + chunk_idx = 0; + payload_idx = 0; + cur_chunk_buf = chunk_list->chunks[0].buf; + for_each_sg(pages_sgl, sg, sg_dma_cnt, entry) { + payloads_in_sg = sg_dma_len(sg) >> EFA_CHUNK_PAYLOAD_SHIFT; + for (i = 0; i < payloads_in_sg; i++) { + cur_chunk_buf[payload_idx++] = + (sg_dma_address(sg) & ~(EFA_CHUNK_PAYLOAD_SIZE - 1)) + + (EFA_CHUNK_PAYLOAD_SIZE * i); + + if (payload_idx == EFA_PTRS_PER_CHUNK) { + chunk_idx++; + cur_chunk_buf = chunk_list->chunks[chunk_idx].buf; + payload_idx = 0; + } + } + } + + /* map chunks to dma and fill chunks next ptrs */ + for (i = chunk_list_size - 1; i >= 0; i--) { + dma_addr = dma_map_single(&dev->pdev->dev, + chunk_list->chunks[i].buf, + chunk_list->chunks[i].length, + DMA_TO_DEVICE); + if (dma_mapping_error(&dev->pdev->dev, dma_addr)) { + ibdev_err(&dev->ibdev, + "chunk[%u] dma_map_failed\n", i); + goto chunk_list_unmap; + } + + chunk_list->chunks[i].dma_addr = dma_addr; + ibdev_dbg(&dev->ibdev, + "chunk[%u] mapped at [%pad]\n", i, &dma_addr); + + if (!i) + break; + + prev_chunk_buf = chunk_list->chunks[i - 1].buf; + + ctrl_buf = (struct efa_com_ctrl_buff_info *) + &prev_chunk_buf[EFA_PTRS_PER_CHUNK]; + ctrl_buf->length = chunk_list->chunks[i].length; + + efa_com_set_dma_addr(dma_addr, + &ctrl_buf->address.mem_addr_high, + &ctrl_buf->address.mem_addr_low); + } + + return 0; + +chunk_list_unmap: + for (; i < chunk_list_size; i++) { + dma_unmap_single(&dev->pdev->dev, chunk_list->chunks[i].dma_addr, + chunk_list->chunks[i].length, DMA_TO_DEVICE); + } +chunk_list_dealloc: + for (i = 0; i < chunk_list_size; i++) + kfree(chunk_list->chunks[i].buf); + + kfree(chunk_list->chunks); + return -ENOMEM; +} + +static void pbl_chunk_list_destroy(struct efa_dev *dev, struct pbl_context *pbl) +{ + struct pbl_chunk_list *chunk_list = &pbl->phys.indirect.chunk_list; + int i; + + for (i = 0; i < chunk_list->size; i++) { + dma_unmap_single(&dev->pdev->dev, chunk_list->chunks[i].dma_addr, + chunk_list->chunks[i].length, DMA_TO_DEVICE); + kfree(chunk_list->chunks[i].buf); + } + + kfree(chunk_list->chunks); +} + +/* initialize pbl continuous mode: map pbl buffer to a dma address. */ +static int pbl_continuous_initialize(struct efa_dev *dev, + struct pbl_context *pbl) +{ + dma_addr_t dma_addr; + + dma_addr = dma_map_single(&dev->pdev->dev, pbl->pbl_buf, + pbl->pbl_buf_size_in_bytes, DMA_TO_DEVICE); + if (dma_mapping_error(&dev->pdev->dev, dma_addr)) { + ibdev_err(&dev->ibdev, "Unable to map pbl to DMA address\n"); + return -ENOMEM; + } + + pbl->phys.continuous.dma_addr = dma_addr; + ibdev_dbg(&dev->ibdev, + "pbl continuous - dma_addr = %pad, size[%u]\n", + &dma_addr, pbl->pbl_buf_size_in_bytes); + + return 0; +} + +/* + * initialize pbl indirect mode: + * create a chunk list out of the dma addresses of the physical pages of + * pbl buffer. + */ +static int pbl_indirect_initialize(struct efa_dev *dev, struct pbl_context *pbl) +{ + u32 size_in_pages = DIV_ROUND_UP(pbl->pbl_buf_size_in_bytes, PAGE_SIZE); + struct scatterlist *sgl; + int sg_dma_cnt, err; + + BUILD_BUG_ON(EFA_CHUNK_PAYLOAD_SIZE > PAGE_SIZE); + sgl = efa_vmalloc_buf_to_sg(pbl->pbl_buf, size_in_pages); + if (!sgl) + return -ENOMEM; + + sg_dma_cnt = dma_map_sg(&dev->pdev->dev, sgl, size_in_pages, DMA_TO_DEVICE); + if (!sg_dma_cnt) { + err = -EINVAL; + goto err_map; + } + + pbl->phys.indirect.pbl_buf_size_in_pages = size_in_pages; + pbl->phys.indirect.sgl = sgl; + pbl->phys.indirect.sg_dma_cnt = sg_dma_cnt; + err = pbl_chunk_list_create(dev, pbl); + if (err) { + ibdev_dbg(&dev->ibdev, + "chunk_list creation failed[%d]\n", err); + goto err_chunk; + } + + ibdev_dbg(&dev->ibdev, + "pbl indirect - size[%u], chunks[%u]\n", + pbl->pbl_buf_size_in_bytes, + pbl->phys.indirect.chunk_list.size); + + return 0; + +err_chunk: + dma_unmap_sg(&dev->pdev->dev, sgl, size_in_pages, DMA_TO_DEVICE); +err_map: + kfree(sgl); + return err; +} + +static void pbl_indirect_terminate(struct efa_dev *dev, struct pbl_context *pbl) +{ + pbl_chunk_list_destroy(dev, pbl); + dma_unmap_sg(&dev->pdev->dev, pbl->phys.indirect.sgl, + pbl->phys.indirect.pbl_buf_size_in_pages, DMA_TO_DEVICE); + kfree(pbl->phys.indirect.sgl); +} + +/* create a page buffer list from a mapped user memory region */ +static int pbl_create(struct efa_dev *dev, + struct pbl_context *pbl, + struct ib_umem *umem, + int hp_cnt, + u8 hp_shift) +{ + int err; + + pbl->pbl_buf_size_in_bytes = hp_cnt * EFA_CHUNK_PAYLOAD_PTR_SIZE; + pbl->pbl_buf = kzalloc(pbl->pbl_buf_size_in_bytes, + GFP_KERNEL | __GFP_NOWARN); + if (pbl->pbl_buf) { + pbl->physically_continuous = 1; + err = umem_to_page_list(dev, umem, pbl->pbl_buf, hp_cnt, + hp_shift); + if (err) + goto err_continuous; + err = pbl_continuous_initialize(dev, pbl); + if (err) + goto err_continuous; + } else { + pbl->physically_continuous = 0; + pbl->pbl_buf = vzalloc(pbl->pbl_buf_size_in_bytes); + if (!pbl->pbl_buf) + return -ENOMEM; + + err = umem_to_page_list(dev, umem, pbl->pbl_buf, hp_cnt, + hp_shift); + if (err) + goto err_indirect; + err = pbl_indirect_initialize(dev, pbl); + if (err) + goto err_indirect; + } + + ibdev_dbg(&dev->ibdev, + "user_pbl_created: user_pages[%u], continuous[%u]\n", + hp_cnt, pbl->physically_continuous); + + return 0; + +err_continuous: + kfree(pbl->pbl_buf); + return err; +err_indirect: + vfree(pbl->pbl_buf); + return err; +} + +static void pbl_destroy(struct efa_dev *dev, struct pbl_context *pbl) +{ + if (pbl->physically_continuous) { + dma_unmap_single(&dev->pdev->dev, pbl->phys.continuous.dma_addr, + pbl->pbl_buf_size_in_bytes, DMA_TO_DEVICE); + kfree(pbl->pbl_buf); + } else { + pbl_indirect_terminate(dev, pbl); + vfree(pbl->pbl_buf); + } +} + +static int efa_create_inline_pbl(struct efa_dev *dev, struct efa_mr *mr, + struct efa_com_reg_mr_params *params) +{ + int err; + + params->inline_pbl = 1; + err = umem_to_page_list(dev, mr->umem, params->pbl.inline_pbl_array, + params->page_num, params->page_shift); + if (err) + return err; + + ibdev_dbg(&dev->ibdev, + "inline_pbl_array - pages[%u]\n", params->page_num); + + return 0; +} + +static int efa_create_pbl(struct efa_dev *dev, + struct pbl_context *pbl, + struct efa_mr *mr, + struct efa_com_reg_mr_params *params) +{ + int err; + + err = pbl_create(dev, pbl, mr->umem, params->page_num, + params->page_shift); + if (err) { + ibdev_dbg(&dev->ibdev, "Failed to create pbl[%d]\n", err); + return err; + } + + params->inline_pbl = 0; + params->indirect = !pbl->physically_continuous; + if (pbl->physically_continuous) { + params->pbl.pbl.length = pbl->pbl_buf_size_in_bytes; + + efa_com_set_dma_addr(pbl->phys.continuous.dma_addr, + ¶ms->pbl.pbl.address.mem_addr_high, + ¶ms->pbl.pbl.address.mem_addr_low); + } else { + params->pbl.pbl.length = + pbl->phys.indirect.chunk_list.chunks[0].length; + + efa_com_set_dma_addr(pbl->phys.indirect.chunk_list.chunks[0].dma_addr, + ¶ms->pbl.pbl.address.mem_addr_high, + ¶ms->pbl.pbl.address.mem_addr_low); + } + + return 0; +} + +static void efa_cont_pages(struct ib_umem *umem, u64 addr, + unsigned long max_page_shift, + int *count, u8 *shift, u32 *ncont) +{ + struct scatterlist *sg; + u64 base = ~0, p = 0; + unsigned long tmp; + unsigned long m; + u64 len, pfn; + int i = 0; + int entry; + + addr = addr >> PAGE_SHIFT; + tmp = (unsigned long)addr; + m = find_first_bit(&tmp, BITS_PER_LONG); + if (max_page_shift) + m = min_t(unsigned long, max_page_shift - PAGE_SHIFT, m); + + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { + len = DIV_ROUND_UP(sg_dma_len(sg), PAGE_SIZE); + pfn = sg_dma_address(sg) >> PAGE_SHIFT; + if (base + p != pfn) { + /* + * If either the offset or the new + * base are unaligned update m + */ + tmp = (unsigned long)(pfn | p); + if (!IS_ALIGNED(tmp, 1 << m)) + m = find_first_bit(&tmp, BITS_PER_LONG); + + base = pfn; + p = 0; + } + + p += len; + i += len; + } + + if (i) { + m = min_t(unsigned long, ilog2(roundup_pow_of_two(i)), m); + *ncont = DIV_ROUND_UP(i, (1 << m)); + } else { + m = 0; + *ncont = 0; + } + + *shift = PAGE_SHIFT + m; + *count = i; +} + +struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, + u64 virt_addr, int access_flags, + struct ib_udata *udata) +{ + struct efa_dev *dev = to_edev(ibpd->device); + struct efa_com_reg_mr_params params = {}; + struct efa_com_reg_mr_result result = {}; + unsigned long max_page_shift; + struct pbl_context pbl; + struct efa_mr *mr; + int inline_size; + int npages; + int err; + + if (udata->inlen && + !ib_is_udata_cleared(udata, 0, sizeof(udata->inlen))) { + ibdev_dbg(&dev->ibdev, + "Incompatible ABI params, udata not cleared\n"); + err = -EINVAL; + goto err_out; + } + + if (access_flags & ~EFA_SUPPORTED_ACCESS_FLAGS) { + ibdev_dbg(&dev->ibdev, + "Unsupported access flags[%#x], supported[%#x]\n", + access_flags, EFA_SUPPORTED_ACCESS_FLAGS); + err = -EOPNOTSUPP; + goto err_out; + } + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) { + err = -ENOMEM; + goto err_out; + } + + mr->umem = ib_umem_get(udata, start, length, access_flags, 0); + if (IS_ERR(mr->umem)) { + err = PTR_ERR(mr->umem); + ibdev_dbg(&dev->ibdev, + "Failed to pin and map user space memory[%d]\n", err); + goto err_free; + } + + params.pd = to_epd(ibpd)->pdn; + params.iova = virt_addr; + params.mr_length_in_bytes = length; + params.permissions = access_flags & 0x1; + max_page_shift = fls64(dev->dev_attr.page_size_cap); + + efa_cont_pages(mr->umem, start, max_page_shift, &npages, + ¶ms.page_shift, ¶ms.page_num); + ibdev_dbg(&dev->ibdev, + "start %#llx length %#llx npages %d params.page_shift %u params.page_num %u\n", + start, length, npages, params.page_shift, params.page_num); + + inline_size = ARRAY_SIZE(params.pbl.inline_pbl_array); + if (params.page_num <= inline_size) { + err = efa_create_inline_pbl(dev, mr, ¶ms); + if (err) + goto err_unmap; + + err = efa_com_register_mr(&dev->edev, ¶ms, &result); + if (err) + goto err_unmap; + } else { + err = efa_create_pbl(dev, &pbl, mr, ¶ms); + if (err) + goto err_unmap; + + err = efa_com_register_mr(&dev->edev, ¶ms, &result); + pbl_destroy(dev, &pbl); + + if (err) + goto err_unmap; + } + + mr->ibmr.lkey = result.l_key; + mr->ibmr.rkey = result.r_key; + mr->ibmr.length = length; + ibdev_dbg(&dev->ibdev, "Registered mr[%d]\n", mr->ibmr.lkey); + + return &mr->ibmr; + +err_unmap: + ib_umem_release(mr->umem); +err_free: + kfree(mr); +err_out: + atomic64_inc(&dev->stats.sw_stats.reg_mr_err); + return ERR_PTR(err); +} + +int efa_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) +{ + struct efa_dev *dev = to_edev(ibmr->device); + struct efa_com_dereg_mr_params params; + struct efa_mr *mr = to_emr(ibmr); + int err; + + if (udata->inlen && + !ib_is_udata_cleared(udata, 0, udata->inlen)) { + ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n"); + return -EINVAL; + } + + ibdev_dbg(&dev->ibdev, "Deregister mr[%d]\n", ibmr->lkey); + + if (mr->umem) { + params.l_key = mr->ibmr.lkey; + err = efa_com_dereg_mr(&dev->edev, ¶ms); + if (err) + return err; + ib_umem_release(mr->umem); + } + + kfree(mr); + + return 0; +} + +int efa_get_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct ib_port_attr attr; + int err; + + err = ib_query_port(ibdev, port_num, &attr); + if (err) { + ibdev_dbg(ibdev, "Couldn't query port err[%d]\n", err); + return err; + } + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + + return 0; +} + +static int efa_dealloc_uar(struct efa_dev *dev, u16 uarn) +{ + struct efa_com_dealloc_uar_params params = { + .uarn = uarn, + }; + + return efa_com_dealloc_uar(&dev->edev, ¶ms); +} + +int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata) +{ + struct efa_ucontext *ucontext = to_eucontext(ibucontext); + struct efa_dev *dev = to_edev(ibucontext->device); + struct efa_ibv_alloc_ucontext_resp resp = {}; + struct efa_com_alloc_uar_result result; + int err; + + /* + * it's fine if the driver does not know all request fields, + * we will ack input fields in our response. + */ + + err = efa_com_alloc_uar(&dev->edev, &result); + if (err) + goto err_out; + + ucontext->uarn = result.uarn; + xa_init(&ucontext->mmap_xa); + + resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_QUERY_DEVICE; + resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_CREATE_AH; + resp.sub_cqs_per_cq = dev->dev_attr.sub_cqs_per_cq; + resp.inline_buf_size = dev->dev_attr.inline_buf_size; + resp.max_llq_size = dev->dev_attr.max_llq_size; + + if (udata && udata->outlen) { + err = ib_copy_to_udata(udata, &resp, + min(sizeof(resp), udata->outlen)); + if (err) + goto err_dealloc_uar; + } + + return 0; + +err_dealloc_uar: + efa_dealloc_uar(dev, result.uarn); +err_out: + atomic64_inc(&dev->stats.sw_stats.alloc_ucontext_err); + return err; +} + +void efa_dealloc_ucontext(struct ib_ucontext *ibucontext) +{ + struct efa_ucontext *ucontext = to_eucontext(ibucontext); + struct efa_dev *dev = to_edev(ibucontext->device); + + mmap_entries_remove_free(dev, ucontext); + efa_dealloc_uar(dev, ucontext->uarn); +} + +static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext, + struct vm_area_struct *vma, u64 key, u64 length) +{ + struct efa_mmap_entry *entry; + unsigned long va; + u64 pfn; + int err; + + entry = mmap_entry_get(dev, ucontext, key, length); + if (!entry) { + ibdev_dbg(&dev->ibdev, "key[%#llx] does not have valid entry\n", + key); + return -EINVAL; + } + + ibdev_dbg(&dev->ibdev, + "Mapping address[%#llx], length[%#llx], mmap_flag[%d]\n", + entry->address, length, entry->mmap_flag); + + pfn = entry->address >> PAGE_SHIFT; + switch (entry->mmap_flag) { + case EFA_MMAP_IO_NC: + err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, length, + pgprot_noncached(vma->vm_page_prot)); + break; + case EFA_MMAP_IO_WC: + err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, length, + pgprot_writecombine(vma->vm_page_prot)); + break; + case EFA_MMAP_DMA_PAGE: + for (va = vma->vm_start; va < vma->vm_end; + va += PAGE_SIZE, pfn++) { + err = vm_insert_page(vma, va, pfn_to_page(pfn)); + if (err) + break; + } + break; + default: + err = -EINVAL; + } + + if (err) + ibdev_dbg( + &dev->ibdev, + "Couldn't mmap address[%#llx] length[%#llx] mmap_flag[%d] err[%d]\n", + entry->address, length, entry->mmap_flag, err); + + return err; +} + +int efa_mmap(struct ib_ucontext *ibucontext, + struct vm_area_struct *vma) +{ + struct efa_ucontext *ucontext = to_eucontext(ibucontext); + struct efa_dev *dev = to_edev(ibucontext->device); + u64 length = vma->vm_end - vma->vm_start; + u64 key = vma->vm_pgoff << PAGE_SHIFT; + + ibdev_dbg(&dev->ibdev, + "start %#lx, end %#lx, length = %#llx, key = %#llx\n", + vma->vm_start, vma->vm_end, length, key); + + if (length % PAGE_SIZE != 0 || !(vma->vm_flags & VM_SHARED)) { + ibdev_dbg(&dev->ibdev, + "length[%#llx] is not page size aligned[%#lx] or VM_SHARED is not set [%#lx]\n", + length, PAGE_SIZE, vma->vm_flags); + return -EINVAL; + } + + if (vma->vm_flags & VM_EXEC) { + ibdev_dbg(&dev->ibdev, "Mapping executable pages is not permitted\n"); + return -EPERM; + } + vma->vm_flags &= ~VM_MAYEXEC; + + return __efa_mmap(dev, ucontext, vma, key, length); +} + +static int efa_ah_destroy(struct efa_dev *dev, struct efa_ah *ah) +{ + struct efa_com_destroy_ah_params params = { + .ah = ah->ah, + .pdn = to_epd(ah->ibah.pd)->pdn, + }; + + return efa_com_destroy_ah(&dev->edev, ¶ms); +} + +int efa_create_ah(struct ib_ah *ibah, + struct rdma_ah_attr *ah_attr, + u32 flags, + struct ib_udata *udata) +{ + struct efa_dev *dev = to_edev(ibah->device); + struct efa_com_create_ah_params params = {}; + struct efa_ibv_create_ah_resp resp = {}; + struct efa_com_create_ah_result result; + struct efa_ah *ah = to_eah(ibah); + int err; + + if (!(flags & RDMA_CREATE_AH_SLEEPABLE)) { + ibdev_dbg(&dev->ibdev, + "Create address handle is not supported in atomic context\n"); + err = -EOPNOTSUPP; + goto err_out; + } + + if (udata->inlen && + !ib_is_udata_cleared(udata, 0, udata->inlen)) { + ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n"); + err = -EINVAL; + goto err_out; + } + + memcpy(params.dest_addr, ah_attr->grh.dgid.raw, + sizeof(params.dest_addr)); + params.pdn = to_epd(ibah->pd)->pdn; + err = efa_com_create_ah(&dev->edev, ¶ms, &result); + if (err) + goto err_out; + + memcpy(ah->id, ah_attr->grh.dgid.raw, sizeof(ah->id)); + ah->ah = result.ah; + + resp.efa_address_handle = result.ah; + + if (udata->outlen) { + err = ib_copy_to_udata(udata, &resp, + min(sizeof(resp), udata->outlen)); + if (err) { + ibdev_dbg(&dev->ibdev, + "Failed to copy udata for create_ah response\n"); + goto err_destroy_ah; + } + } + ibdev_dbg(&dev->ibdev, "Created ah[%d]\n", ah->ah); + + return 0; + +err_destroy_ah: + efa_ah_destroy(dev, ah); +err_out: + atomic64_inc(&dev->stats.sw_stats.create_ah_err); + return err; +} + +void efa_destroy_ah(struct ib_ah *ibah, u32 flags) +{ + struct efa_dev *dev = to_edev(ibah->pd->device); + struct efa_ah *ah = to_eah(ibah); + + ibdev_dbg(&dev->ibdev, "Destroy ah[%d]\n", ah->ah); + + if (!(flags & RDMA_DESTROY_AH_SLEEPABLE)) { + ibdev_dbg(&dev->ibdev, + "Destroy address handle is not supported in atomic context\n"); + return; + } + + efa_ah_destroy(dev, ah); +} + +enum rdma_link_layer efa_port_link_layer(struct ib_device *ibdev, + u8 port_num) +{ + return IB_LINK_LAYER_UNSPECIFIED; +} + diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index addefae16c9c..310105d4e3de 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -4104,6 +4104,9 @@ def_access_ibp_counter(seq_naks); static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = { [C_RCV_OVF] = RXE32_DEV_CNTR_ELEM(RcvOverflow, RCV_BUF_OVFL_CNT, CNTR_SYNTH), +[C_RX_LEN_ERR] = RXE32_DEV_CNTR_ELEM(RxLenErr, RCV_LENGTH_ERR_CNT, CNTR_SYNTH), +[C_RX_ICRC_ERR] = RXE32_DEV_CNTR_ELEM(RxICrcErr, RCV_ICRC_ERR_CNT, CNTR_SYNTH), +[C_RX_EBP] = RXE32_DEV_CNTR_ELEM(RxEbpCnt, RCV_EBP_CNT, CNTR_SYNTH), [C_RX_TID_FULL] = RXE32_DEV_CNTR_ELEM(RxTIDFullEr, RCV_TID_FULL_ERR_CNT, CNTR_NORMAL), [C_RX_TID_INVALID] = RXE32_DEV_CNTR_ELEM(RxTIDInvalid, RCV_TID_VALID_ERR_CNT, @@ -13294,15 +13297,18 @@ static int set_up_context_variables(struct hfi1_devdata *dd) /* * The RMT entries are currently allocated as shown below: * 1. QOS (0 to 128 entries); - * 2. FECN for PSM (num_user_contexts + num_vnic_contexts); + * 2. FECN (num_kernel_context - 1 + num_user_contexts + + * num_vnic_contexts); * 3. VNIC (num_vnic_contexts). - * It should be noted that PSM FECN oversubscribe num_vnic_contexts + * It should be noted that FECN oversubscribe num_vnic_contexts * entries of RMT because both VNIC and PSM could allocate any receive * context between dd->first_dyn_alloc_text and dd->num_rcv_contexts, * and PSM FECN must reserve an RMT entry for each possible PSM receive * context. */ rmt_count = qos_rmt_entries(dd, NULL, NULL) + (num_vnic_contexts * 2); + if (HFI1_CAP_IS_KSET(TID_RDMA)) + rmt_count += num_kernel_contexts - 1; if (rmt_count + n_usr_ctxts > NUM_MAP_ENTRIES) { user_rmt_reduced = NUM_MAP_ENTRIES - rmt_count; dd_dev_err(dd, @@ -14285,37 +14291,43 @@ bail: init_qpmap_table(dd, FIRST_KERNEL_KCTXT, dd->n_krcv_queues - 1); } -static void init_user_fecn_handling(struct hfi1_devdata *dd, - struct rsm_map_table *rmt) +static void init_fecn_handling(struct hfi1_devdata *dd, + struct rsm_map_table *rmt) { struct rsm_rule_data rrd; u64 reg; - int i, idx, regoff, regidx; + int i, idx, regoff, regidx, start; u8 offset; u32 total_cnt; + if (HFI1_CAP_IS_KSET(TID_RDMA)) + /* Exclude context 0 */ + start = 1; + else + start = dd->first_dyn_alloc_ctxt; + + total_cnt = dd->num_rcv_contexts - start; + /* there needs to be enough room in the map table */ - total_cnt = dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt; if (rmt->used + total_cnt >= NUM_MAP_ENTRIES) { - dd_dev_err(dd, "User FECN handling disabled - too many user contexts allocated\n"); + dd_dev_err(dd, "FECN handling disabled - too many contexts allocated\n"); return; } /* * RSM will extract the destination context as an index into the * map table. The destination contexts are a sequential block - * in the range first_dyn_alloc_ctxt...num_rcv_contexts-1 (inclusive). + * in the range start...num_rcv_contexts-1 (inclusive). * Map entries are accessed as offset + extracted value. Adjust * the added offset so this sequence can be placed anywhere in * the table - as long as the entries themselves do not wrap. * There are only enough bits in offset for the table size, so * start with that to allow for a "negative" offset. */ - offset = (u8)(NUM_MAP_ENTRIES + (int)rmt->used - - (int)dd->first_dyn_alloc_ctxt); + offset = (u8)(NUM_MAP_ENTRIES + rmt->used - start); - for (i = dd->first_dyn_alloc_ctxt, idx = rmt->used; - i < dd->num_rcv_contexts; i++, idx++) { + for (i = start, idx = rmt->used; i < dd->num_rcv_contexts; + i++, idx++) { /* replace with identity mapping */ regoff = (idx % 8) * 8; regidx = idx / 8; @@ -14437,7 +14449,7 @@ static void init_rxe(struct hfi1_devdata *dd) rmt = alloc_rsm_map_table(dd); /* set up QOS, including the QPN map table */ init_qos(dd, rmt); - init_user_fecn_handling(dd, rmt); + init_fecn_handling(dd, rmt); complete_rsm_map_table(dd, rmt); /* record number of used rsm map entries for vnic */ dd->vnic.rmt_start = rmt->used; @@ -14663,8 +14675,8 @@ void hfi1_start_cleanup(struct hfi1_devdata *dd) */ static int init_asic_data(struct hfi1_devdata *dd) { - unsigned long flags; - struct hfi1_devdata *tmp, *peer = NULL; + unsigned long index; + struct hfi1_devdata *peer; struct hfi1_asic_data *asic_data; int ret = 0; @@ -14673,14 +14685,12 @@ static int init_asic_data(struct hfi1_devdata *dd) if (!asic_data) return -ENOMEM; - spin_lock_irqsave(&hfi1_devs_lock, flags); + xa_lock_irq(&hfi1_dev_table); /* Find our peer device */ - list_for_each_entry(tmp, &hfi1_dev_list, list) { - if ((HFI_BASE_GUID(dd) == HFI_BASE_GUID(tmp)) && - dd->unit != tmp->unit) { - peer = tmp; + xa_for_each(&hfi1_dev_table, index, peer) { + if ((HFI_BASE_GUID(dd) == HFI_BASE_GUID(peer)) && + dd->unit != peer->unit) break; - } } if (peer) { @@ -14692,7 +14702,7 @@ static int init_asic_data(struct hfi1_devdata *dd) mutex_init(&dd->asic_data->asic_resource_mutex); } dd->asic_data->dds[dd->hfi1_id] = dd; /* self back-pointer */ - spin_unlock_irqrestore(&hfi1_devs_lock, flags); + xa_unlock_irq(&hfi1_dev_table); /* first one through - set up i2c devices */ if (!peer) diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 6c27c1c6a868..4e6c3556ec48 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -858,6 +858,9 @@ static inline int idx_from_vl(int vl) /* Per device counter indexes */ enum { C_RCV_OVF = 0, + C_RX_LEN_ERR, + C_RX_ICRC_ERR, + C_RX_EBP, C_RX_TID_FULL, C_RX_TID_INVALID, C_RX_TID_FLGMS, diff --git a/drivers/infiniband/hw/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h index c0800ea5a3f8..ab3589d17aee 100644 --- a/drivers/infiniband/hw/hfi1/chip_registers.h +++ b/drivers/infiniband/hw/hfi1/chip_registers.h @@ -380,6 +380,9 @@ #define DC_LCB_PRF_TX_FLIT_CNT (DC_LCB_CSRS + 0x000000000418) #define DC_LCB_STS_LINK_TRANSFER_ACTIVE (DC_LCB_CSRS + 0x000000000468) #define DC_LCB_STS_ROUND_TRIP_LTP_CNT (DC_LCB_CSRS + 0x0000000004B0) +#define RCV_LENGTH_ERR_CNT 0 +#define RCV_ICRC_ERR_CNT 6 +#define RCV_EBP_CNT 9 #define RCV_BUF_OVFL_CNT 10 #define RCV_CONTEXT_EGR_STALL 22 #define RCV_DATA_PKT_CNT 0 diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h index 7310a5dba420..d47da7b0438f 100644 --- a/drivers/infiniband/hw/hfi1/common.h +++ b/drivers/infiniband/hw/hfi1/common.h @@ -286,7 +286,7 @@ struct diag_pkt { #define RHF_TID_ERR (0x1ull << 59) #define RHF_LEN_ERR (0x1ull << 60) #define RHF_ECC_ERR (0x1ull << 61) -#define RHF_VCRC_ERR (0x1ull << 62) +#define RHF_RESERVED (0x1ull << 62) #define RHF_ICRC_ERR (0x1ull << 63) #define RHF_ERROR_SMASK 0xffe0000000000000ull /* bits 63:53 */ diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index 427ba0ce74a5..15efb4a380b2 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -1080,6 +1080,77 @@ static int qsfp2_debugfs_release(struct inode *in, struct file *fp) return __qsfp_debugfs_release(in, fp, 1); } +#define EXPROM_WRITE_ENABLE BIT_ULL(14) + +static bool exprom_wp_disabled; + +static int exprom_wp_set(struct hfi1_devdata *dd, bool disable) +{ + u64 gpio_val = 0; + + if (disable) { + gpio_val = EXPROM_WRITE_ENABLE; + exprom_wp_disabled = true; + dd_dev_info(dd, "Disable Expansion ROM Write Protection\n"); + } else { + exprom_wp_disabled = false; + dd_dev_info(dd, "Enable Expansion ROM Write Protection\n"); + } + + write_csr(dd, ASIC_GPIO_OUT, gpio_val); + write_csr(dd, ASIC_GPIO_OE, gpio_val); + + return 0; +} + +static ssize_t exprom_wp_debugfs_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + return 0; +} + +static ssize_t exprom_wp_debugfs_write(struct file *file, + const char __user *buf, size_t count, + loff_t *ppos) +{ + struct hfi1_pportdata *ppd = private2ppd(file); + char cdata; + + if (count != 1) + return -EINVAL; + if (get_user(cdata, buf)) + return -EFAULT; + if (cdata == '0') + exprom_wp_set(ppd->dd, false); + else if (cdata == '1') + exprom_wp_set(ppd->dd, true); + else + return -EINVAL; + + return 1; +} + +static unsigned long exprom_in_use; + +static int exprom_wp_debugfs_open(struct inode *in, struct file *fp) +{ + if (test_and_set_bit(0, &exprom_in_use)) + return -EBUSY; + + return 0; +} + +static int exprom_wp_debugfs_release(struct inode *in, struct file *fp) +{ + struct hfi1_pportdata *ppd = private2ppd(fp); + + if (exprom_wp_disabled) + exprom_wp_set(ppd->dd, false); + clear_bit(0, &exprom_in_use); + + return 0; +} + #define DEBUGFS_OPS(nm, readroutine, writeroutine) \ { \ .name = nm, \ @@ -1119,6 +1190,9 @@ static const struct counter_info port_cntr_ops[] = { qsfp1_debugfs_open, qsfp1_debugfs_release), DEBUGFS_XOPS("qsfp2", qsfp2_debugfs_read, qsfp2_debugfs_write, qsfp2_debugfs_open, qsfp2_debugfs_release), + DEBUGFS_XOPS("exprom_wp", exprom_wp_debugfs_read, + exprom_wp_debugfs_write, exprom_wp_debugfs_open, + exprom_wp_debugfs_release), DEBUGFS_OPS("asic_flags", asic_flags_read, asic_flags_write), DEBUGFS_OPS("dc8051_memory", dc8051_memory_read, NULL), DEBUGFS_OPS("lcb", debugfs_lcb_read, debugfs_lcb_write), @@ -1302,15 +1376,15 @@ static void _driver_stats_seq_stop(struct seq_file *s, void *v) static u64 hfi1_sps_ints(void) { - unsigned long flags; + unsigned long index, flags; struct hfi1_devdata *dd; u64 sps_ints = 0; - spin_lock_irqsave(&hfi1_devs_lock, flags); - list_for_each_entry(dd, &hfi1_dev_list, list) { + xa_lock_irqsave(&hfi1_dev_table, flags); + xa_for_each(&hfi1_dev_table, index, dd) { sps_ints += get_all_cpu_total(dd->int_counter); } - spin_unlock_irqrestore(&hfi1_devs_lock, flags); + xa_unlock_irqrestore(&hfi1_dev_table, flags); return sps_ints; } diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 2a9d2912f5db..01aa1f132f55 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -72,8 +72,6 @@ */ const char ib_hfi1_version[] = HFI1_DRIVER_VERSION "\n"; -DEFINE_SPINLOCK(hfi1_devs_lock); -LIST_HEAD(hfi1_dev_list); DEFINE_MUTEX(hfi1_mutex); /* general driver use */ unsigned int hfi1_max_mtu = HFI1_DEFAULT_MAX_MTU; @@ -175,11 +173,11 @@ int hfi1_count_active_units(void) { struct hfi1_devdata *dd; struct hfi1_pportdata *ppd; - unsigned long flags; + unsigned long index, flags; int pidx, nunits_active = 0; - spin_lock_irqsave(&hfi1_devs_lock, flags); - list_for_each_entry(dd, &hfi1_dev_list, list) { + xa_lock_irqsave(&hfi1_dev_table, flags); + xa_for_each(&hfi1_dev_table, index, dd) { if (!(dd->flags & HFI1_PRESENT) || !dd->kregbase1) continue; for (pidx = 0; pidx < dd->num_pports; ++pidx) { @@ -190,7 +188,7 @@ int hfi1_count_active_units(void) } } } - spin_unlock_irqrestore(&hfi1_devs_lock, flags); + xa_unlock_irqrestore(&hfi1_dev_table, flags); return nunits_active; } @@ -264,7 +262,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, hfi1_dbg_fault_suppress_err(verbs_dev)) return; - if (packet->rhf & (RHF_VCRC_ERR | RHF_ICRC_ERR)) + if (packet->rhf & RHF_ICRC_ERR) return; if (packet->etype == RHF_RCV_TYPE_BYPASS) { @@ -516,7 +514,9 @@ bool hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, */ do_cnp = prescan || (opcode >= IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST && - opcode <= IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE); + opcode <= IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE) || + opcode == TID_OP(READ_RESP) || + opcode == TID_OP(ACK); /* Call appropriate CNP handler */ if (!ignore_fecn && do_cnp && fecn) @@ -1581,7 +1581,7 @@ static void show_eflags_errs(struct hfi1_packet *packet) u32 rte = rhf_rcv_type_err(packet->rhf); dd_dev_err(rcd->dd, - "receive context %d: rhf 0x%016llx, errs [ %s%s%s%s%s%s%s%s] rte 0x%x\n", + "receive context %d: rhf 0x%016llx, errs [ %s%s%s%s%s%s%s] rte 0x%x\n", rcd->ctxt, packet->rhf, packet->rhf & RHF_K_HDR_LEN_ERR ? "k_hdr_len " : "", packet->rhf & RHF_DC_UNC_ERR ? "dc_unc " : "", @@ -1589,7 +1589,6 @@ static void show_eflags_errs(struct hfi1_packet *packet) packet->rhf & RHF_TID_ERR ? "tid " : "", packet->rhf & RHF_LEN_ERR ? "len " : "", packet->rhf & RHF_ECC_ERR ? "ecc " : "", - packet->rhf & RHF_VCRC_ERR ? "vcrc " : "", packet->rhf & RHF_ICRC_ERR ? "icrc " : "", rte); } diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.c b/drivers/infiniband/hw/hfi1/exp_rcv.c index 1be49a0d9c11..e9d5cc8b771a 100644 --- a/drivers/infiniband/hw/hfi1/exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/exp_rcv.c @@ -112,9 +112,6 @@ int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd) */ void hfi1_free_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd) { - WARN_ON(!EXP_TID_SET_EMPTY(rcd->tid_full_list)); - WARN_ON(!EXP_TID_SET_EMPTY(rcd->tid_used_list)); - kfree(rcd->groups); rcd->groups = NULL; hfi1_exp_tid_group_init(rcd); diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 048b5d73ba39..b458c218842b 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -54,7 +54,6 @@ #include <linux/list.h> #include <linux/scatterlist.h> #include <linux/slab.h> -#include <linux/idr.h> #include <linux/io.h> #include <linux/fs.h> #include <linux/completion.h> @@ -65,6 +64,7 @@ #include <linux/kthread.h> #include <linux/i2c.h> #include <linux/i2c-algo-bit.h> +#include <linux/xarray.h> #include <rdma/ib_hdrs.h> #include <rdma/opa_addr.h> #include <linux/rhashtable.h> @@ -1021,8 +1021,8 @@ struct hfi1_asic_data { struct hfi1_vnic_data { struct hfi1_ctxtdata *ctxt[HFI1_NUM_VNIC_CTXT]; struct kmem_cache *txreq_cache; + struct xarray vesws; u8 num_vports; - struct idr vesw_idr; u8 rmt_start; u8 num_ctxt; }; @@ -1041,7 +1041,6 @@ struct sdma_vl_map; typedef int (*send_routine)(struct rvt_qp *, struct hfi1_pkt_state *, u64); struct hfi1_devdata { struct hfi1_ibdev verbs_dev; /* must be first */ - struct list_head list; /* pointers to related structs for this device */ /* pci access data structure */ struct pci_dev *pcidev; @@ -1426,8 +1425,7 @@ struct hfi1_filedata { struct mm_struct *mm; }; -extern struct list_head hfi1_dev_list; -extern spinlock_t hfi1_devs_lock; +extern struct xarray hfi1_dev_table; struct hfi1_devdata *hfi1_lookup(int unit); static inline unsigned long uctxt_offset(struct hfi1_ctxtdata *uctxt) diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index faaaac8fbc55..71cb9525c074 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -49,7 +49,7 @@ #include <linux/netdevice.h> #include <linux/vmalloc.h> #include <linux/delay.h> -#include <linux/idr.h> +#include <linux/xarray.h> #include <linux/module.h> #include <linux/printk.h> #include <linux/hrtimer.h> @@ -124,7 +124,7 @@ MODULE_PARM_DESC(user_credit_return_threshold, "Credit return threshold for user static inline u64 encode_rcv_header_entry_size(u16 size); -static struct idr hfi1_unit_table; +DEFINE_XARRAY_FLAGS(hfi1_dev_table, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ); static int hfi1_create_kctxt(struct hfi1_devdata *dd, struct hfi1_pportdata *ppd) @@ -469,7 +469,7 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa, if (rcd->egrbufs.size < hfi1_max_mtu) { rcd->egrbufs.size = __roundup_pow_of_two(hfi1_max_mtu); hfi1_cdbg(PROC, - "ctxt%u: eager bufs size too small. Adjusting to %zu\n", + "ctxt%u: eager bufs size too small. Adjusting to %u\n", rcd->ctxt, rcd->egrbufs.size); } rcd->egrbufs.rcvtid_size = HFI1_MAX_EAGER_BUFFER_SIZE; @@ -805,7 +805,8 @@ static int create_workqueues(struct hfi1_devdata *dd) ppd->hfi1_wq = alloc_workqueue( "hfi%d_%d", - WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE, + WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE | + WQ_MEM_RECLAIM, HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES, dd->unit, pidx); if (!ppd->hfi1_wq) @@ -1018,21 +1019,9 @@ done: return ret; } -static inline struct hfi1_devdata *__hfi1_lookup(int unit) -{ - return idr_find(&hfi1_unit_table, unit); -} - struct hfi1_devdata *hfi1_lookup(int unit) { - struct hfi1_devdata *dd; - unsigned long flags; - - spin_lock_irqsave(&hfi1_devs_lock, flags); - dd = __hfi1_lookup(unit); - spin_unlock_irqrestore(&hfi1_devs_lock, flags); - - return dd; + return xa_load(&hfi1_dev_table, unit); } /* @@ -1200,7 +1189,7 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) /* * Release our hold on the shared asic data. If we are the last one, * return the structure to be finalized outside the lock. Must be - * holding hfi1_devs_lock. + * holding hfi1_dev_table lock. */ static struct hfi1_asic_data *release_asic_data(struct hfi1_devdata *dd) { @@ -1236,13 +1225,10 @@ static void hfi1_clean_devdata(struct hfi1_devdata *dd) struct hfi1_asic_data *ad; unsigned long flags; - spin_lock_irqsave(&hfi1_devs_lock, flags); - if (!list_empty(&dd->list)) { - idr_remove(&hfi1_unit_table, dd->unit); - list_del_init(&dd->list); - } + xa_lock_irqsave(&hfi1_dev_table, flags); + __xa_erase(&hfi1_dev_table, dd->unit); ad = release_asic_data(dd); - spin_unlock_irqrestore(&hfi1_devs_lock, flags); + xa_unlock_irqrestore(&hfi1_dev_table, flags); finalize_asic_data(dd, ad); free_platform_config(dd); @@ -1286,13 +1272,10 @@ void hfi1_free_devdata(struct hfi1_devdata *dd) * Must be done via verbs allocator, because the verbs cleanup process * both does cleanup and free of the data structure. * "extra" is for chip-specific data. - * - * Use the idr mechanism to get a unit number for this unit. */ static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra) { - unsigned long flags; struct hfi1_devdata *dd; int ret, nports; @@ -1307,21 +1290,10 @@ static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, dd->pport = (struct hfi1_pportdata *)(dd + 1); dd->pcidev = pdev; pci_set_drvdata(pdev, dd); - - INIT_LIST_HEAD(&dd->list); - idr_preload(GFP_KERNEL); - spin_lock_irqsave(&hfi1_devs_lock, flags); - - ret = idr_alloc(&hfi1_unit_table, dd, 0, 0, GFP_NOWAIT); - if (ret >= 0) { - dd->unit = ret; - list_add(&dd->list, &hfi1_dev_list); - } dd->node = NUMA_NO_NODE; - spin_unlock_irqrestore(&hfi1_devs_lock, flags); - idr_preload_end(); - + ret = xa_alloc_irq(&hfi1_dev_table, &dd->unit, dd, xa_limit_32b, + GFP_KERNEL); if (ret < 0) { dev_err(&pdev->dev, "Could not allocate unit ID: error %d\n", -ret); @@ -1522,8 +1494,6 @@ static int __init hfi1_mod_init(void) * These must be called before the driver is registered with * the PCI subsystem. */ - idr_init(&hfi1_unit_table); - hfi1_dbg_init(); ret = pci_register_driver(&hfi1_pci_driver); if (ret < 0) { @@ -1534,7 +1504,6 @@ static int __init hfi1_mod_init(void) bail_dev: hfi1_dbg_exit(); - idr_destroy(&hfi1_unit_table); dev_cleanup(); bail: return ret; @@ -1552,7 +1521,7 @@ static void __exit hfi1_mod_cleanup(void) node_affinity_destroy_all(); hfi1_dbg_exit(); - idr_destroy(&hfi1_unit_table); + WARN_ON(!xa_empty(&hfi1_dev_table)); dispose_firmware(); /* asymmetric with obtain_firmware() */ dev_cleanup(); } @@ -2071,7 +2040,7 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd) rcd->egrbufs.size = alloced_bytes; hfi1_cdbg(PROC, - "ctxt%u: Alloced %u rcv tid entries @ %uKB, total %zuKB\n", + "ctxt%u: Alloced %u rcv tid entries @ %uKB, total %uKB\n", rcd->ctxt, rcd->egrbufs.alloced, rcd->egrbufs.rcvtid_size / 1024, rcd->egrbufs.size / 1024); diff --git a/drivers/infiniband/hw/hfi1/opfn.h b/drivers/infiniband/hw/hfi1/opfn.h index 5f2011cabc25..62f93c1dc082 100644 --- a/drivers/infiniband/hw/hfi1/opfn.h +++ b/drivers/infiniband/hw/hfi1/opfn.h @@ -47,12 +47,14 @@ * for future transactions */ +#include <linux/workqueue.h> +#include <rdma/ib_verbs.h> +#include <rdma/rdmavt_qp.h> + /* STL Verbs Extended */ #define IB_BTHE_E_SHIFT 24 #define HFI1_VERBS_E_ATOMIC_VADDR U64_MAX -struct ib_atomic_eth; - enum hfi1_opfn_codes { STL_VERBS_EXTD_NONE = 0, STL_VERBS_EXTD_TID_RDMA, diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index eba300330a02..4e0e9fc0a777 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -742,6 +742,8 @@ void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp) iowait_wakeup, iowait_sdma_drained, hfi1_init_priority); + /* Init to a value to start the running average correctly */ + priv->s_running_pkt_size = piothreshold / 2; return priv; } diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 5991211d72bd..a922edcf23d6 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -140,10 +140,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, case OP(RDMA_READ_RESPONSE_LAST): case OP(RDMA_READ_RESPONSE_ONLY): e = &qp->s_ack_queue[qp->s_tail_ack_queue]; - if (e->rdma_sge.mr) { - rvt_put_mr(e->rdma_sge.mr); - e->rdma_sge.mr = NULL; - } + release_rdma_sge_mr(e); /* FALLTHROUGH */ case OP(ATOMIC_ACKNOWLEDGE): /* @@ -343,7 +340,8 @@ write_resp: break; e->sent = 1; - qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST); + /* Do not free e->rdma_sge until all data are received */ + qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE); break; case TID_OP(READ_RESP): @@ -1836,7 +1834,7 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah) qp->s_last = s_last; /* see post_send() */ barrier(); - rvt_put_swqe(wqe); + rvt_put_qp_swqe(qp, wqe); rvt_qp_swqe_complete(qp, wqe, ib_hfi1_wc_opcode[wqe->wr.opcode], @@ -1884,7 +1882,7 @@ struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, u32 s_last; trdma_clean_swqe(qp, wqe); - rvt_put_swqe(wqe); + rvt_put_qp_swqe(qp, wqe); rvt_qp_wqe_unreserve(qp, wqe); s_last = qp->s_last; trace_hfi1_qp_send_completion(qp, wqe, s_last); @@ -2643,10 +2641,7 @@ static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data, len = be32_to_cpu(reth->length); if (unlikely(offset + len != e->rdma_sge.sge_length)) goto unlock_done; - if (e->rdma_sge.mr) { - rvt_put_mr(e->rdma_sge.mr); - e->rdma_sge.mr = NULL; - } + release_rdma_sge_mr(e); if (len != 0) { u32 rkey = be32_to_cpu(reth->rkey); u64 vaddr = get_ib_reth_vaddr(reth); @@ -3088,10 +3083,7 @@ send_last: update_ack_queue(qp, next); } e = &qp->s_ack_queue[qp->r_head_ack_queue]; - if (e->rdma_sge.mr) { - rvt_put_mr(e->rdma_sge.mr); - e->rdma_sge.mr = NULL; - } + release_rdma_sge_mr(e); reth = &ohdr->u.rc.reth; len = be32_to_cpu(reth->length); if (len) { @@ -3166,10 +3158,7 @@ send_last: update_ack_queue(qp, next); } e = &qp->s_ack_queue[qp->r_head_ack_queue]; - if (e->rdma_sge.mr) { - rvt_put_mr(e->rdma_sge.mr); - e->rdma_sge.mr = NULL; - } + release_rdma_sge_mr(e); /* Process OPFN special virtual address */ if (opfn) { opfn_conn_response(qp, e, ateth); diff --git a/drivers/infiniband/hw/hfi1/rc.h b/drivers/infiniband/hw/hfi1/rc.h index 8e0935b9bf2a..5ed5e85d5841 100644 --- a/drivers/infiniband/hw/hfi1/rc.h +++ b/drivers/infiniband/hw/hfi1/rc.h @@ -41,6 +41,14 @@ static inline u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, return rvt_restart_sge(ss, wqe, len); } +static inline void release_rdma_sge_mr(struct rvt_ack_entry *e) +{ + if (e->rdma_sge.mr) { + rvt_put_mr(e->rdma_sge.mr); + e->rdma_sge.mr = NULL; + } +} + struct rvt_ack_entry *find_prev_entry(struct rvt_qp *qp, u32 psn, u8 *prev, u8 *prev_ack, bool *scheduled); int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, u64 val, diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index 124a3ec1e15c..23ac6057b211 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -524,7 +524,7 @@ void _hfi1_do_send(struct work_struct *work) /** * hfi1_do_send - perform a send on a QP - * @work: contains a pointer to the QP + * @qp: a pointer to the QP * @in_thread: true if in a workqueue thread * * Process entries in the send work queue until credit or queue is diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index 43cbce7a19ea..6fb93032fbef 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -67,8 +67,6 @@ static u32 mask_generation(u32 a) #define TID_RDMA_DESTQP_FLOW_SHIFT 11 #define TID_RDMA_DESTQP_FLOW_MASK 0x1f -#define TID_FLOW_SW_PSN BIT(0) - #define TID_OPFN_QP_CTXT_MASK 0xff #define TID_OPFN_QP_CTXT_SHIFT 56 #define TID_OPFN_QP_KDETH_MASK 0xff @@ -128,6 +126,15 @@ static int make_tid_rdma_ack(struct rvt_qp *qp, struct ib_other_headers *ohdr, struct hfi1_pkt_state *ps); static void hfi1_do_tid_send(struct rvt_qp *qp); +static u32 read_r_next_psn(struct hfi1_devdata *dd, u8 ctxt, u8 fidx); +static void tid_rdma_rcv_err(struct hfi1_packet *packet, + struct ib_other_headers *ohdr, + struct rvt_qp *qp, u32 psn, int diff, bool fecn); +static void update_r_next_psn_fecn(struct hfi1_packet *packet, + struct hfi1_qp_priv *priv, + struct hfi1_ctxtdata *rcd, + struct tid_rdma_flow *flow, + bool fecn); static u64 tid_rdma_opfn_encode(struct tid_rdma_params *p) { @@ -776,7 +783,6 @@ int hfi1_kern_setup_hw_flow(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp) rcd->flows[fs->index].generation = fs->generation; fs->generation = kern_setup_hw_flow(rcd, fs->index); fs->psn = 0; - fs->flags = 0; dequeue_tid_waiter(rcd, &rcd->flow_queue, qp); /* get head before dropping lock */ fqp = first_qp(rcd, &rcd->flow_queue); @@ -1807,6 +1813,7 @@ sync_check: goto done; hfi1_kern_clear_hw_flow(req->rcd, qp); + qpriv->s_flags &= ~HFI1_R_TID_SW_PSN; req->state = TID_REQUEST_ACTIVE; } @@ -2036,10 +2043,7 @@ static int tid_rdma_rcv_error(struct hfi1_packet *packet, if (psn != e->psn || len != req->total_len) goto unlock; - if (e->rdma_sge.mr) { - rvt_put_mr(e->rdma_sge.mr); - e->rdma_sge.mr = NULL; - } + release_rdma_sge_mr(e); rkey = be32_to_cpu(reth->rkey); vaddr = get_ib_reth_vaddr(reth); @@ -2238,7 +2242,7 @@ void hfi1_rc_rcv_tid_rdma_read_req(struct hfi1_packet *packet) struct ib_reth *reth; struct hfi1_qp_priv *qpriv = qp->priv; u32 bth0, psn, len, rkey; - bool is_fecn; + bool fecn; u8 next; u64 vaddr; int diff; @@ -2248,7 +2252,7 @@ void hfi1_rc_rcv_tid_rdma_read_req(struct hfi1_packet *packet) if (hfi1_ruc_check_hdr(ibp, packet)) return; - is_fecn = process_ecn(qp, packet); + fecn = process_ecn(qp, packet); psn = mask_psn(be32_to_cpu(ohdr->bth[2])); trace_hfi1_rsp_rcv_tid_read_req(qp, psn); @@ -2267,9 +2271,8 @@ void hfi1_rc_rcv_tid_rdma_read_req(struct hfi1_packet *packet) diff = delta_psn(psn, qp->r_psn); if (unlikely(diff)) { - if (tid_rdma_rcv_error(packet, ohdr, qp, psn, diff)) - return; - goto send_ack; + tid_rdma_rcv_err(packet, ohdr, qp, psn, diff, fecn); + return; } /* We've verified the request, insert it into the ack queue. */ @@ -2285,10 +2288,7 @@ void hfi1_rc_rcv_tid_rdma_read_req(struct hfi1_packet *packet) update_ack_queue(qp, next); } e = &qp->s_ack_queue[qp->r_head_ack_queue]; - if (e->rdma_sge.mr) { - rvt_put_mr(e->rdma_sge.mr); - e->rdma_sge.mr = NULL; - } + release_rdma_sge_mr(e); rkey = be32_to_cpu(reth->rkey); qp->r_len = len; @@ -2324,11 +2324,11 @@ void hfi1_rc_rcv_tid_rdma_read_req(struct hfi1_packet *packet) /* Schedule the send tasklet. */ qp->s_flags |= RVT_S_RESP_PENDING; + if (fecn) + qp->s_flags |= RVT_S_ECN; hfi1_schedule_send(qp); spin_unlock_irqrestore(&qp->s_lock, flags); - if (is_fecn) - goto send_ack; return; nack_inv_unlock: @@ -2345,8 +2345,6 @@ nack_acc: rvt_rc_error(qp, IB_WC_LOC_PROT_ERR); qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; qp->r_ack_psn = qp->r_psn; -send_ack: - hfi1_send_rc_ack(packet, is_fecn); } u32 hfi1_build_tid_rdma_read_resp(struct rvt_qp *qp, struct rvt_ack_entry *e, @@ -2463,12 +2461,12 @@ void hfi1_rc_rcv_tid_rdma_read_resp(struct hfi1_packet *packet) struct tid_rdma_request *req; struct tid_rdma_flow *flow; u32 opcode, aeth; - bool is_fecn; + bool fecn; unsigned long flags; u32 kpsn, ipsn; trace_hfi1_sender_rcv_tid_read_resp(qp); - is_fecn = process_ecn(qp, packet); + fecn = process_ecn(qp, packet); kpsn = mask_psn(be32_to_cpu(ohdr->bth[2])); aeth = be32_to_cpu(ohdr->u.tid_rdma.r_rsp.aeth); opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff; @@ -2481,8 +2479,43 @@ void hfi1_rc_rcv_tid_rdma_read_resp(struct hfi1_packet *packet) flow = &req->flows[req->clear_tail]; /* When header suppression is disabled */ - if (cmp_psn(ipsn, flow->flow_state.ib_lpsn)) + if (cmp_psn(ipsn, flow->flow_state.ib_lpsn)) { + update_r_next_psn_fecn(packet, priv, rcd, flow, fecn); + + if (cmp_psn(kpsn, flow->flow_state.r_next_psn)) + goto ack_done; + flow->flow_state.r_next_psn = mask_psn(kpsn + 1); + /* + * Copy the payload to destination buffer if this packet is + * delivered as an eager packet due to RSM rule and FECN. + * The RSM rule selects FECN bit in BTH and SH bit in + * KDETH header and therefore will not match the last + * packet of each segment that has SH bit cleared. + */ + if (fecn && packet->etype == RHF_RCV_TYPE_EAGER) { + struct rvt_sge_state ss; + u32 len; + u32 tlen = packet->tlen; + u16 hdrsize = packet->hlen; + u8 pad = packet->pad; + u8 extra_bytes = pad + packet->extra_byte + + (SIZE_OF_CRC << 2); + u32 pmtu = qp->pmtu; + + if (unlikely(tlen != (hdrsize + pmtu + extra_bytes))) + goto ack_op_err; + len = restart_sge(&ss, req->e.swqe, ipsn, pmtu); + if (unlikely(len < pmtu)) + goto ack_op_err; + rvt_copy_sge(qp, &ss, packet->payload, pmtu, false, + false); + /* Raise the sw sequence check flag for next packet */ + priv->s_flags |= HFI1_R_TID_SW_PSN; + } + goto ack_done; + } + flow->flow_state.r_next_psn = mask_psn(kpsn + 1); req->ack_pending--; priv->pending_tid_r_segs--; qp->s_num_rd_atomic--; @@ -2524,6 +2557,7 @@ void hfi1_rc_rcv_tid_rdma_read_resp(struct hfi1_packet *packet) req->comp_seg == req->cur_seg) || priv->tid_r_comp == priv->tid_r_reqs) { hfi1_kern_clear_hw_flow(priv->rcd, qp); + priv->s_flags &= ~HFI1_R_TID_SW_PSN; if (req->state == TID_REQUEST_SYNC) req->state = TID_REQUEST_ACTIVE; } @@ -2545,8 +2579,6 @@ ack_op_err: ack_done: spin_unlock_irqrestore(&qp->s_lock, flags); - if (is_fecn) - hfi1_send_rc_ack(packet, is_fecn); } void hfi1_kern_read_tid_flow_free(struct rvt_qp *qp) @@ -2773,9 +2805,9 @@ static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd, rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR); return ret; } - if (priv->flow_state.flags & TID_FLOW_SW_PSN) { + if (priv->s_flags & HFI1_R_TID_SW_PSN) { diff = cmp_psn(psn, - priv->flow_state.r_next_psn); + flow->flow_state.r_next_psn); if (diff > 0) { if (!(qp->r_flags & RVT_R_RDMAR_SEQ)) restart_tid_rdma_read_req(rcd, @@ -2811,22 +2843,15 @@ static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd, qp->r_flags &= ~RVT_R_RDMAR_SEQ; } - priv->flow_state.r_next_psn++; + flow->flow_state.r_next_psn = + mask_psn(psn + 1); } else { - u64 reg; u32 last_psn; - /* - * The only sane way to get the amount of - * progress is to read the HW flow state. - */ - reg = read_uctxt_csr(dd, rcd->ctxt, - RCV_TID_FLOW_TABLE + - (8 * flow->idx)); - last_psn = mask_psn(reg); - - priv->flow_state.r_next_psn = last_psn; - priv->flow_state.flags |= TID_FLOW_SW_PSN; + last_psn = read_r_next_psn(dd, rcd->ctxt, + flow->idx); + flow->flow_state.r_next_psn = last_psn; + priv->s_flags |= HFI1_R_TID_SW_PSN; /* * If no request has been restarted yet, * restart the current one. @@ -2891,10 +2916,11 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd, struct rvt_ack_entry *e; struct tid_rdma_request *req; struct tid_rdma_flow *flow; + int diff = 0; trace_hfi1_msg_handle_kdeth_eflags(NULL, "Kdeth error: rhf ", packet->rhf); - if (packet->rhf & (RHF_VCRC_ERR | RHF_ICRC_ERR)) + if (packet->rhf & RHF_ICRC_ERR) return ret; packet->ohdr = &hdr->u.oth; @@ -2974,17 +3000,10 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd, switch (rte) { case RHF_RTE_EXPECTED_FLOW_SEQ_ERR: if (!(qpriv->s_flags & HFI1_R_TID_SW_PSN)) { - u64 reg; - qpriv->s_flags |= HFI1_R_TID_SW_PSN; - /* - * The only sane way to get the amount of - * progress is to read the HW flow state. - */ - reg = read_uctxt_csr(dd, rcd->ctxt, - RCV_TID_FLOW_TABLE + - (8 * flow->idx)); - flow->flow_state.r_next_psn = mask_psn(reg); + flow->flow_state.r_next_psn = + read_r_next_psn(dd, rcd->ctxt, + flow->idx); qpriv->r_next_psn_kdeth = flow->flow_state.r_next_psn; goto nak_psn; @@ -2997,10 +3016,12 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd, * mismatch could be due to packets that were * already in flight. */ - if (psn != flow->flow_state.r_next_psn) { - psn = flow->flow_state.r_next_psn; + diff = cmp_psn(psn, + flow->flow_state.r_next_psn); + if (diff > 0) goto nak_psn; - } + else if (diff < 0) + break; qpriv->s_nak_state = 0; /* @@ -3011,8 +3032,10 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd, if (psn == full_flow_psn(flow, flow->flow_state.lpsn)) ret = false; + flow->flow_state.r_next_psn = + mask_psn(psn + 1); qpriv->r_next_psn_kdeth = - ++flow->flow_state.r_next_psn; + flow->flow_state.r_next_psn; } break; @@ -3517,8 +3540,10 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx) if (qpriv->r_tid_alloc == qpriv->r_tid_head) { /* If all data has been received, clear the flow */ if (qpriv->flow_state.index < RXE_NUM_TID_FLOWS && - !qpriv->alloc_w_segs) + !qpriv->alloc_w_segs) { hfi1_kern_clear_hw_flow(rcd, qp); + qpriv->s_flags &= ~HFI1_R_TID_SW_PSN; + } break; } @@ -3544,8 +3569,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx) if (qpriv->sync_pt && !qpriv->alloc_w_segs) { hfi1_kern_clear_hw_flow(rcd, qp); qpriv->sync_pt = false; - if (qpriv->s_flags & HFI1_R_TID_SW_PSN) - qpriv->s_flags &= ~HFI1_R_TID_SW_PSN; + qpriv->s_flags &= ~HFI1_R_TID_SW_PSN; } /* Allocate flow if we don't have one */ @@ -3687,7 +3711,7 @@ void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet) struct hfi1_qp_priv *qpriv = qp->priv; struct tid_rdma_request *req; u32 bth0, psn, len, rkey, num_segs; - bool is_fecn; + bool fecn; u8 next; u64 vaddr; int diff; @@ -3696,7 +3720,7 @@ void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet) if (hfi1_ruc_check_hdr(ibp, packet)) return; - is_fecn = process_ecn(qp, packet); + fecn = process_ecn(qp, packet); psn = mask_psn(be32_to_cpu(ohdr->bth[2])); trace_hfi1_rsp_rcv_tid_write_req(qp, psn); @@ -3713,9 +3737,8 @@ void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet) num_segs = DIV_ROUND_UP(len, qpriv->tid_rdma.local.max_len); diff = delta_psn(psn, qp->r_psn); if (unlikely(diff)) { - if (tid_rdma_rcv_error(packet, ohdr, qp, psn, diff)) - return; - goto send_ack; + tid_rdma_rcv_err(packet, ohdr, qp, psn, diff, fecn); + return; } /* @@ -3751,10 +3774,7 @@ void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet) goto update_head; } - if (e->rdma_sge.mr) { - rvt_put_mr(e->rdma_sge.mr); - e->rdma_sge.mr = NULL; - } + release_rdma_sge_mr(e); /* The length needs to be in multiples of PAGE_SIZE */ if (!len || len & ~PAGE_MASK) @@ -3834,11 +3854,11 @@ update_head: /* Schedule the send tasklet. */ qp->s_flags |= RVT_S_RESP_PENDING; + if (fecn) + qp->s_flags |= RVT_S_ECN; hfi1_schedule_send(qp); spin_unlock_irqrestore(&qp->s_lock, flags); - if (is_fecn) - goto send_ack; return; nack_inv_unlock: @@ -3855,8 +3875,6 @@ nack_acc: rvt_rc_error(qp, IB_WC_LOC_PROT_ERR); qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; qp->r_ack_psn = qp->r_psn; -send_ack: - hfi1_send_rc_ack(packet, is_fecn); } u32 hfi1_build_tid_rdma_write_resp(struct rvt_qp *qp, struct rvt_ack_entry *e, @@ -4073,10 +4091,10 @@ void hfi1_rc_rcv_tid_rdma_write_resp(struct hfi1_packet *packet) struct tid_rdma_flow *flow; enum ib_wc_status status; u32 opcode, aeth, psn, flow_psn, i, tidlen = 0, pktlen; - bool is_fecn; + bool fecn; unsigned long flags; - is_fecn = process_ecn(qp, packet); + fecn = process_ecn(qp, packet); psn = mask_psn(be32_to_cpu(ohdr->bth[2])); aeth = be32_to_cpu(ohdr->u.tid_rdma.w_rsp.aeth); opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff; @@ -4216,7 +4234,6 @@ void hfi1_rc_rcv_tid_rdma_write_resp(struct hfi1_packet *packet) qpriv->s_tid_cur = i; } qp->s_flags &= ~HFI1_S_WAIT_TID_RESP; - hfi1_schedule_tid_send(qp); goto ack_done; @@ -4225,9 +4242,9 @@ ack_op_err: ack_err: rvt_error_qp(qp, status); ack_done: + if (fecn) + qp->s_flags |= RVT_S_ECN; spin_unlock_irqrestore(&qp->s_lock, flags); - if (is_fecn) - hfi1_send_rc_ack(packet, is_fecn); } bool hfi1_build_tid_rdma_packet(struct rvt_swqe *wqe, @@ -4307,7 +4324,9 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet) unsigned long flags; u32 psn, next; u8 opcode; + bool fecn; + fecn = process_ecn(qp, packet); psn = mask_psn(be32_to_cpu(ohdr->bth[2])); opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff; @@ -4320,9 +4339,53 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet) req = ack_to_tid_req(e); flow = &req->flows[req->clear_tail]; if (cmp_psn(psn, full_flow_psn(flow, flow->flow_state.lpsn))) { + update_r_next_psn_fecn(packet, priv, rcd, flow, fecn); + if (cmp_psn(psn, flow->flow_state.r_next_psn)) goto send_nak; - flow->flow_state.r_next_psn++; + + flow->flow_state.r_next_psn = mask_psn(psn + 1); + /* + * Copy the payload to destination buffer if this packet is + * delivered as an eager packet due to RSM rule and FECN. + * The RSM rule selects FECN bit in BTH and SH bit in + * KDETH header and therefore will not match the last + * packet of each segment that has SH bit cleared. + */ + if (fecn && packet->etype == RHF_RCV_TYPE_EAGER) { + struct rvt_sge_state ss; + u32 len; + u32 tlen = packet->tlen; + u16 hdrsize = packet->hlen; + u8 pad = packet->pad; + u8 extra_bytes = pad + packet->extra_byte + + (SIZE_OF_CRC << 2); + u32 pmtu = qp->pmtu; + + if (unlikely(tlen != (hdrsize + pmtu + extra_bytes))) + goto send_nak; + len = req->comp_seg * req->seg_len; + len += delta_psn(psn, + full_flow_psn(flow, flow->flow_state.spsn)) * + pmtu; + if (unlikely(req->total_len - len < pmtu)) + goto send_nak; + + /* + * The e->rdma_sge field is set when TID RDMA WRITE REQ + * is first received and is never modified thereafter. + */ + ss.sge = e->rdma_sge; + ss.sg_list = NULL; + ss.num_sge = 1; + ss.total_len = req->total_len; + rvt_skip_sge(&ss, len, false); + rvt_copy_sge(qp, &ss, packet->payload, pmtu, false, + false); + /* Raise the sw sequence check flag for next packet */ + priv->r_next_psn_kdeth = mask_psn(psn + 1); + priv->s_flags |= HFI1_R_TID_SW_PSN; + } goto exit; } flow->flow_state.r_next_psn = mask_psn(psn + 1); @@ -4347,6 +4410,7 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet) priv->r_tid_ack = priv->r_tid_tail; if (opcode == TID_OP(WRITE_DATA_LAST)) { + release_rdma_sge_mr(e); for (next = priv->r_tid_tail + 1; ; next++) { if (next > rvt_size_atomic(&dev->rdi)) next = 0; @@ -4386,6 +4450,8 @@ done: hfi1_schedule_tid_send(qp); exit: priv->r_next_psn_kdeth = flow->flow_state.r_next_psn; + if (fecn) + qp->s_flags |= RVT_S_ECN; spin_unlock_irqrestore(&qp->s_lock, flags); return; @@ -4487,12 +4553,11 @@ void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet) struct tid_rdma_request *req; struct tid_rdma_flow *flow; u32 aeth, psn, req_psn, ack_psn, fspsn, resync_psn, ack_kpsn; - bool is_fecn; unsigned long flags; u16 fidx; trace_hfi1_tid_write_sender_rcv_tid_ack(qp, 0); - is_fecn = process_ecn(qp, packet); + process_ecn(qp, packet); psn = mask_psn(be32_to_cpu(ohdr->bth[2])); aeth = be32_to_cpu(ohdr->u.tid_rdma.ack.aeth); req_psn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.ack.verbs_psn)); @@ -4846,10 +4911,10 @@ void hfi1_rc_rcv_tid_rdma_resync(struct hfi1_packet *packet) struct tid_rdma_flow *flow; struct tid_flow_state *fs = &qpriv->flow_state; u32 psn, generation, idx, gen_next; - bool is_fecn; + bool fecn; unsigned long flags; - is_fecn = process_ecn(qp, packet); + fecn = process_ecn(qp, packet); psn = mask_psn(be32_to_cpu(ohdr->bth[2])); generation = mask_psn(psn + 1) >> HFI1_KDETH_BTH_SEQ_SHIFT; @@ -4940,6 +5005,8 @@ void hfi1_rc_rcv_tid_rdma_resync(struct hfi1_packet *packet) qpriv->s_flags |= RVT_S_ACK_PENDING; hfi1_schedule_tid_send(qp); bail: + if (fecn) + qp->s_flags |= RVT_S_ECN; spin_unlock_irqrestore(&qp->s_lock, flags); } @@ -5449,3 +5516,48 @@ bool hfi1_tid_rdma_ack_interlock(struct rvt_qp *qp, struct rvt_ack_entry *e) } return false; } + +static u32 read_r_next_psn(struct hfi1_devdata *dd, u8 ctxt, u8 fidx) +{ + u64 reg; + + /* + * The only sane way to get the amount of + * progress is to read the HW flow state. + */ + reg = read_uctxt_csr(dd, ctxt, RCV_TID_FLOW_TABLE + (8 * fidx)); + return mask_psn(reg); +} + +static void tid_rdma_rcv_err(struct hfi1_packet *packet, + struct ib_other_headers *ohdr, + struct rvt_qp *qp, u32 psn, int diff, bool fecn) +{ + unsigned long flags; + + tid_rdma_rcv_error(packet, ohdr, qp, psn, diff); + if (fecn) { + spin_lock_irqsave(&qp->s_lock, flags); + qp->s_flags |= RVT_S_ECN; + spin_unlock_irqrestore(&qp->s_lock, flags); + } +} + +static void update_r_next_psn_fecn(struct hfi1_packet *packet, + struct hfi1_qp_priv *priv, + struct hfi1_ctxtdata *rcd, + struct tid_rdma_flow *flow, + bool fecn) +{ + /* + * If a start/middle packet is delivered here due to + * RSM rule and FECN, we need to update the r_next_psn. + */ + if (fecn && packet->etype == RHF_RCV_TYPE_EAGER && + !(priv->s_flags & HFI1_R_TID_SW_PSN)) { + struct hfi1_devdata *dd = rcd->dd; + + flow->flow_state.r_next_psn = + read_r_next_psn(dd, rcd->ctxt, flow->idx); + } +} diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.h b/drivers/infiniband/hw/hfi1/tid_rdma.h index 53ab24ef4f02..1c536185261e 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.h +++ b/drivers/infiniband/hw/hfi1/tid_rdma.h @@ -76,10 +76,8 @@ struct tid_rdma_qp_params { struct tid_flow_state { u32 generation; u32 psn; - u32 r_next_psn; /* next PSN to be received (in TID space) */ u8 index; u8 last_index; - u8 flags; }; enum tid_rdma_req_state { diff --git a/drivers/infiniband/hw/hfi1/trace_dbg.h b/drivers/infiniband/hw/hfi1/trace_dbg.h index e62171fb7379..de7a87392b8d 100644 --- a/drivers/infiniband/hw/hfi1/trace_dbg.h +++ b/drivers/infiniband/hw/hfi1/trace_dbg.h @@ -86,14 +86,14 @@ DECLARE_EVENT_CLASS(hfi1_trace_template, * actual function to work and can not be in a macro. */ #define __hfi1_trace_def(lvl) \ -void __hfi1_trace_##lvl(const char *funct, char *fmt, ...); \ +void __printf(2, 3) __hfi1_trace_##lvl(const char *funct, char *fmt, ...); \ \ DEFINE_EVENT(hfi1_trace_template, hfi1_ ##lvl, \ TP_PROTO(const char *function, struct va_format *vaf), \ TP_ARGS(function, vaf)) #define __hfi1_trace_fn(lvl) \ -void __hfi1_trace_##lvl(const char *func, char *fmt, ...) \ +void __printf(2, 3) __hfi1_trace_##lvl(const char *func, char *fmt, ...)\ { \ struct va_format vaf = { \ .fmt = fmt, \ diff --git a/drivers/infiniband/hw/hfi1/trace_tid.h b/drivers/infiniband/hw/hfi1/trace_tid.h index 548dfc45a407..4388b594ed1b 100644 --- a/drivers/infiniband/hw/hfi1/trace_tid.h +++ b/drivers/infiniband/hw/hfi1/trace_tid.h @@ -53,7 +53,7 @@ u16 hfi1_trace_get_tid_idx(u32 ent); "tid_r_comp %u pending_tid_r_segs %u " \ "s_flags 0x%x ps_flags 0x%x iow_flags 0x%lx " \ "s_state 0x%x hw_flow_index %u generation 0x%x " \ - "fpsn 0x%x flow_flags 0x%x" + "fpsn 0x%x" #define TID_REQ_PRN "[%s] qpn 0x%x newreq %u opcode 0x%x psn 0x%x lpsn 0x%x " \ "cur_seg %u comp_seg %u ack_seg %u alloc_seg %u " \ @@ -71,7 +71,7 @@ u16 hfi1_trace_get_tid_idx(u32 ent); "pending_tid_w_segs %u sync_pt %s " \ "ps_nak_psn 0x%x ps_nak_state 0x%x " \ "prnr_nak_state 0x%x hw_flow_index %u generation "\ - "0x%x fpsn 0x%x flow_flags 0x%x resync %s" \ + "0x%x fpsn 0x%x resync %s" \ "r_next_psn_kdeth 0x%x" #define TID_WRITE_SENDER_PRN "[%s] qpn 0x%x newreq %u s_tid_cur %u " \ @@ -973,7 +973,6 @@ DECLARE_EVENT_CLASS(/* tid_read_sender */ __field(u32, hw_flow_index) __field(u32, generation) __field(u32, fpsn) - __field(u32, flow_flags) ), TP_fast_assign(/* assign */ struct hfi1_qp_priv *priv = qp->priv; @@ -991,7 +990,6 @@ DECLARE_EVENT_CLASS(/* tid_read_sender */ __entry->hw_flow_index = priv->flow_state.index; __entry->generation = priv->flow_state.generation; __entry->fpsn = priv->flow_state.psn; - __entry->flow_flags = priv->flow_state.flags; ), TP_printk(/* print */ TID_READ_SENDER_PRN, @@ -1007,8 +1005,7 @@ DECLARE_EVENT_CLASS(/* tid_read_sender */ __entry->s_state, __entry->hw_flow_index, __entry->generation, - __entry->fpsn, - __entry->flow_flags + __entry->fpsn ) ); @@ -1338,7 +1335,6 @@ DECLARE_EVENT_CLASS(/* tid_write_sp */ __field(u32, hw_flow_index) __field(u32, generation) __field(u32, fpsn) - __field(u32, flow_flags) __field(bool, resync) __field(u32, r_next_psn_kdeth) ), @@ -1360,7 +1356,6 @@ DECLARE_EVENT_CLASS(/* tid_write_sp */ __entry->hw_flow_index = priv->flow_state.index; __entry->generation = priv->flow_state.generation; __entry->fpsn = priv->flow_state.psn; - __entry->flow_flags = priv->flow_state.flags; __entry->resync = priv->resync; __entry->r_next_psn_kdeth = priv->r_next_psn_kdeth; ), @@ -1381,7 +1376,6 @@ DECLARE_EVENT_CLASS(/* tid_write_sp */ __entry->hw_flow_index, __entry->generation, __entry->fpsn, - __entry->flow_flags, __entry->resync ? "yes" : "no", __entry->r_next_psn_kdeth ) diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 55a56b3d7f83..1eb4105b2d22 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1223,15 +1223,16 @@ static inline send_routine get_send_routine(struct rvt_qp *qp, case IB_QPT_UD: break; case IB_QPT_UC: - case IB_QPT_RC: { + case IB_QPT_RC: + priv->s_running_pkt_size = + (tx->s_cur_size + priv->s_running_pkt_size) / 2; if (piothreshold && - tx->s_cur_size <= min(piothreshold, qp->pmtu) && + priv->s_running_pkt_size <= min(piothreshold, qp->pmtu) && (BIT(ps->opcode & OPMASK) & pio_opmask[ps->opcode >> 5]) && iowait_sdma_pending(&priv->s_iowait) == 0 && !sdma_txreq_built(&tx->txreq)) return dd->process_pio_send; break; - } default: break; } @@ -1739,15 +1740,15 @@ static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev, static u64 hfi1_sps_ints(void) { - unsigned long flags; + unsigned long index, flags; struct hfi1_devdata *dd; u64 sps_ints = 0; - spin_lock_irqsave(&hfi1_devs_lock, flags); - list_for_each_entry(dd, &hfi1_dev_list, list) { + xa_lock_irqsave(&hfi1_dev_table, flags); + xa_for_each(&hfi1_dev_table, index, dd) { sps_ints += get_all_cpu_total(dd->int_counter); } - spin_unlock_irqrestore(&hfi1_devs_lock, flags); + xa_unlock_irqrestore(&hfi1_dev_table, flags); return sps_ints; } diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 62ace0b2d17a..7ecb8ed4a1d9 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -170,6 +170,7 @@ struct hfi1_qp_priv { struct tid_flow_state flow_state; struct tid_rdma_qp_params tid_rdma; struct rvt_qp *owner; + u16 s_running_pkt_size; u8 hdr_type; /* 9B or 16B */ struct rvt_sge_state tid_ss; /* SGE state pointer for 2nd leg */ atomic_t n_requests; /* # of TID RDMA requests in the */ diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c index 2b07032dbdda..b49e60e8397d 100644 --- a/drivers/infiniband/hw/hfi1/vnic_main.c +++ b/drivers/infiniband/hw/hfi1/vnic_main.c @@ -162,12 +162,12 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd, void hfi1_vnic_setup(struct hfi1_devdata *dd) { - idr_init(&dd->vnic.vesw_idr); + xa_init(&dd->vnic.vesws); } void hfi1_vnic_cleanup(struct hfi1_devdata *dd) { - idr_destroy(&dd->vnic.vesw_idr); + WARN_ON(!xa_empty(&dd->vnic.vesws)); } #define SUM_GRP_COUNTERS(stats, qstats, x_grp) do { \ @@ -533,7 +533,7 @@ void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet) l4_type = hfi1_16B_get_l4(packet->ebuf); if (likely(l4_type == OPA_16B_L4_ETHR)) { vesw_id = HFI1_VNIC_GET_VESWID(packet->ebuf); - vinfo = idr_find(&dd->vnic.vesw_idr, vesw_id); + vinfo = xa_load(&dd->vnic.vesws, vesw_id); /* * In case of invalid vesw id, count the error on @@ -541,9 +541,10 @@ void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet) */ if (unlikely(!vinfo)) { struct hfi1_vnic_vport_info *vinfo_tmp; - int id_tmp = 0; + unsigned long index = 0; - vinfo_tmp = idr_get_next(&dd->vnic.vesw_idr, &id_tmp); + vinfo_tmp = xa_find(&dd->vnic.vesws, &index, ULONG_MAX, + XA_PRESENT); if (vinfo_tmp) { spin_lock(&vport_cntr_lock); vinfo_tmp->stats[0].netstats.rx_nohandler++; @@ -597,8 +598,7 @@ static int hfi1_vnic_up(struct hfi1_vnic_vport_info *vinfo) if (!vinfo->vesw_id) return -EINVAL; - rc = idr_alloc(&dd->vnic.vesw_idr, vinfo, vinfo->vesw_id, - vinfo->vesw_id + 1, GFP_NOWAIT); + rc = xa_insert(&dd->vnic.vesws, vinfo->vesw_id, vinfo, GFP_KERNEL); if (rc < 0) return rc; @@ -624,7 +624,7 @@ static void hfi1_vnic_down(struct hfi1_vnic_vport_info *vinfo) clear_bit(HFI1_VNIC_UP, &vinfo->flags); netif_carrier_off(vinfo->netdev); netif_tx_disable(vinfo->netdev); - idr_remove(&dd->vnic.vesw_idr, vinfo->vesw_id); + xa_erase(&dd->vnic.vesws, vinfo->vesw_id); /* ensure irqs see the change */ msix_vnic_synchronize_irq(dd); diff --git a/drivers/infiniband/hw/hns/Makefile b/drivers/infiniband/hw/hns/Makefile index e2a7f1488f76..eee5205f936f 100644 --- a/drivers/infiniband/hw/hns/Makefile +++ b/drivers/infiniband/hw/hns/Makefile @@ -7,8 +7,8 @@ ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3 obj-$(CONFIG_INFINIBAND_HNS) += hns-roce.o hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \ hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \ - hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o + hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o hns_roce_restrack.o obj-$(CONFIG_INFINIBAND_HNS_HIP06) += hns-roce-hw-v1.o hns-roce-hw-v1-objs := hns_roce_hw_v1.o obj-$(CONFIG_INFINIBAND_HNS_HIP08) += hns-roce-hw-v2.o -hns-roce-hw-v2-objs := hns_roce_hw_v2.o +hns-roce-hw-v2-objs := hns_roce_hw_v2.o hns_roce_hw_v2_dfx.o diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index b3c8c45ec1e3..cdd2ac24fc2a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -39,38 +39,34 @@ #define HNS_ROCE_VLAN_SL_BIT_MASK 7 #define HNS_ROCE_VLAN_SL_SHIFT 13 -struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, - struct rdma_ah_attr *ah_attr, - u32 flags, - struct ib_udata *udata) +int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata) { - struct hns_roce_dev *hr_dev = to_hr_dev(ibpd->device); + struct hns_roce_dev *hr_dev = to_hr_dev(ibah->device); const struct ib_gid_attr *gid_attr; struct device *dev = hr_dev->dev; - struct hns_roce_ah *ah; + struct hns_roce_ah *ah = to_hr_ah(ibah); u16 vlan_tag = 0xffff; const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); bool vlan_en = false; + int ret; - ah = kzalloc(sizeof(*ah), GFP_ATOMIC); - if (!ah) - return ERR_PTR(-ENOMEM); + gid_attr = ah_attr->grh.sgid_attr; + ret = rdma_read_gid_l2_fields(gid_attr, &vlan_tag, NULL); + if (ret) + return ret; /* Get mac address */ memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN); - gid_attr = ah_attr->grh.sgid_attr; - if (is_vlan_dev(gid_attr->ndev)) { - vlan_tag = vlan_dev_vlan_id(gid_attr->ndev); + if (vlan_tag < VLAN_CFI_MASK) { vlan_en = true; - } - - if (vlan_tag < 0x1000) vlan_tag |= (rdma_ah_get_sl(ah_attr) & HNS_ROCE_VLAN_SL_BIT_MASK) << HNS_ROCE_VLAN_SL_SHIFT; + } - ah->av.port_pd = cpu_to_be32(to_hr_pd(ibpd)->pdn | + ah->av.port_pd = cpu_to_le32(to_hr_pd(ibah->pd)->pdn | (rdma_ah_get_port_num(ah_attr) << HNS_ROCE_PORT_NUM_SHIFT)); ah->av.gid_index = grh->sgid_index; @@ -86,7 +82,7 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, ah->av.sl_tclass_flowlabel = cpu_to_le32(rdma_ah_get_sl(ah_attr) << HNS_ROCE_SL_SHIFT); - return &ah->ibah; + return 0; } int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) @@ -111,9 +107,7 @@ int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) return 0; } -int hns_roce_destroy_ah(struct ib_ah *ah, u32 flags) +void hns_roce_destroy_ah(struct ib_ah *ah, u32 flags) { - kfree(to_hr_ah(ah)); - - return 0; + return; } diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h index 059fd1da493e..2b6ac646ca9a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.h +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h @@ -53,6 +53,7 @@ enum { HNS_ROCE_CMD_QUERY_QPC = 0x42, HNS_ROCE_CMD_MODIFY_CQC = 0x52, + HNS_ROCE_CMD_QUERY_CQC = 0x53, /* CQC BT commands */ HNS_ROCE_CMD_WRITE_CQC_BT0 = 0x10, HNS_ROCE_CMD_WRITE_CQC_BT1 = 0x11, diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h index f4c92a7ac1ce..8e95a1aa1b4f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_common.h +++ b/drivers/infiniband/hw/hns/hns_roce_common.h @@ -57,32 +57,6 @@ #define roce_set_bit(origin, shift, val) \ roce_set_field((origin), (1ul << (shift)), (shift), (val)) -/* - * roce_hw_index_cmp_lt - Compare two hardware index values in hisilicon - * SOC, check if a is less than b. - * @a: hardware index value - * @b: hardware index value - * @bits: the number of bits of a and b, range: 0~31. - * - * Hardware index increases continuously till max value, and then restart - * from zero, again and again. Because the bits of reg field is often - * limited, the reg field can only hold the low bits of the hardware index - * in hisilicon SOC. - * In some scenes we need to compare two values(a,b) getted from two reg - * fields in this driver, for example: - * If a equals 0xfffe, b equals 0x1 and bits equals 16, we think b has - * incresed from 0xffff to 0x1 and a is less than b. - * If a equals 0xfffe, b equals 0x0xf001 and bits equals 16, we think a - * is bigger than b. - * - * Return true on a less than b, otherwise false. - */ -#define roce_hw_index_mask(bits) ((1ul << (bits)) - 1) -#define roce_hw_index_shift(bits) (32 - (bits)) -#define roce_hw_index_cmp_lt(a, b, bits) \ - ((int)((((a) - (b)) & roce_hw_index_mask(bits)) << \ - roce_hw_index_shift(bits)) < 0) - #define ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S 3 #define ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S 4 @@ -271,8 +245,6 @@ #define ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M \ (((1UL << 28) - 1) << ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) -#define ROCEE_SDB_PTR_CMP_BITS 28 - #define ROCEE_SDB_INV_CNT_SDB_INV_CNT_S 0 #define ROCEE_SDB_INV_CNT_SDB_INV_CNT_M \ (((1UL << 16) - 1) << ROCEE_SDB_INV_CNT_SDB_INV_CNT_S) @@ -353,13 +325,8 @@ #define ROCEE_CAEP_AE_MASK_REG 0x6C8 #define ROCEE_CAEP_AE_ST_REG 0x6CC -#define ROCEE_SDB_ISSUE_PTR_REG 0x758 -#define ROCEE_SDB_SEND_PTR_REG 0x75C #define ROCEE_CAEP_CQE_WCMD_EMPTY 0x850 #define ROCEE_SCAEP_WR_CQE_CNT 0x8D0 -#define ROCEE_SDB_INV_CNT_REG 0x9A4 -#define ROCEE_SDB_RETRY_CNT_REG 0x9AC -#define ROCEE_TSP_BP_ST_REG 0x9EC #define ROCEE_ECC_UCERR_ALM0_REG 0xB34 #define ROCEE_ECC_CERR_ALM0_REG 0xB40 diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 1dfe5627006c..9caf35061721 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -32,6 +32,7 @@ #include <linux/platform_device.h> #include <rdma/ib_umem.h> +#include <rdma/uverbs_ioctl.h> #include "hns_roce_device.h" #include "hns_roce_cmd.h" #include "hns_roce_hem.h" @@ -127,13 +128,9 @@ static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent, goto err_out; } - /* The cq insert radix tree */ - spin_lock_irq(&cq_table->lock); - /* Radix_tree: The associated pointer and long integer key value like */ - ret = radix_tree_insert(&cq_table->tree, hr_cq->cqn, hr_cq); - spin_unlock_irq(&cq_table->lock); + ret = xa_err(xa_store(&cq_table->array, hr_cq->cqn, hr_cq, GFP_KERNEL)); if (ret) { - dev_err(dev, "CQ alloc.Failed to radix_tree_insert.\n"); + dev_err(dev, "CQ alloc failed xa_store.\n"); goto err_put; } @@ -141,7 +138,7 @@ static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent, mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); if (IS_ERR(mailbox)) { ret = PTR_ERR(mailbox); - goto err_radix; + goto err_xa; } hr_dev->hw->write_cqc(hr_dev, hr_cq, mailbox->buf, mtts, dma_handle, @@ -152,7 +149,7 @@ static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent, hns_roce_free_cmd_mailbox(hr_dev, mailbox); if (ret) { dev_err(dev, "CQ alloc.Failed to cmd mailbox.\n"); - goto err_radix; + goto err_xa; } hr_cq->cons_index = 0; @@ -164,10 +161,8 @@ static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent, return 0; -err_radix: - spin_lock_irq(&cq_table->lock); - radix_tree_delete(&cq_table->tree, hr_cq->cqn); - spin_unlock_irq(&cq_table->lock); +err_xa: + xa_erase(&cq_table->array, hr_cq->cqn); err_put: hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn); @@ -197,6 +192,8 @@ void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) dev_err(dev, "HW2SW_CQ failed (%d) for CQN %06lx\n", ret, hr_cq->cqn); + xa_erase(&cq_table->array, hr_cq->cqn); + /* Waiting interrupt process procedure carried out */ synchronize_irq(hr_dev->eq_table.eq[hr_cq->vector].irq); @@ -205,10 +202,6 @@ void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) complete(&hr_cq->free); wait_for_completion(&hr_cq->free); - spin_lock_irq(&cq_table->lock); - radix_tree_delete(&cq_table->tree, hr_cq->cqn); - spin_unlock_irq(&cq_table->lock); - hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn); hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn, BITMAP_NO_RR); } @@ -309,7 +302,6 @@ static void hns_roce_ib_free_cq_buf(struct hns_roce_dev *hr_dev, struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); @@ -321,6 +313,8 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, int vector = attr->comp_vector; int cq_entries = attr->cqe; int ret; + struct hns_roce_ucontext *context = rdma_udata_to_drv_context( + udata, struct hns_roce_ucontext, ibucontext); if (cq_entries < 1 || cq_entries > hr_dev->caps.max_cqes) { dev_err(dev, "Creat CQ failed. entries=%d, max=%d\n", @@ -339,7 +333,7 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, hr_cq->ib_cq.cqe = cq_entries - 1; spin_lock_init(&hr_cq->lock); - if (context) { + if (udata) { if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { dev_err(dev, "Failed to copy_from_udata.\n"); ret = -EFAULT; @@ -357,8 +351,7 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) && (udata->outlen >= sizeof(resp))) { - ret = hns_roce_db_map_user(to_hr_ucontext(context), - udata, ucmd.db_addr, + ret = hns_roce_db_map_user(context, udata, ucmd.db_addr, &hr_cq->db); if (ret) { dev_err(dev, "cq record doorbell map failed!\n"); @@ -369,7 +362,7 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, } /* Get user space parameters */ - uar = &to_hr_ucontext(context)->uar; + uar = &context->uar; } else { if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) { ret = hns_roce_alloc_db(hr_dev, &hr_cq->db, 1); @@ -408,7 +401,7 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, * problems if tptr is set to zero here, so we initialze it in user * space. */ - if (!context && hr_cq->tptr_addr) + if (!udata && hr_cq->tptr_addr) *hr_cq->tptr_addr = 0; /* Get created cq handler and carry out event */ @@ -416,7 +409,7 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, hr_cq->event = hns_roce_ib_cq_event; hr_cq->cq_depth = cq_entries; - if (context) { + if (udata) { resp.cqn = hr_cq->cqn; ret = ib_copy_to_udata(udata, &resp, sizeof(resp)); if (ret) @@ -429,21 +422,20 @@ err_cqc: hns_roce_free_cq(hr_dev, hr_cq); err_dbmap: - if (context && (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) && + if (udata && (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) && (udata->outlen >= sizeof(resp))) - hns_roce_db_unmap_user(to_hr_ucontext(context), - &hr_cq->db); + hns_roce_db_unmap_user(context, &hr_cq->db); err_mtt: hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt); - if (context) + if (udata) ib_umem_release(hr_cq->umem); else hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf, hr_cq->ib_cq.cqe); err_db: - if (!context && (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB)) + if (!udata && (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB)) hns_roce_free_db(hr_dev, &hr_cq->db); err_cq: @@ -452,24 +444,27 @@ err_cq: } EXPORT_SYMBOL_GPL(hns_roce_ib_create_cq); -int hns_roce_ib_destroy_cq(struct ib_cq *ib_cq) +int hns_roce_ib_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device); struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq); int ret = 0; if (hr_dev->hw->destroy_cq) { - ret = hr_dev->hw->destroy_cq(ib_cq); + ret = hr_dev->hw->destroy_cq(ib_cq, udata); } else { hns_roce_free_cq(hr_dev, hr_cq); hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt); - if (ib_cq->uobject) { + if (udata) { ib_umem_release(hr_cq->umem); if (hr_cq->db_en == 1) hns_roce_db_unmap_user( - to_hr_ucontext(ib_cq->uobject->context), + rdma_udata_to_drv_context( + udata, + struct hns_roce_ucontext, + ibucontext), &hr_cq->db); } else { /* Free the buff of stored cq */ @@ -491,8 +486,7 @@ void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn) struct device *dev = hr_dev->dev; struct hns_roce_cq *cq; - cq = radix_tree_lookup(&hr_dev->cq_table.tree, - cqn & (hr_dev->caps.num_cqs - 1)); + cq = xa_load(&hr_dev->cq_table.array, cqn & (hr_dev->caps.num_cqs - 1)); if (!cq) { dev_warn(dev, "Completion event for bogus CQ 0x%08x\n", cqn); return; @@ -509,8 +503,7 @@ void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type) struct device *dev = hr_dev->dev; struct hns_roce_cq *cq; - cq = radix_tree_lookup(&cq_table->tree, - cqn & (hr_dev->caps.num_cqs - 1)); + cq = xa_load(&cq_table->array, cqn & (hr_dev->caps.num_cqs - 1)); if (cq) atomic_inc(&cq->refcount); @@ -530,8 +523,7 @@ int hns_roce_init_cq_table(struct hns_roce_dev *hr_dev) { struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; - spin_lock_init(&cq_table->lock); - INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC); + xa_init(&cq_table->array); return hns_roce_bitmap_init(&cq_table->bitmap, hr_dev->caps.num_cqs, hr_dev->caps.num_cqs - 1, diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 9ee86daf1700..563cf39df6d5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -505,7 +505,6 @@ struct hns_roce_uar_table { struct hns_roce_qp_table { struct hns_roce_bitmap bitmap; - spinlock_t lock; struct hns_roce_hem_table qp_table; struct hns_roce_hem_table irrl_table; struct hns_roce_hem_table trrl_table; @@ -515,8 +514,7 @@ struct hns_roce_qp_table { struct hns_roce_cq_table { struct hns_roce_bitmap bitmap; - spinlock_t lock; - struct radix_tree_root tree; + struct xarray array; struct hns_roce_hem_table table; }; @@ -869,6 +867,11 @@ struct hns_roce_work { int sub_type; }; +struct hns_roce_dfx_hw { + int (*query_cqc_info)(struct hns_roce_dev *hr_dev, u32 cqn, + int *buffer); +}; + struct hns_roce_hw { int (*reset)(struct hns_roce_dev *hr_dev, bool enable); int (*cmq_init)(struct hns_roce_dev *hr_dev); @@ -907,7 +910,7 @@ struct hns_roce_hw { int (*modify_qp)(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, enum ib_qp_state cur_state, enum ib_qp_state new_state); - int (*destroy_qp)(struct ib_qp *ibqp); + int (*destroy_qp)(struct ib_qp *ibqp, struct ib_udata *udata); int (*qp_flow_control_init)(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp); int (*post_send)(struct ib_qp *ibqp, const struct ib_send_wr *wr, @@ -916,8 +919,9 @@ struct hns_roce_hw { const struct ib_recv_wr **bad_recv_wr); int (*req_notify_cq)(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); int (*poll_cq)(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); - int (*dereg_mr)(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr); - int (*destroy_cq)(struct ib_cq *ibcq); + int (*dereg_mr)(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, + struct ib_udata *udata); + int (*destroy_cq)(struct ib_cq *ibcq, struct ib_udata *udata); int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period); int (*init_eq)(struct hns_roce_dev *hr_dev); void (*cleanup_eq)(struct hns_roce_dev *hr_dev); @@ -956,7 +960,7 @@ struct hns_roce_dev { int irq[HNS_ROCE_MAX_IRQ_NUM]; u8 __iomem *reg_base; struct hns_roce_caps caps; - struct radix_tree_root qp_table_tree; + struct xarray qp_table_xa; unsigned char dev_addr[HNS_ROCE_MAX_PORTS][MAC_ADDR_OCTET_NUM]; u64 sys_image_guid; @@ -985,6 +989,7 @@ struct hns_roce_dev { const struct hns_roce_hw *hw; void *priv; struct workqueue_struct *irq_workq; + const struct hns_roce_dfx_hw *dfx; }; static inline struct hns_roce_dev *to_hr_dev(struct ib_device *ib_dev) @@ -1046,8 +1051,7 @@ static inline void hns_roce_write64_k(__le32 val[2], void __iomem *dest) static inline struct hns_roce_qp *__hns_roce_qp_lookup(struct hns_roce_dev *hr_dev, u32 qpn) { - return radix_tree_lookup(&hr_dev->qp_table_tree, - qpn & (hr_dev->caps.num_qps - 1)); + return xa_load(&hr_dev->qp_table_xa, qpn & (hr_dev->caps.num_qps - 1)); } static inline void *hns_roce_buf_offset(struct hns_roce_buf *buf, int offset) @@ -1107,16 +1111,13 @@ void hns_roce_bitmap_free_range(struct hns_roce_bitmap *bitmap, unsigned long obj, int cnt, int rr); -struct ib_ah *hns_roce_create_ah(struct ib_pd *pd, - struct rdma_ah_attr *ah_attr, - u32 flags, - struct ib_udata *udata); +int hns_roce_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata); int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); -int hns_roce_destroy_ah(struct ib_ah *ah, u32 flags); +void hns_roce_destroy_ah(struct ib_ah *ah, u32 flags); -int hns_roce_alloc_pd(struct ib_pd *pd, struct ib_ucontext *context, - struct ib_udata *udata); -void hns_roce_dealloc_pd(struct ib_pd *pd); +int hns_roce_alloc_pd(struct ib_pd *pd, struct ib_udata *udata); +void hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc); struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, @@ -1126,10 +1127,10 @@ int hns_roce_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct ib_pd *pd, struct ib_udata *udata); struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg); + u32 max_num_sg, struct ib_udata *udata); int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); -int hns_roce_dereg_mr(struct ib_mr *ibmr); +int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev, struct hns_roce_cmd_mailbox *mailbox, unsigned long mpt_index); @@ -1147,13 +1148,13 @@ int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt, struct ib_umem *umem); -struct ib_srq *hns_roce_create_srq(struct ib_pd *pd, - struct ib_srq_init_attr *srq_init_attr, - struct ib_udata *udata); +int hns_roce_create_srq(struct ib_srq *srq, + struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata); int hns_roce_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr, enum ib_srq_attr_mask srq_attr_mask, struct ib_udata *udata); -int hns_roce_destroy_srq(struct ib_srq *ibsrq); +void hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata); struct ib_qp *hns_roce_create_qp(struct ib_pd *ib_pd, struct ib_qp_init_attr *init_attr, @@ -1179,10 +1180,9 @@ int to_hr_qp_type(int qp_type); struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, struct ib_udata *udata); -int hns_roce_ib_destroy_cq(struct ib_cq *ib_cq); +int hns_roce_ib_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata); void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq); int hns_roce_db_map_user(struct hns_roce_ucontext *context, @@ -1202,4 +1202,6 @@ int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index); int hns_roce_init(struct hns_roce_dev *hr_dev); void hns_roce_exit(struct hns_roce_dev *hr_dev); +int hns_roce_fill_res_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res); #endif /* _HNS_ROCE_DEVICE_H */ diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index c8555f7704d8..4c5d0f160c10 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -730,7 +730,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev) /* Reserved cq for loop qp */ cq_init_attr.cqe = HNS_ROCE_MIN_WQE_NUM * 2; cq_init_attr.comp_vector = 0; - cq = hns_roce_ib_create_cq(&hr_dev->ib_dev, &cq_init_attr, NULL, NULL); + cq = hns_roce_ib_create_cq(&hr_dev->ib_dev, &cq_init_attr, NULL); if (IS_ERR(cq)) { dev_err(dev, "Create cq for reserved loop qp failed!"); return -ENOMEM; @@ -749,7 +749,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev) goto alloc_mem_failed; pd->device = ibdev; - ret = hns_roce_alloc_pd(pd, NULL, NULL); + ret = hns_roce_alloc_pd(pd, NULL); if (ret) goto alloc_pd_failed; @@ -855,17 +855,17 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev) create_lp_qp_failed: for (i -= 1; i >= 0; i--) { hr_qp = free_mr->mr_free_qp[i]; - if (hns_roce_v1_destroy_qp(&hr_qp->ibqp)) + if (hns_roce_v1_destroy_qp(&hr_qp->ibqp, NULL)) dev_err(dev, "Destroy qp %d for mr free failed!\n", i); } - hns_roce_dealloc_pd(pd); + hns_roce_dealloc_pd(pd, NULL); alloc_pd_failed: kfree(pd); alloc_mem_failed: - if (hns_roce_ib_destroy_cq(cq)) + if (hns_roce_ib_destroy_cq(cq, NULL)) dev_err(dev, "Destroy cq for create_lp_qp failed!\n"); return ret; @@ -888,17 +888,17 @@ static void hns_roce_v1_release_lp_qp(struct hns_roce_dev *hr_dev) if (!hr_qp) continue; - ret = hns_roce_v1_destroy_qp(&hr_qp->ibqp); + ret = hns_roce_v1_destroy_qp(&hr_qp->ibqp, NULL); if (ret) dev_err(dev, "Destroy qp %d for mr free failed(%d)!\n", i, ret); } - ret = hns_roce_ib_destroy_cq(&free_mr->mr_free_cq->ib_cq); + ret = hns_roce_ib_destroy_cq(&free_mr->mr_free_cq->ib_cq, NULL); if (ret) dev_err(dev, "Destroy cq for mr_free failed(%d)!\n", ret); - hns_roce_dealloc_pd(&free_mr->mr_free_pd->ibpd); + hns_roce_dealloc_pd(&free_mr->mr_free_pd->ibpd, NULL); } static int hns_roce_db_init(struct hns_roce_dev *hr_dev) @@ -1096,7 +1096,7 @@ free_work: } static int hns_roce_v1_dereg_mr(struct hns_roce_dev *hr_dev, - struct hns_roce_mr *mr) + struct hns_roce_mr *mr, struct ib_udata *udata) { struct device *dev = &hr_dev->pdev->dev; struct hns_roce_mr_free_work *mr_work; @@ -1511,38 +1511,6 @@ static int hns_roce_v1_reset(struct hns_roce_dev *hr_dev, bool dereset) return ret; } -static int hns_roce_des_qp_init(struct hns_roce_dev *hr_dev) -{ - struct device *dev = &hr_dev->pdev->dev; - struct hns_roce_v1_priv *priv; - struct hns_roce_des_qp *des_qp; - - priv = (struct hns_roce_v1_priv *)hr_dev->priv; - des_qp = &priv->des_qp; - - des_qp->requeue_flag = 1; - des_qp->qp_wq = create_singlethread_workqueue("hns_roce_destroy_qp"); - if (!des_qp->qp_wq) { - dev_err(dev, "Create destroy qp workqueue failed!\n"); - return -ENOMEM; - } - - return 0; -} - -static void hns_roce_des_qp_free(struct hns_roce_dev *hr_dev) -{ - struct hns_roce_v1_priv *priv; - struct hns_roce_des_qp *des_qp; - - priv = (struct hns_roce_v1_priv *)hr_dev->priv; - des_qp = &priv->des_qp; - - des_qp->requeue_flag = 0; - flush_workqueue(des_qp->qp_wq); - destroy_workqueue(des_qp->qp_wq); -} - static int hns_roce_v1_profile(struct hns_roce_dev *hr_dev) { int i = 0; @@ -1661,12 +1629,6 @@ static int hns_roce_v1_init(struct hns_roce_dev *hr_dev) goto error_failed_tptr_init; } - ret = hns_roce_des_qp_init(hr_dev); - if (ret) { - dev_err(dev, "des qp init failed!\n"); - goto error_failed_des_qp_init; - } - ret = hns_roce_free_mr_init(hr_dev); if (ret) { dev_err(dev, "free mr init failed!\n"); @@ -1678,9 +1640,6 @@ static int hns_roce_v1_init(struct hns_roce_dev *hr_dev) return 0; error_failed_free_mr_init: - hns_roce_des_qp_free(hr_dev); - -error_failed_des_qp_init: hns_roce_tptr_free(hr_dev); error_failed_tptr_init: @@ -1698,7 +1657,6 @@ static void hns_roce_v1_exit(struct hns_roce_dev *hr_dev) { hns_roce_port_enable(hr_dev, HNS_ROCE_PORT_DOWN); hns_roce_free_mr_free(hr_dev); - hns_roce_des_qp_free(hr_dev); hns_roce_tptr_free(hr_dev); hns_roce_bt_free(hr_dev); hns_roce_raq_free(hr_dev); @@ -3642,307 +3600,22 @@ static int hns_roce_v1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, hns_roce_v1_q_qp(ibqp, qp_attr, qp_attr_mask, qp_init_attr); } -static void hns_roce_check_sdb_status(struct hns_roce_dev *hr_dev, - u32 *old_send, u32 *old_retry, - u32 *tsp_st, u32 *success_flags) -{ - __le32 *old_send_tmp, *old_retry_tmp; - u32 sdb_retry_cnt; - u32 sdb_send_ptr; - u32 cur_cnt, old_cnt; - __le32 tmp, tmp1; - u32 send_ptr; - - sdb_send_ptr = roce_read(hr_dev, ROCEE_SDB_SEND_PTR_REG); - sdb_retry_cnt = roce_read(hr_dev, ROCEE_SDB_RETRY_CNT_REG); - tmp = cpu_to_le32(sdb_send_ptr); - tmp1 = cpu_to_le32(sdb_retry_cnt); - cur_cnt = roce_get_field(tmp, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) + - roce_get_field(tmp1, ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M, - ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S); - - old_send_tmp = (__le32 *)old_send; - old_retry_tmp = (__le32 *)old_retry; - if (!roce_get_bit(*tsp_st, ROCEE_CNT_CLR_CE_CNT_CLR_CE_S)) { - old_cnt = roce_get_field(*old_send_tmp, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) + - roce_get_field(*old_retry_tmp, - ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M, - ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S); - if (cur_cnt - old_cnt > SDB_ST_CMP_VAL) - *success_flags = 1; - } else { - old_cnt = roce_get_field(*old_send_tmp, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S); - if (cur_cnt - old_cnt > SDB_ST_CMP_VAL) { - *success_flags = 1; - } else { - send_ptr = roce_get_field(*old_send_tmp, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) + - roce_get_field(tmp1, - ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M, - ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S); - roce_set_field(*old_send_tmp, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S, - send_ptr); - } - } -} - -static int check_qp_db_process_status(struct hns_roce_dev *hr_dev, - struct hns_roce_qp *hr_qp, - u32 sdb_issue_ptr, - u32 *sdb_inv_cnt, - u32 *wait_stage) -{ - struct device *dev = &hr_dev->pdev->dev; - u32 sdb_send_ptr, old_send; - __le32 sdb_issue_ptr_tmp; - __le32 sdb_send_ptr_tmp; - u32 success_flags = 0; - unsigned long end; - u32 old_retry; - u32 inv_cnt; - u32 tsp_st; - __le32 tmp; - - if (*wait_stage > HNS_ROCE_V1_DB_STAGE2 || - *wait_stage < HNS_ROCE_V1_DB_STAGE1) { - dev_err(dev, "QP(0x%lx) db status wait stage(%d) error!\n", - hr_qp->qpn, *wait_stage); - return -EINVAL; - } - - /* Calculate the total timeout for the entire verification process */ - end = msecs_to_jiffies(HNS_ROCE_V1_CHECK_DB_TIMEOUT_MSECS) + jiffies; - - if (*wait_stage == HNS_ROCE_V1_DB_STAGE1) { - /* Query db process status, until hw process completely */ - sdb_send_ptr = roce_read(hr_dev, ROCEE_SDB_SEND_PTR_REG); - while (roce_hw_index_cmp_lt(sdb_send_ptr, sdb_issue_ptr, - ROCEE_SDB_PTR_CMP_BITS)) { - if (!time_before(jiffies, end)) { - dev_dbg(dev, "QP(0x%lx) db process stage1 timeout. issue 0x%x send 0x%x.\n", - hr_qp->qpn, sdb_issue_ptr, - sdb_send_ptr); - return 0; - } - - msleep(HNS_ROCE_V1_CHECK_DB_SLEEP_MSECS); - sdb_send_ptr = roce_read(hr_dev, - ROCEE_SDB_SEND_PTR_REG); - } - - sdb_send_ptr_tmp = cpu_to_le32(sdb_send_ptr); - sdb_issue_ptr_tmp = cpu_to_le32(sdb_issue_ptr); - if (roce_get_field(sdb_issue_ptr_tmp, - ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_M, - ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_S) == - roce_get_field(sdb_send_ptr_tmp, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S)) { - old_send = roce_read(hr_dev, ROCEE_SDB_SEND_PTR_REG); - old_retry = roce_read(hr_dev, ROCEE_SDB_RETRY_CNT_REG); - - do { - tsp_st = roce_read(hr_dev, ROCEE_TSP_BP_ST_REG); - tmp = cpu_to_le32(tsp_st); - if (roce_get_bit(tmp, - ROCEE_TSP_BP_ST_QH_FIFO_ENTRY_S) == 1) { - *wait_stage = HNS_ROCE_V1_DB_WAIT_OK; - return 0; - } - - if (!time_before(jiffies, end)) { - dev_dbg(dev, "QP(0x%lx) db process stage1 timeout when send ptr equals issue ptr.\n" - "issue 0x%x send 0x%x.\n", - hr_qp->qpn, - le32_to_cpu(sdb_issue_ptr_tmp), - le32_to_cpu(sdb_send_ptr_tmp)); - return 0; - } - - msleep(HNS_ROCE_V1_CHECK_DB_SLEEP_MSECS); - - hns_roce_check_sdb_status(hr_dev, &old_send, - &old_retry, &tsp_st, - &success_flags); - } while (!success_flags); - } - - *wait_stage = HNS_ROCE_V1_DB_STAGE2; - - /* Get list pointer */ - *sdb_inv_cnt = roce_read(hr_dev, ROCEE_SDB_INV_CNT_REG); - dev_dbg(dev, "QP(0x%lx) db process stage2. inv cnt = 0x%x.\n", - hr_qp->qpn, *sdb_inv_cnt); - } - - if (*wait_stage == HNS_ROCE_V1_DB_STAGE2) { - /* Query db's list status, until hw reversal */ - inv_cnt = roce_read(hr_dev, ROCEE_SDB_INV_CNT_REG); - while (roce_hw_index_cmp_lt(inv_cnt, - *sdb_inv_cnt + SDB_INV_CNT_OFFSET, - ROCEE_SDB_CNT_CMP_BITS)) { - if (!time_before(jiffies, end)) { - dev_dbg(dev, "QP(0x%lx) db process stage2 timeout. inv cnt 0x%x.\n", - hr_qp->qpn, inv_cnt); - return 0; - } - - msleep(HNS_ROCE_V1_CHECK_DB_SLEEP_MSECS); - inv_cnt = roce_read(hr_dev, ROCEE_SDB_INV_CNT_REG); - } - - *wait_stage = HNS_ROCE_V1_DB_WAIT_OK; - } - - return 0; -} - -static int check_qp_reset_state(struct hns_roce_dev *hr_dev, - struct hns_roce_qp *hr_qp, - struct hns_roce_qp_work *qp_work_entry, - int *is_timeout) -{ - struct device *dev = &hr_dev->pdev->dev; - u32 sdb_issue_ptr; - int ret; - - if (hr_qp->state != IB_QPS_RESET) { - /* Set qp to ERR, waiting for hw complete processing all dbs */ - ret = hns_roce_v1_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state, - IB_QPS_ERR); - if (ret) { - dev_err(dev, "Modify QP(0x%lx) to ERR failed!\n", - hr_qp->qpn); - return ret; - } - - /* Record issued doorbell */ - sdb_issue_ptr = roce_read(hr_dev, ROCEE_SDB_ISSUE_PTR_REG); - qp_work_entry->sdb_issue_ptr = sdb_issue_ptr; - qp_work_entry->db_wait_stage = HNS_ROCE_V1_DB_STAGE1; - - /* Query db process status, until hw process completely */ - ret = check_qp_db_process_status(hr_dev, hr_qp, sdb_issue_ptr, - &qp_work_entry->sdb_inv_cnt, - &qp_work_entry->db_wait_stage); - if (ret) { - dev_err(dev, "Check QP(0x%lx) db process status failed!\n", - hr_qp->qpn); - return ret; - } - - if (qp_work_entry->db_wait_stage != HNS_ROCE_V1_DB_WAIT_OK) { - qp_work_entry->sche_cnt = 0; - *is_timeout = 1; - return 0; - } - - /* Modify qp to reset before destroying qp */ - ret = hns_roce_v1_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state, - IB_QPS_RESET); - if (ret) { - dev_err(dev, "Modify QP(0x%lx) to RST failed!\n", - hr_qp->qpn); - return ret; - } - } - - return 0; -} - -static void hns_roce_v1_destroy_qp_work_fn(struct work_struct *work) -{ - struct hns_roce_qp_work *qp_work_entry; - struct hns_roce_v1_priv *priv; - struct hns_roce_dev *hr_dev; - struct hns_roce_qp *hr_qp; - struct device *dev; - unsigned long qpn; - int ret; - - qp_work_entry = container_of(work, struct hns_roce_qp_work, work); - hr_dev = to_hr_dev(qp_work_entry->ib_dev); - dev = &hr_dev->pdev->dev; - priv = (struct hns_roce_v1_priv *)hr_dev->priv; - hr_qp = qp_work_entry->qp; - qpn = hr_qp->qpn; - - dev_dbg(dev, "Schedule destroy QP(0x%lx) work.\n", qpn); - - qp_work_entry->sche_cnt++; - - /* Query db process status, until hw process completely */ - ret = check_qp_db_process_status(hr_dev, hr_qp, - qp_work_entry->sdb_issue_ptr, - &qp_work_entry->sdb_inv_cnt, - &qp_work_entry->db_wait_stage); - if (ret) { - dev_err(dev, "Check QP(0x%lx) db process status failed!\n", - qpn); - return; - } - - if (qp_work_entry->db_wait_stage != HNS_ROCE_V1_DB_WAIT_OK && - priv->des_qp.requeue_flag) { - queue_work(priv->des_qp.qp_wq, work); - return; - } - - /* Modify qp to reset before destroying qp */ - ret = hns_roce_v1_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state, - IB_QPS_RESET); - if (ret) { - dev_err(dev, "Modify QP(0x%lx) to RST failed!\n", qpn); - return; - } - - hns_roce_qp_remove(hr_dev, hr_qp); - hns_roce_qp_free(hr_dev, hr_qp); - - if (hr_qp->ibqp.qp_type == IB_QPT_RC) { - /* RC QP, release QPN */ - hns_roce_release_range_qp(hr_dev, qpn, 1); - kfree(hr_qp); - } else - kfree(hr_to_hr_sqp(hr_qp)); - - kfree(qp_work_entry); - - dev_dbg(dev, "Accomplished destroy QP(0x%lx) work.\n", qpn); -} - -int hns_roce_v1_destroy_qp(struct ib_qp *ibqp) +int hns_roce_v1_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); - struct device *dev = &hr_dev->pdev->dev; - struct hns_roce_qp_work qp_work_entry; - struct hns_roce_qp_work *qp_work; - struct hns_roce_v1_priv *priv; struct hns_roce_cq *send_cq, *recv_cq; - bool is_user = ibqp->uobject; - int is_timeout = 0; int ret; - ret = check_qp_reset_state(hr_dev, hr_qp, &qp_work_entry, &is_timeout); - if (ret) { - dev_err(dev, "QP reset state check failed(%d)!\n", ret); + ret = hns_roce_v1_modify_qp(ibqp, NULL, 0, hr_qp->state, IB_QPS_RESET); + if (ret) return ret; - } send_cq = to_hr_cq(hr_qp->ibqp.send_cq); recv_cq = to_hr_cq(hr_qp->ibqp.recv_cq); hns_roce_lock_cqs(send_cq, recv_cq); - if (!is_user) { + if (!udata) { __hns_roce_v1_cq_clean(recv_cq, hr_qp->qpn, hr_qp->ibqp.srq ? to_hr_srq(hr_qp->ibqp.srq) : NULL); if (send_cq != recv_cq) @@ -3950,18 +3623,16 @@ int hns_roce_v1_destroy_qp(struct ib_qp *ibqp) } hns_roce_unlock_cqs(send_cq, recv_cq); - if (!is_timeout) { - hns_roce_qp_remove(hr_dev, hr_qp); - hns_roce_qp_free(hr_dev, hr_qp); + hns_roce_qp_remove(hr_dev, hr_qp); + hns_roce_qp_free(hr_dev, hr_qp); - /* RC QP, release QPN */ - if (hr_qp->ibqp.qp_type == IB_QPT_RC) - hns_roce_release_range_qp(hr_dev, hr_qp->qpn, 1); - } + /* RC QP, release QPN */ + if (hr_qp->ibqp.qp_type == IB_QPT_RC) + hns_roce_release_range_qp(hr_dev, hr_qp->qpn, 1); hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt); - if (is_user) + if (udata) ib_umem_release(hr_qp->umem); else { kfree(hr_qp->sq.wrid); @@ -3970,33 +3641,14 @@ int hns_roce_v1_destroy_qp(struct ib_qp *ibqp) hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf); } - if (!is_timeout) { - if (hr_qp->ibqp.qp_type == IB_QPT_RC) - kfree(hr_qp); - else - kfree(hr_to_hr_sqp(hr_qp)); - } else { - qp_work = kzalloc(sizeof(*qp_work), GFP_KERNEL); - if (!qp_work) - return -ENOMEM; - - INIT_WORK(&qp_work->work, hns_roce_v1_destroy_qp_work_fn); - qp_work->ib_dev = &hr_dev->ib_dev; - qp_work->qp = hr_qp; - qp_work->db_wait_stage = qp_work_entry.db_wait_stage; - qp_work->sdb_issue_ptr = qp_work_entry.sdb_issue_ptr; - qp_work->sdb_inv_cnt = qp_work_entry.sdb_inv_cnt; - qp_work->sche_cnt = qp_work_entry.sche_cnt; - - priv = (struct hns_roce_v1_priv *)hr_dev->priv; - queue_work(priv->des_qp.qp_wq, &qp_work->work); - dev_dbg(dev, "Begin destroy QP(0x%lx) work.\n", hr_qp->qpn); - } - + if (hr_qp->ibqp.qp_type == IB_QPT_RC) + kfree(hr_qp); + else + kfree(hr_to_hr_sqp(hr_qp)); return 0; } -static int hns_roce_v1_destroy_cq(struct ib_cq *ibcq) +static int hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibcq->device); struct hns_roce_cq *hr_cq = to_hr_cq(ibcq); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h index 66440147d9eb..52307b2c7100 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h @@ -110,11 +110,6 @@ #define HNS_ROCE_V1_EXT_ODB_ALFUL \ (HNS_ROCE_V1_EXT_ODB_DEPTH - HNS_ROCE_V1_DB_RSVD) -#define HNS_ROCE_V1_DB_WAIT_OK 0 -#define HNS_ROCE_V1_DB_STAGE1 1 -#define HNS_ROCE_V1_DB_STAGE2 2 -#define HNS_ROCE_V1_CHECK_DB_TIMEOUT_MSECS 10000 -#define HNS_ROCE_V1_CHECK_DB_SLEEP_MSECS 20 #define HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS 50000 #define HNS_ROCE_V1_RECREATE_LP_QP_TIMEOUT_MSECS 10000 #define HNS_ROCE_V1_FREE_MR_WAIT_VALUE 5 @@ -162,7 +157,6 @@ #define SQ_PSN_SHIFT 8 #define QKEY_VAL 0x80010000 #define SDB_INV_CNT_OFFSET 8 -#define SDB_ST_CMP_VAL 8 #define HNS_ROCE_CEQ_DEFAULT_INTERVAL 0x10 #define HNS_ROCE_CEQ_DEFAULT_BURST_NUM 0x10 @@ -1068,11 +1062,6 @@ struct hns_roce_qp_work { u32 sche_cnt; }; -struct hns_roce_des_qp { - struct workqueue_struct *qp_wq; - int requeue_flag; -}; - struct hns_roce_mr_free_work { struct work_struct work; struct ib_device *ib_dev; @@ -1100,12 +1089,11 @@ struct hns_roce_v1_priv { struct hns_roce_raq_table raq_table; struct hns_roce_bt_table bt_table; struct hns_roce_tptr_table tptr_table; - struct hns_roce_des_qp des_qp; struct hns_roce_free_mr free_mr; }; int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool dereset); int hns_roce_v1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); -int hns_roce_v1_destroy_qp(struct ib_qp *ibqp); +int hns_roce_v1_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata); #endif diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 1c54390e1c85..b5392cb5b20f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -37,7 +37,9 @@ #include <linux/types.h> #include <net/addrconf.h> #include <rdma/ib_addr.h> +#include <rdma/ib_cache.h> #include <rdma/ib_umem.h> +#include <rdma/uverbs_ioctl.h> #include "hnae3.h" #include "hns_roce_common.h" @@ -1086,7 +1088,7 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, return ret; } -int hns_roce_cmq_send(struct hns_roce_dev *hr_dev, +static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev, struct hns_roce_cmq_desc *desc, int num) { int retval; @@ -1559,7 +1561,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->qpc_hop_num = HNS_ROCE_CONTEXT_HOP_NUM; caps->srqc_ba_pg_sz = 0; caps->srqc_buf_pg_sz = 0; - caps->srqc_hop_num = HNS_ROCE_HOP_NUM_0; + caps->srqc_hop_num = HNS_ROCE_CONTEXT_HOP_NUM; caps->cqc_ba_pg_sz = 0; caps->cqc_buf_pg_sz = 0; caps->cqc_hop_num = HNS_ROCE_CONTEXT_HOP_NUM; @@ -2150,7 +2152,7 @@ static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, V2_MPT_BYTE_4_PD_S, mr->pd); roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RA_EN_S, 0); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1); + roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 0); roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1); roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_BIND_EN_S, (mr->access & IB_ACCESS_MW_BIND ? 1 : 0)); @@ -3171,12 +3173,6 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp, roce_set_bit(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_CNP_TX_FLAG_S, 0); roce_set_bit(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_CE_FLAG_S, 0); - if (attr_mask & IB_QP_QKEY) { - context->qkey_xrcd = attr->qkey; - qpc_mask->qkey_xrcd = 0; - hr_qp->qkey = attr->qkey; - } - if (hr_qp->rdb_en) { roce_set_bit(context->byte_68_rq_db, V2_QPC_BYTE_68_RQ_RECORD_EN_S, 1); @@ -3388,7 +3384,6 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp, 0); hr_qp->access_flags = attr->qp_access_flags; - hr_qp->pkey_index = attr->pkey_index; roce_set_field(context->byte_252_err_txcqn, V2_QPC_BYTE_252_TX_CQN_M, V2_QPC_BYTE_252_TX_CQN_S, to_hr_cq(ibqp->send_cq)->cqn); roce_set_field(qpc_mask->byte_252_err_txcqn, V2_QPC_BYTE_252_TX_CQN_M, @@ -3512,11 +3507,6 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp, V2_QPC_BYTE_76_SRQN_M, V2_QPC_BYTE_76_SRQN_S, 0); } - if (attr_mask & IB_QP_QKEY) { - context->qkey_xrcd = attr->qkey; - qpc_mask->qkey_xrcd = 0; - } - roce_set_field(context->byte_4_sqpn_tst, V2_QPC_BYTE_4_SQPN_M, V2_QPC_BYTE_4_SQPN_S, hr_qp->qpn); roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_SQPN_M, @@ -3636,13 +3626,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_M, V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_S, 0); - roce_set_field(context->byte_80_rnr_rx_cqn, - V2_QPC_BYTE_80_MIN_RNR_TIME_M, - V2_QPC_BYTE_80_MIN_RNR_TIME_S, attr->min_rnr_timer); - roce_set_field(qpc_mask->byte_80_rnr_rx_cqn, - V2_QPC_BYTE_80_MIN_RNR_TIME_M, - V2_QPC_BYTE_80_MIN_RNR_TIME_S, 0); - page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); context->rq_cur_blk_addr = (u32)(mtts[hr_qp->rq.offset / page_size] >> PAGE_ADDR_SHIFT); @@ -3670,13 +3653,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, V2_QPC_BYTE_104_RQ_NXT_BLK_ADDR_M, V2_QPC_BYTE_104_RQ_NXT_BLK_ADDR_S, 0); - roce_set_field(context->byte_108_rx_reqepsn, - V2_QPC_BYTE_108_RX_REQ_EPSN_M, - V2_QPC_BYTE_108_RX_REQ_EPSN_S, attr->rq_psn); - roce_set_field(qpc_mask->byte_108_rx_reqepsn, - V2_QPC_BYTE_108_RX_REQ_EPSN_M, - V2_QPC_BYTE_108_RX_REQ_EPSN_S, 0); - roce_set_field(context->byte_132_trrl, V2_QPC_BYTE_132_TRRL_BA_M, V2_QPC_BYTE_132_TRRL_BA_S, dma_handle_3 >> 4); roce_set_field(qpc_mask->byte_132_trrl, V2_QPC_BYTE_132_TRRL_BA_M, @@ -3715,15 +3691,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, roce_set_bit(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_LBI_S, 0); } - if ((attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) && - attr->max_dest_rd_atomic) { - roce_set_field(context->byte_140_raq, V2_QPC_BYTE_140_RR_MAX_M, - V2_QPC_BYTE_140_RR_MAX_S, - fls(attr->max_dest_rd_atomic - 1)); - roce_set_field(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RR_MAX_M, - V2_QPC_BYTE_140_RR_MAX_S, 0); - } - if (attr_mask & IB_QP_DEST_QPN) { roce_set_field(context->byte_56_dqpn_err, V2_QPC_BYTE_56_DQPN_M, V2_QPC_BYTE_56_DQPN_S, attr->dest_qp_num); @@ -3784,11 +3751,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, context->rq_rnr_timer = 0; qpc_mask->rq_rnr_timer = 0; - roce_set_field(context->byte_152_raq, V2_QPC_BYTE_152_RAQ_PSN_M, - V2_QPC_BYTE_152_RAQ_PSN_S, attr->rq_psn - 1); - roce_set_field(qpc_mask->byte_152_raq, V2_QPC_BYTE_152_RAQ_PSN_M, - V2_QPC_BYTE_152_RAQ_PSN_S, 0); - roce_set_field(qpc_mask->byte_132_trrl, V2_QPC_BYTE_132_TRRL_HEAD_MAX_M, V2_QPC_BYTE_132_TRRL_HEAD_MAX_S, 0); roce_set_field(qpc_mask->byte_132_trrl, V2_QPC_BYTE_132_TRRL_TAIL_MAX_M, @@ -3886,13 +3848,6 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, V2_QPC_BYTE_240_RX_ACK_MSN_M, V2_QPC_BYTE_240_RX_ACK_MSN_S, 0); - roce_set_field(context->byte_244_rnr_rxack, - V2_QPC_BYTE_244_RX_ACK_EPSN_M, - V2_QPC_BYTE_244_RX_ACK_EPSN_S, attr->sq_psn); - roce_set_field(qpc_mask->byte_244_rnr_rxack, - V2_QPC_BYTE_244_RX_ACK_EPSN_M, - V2_QPC_BYTE_244_RX_ACK_EPSN_S, 0); - roce_set_field(qpc_mask->byte_248_ack_psn, V2_QPC_BYTE_248_ACK_LAST_OPTYPE_M, V2_QPC_BYTE_248_ACK_LAST_OPTYPE_S, 0); @@ -3906,27 +3861,6 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, V2_QPC_BYTE_240_IRRL_TAIL_REAL_M, V2_QPC_BYTE_240_IRRL_TAIL_REAL_S, 0); - roce_set_field(context->byte_220_retry_psn_msn, - V2_QPC_BYTE_220_RETRY_MSG_PSN_M, - V2_QPC_BYTE_220_RETRY_MSG_PSN_S, attr->sq_psn); - roce_set_field(qpc_mask->byte_220_retry_psn_msn, - V2_QPC_BYTE_220_RETRY_MSG_PSN_M, - V2_QPC_BYTE_220_RETRY_MSG_PSN_S, 0); - - roce_set_field(context->byte_224_retry_msg, - V2_QPC_BYTE_224_RETRY_MSG_PSN_M, - V2_QPC_BYTE_224_RETRY_MSG_PSN_S, attr->sq_psn >> 16); - roce_set_field(qpc_mask->byte_224_retry_msg, - V2_QPC_BYTE_224_RETRY_MSG_PSN_M, - V2_QPC_BYTE_224_RETRY_MSG_PSN_S, 0); - - roce_set_field(context->byte_224_retry_msg, - V2_QPC_BYTE_224_RETRY_MSG_FPKT_PSN_M, - V2_QPC_BYTE_224_RETRY_MSG_FPKT_PSN_S, attr->sq_psn); - roce_set_field(qpc_mask->byte_224_retry_msg, - V2_QPC_BYTE_224_RETRY_MSG_FPKT_PSN_M, - V2_QPC_BYTE_224_RETRY_MSG_FPKT_PSN_S, 0); - roce_set_field(qpc_mask->byte_220_retry_psn_msn, V2_QPC_BYTE_220_RETRY_MSG_MSN_M, V2_QPC_BYTE_220_RETRY_MSG_MSN_S, 0); @@ -3937,66 +3871,14 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, roce_set_field(qpc_mask->byte_212_lsn, V2_QPC_BYTE_212_CHECK_FLG_M, V2_QPC_BYTE_212_CHECK_FLG_S, 0); - roce_set_field(context->byte_212_lsn, V2_QPC_BYTE_212_RETRY_CNT_M, - V2_QPC_BYTE_212_RETRY_CNT_S, attr->retry_cnt); - roce_set_field(qpc_mask->byte_212_lsn, V2_QPC_BYTE_212_RETRY_CNT_M, - V2_QPC_BYTE_212_RETRY_CNT_S, 0); - - roce_set_field(context->byte_212_lsn, V2_QPC_BYTE_212_RETRY_NUM_INIT_M, - V2_QPC_BYTE_212_RETRY_NUM_INIT_S, attr->retry_cnt); - roce_set_field(qpc_mask->byte_212_lsn, V2_QPC_BYTE_212_RETRY_NUM_INIT_M, - V2_QPC_BYTE_212_RETRY_NUM_INIT_S, 0); - - roce_set_field(context->byte_244_rnr_rxack, - V2_QPC_BYTE_244_RNR_NUM_INIT_M, - V2_QPC_BYTE_244_RNR_NUM_INIT_S, attr->rnr_retry); - roce_set_field(qpc_mask->byte_244_rnr_rxack, - V2_QPC_BYTE_244_RNR_NUM_INIT_M, - V2_QPC_BYTE_244_RNR_NUM_INIT_S, 0); - - roce_set_field(context->byte_244_rnr_rxack, V2_QPC_BYTE_244_RNR_CNT_M, - V2_QPC_BYTE_244_RNR_CNT_S, attr->rnr_retry); - roce_set_field(qpc_mask->byte_244_rnr_rxack, V2_QPC_BYTE_244_RNR_CNT_M, - V2_QPC_BYTE_244_RNR_CNT_S, 0); - roce_set_field(context->byte_212_lsn, V2_QPC_BYTE_212_LSN_M, V2_QPC_BYTE_212_LSN_S, 0x100); roce_set_field(qpc_mask->byte_212_lsn, V2_QPC_BYTE_212_LSN_M, V2_QPC_BYTE_212_LSN_S, 0); - if (attr_mask & IB_QP_TIMEOUT) { - if (attr->timeout < 31) { - roce_set_field(context->byte_28_at_fl, - V2_QPC_BYTE_28_AT_M, V2_QPC_BYTE_28_AT_S, - attr->timeout); - roce_set_field(qpc_mask->byte_28_at_fl, - V2_QPC_BYTE_28_AT_M, V2_QPC_BYTE_28_AT_S, - 0); - } else { - dev_warn(dev, "Local ACK timeout shall be 0 to 30.\n"); - } - } - - roce_set_field(context->byte_172_sq_psn, V2_QPC_BYTE_172_SQ_CUR_PSN_M, - V2_QPC_BYTE_172_SQ_CUR_PSN_S, attr->sq_psn); - roce_set_field(qpc_mask->byte_172_sq_psn, V2_QPC_BYTE_172_SQ_CUR_PSN_M, - V2_QPC_BYTE_172_SQ_CUR_PSN_S, 0); - roce_set_field(qpc_mask->byte_196_sq_psn, V2_QPC_BYTE_196_IRRL_HEAD_M, V2_QPC_BYTE_196_IRRL_HEAD_S, 0); - roce_set_field(context->byte_196_sq_psn, V2_QPC_BYTE_196_SQ_MAX_PSN_M, - V2_QPC_BYTE_196_SQ_MAX_PSN_S, attr->sq_psn); - roce_set_field(qpc_mask->byte_196_sq_psn, V2_QPC_BYTE_196_SQ_MAX_PSN_M, - V2_QPC_BYTE_196_SQ_MAX_PSN_S, 0); - if ((attr_mask & IB_QP_MAX_QP_RD_ATOMIC) && attr->max_rd_atomic) { - roce_set_field(context->byte_208_irrl, V2_QPC_BYTE_208_SR_MAX_M, - V2_QPC_BYTE_208_SR_MAX_S, - fls(attr->max_rd_atomic - 1)); - roce_set_field(qpc_mask->byte_208_irrl, - V2_QPC_BYTE_208_SR_MAX_M, - V2_QPC_BYTE_208_SR_MAX_S, 0); - } return 0; } @@ -4090,7 +3972,6 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr); const struct ib_gid_attr *gid_attr = NULL; - u8 src_mac[ETH_ALEN]; int is_roce_protocol; u16 vlan = 0xffff; u8 ib_port; @@ -4104,11 +3985,12 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, if (is_roce_protocol) { gid_attr = attr->ah_attr.grh.sgid_attr; - vlan = rdma_vlan_dev_vlan_id(gid_attr->ndev); - memcpy(src_mac, gid_attr->ndev->dev_addr, ETH_ALEN); + ret = rdma_read_gid_l2_fields(gid_attr, &vlan, NULL); + if (ret) + goto out; } - if (is_vlan_dev(gid_attr->ndev)) { + if (vlan < VLAN_CFI_MASK) { roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQ_VLAN_EN_S, 1); roce_set_bit(qpc_mask->byte_76_srqn_op_en, @@ -4190,9 +4072,152 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr); } + if (attr_mask & IB_QP_TIMEOUT) { + if (attr->timeout < 31) { + roce_set_field(context->byte_28_at_fl, + V2_QPC_BYTE_28_AT_M, V2_QPC_BYTE_28_AT_S, + attr->timeout); + roce_set_field(qpc_mask->byte_28_at_fl, + V2_QPC_BYTE_28_AT_M, V2_QPC_BYTE_28_AT_S, + 0); + } else { + dev_warn(dev, "Local ACK timeout shall be 0 to 30.\n"); + } + } + + if (attr_mask & IB_QP_RETRY_CNT) { + roce_set_field(context->byte_212_lsn, + V2_QPC_BYTE_212_RETRY_NUM_INIT_M, + V2_QPC_BYTE_212_RETRY_NUM_INIT_S, + attr->retry_cnt); + roce_set_field(qpc_mask->byte_212_lsn, + V2_QPC_BYTE_212_RETRY_NUM_INIT_M, + V2_QPC_BYTE_212_RETRY_NUM_INIT_S, 0); + + roce_set_field(context->byte_212_lsn, + V2_QPC_BYTE_212_RETRY_CNT_M, + V2_QPC_BYTE_212_RETRY_CNT_S, + attr->retry_cnt); + roce_set_field(qpc_mask->byte_212_lsn, + V2_QPC_BYTE_212_RETRY_CNT_M, + V2_QPC_BYTE_212_RETRY_CNT_S, 0); + } + + if (attr_mask & IB_QP_RNR_RETRY) { + roce_set_field(context->byte_244_rnr_rxack, + V2_QPC_BYTE_244_RNR_NUM_INIT_M, + V2_QPC_BYTE_244_RNR_NUM_INIT_S, attr->rnr_retry); + roce_set_field(qpc_mask->byte_244_rnr_rxack, + V2_QPC_BYTE_244_RNR_NUM_INIT_M, + V2_QPC_BYTE_244_RNR_NUM_INIT_S, 0); + + roce_set_field(context->byte_244_rnr_rxack, + V2_QPC_BYTE_244_RNR_CNT_M, + V2_QPC_BYTE_244_RNR_CNT_S, attr->rnr_retry); + roce_set_field(qpc_mask->byte_244_rnr_rxack, + V2_QPC_BYTE_244_RNR_CNT_M, + V2_QPC_BYTE_244_RNR_CNT_S, 0); + } + + if (attr_mask & IB_QP_SQ_PSN) { + roce_set_field(context->byte_172_sq_psn, + V2_QPC_BYTE_172_SQ_CUR_PSN_M, + V2_QPC_BYTE_172_SQ_CUR_PSN_S, attr->sq_psn); + roce_set_field(qpc_mask->byte_172_sq_psn, + V2_QPC_BYTE_172_SQ_CUR_PSN_M, + V2_QPC_BYTE_172_SQ_CUR_PSN_S, 0); + + roce_set_field(context->byte_196_sq_psn, + V2_QPC_BYTE_196_SQ_MAX_PSN_M, + V2_QPC_BYTE_196_SQ_MAX_PSN_S, attr->sq_psn); + roce_set_field(qpc_mask->byte_196_sq_psn, + V2_QPC_BYTE_196_SQ_MAX_PSN_M, + V2_QPC_BYTE_196_SQ_MAX_PSN_S, 0); + + roce_set_field(context->byte_220_retry_psn_msn, + V2_QPC_BYTE_220_RETRY_MSG_PSN_M, + V2_QPC_BYTE_220_RETRY_MSG_PSN_S, attr->sq_psn); + roce_set_field(qpc_mask->byte_220_retry_psn_msn, + V2_QPC_BYTE_220_RETRY_MSG_PSN_M, + V2_QPC_BYTE_220_RETRY_MSG_PSN_S, 0); + + roce_set_field(context->byte_224_retry_msg, + V2_QPC_BYTE_224_RETRY_MSG_PSN_M, + V2_QPC_BYTE_224_RETRY_MSG_PSN_S, + attr->sq_psn >> 16); + roce_set_field(qpc_mask->byte_224_retry_msg, + V2_QPC_BYTE_224_RETRY_MSG_PSN_M, + V2_QPC_BYTE_224_RETRY_MSG_PSN_S, 0); + + roce_set_field(context->byte_224_retry_msg, + V2_QPC_BYTE_224_RETRY_MSG_FPKT_PSN_M, + V2_QPC_BYTE_224_RETRY_MSG_FPKT_PSN_S, + attr->sq_psn); + roce_set_field(qpc_mask->byte_224_retry_msg, + V2_QPC_BYTE_224_RETRY_MSG_FPKT_PSN_M, + V2_QPC_BYTE_224_RETRY_MSG_FPKT_PSN_S, 0); + + roce_set_field(context->byte_244_rnr_rxack, + V2_QPC_BYTE_244_RX_ACK_EPSN_M, + V2_QPC_BYTE_244_RX_ACK_EPSN_S, attr->sq_psn); + roce_set_field(qpc_mask->byte_244_rnr_rxack, + V2_QPC_BYTE_244_RX_ACK_EPSN_M, + V2_QPC_BYTE_244_RX_ACK_EPSN_S, 0); + } + + if ((attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) && + attr->max_dest_rd_atomic) { + roce_set_field(context->byte_140_raq, V2_QPC_BYTE_140_RR_MAX_M, + V2_QPC_BYTE_140_RR_MAX_S, + fls(attr->max_dest_rd_atomic - 1)); + roce_set_field(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RR_MAX_M, + V2_QPC_BYTE_140_RR_MAX_S, 0); + } + + if ((attr_mask & IB_QP_MAX_QP_RD_ATOMIC) && attr->max_rd_atomic) { + roce_set_field(context->byte_208_irrl, V2_QPC_BYTE_208_SR_MAX_M, + V2_QPC_BYTE_208_SR_MAX_S, + fls(attr->max_rd_atomic - 1)); + roce_set_field(qpc_mask->byte_208_irrl, + V2_QPC_BYTE_208_SR_MAX_M, + V2_QPC_BYTE_208_SR_MAX_S, 0); + } + if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) set_access_flags(hr_qp, context, qpc_mask, attr, attr_mask); + if (attr_mask & IB_QP_MIN_RNR_TIMER) { + roce_set_field(context->byte_80_rnr_rx_cqn, + V2_QPC_BYTE_80_MIN_RNR_TIME_M, + V2_QPC_BYTE_80_MIN_RNR_TIME_S, + attr->min_rnr_timer); + roce_set_field(qpc_mask->byte_80_rnr_rx_cqn, + V2_QPC_BYTE_80_MIN_RNR_TIME_M, + V2_QPC_BYTE_80_MIN_RNR_TIME_S, 0); + } + + /* RC&UC required attr */ + if (attr_mask & IB_QP_RQ_PSN) { + roce_set_field(context->byte_108_rx_reqepsn, + V2_QPC_BYTE_108_RX_REQ_EPSN_M, + V2_QPC_BYTE_108_RX_REQ_EPSN_S, attr->rq_psn); + roce_set_field(qpc_mask->byte_108_rx_reqepsn, + V2_QPC_BYTE_108_RX_REQ_EPSN_M, + V2_QPC_BYTE_108_RX_REQ_EPSN_S, 0); + + roce_set_field(context->byte_152_raq, V2_QPC_BYTE_152_RAQ_PSN_M, + V2_QPC_BYTE_152_RAQ_PSN_S, attr->rq_psn - 1); + roce_set_field(qpc_mask->byte_152_raq, + V2_QPC_BYTE_152_RAQ_PSN_M, + V2_QPC_BYTE_152_RAQ_PSN_S, 0); + } + + if (attr_mask & IB_QP_QKEY) { + context->qkey_xrcd = attr->qkey; + qpc_mask->qkey_xrcd = 0; + hr_qp->qkey = attr->qkey; + } + roce_set_bit(context->byte_108_rx_reqepsn, V2_QPC_BYTE_108_INV_CREDIT_S, ibqp->srq ? 1 : 0); roce_set_bit(qpc_mask->byte_108_rx_reqepsn, @@ -4421,7 +4446,7 @@ out: static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, - bool is_user) + struct ib_udata *udata) { struct hns_roce_cq *send_cq, *recv_cq; struct device *dev = hr_dev->dev; @@ -4443,7 +4468,7 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, hns_roce_lock_cqs(send_cq, recv_cq); - if (!is_user) { + if (!udata) { __hns_roce_v2_cq_clean(recv_cq, hr_qp->qpn, hr_qp->ibqp.srq ? to_hr_srq(hr_qp->ibqp.srq) : NULL); if (send_cq != recv_cq) @@ -4464,16 +4489,18 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt); - if (is_user) { + if (udata) { + struct hns_roce_ucontext *context = + rdma_udata_to_drv_context( + udata, + struct hns_roce_ucontext, + ibucontext); + if (hr_qp->sq.wqe_cnt && (hr_qp->sdb_en == 1)) - hns_roce_db_unmap_user( - to_hr_ucontext(hr_qp->ibqp.uobject->context), - &hr_qp->sdb); + hns_roce_db_unmap_user(context, &hr_qp->sdb); if (hr_qp->rq.wqe_cnt && (hr_qp->rdb_en == 1)) - hns_roce_db_unmap_user( - to_hr_ucontext(hr_qp->ibqp.uobject->context), - &hr_qp->rdb); + hns_roce_db_unmap_user(context, &hr_qp->rdb); ib_umem_release(hr_qp->umem); } else { kfree(hr_qp->sq.wrid); @@ -4492,13 +4519,13 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, return 0; } -static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp) +static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); int ret; - ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, ibqp->uobject); + ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, udata); if (ret) { dev_err(hr_dev->dev, "Destroy qp failed(%d)\n", ret); return ret; @@ -6044,6 +6071,10 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, return ret; } +static const struct hns_roce_dfx_hw hns_roce_dfx_hw_v2 = { + .query_cqc_info = hns_roce_v2_query_cqc_info, +}; + static const struct ib_device_ops hns_roce_v2_dev_ops = { .destroy_qp = hns_roce_v2_destroy_qp, .modify_cq = hns_roce_v2_modify_cq, @@ -6113,16 +6144,10 @@ static int hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev, struct hnae3_handle *handle) { struct hns_roce_v2_priv *priv = hr_dev->priv; - const struct pci_device_id *id; int i; - id = pci_match_id(hns_roce_hw_v2_pci_tbl, hr_dev->pci_dev); - if (!id) { - dev_err(hr_dev->dev, "device is not compatible!\n"); - return -ENXIO; - } - hr_dev->hw = &hns_roce_hw_v2; + hr_dev->dfx = &hns_roce_dfx_hw_v2; hr_dev->sdb_offset = ROCEE_DB_SQ_L_0_REG; hr_dev->odb_offset = hr_dev->sdb_offset; @@ -6209,6 +6234,7 @@ static void __hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle, static int hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) { const struct hnae3_ae_ops *ops = handle->ae_algo->ops; + const struct pci_device_id *id; struct device *dev = &handle->pdev->dev; int ret; @@ -6219,6 +6245,10 @@ static int hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) goto reset_chk_err; } + id = pci_match_id(hns_roce_hw_v2_pci_tbl, handle->pdev); + if (!id) + return 0; + ret = __hns_roce_hw_v2_init_instance(handle); if (ret) { handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index f1f1b75812f9..edfdbe2ce0db 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -719,8 +719,8 @@ struct hns_roce_v2_qp_context { #define V2_QPC_BYTE_148_RAQ_SYNDROME_S 24 #define V2_QPC_BYTE_148_RAQ_SYNDROME_M GENMASK(31, 24) -#define V2_QPC_BYTE_152_RAQ_PSN_S 8 -#define V2_QPC_BYTE_152_RAQ_PSN_M GENMASK(31, 8) +#define V2_QPC_BYTE_152_RAQ_PSN_S 0 +#define V2_QPC_BYTE_152_RAQ_PSN_M GENMASK(23, 0) #define V2_QPC_BYTE_152_RAQ_TRRL_RTY_HEAD_S 24 #define V2_QPC_BYTE_152_RAQ_TRRL_RTY_HEAD_M GENMASK(31, 24) @@ -1799,6 +1799,9 @@ struct hns_roce_sccc_clr_done { __le32 rsv[5]; }; +int hns_roce_v2_query_cqc_info(struct hns_roce_dev *hr_dev, u32 cqn, + int *buffer); + static inline void hns_roce_write64(struct hns_roce_dev *hr_dev, __le32 val[2], void __iomem *dest) { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2_dfx.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2_dfx.c new file mode 100644 index 000000000000..5a97b5a0b7be --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2_dfx.c @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +// Copyright (c) 2019 Hisilicon Limited. + +#include "hnae3.h" +#include "hns_roce_device.h" +#include "hns_roce_cmd.h" +#include "hns_roce_hw_v2.h" + +int hns_roce_v2_query_cqc_info(struct hns_roce_dev *hr_dev, u32 cqn, + int *buffer) +{ + struct hns_roce_v2_cq_context *cq_context; + struct hns_roce_cmd_mailbox *mailbox; + int ret; + + mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + cq_context = mailbox->buf; + ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, cqn, 0, + HNS_ROCE_CMD_QUERY_CQC, + HNS_ROCE_CMD_TIMEOUT_MSECS); + if (ret) { + dev_err(hr_dev->dev, "QUERY cqc cmd process error\n"); + goto err_mailbox; + } + + memcpy(buffer, cq_context, sizeof(*cq_context)); + +err_mailbox: + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + + return ret; +} diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index c929125da84b..8da5f18bf820 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -234,25 +234,6 @@ static int hns_roce_query_device(struct ib_device *ib_dev, return 0; } -static struct net_device *hns_roce_get_netdev(struct ib_device *ib_dev, - u8 port_num) -{ - struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); - struct net_device *ndev; - - if (port_num < 1 || port_num > hr_dev->caps.num_ports) - return NULL; - - rcu_read_lock(); - - ndev = hr_dev->iboe.netdevs[port_num - 1]; - if (ndev) - dev_hold(ndev); - - rcu_read_unlock(); - return ndev; -} - static int hns_roce_query_port(struct ib_device *ib_dev, u8 port_num, struct ib_port_attr *props) { @@ -455,9 +436,9 @@ static const struct ib_device_ops hns_roce_dev_ops = { .destroy_ah = hns_roce_destroy_ah, .destroy_cq = hns_roce_ib_destroy_cq, .disassociate_ucontext = hns_roce_disassociate_ucontext, + .fill_res_entry = hns_roce_fill_res_entry, .get_dma_mr = hns_roce_get_dma_mr, .get_link_layer = hns_roce_get_link_layer, - .get_netdev = hns_roce_get_netdev, .get_port_immutable = hns_roce_port_immutable, .mmap = hns_roce_mmap, .modify_device = hns_roce_modify_device, @@ -468,6 +449,8 @@ static const struct ib_device_ops hns_roce_dev_ops = { .query_pkey = hns_roce_query_pkey, .query_port = hns_roce_query_port, .reg_user_mr = hns_roce_reg_user_mr, + + INIT_RDMA_OBJ_SIZE(ib_ah, hns_roce_ah, ibah), INIT_RDMA_OBJ_SIZE(ib_pd, hns_roce_pd, ibpd), INIT_RDMA_OBJ_SIZE(ib_ucontext, hns_roce_ucontext, ibucontext), }; @@ -489,6 +472,8 @@ static const struct ib_device_ops hns_roce_dev_frmr_ops = { static const struct ib_device_ops hns_roce_dev_srq_ops = { .create_srq = hns_roce_create_srq, .destroy_srq = hns_roce_destroy_srq, + + INIT_RDMA_OBJ_SIZE(ib_srq, hns_roce_srq, ibsrq), }; static int hns_roce_register_device(struct hns_roce_dev *hr_dev) @@ -497,6 +482,7 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) struct hns_roce_ib_iboe *iboe = NULL; struct ib_device *ib_dev = NULL; struct device *dev = hr_dev->dev; + unsigned int i; iboe = &hr_dev->iboe; spin_lock_init(&iboe->lock); @@ -562,6 +548,15 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) ib_dev->driver_id = RDMA_DRIVER_HNS; ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_ops); ib_set_device_ops(ib_dev, &hns_roce_dev_ops); + for (i = 0; i < hr_dev->caps.num_ports; i++) { + if (!hr_dev->iboe.netdevs[i]) + continue; + + ret = ib_device_set_netdev(ib_dev, hr_dev->iboe.netdevs[i], + i + 1); + if (ret) + return ret; + } ret = ib_register_device(ib_dev, "hns_%d"); if (ret) { dev_err(dev, "ib_register_device failed!\n"); diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 08be0e4eabcd..6110ec408626 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -1282,14 +1282,14 @@ free_cmd_mbox: return ret; } -int hns_roce_dereg_mr(struct ib_mr *ibmr) +int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device); struct hns_roce_mr *mr = to_hr_mr(ibmr); int ret = 0; if (hr_dev->hw->dereg_mr) { - ret = hr_dev->hw->dereg_mr(hr_dev, mr); + ret = hr_dev->hw->dereg_mr(hr_dev, mr, udata); } else { hns_roce_mr_free(hr_dev, mr); @@ -1303,7 +1303,7 @@ int hns_roce_dereg_mr(struct ib_mr *ibmr) } struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg) + u32 max_num_sg, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); struct device *dev = hr_dev->dev; diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c index b9b97c5e97e6..813401384d78 100644 --- a/drivers/infiniband/hw/hns/hns_roce_pd.c +++ b/drivers/infiniband/hw/hns/hns_roce_pd.c @@ -57,8 +57,7 @@ void hns_roce_cleanup_pd_table(struct hns_roce_dev *hr_dev) hns_roce_bitmap_cleanup(&hr_dev->pd_bitmap); } -int hns_roce_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, - struct ib_udata *udata) +int hns_roce_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct ib_device *ib_dev = ibpd->device; struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); @@ -72,7 +71,7 @@ int hns_roce_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, return ret; } - if (context) { + if (udata) { struct hns_roce_ib_alloc_pd_resp uresp = {.pdn = pd->pdn}; if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) { @@ -86,7 +85,7 @@ int hns_roce_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, } EXPORT_SYMBOL_GPL(hns_roce_alloc_pd); -void hns_roce_dealloc_pd(struct ib_pd *pd) +void hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) { hns_roce_pd_free(to_hr_dev(pd->device), to_hr_pd(pd)->pdn); } diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 60cf9f03e941..8db2817a249e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -45,17 +45,14 @@ void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type) { - struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; struct device *dev = hr_dev->dev; struct hns_roce_qp *qp; - spin_lock(&qp_table->lock); - + xa_lock(&hr_dev->qp_table_xa); qp = __hns_roce_qp_lookup(hr_dev, qpn); if (qp) atomic_inc(&qp->refcount); - - spin_unlock(&qp_table->lock); + xa_unlock(&hr_dev->qp_table_xa); if (!qp) { dev_warn(dev, "Async event for bogus QP %08x\n", qpn); @@ -147,29 +144,20 @@ EXPORT_SYMBOL_GPL(to_hns_roce_state); static int hns_roce_gsi_qp_alloc(struct hns_roce_dev *hr_dev, unsigned long qpn, struct hns_roce_qp *hr_qp) { - struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; + struct xarray *xa = &hr_dev->qp_table_xa; int ret; if (!qpn) return -EINVAL; hr_qp->qpn = qpn; - - spin_lock_irq(&qp_table->lock); - ret = radix_tree_insert(&hr_dev->qp_table_tree, - hr_qp->qpn & (hr_dev->caps.num_qps - 1), hr_qp); - spin_unlock_irq(&qp_table->lock); - if (ret) { - dev_err(hr_dev->dev, "QPC radix_tree_insert failed\n"); - goto err_put_irrl; - } - atomic_set(&hr_qp->refcount, 1); init_completion(&hr_qp->free); - return 0; - -err_put_irrl: + ret = xa_err(xa_store_irq(xa, hr_qp->qpn & (hr_dev->caps.num_qps - 1), + hr_qp, GFP_KERNEL)); + if (ret) + dev_err(hr_dev->dev, "QPC xa_store failed\n"); return ret; } @@ -220,17 +208,9 @@ static int hns_roce_qp_alloc(struct hns_roce_dev *hr_dev, unsigned long qpn, } } - spin_lock_irq(&qp_table->lock); - ret = radix_tree_insert(&hr_dev->qp_table_tree, - hr_qp->qpn & (hr_dev->caps.num_qps - 1), hr_qp); - spin_unlock_irq(&qp_table->lock); - if (ret) { - dev_err(dev, "QPC radix_tree_insert failed\n"); + ret = hns_roce_gsi_qp_alloc(hr_dev, qpn, hr_qp); + if (ret) goto err_put_sccc; - } - - atomic_set(&hr_qp->refcount, 1); - init_completion(&hr_qp->free); return 0; @@ -255,13 +235,12 @@ err_out: void hns_roce_qp_remove(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { - struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; + struct xarray *xa = &hr_dev->qp_table_xa; unsigned long flags; - spin_lock_irqsave(&qp_table->lock, flags); - radix_tree_delete(&hr_dev->qp_table_tree, - hr_qp->qpn & (hr_dev->caps.num_qps - 1)); - spin_unlock_irqrestore(&qp_table->lock, flags); + xa_lock_irqsave(xa, flags); + __xa_erase(xa, hr_qp->qpn & (hr_dev->caps.num_qps - 1)); + xa_unlock_irqrestore(xa, flags); } EXPORT_SYMBOL_GPL(hns_roce_qp_remove); @@ -1154,8 +1133,7 @@ int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev) int ret; mutex_init(&qp_table->scc_mutex); - spin_lock_init(&qp_table->lock); - INIT_RADIX_TREE(&hr_dev->qp_table_tree, GFP_ATOMIC); + xa_init(&hr_dev->qp_table_xa); /* In hw v1, a port include two SQP, six ports total 12 */ if (hr_dev->caps.max_sq_sg <= 2) diff --git a/drivers/infiniband/hw/hns/hns_roce_restrack.c b/drivers/infiniband/hw/hns/hns_roce_restrack.c new file mode 100644 index 000000000000..0a31d0a3d657 --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_restrack.c @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +// Copyright (c) 2019 Hisilicon Limited. + +#include <rdma/rdma_cm.h> +#include <rdma/restrack.h> +#include <uapi/rdma/rdma_netlink.h> +#include "hnae3.h" +#include "hns_roce_common.h" +#include "hns_roce_device.h" +#include "hns_roce_hw_v2.h" + +static int hns_roce_fill_cq(struct sk_buff *msg, + struct hns_roce_v2_cq_context *context) +{ + if (rdma_nl_put_driver_u32(msg, "state", + roce_get_field(context->byte_4_pg_ceqn, + V2_CQC_BYTE_4_ARM_ST_M, + V2_CQC_BYTE_4_ARM_ST_S))) + goto err; + + if (rdma_nl_put_driver_u32(msg, "ceqn", + roce_get_field(context->byte_4_pg_ceqn, + V2_CQC_BYTE_4_CEQN_M, + V2_CQC_BYTE_4_CEQN_S))) + goto err; + + if (rdma_nl_put_driver_u32(msg, "cqn", + roce_get_field(context->byte_8_cqn, + V2_CQC_BYTE_8_CQN_M, + V2_CQC_BYTE_8_CQN_S))) + goto err; + + if (rdma_nl_put_driver_u32(msg, "hopnum", + roce_get_field(context->byte_16_hop_addr, + V2_CQC_BYTE_16_CQE_HOP_NUM_M, + V2_CQC_BYTE_16_CQE_HOP_NUM_S))) + goto err; + + if (rdma_nl_put_driver_u32( + msg, "pi", + roce_get_field(context->byte_28_cq_pi, + V2_CQC_BYTE_28_CQ_PRODUCER_IDX_M, + V2_CQC_BYTE_28_CQ_PRODUCER_IDX_S))) + goto err; + + if (rdma_nl_put_driver_u32( + msg, "ci", + roce_get_field(context->byte_32_cq_ci, + V2_CQC_BYTE_32_CQ_CONSUMER_IDX_M, + V2_CQC_BYTE_32_CQ_CONSUMER_IDX_S))) + goto err; + + if (rdma_nl_put_driver_u32( + msg, "coalesce", + roce_get_field(context->byte_56_cqe_period_maxcnt, + V2_CQC_BYTE_56_CQ_MAX_CNT_M, + V2_CQC_BYTE_56_CQ_MAX_CNT_S))) + goto err; + + if (rdma_nl_put_driver_u32( + msg, "period", + roce_get_field(context->byte_56_cqe_period_maxcnt, + V2_CQC_BYTE_56_CQ_PERIOD_M, + V2_CQC_BYTE_56_CQ_PERIOD_S))) + goto err; + + if (rdma_nl_put_driver_u32(msg, "cnt", + roce_get_field(context->byte_52_cqe_cnt, + V2_CQC_BYTE_52_CQE_CNT_M, + V2_CQC_BYTE_52_CQE_CNT_S))) + goto err; + + return 0; + +err: + return -EMSGSIZE; +} + +static int hns_roce_fill_res_cq_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res) +{ + struct ib_cq *ib_cq = container_of(res, struct ib_cq, res); + struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device); + struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq); + struct hns_roce_v2_cq_context *context; + struct nlattr *table_attr; + int ret; + + if (!hr_dev->dfx->query_cqc_info) + return -EINVAL; + + context = kzalloc(sizeof(struct hns_roce_v2_cq_context), GFP_KERNEL); + if (!context) + return -ENOMEM; + + ret = hr_dev->dfx->query_cqc_info(hr_dev, hr_cq->cqn, (int *)context); + if (ret) + goto err; + + table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER); + if (!table_attr) + goto err; + + if (hns_roce_fill_cq(msg, context)) + goto err_cancel_table; + + nla_nest_end(msg, table_attr); + kfree(context); + + return 0; + +err_cancel_table: + nla_nest_cancel(msg, table_attr); +err: + kfree(context); + return -EMSGSIZE; +} + +int hns_roce_fill_res_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res) +{ + if (res->type == RDMA_RESTRACK_CQ) + return hns_roce_fill_res_cq_entry(msg, res); + + return 0; +} diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index a8ee2f6da967..b3421b1f21e0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -206,13 +206,13 @@ static int hns_roce_create_idx_que(struct ib_pd *pd, struct hns_roce_srq *srq, return 0; } -struct ib_srq *hns_roce_create_srq(struct ib_pd *pd, - struct ib_srq_init_attr *srq_init_attr, - struct ib_udata *udata) +int hns_roce_create_srq(struct ib_srq *ib_srq, + struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata) { - struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); + struct hns_roce_dev *hr_dev = to_hr_dev(ib_srq->device); struct hns_roce_ib_create_srq_resp resp = {}; - struct hns_roce_srq *srq; + struct hns_roce_srq *srq = to_hr_srq(ib_srq); int srq_desc_size; int srq_buf_size; u32 page_shift; @@ -223,11 +223,7 @@ struct ib_srq *hns_roce_create_srq(struct ib_pd *pd, /* Check the actual SRQ wqe and SRQ sge num */ if (srq_init_attr->attr.max_wr >= hr_dev->caps.max_srq_wrs || srq_init_attr->attr.max_sge > hr_dev->caps.max_srq_sges) - return ERR_PTR(-EINVAL); - - srq = kzalloc(sizeof(*srq), GFP_KERNEL); - if (!srq) - return ERR_PTR(-ENOMEM); + return -EINVAL; mutex_init(&srq->mutex); spin_lock_init(&srq->lock); @@ -249,17 +245,13 @@ struct ib_srq *hns_roce_create_srq(struct ib_pd *pd, if (udata) { struct hns_roce_ib_create_srq ucmd; - if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { - ret = -EFAULT; - goto err_srq; - } + if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) + return -EFAULT; srq->umem = ib_umem_get(udata, ucmd.buf_addr, srq_buf_size, 0, 0); - if (IS_ERR(srq->umem)) { - ret = PTR_ERR(srq->umem); - goto err_srq; - } + if (IS_ERR(srq->umem)) + return PTR_ERR(srq->umem); if (hr_dev->caps.srqwqe_buf_pg_sz) { npages = (ib_umem_page_count(srq->umem) + @@ -321,11 +313,9 @@ struct ib_srq *hns_roce_create_srq(struct ib_pd *pd, } else { page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz; if (hns_roce_buf_alloc(hr_dev, srq_buf_size, - (1 << page_shift) * 2, - &srq->buf, page_shift)) { - ret = -ENOMEM; - goto err_srq; - } + (1 << page_shift) * 2, &srq->buf, + page_shift)) + return -ENOMEM; srq->head = 0; srq->tail = srq->max - 1; @@ -340,7 +330,7 @@ struct ib_srq *hns_roce_create_srq(struct ib_pd *pd, goto err_srq_mtt; page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz; - ret = hns_roce_create_idx_que(pd, srq, page_shift); + ret = hns_roce_create_idx_que(ib_srq->pd, srq, page_shift); if (ret) { dev_err(hr_dev->dev, "Create idx queue fail(%d)!\n", ret); @@ -372,7 +362,7 @@ struct ib_srq *hns_roce_create_srq(struct ib_pd *pd, srq->db_reg_l = hr_dev->reg_base + SRQ_DB_REG; - ret = hns_roce_srq_alloc(hr_dev, to_hr_pd(pd)->pdn, cqn, 0, + ret = hns_roce_srq_alloc(hr_dev, to_hr_pd(ib_srq->pd)->pdn, cqn, 0, &srq->mtt, 0, srq); if (ret) goto err_wrid; @@ -389,7 +379,7 @@ struct ib_srq *hns_roce_create_srq(struct ib_pd *pd, } } - return &srq->ibsrq; + return 0; err_srqc_alloc: hns_roce_srq_free(hr_dev, srq); @@ -418,12 +408,10 @@ err_buf: else hns_roce_buf_free(hr_dev, srq_buf_size, &srq->buf); -err_srq: - kfree(srq); - return ERR_PTR(ret); + return ret; } -int hns_roce_destroy_srq(struct ib_srq *ibsrq) +void hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device); struct hns_roce_srq *srq = to_hr_srq(ibsrq); @@ -440,10 +428,6 @@ int hns_roce_destroy_srq(struct ib_srq *ibsrq) hns_roce_buf_free(hr_dev, srq->max << srq->wqe_shift, &srq->buf); } - - kfree(srq); - - return 0; } int hns_roce_init_srq_table(struct hns_roce_dev *hr_dev) diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h index 2f2b4426ded7..8feec35f95a7 100644 --- a/drivers/infiniband/hw/i40iw/i40iw.h +++ b/drivers/infiniband/hw/i40iw/i40iw.h @@ -552,7 +552,7 @@ enum i40iw_status_code i40iw_obj_aligned_mem(struct i40iw_device *iwdev, void i40iw_request_reset(struct i40iw_device *iwdev); void i40iw_destroy_rdma_device(struct i40iw_ib_device *iwibdev); -void i40iw_setup_cm_core(struct i40iw_device *iwdev); +int i40iw_setup_cm_core(struct i40iw_device *iwdev); void i40iw_cleanup_cm_core(struct i40iw_cm_core *cm_core); void i40iw_process_ceq(struct i40iw_device *, struct i40iw_ceq *iwceq); void i40iw_process_aeq(struct i40iw_device *); diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 206cfb0016f8..8233f5a4e623 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -3237,7 +3237,7 @@ void i40iw_receive_ilq(struct i40iw_sc_vsi *vsi, struct i40iw_puda_buf *rbuf) * core * @iwdev: iwarp device structure */ -void i40iw_setup_cm_core(struct i40iw_device *iwdev) +int i40iw_setup_cm_core(struct i40iw_device *iwdev) { struct i40iw_cm_core *cm_core = &iwdev->cm_core; @@ -3256,9 +3256,19 @@ void i40iw_setup_cm_core(struct i40iw_device *iwdev) cm_core->event_wq = alloc_ordered_workqueue("iwewq", WQ_MEM_RECLAIM); + if (!cm_core->event_wq) + goto error; cm_core->disconn_wq = alloc_ordered_workqueue("iwdwq", WQ_MEM_RECLAIM); + if (!cm_core->disconn_wq) + goto error; + + return 0; +error: + i40iw_cleanup_cm_core(&iwdev->cm_core); + + return -ENOMEM; } /** @@ -3278,8 +3288,10 @@ void i40iw_cleanup_cm_core(struct i40iw_cm_core *cm_core) del_timer_sync(&cm_core->tcp_timer); spin_unlock_irqrestore(&cm_core->ht_lock, flags); - destroy_workqueue(cm_core->event_wq); - destroy_workqueue(cm_core->disconn_wq); + if (cm_core->event_wq) + destroy_workqueue(cm_core->event_wq); + if (cm_core->disconn_wq) + destroy_workqueue(cm_core->disconn_wq); } /** @@ -3478,7 +3490,8 @@ static void i40iw_qp_disconnect(struct i40iw_qp *iwqp) /* Need to free the Last Streaming Mode Message */ if (iwqp->ietf_mem.va) { if (iwqp->lsmm_mr) - iwibdev->ibdev.ops.dereg_mr(iwqp->lsmm_mr); + iwibdev->ibdev.ops.dereg_mr(iwqp->lsmm_mr, + NULL); i40iw_free_dma_mem(iwdev->sc_dev.hw, &iwqp->ietf_mem); } } diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index 68095f00d08f..10932baee279 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -1641,7 +1641,10 @@ static int i40iw_open(struct i40e_info *ldev, struct i40e_client *client) iwdev = &hdl->device; iwdev->hdl = hdl; dev = &iwdev->sc_dev; - i40iw_setup_cm_core(iwdev); + if (i40iw_setup_cm_core(iwdev)) { + kfree(iwdev->hdl); + return -ENOMEM; + } dev->back_dev = (void *)iwdev; iwdev->ldev = &hdl->ldev; diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index a8352e3ca23d..5689d742bafb 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -291,18 +291,15 @@ static void i40iw_dealloc_push_page(struct i40iw_device *iwdev, struct i40iw_sc_ /** * i40iw_alloc_pd - allocate protection domain * @pd: PD pointer - * @context: user context created during alloc * @udata: user data */ -static int i40iw_alloc_pd(struct ib_pd *pd, struct ib_ucontext *context, - struct ib_udata *udata) +static int i40iw_alloc_pd(struct ib_pd *pd, struct ib_udata *udata) { struct i40iw_pd *iwpd = to_iwpd(pd); struct i40iw_device *iwdev = to_iwdev(pd->device); struct i40iw_sc_dev *dev = &iwdev->sc_dev; struct i40iw_alloc_pd_resp uresp; struct i40iw_sc_pd *sc_pd; - struct i40iw_ucontext *ucontext; u32 pd_id = 0; int err; @@ -318,8 +315,9 @@ static int i40iw_alloc_pd(struct ib_pd *pd, struct ib_ucontext *context, sc_pd = &iwpd->sc_pd; - if (context) { - ucontext = to_ucontext(context); + if (udata) { + struct i40iw_ucontext *ucontext = rdma_udata_to_drv_context( + udata, struct i40iw_ucontext, ibucontext); dev->iw_pd_ops->pd_init(dev, sc_pd, pd_id, ucontext->abi_ver); memset(&uresp, 0, sizeof(uresp)); uresp.pd_id = pd_id; @@ -342,8 +340,9 @@ error: /** * i40iw_dealloc_pd - deallocate pd * @ibpd: ptr of pd to be deallocated + * @udata: user data or null for kernel object */ -static void i40iw_dealloc_pd(struct ib_pd *ibpd) +static void i40iw_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct i40iw_pd *iwpd = to_iwpd(ibpd); struct i40iw_device *iwdev = to_iwdev(ibpd->device); @@ -413,7 +412,7 @@ static void i40iw_clean_cqes(struct i40iw_qp *iwqp, struct i40iw_cq *iwcq) * i40iw_destroy_qp - destroy qp * @ibqp: qp's ib pointer also to get to device's qp address */ -static int i40iw_destroy_qp(struct ib_qp *ibqp) +static int i40iw_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { struct i40iw_qp *iwqp = to_iwqp(ibqp); @@ -744,8 +743,8 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, err_code = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (err_code) { i40iw_pr_err("copy_to_udata failed\n"); - i40iw_destroy_qp(&iwqp->ibqp); - /* let the completion of the qp destroy free the qp */ + i40iw_destroy_qp(&iwqp->ibqp, udata); + /* let the completion of the qp destroy free the qp */ return ERR_PTR(err_code); } } @@ -1063,8 +1062,9 @@ void i40iw_cq_wq_destroy(struct i40iw_device *iwdev, struct i40iw_sc_cq *cq) /** * i40iw_destroy_cq - destroy cq * @ib_cq: cq pointer + * @udata: user data or NULL for kernel object */ -static int i40iw_destroy_cq(struct ib_cq *ib_cq) +static int i40iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) { struct i40iw_cq *iwcq; struct i40iw_device *iwdev; @@ -1089,12 +1089,10 @@ static int i40iw_destroy_cq(struct ib_cq *ib_cq) * i40iw_create_cq - create cq * @ibdev: device pointer from stack * @attr: attributes for cq - * @context: user context created during alloc * @udata: user data */ static struct ib_cq *i40iw_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, struct ib_udata *udata) { struct i40iw_device *iwdev = to_iwdev(ibdev); @@ -1144,14 +1142,14 @@ static struct ib_cq *i40iw_create_cq(struct ib_device *ibdev, info.ceq_id_valid = true; info.ceqe_mask = 1; info.type = I40IW_CQ_TYPE_IWARP; - if (context) { - struct i40iw_ucontext *ucontext; + if (udata) { + struct i40iw_ucontext *ucontext = rdma_udata_to_drv_context( + udata, struct i40iw_ucontext, ibucontext); struct i40iw_create_cq_req req; struct i40iw_cq_mr *cqmr; memset(&req, 0, sizeof(req)); iwcq->user_mode = true; - ucontext = to_ucontext(context); if (ib_copy_from_udata(&req, udata, sizeof(struct i40iw_create_cq_req))) { err_code = -EFAULT; goto cq_free_resources; @@ -1221,7 +1219,7 @@ static struct ib_cq *i40iw_create_cq(struct ib_device *ibdev, goto cq_free_resources; } - if (context) { + if (udata) { struct i40iw_create_cq_resp resp; memset(&resp, 0, sizeof(resp)); @@ -1340,53 +1338,22 @@ static void i40iw_copy_user_pgaddrs(struct i40iw_mr *iwmr, struct i40iw_pbl *iwpbl = &iwmr->iwpbl; struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc; struct i40iw_pble_info *pinfo; - struct sg_dma_page_iter sg_iter; - u64 pg_addr = 0; + struct ib_block_iter biter; u32 idx = 0; - bool first_pg = true; pinfo = (level == I40IW_LEVEL_1) ? NULL : palloc->level2.leaf; if (iwmr->type == IW_MEMREG_TYPE_QP) iwpbl->qp_mr.sq_page = sg_page(region->sg_head.sgl); - for_each_sg_dma_page (region->sg_head.sgl, &sg_iter, region->nmap, 0) { - pg_addr = sg_page_iter_dma_address(&sg_iter); - if (first_pg) - *pbl = cpu_to_le64(pg_addr & iwmr->page_msk); - else if (!(pg_addr & ~iwmr->page_msk)) - *pbl = cpu_to_le64(pg_addr); - else - continue; - - first_pg = false; + rdma_for_each_block(region->sg_head.sgl, &biter, region->nmap, + iwmr->page_size) { + *pbl = rdma_block_iter_dma_address(&biter); pbl = i40iw_next_pbl_addr(pbl, &pinfo, &idx); } } /** - * i40iw_set_hugetlb_params - set MR pg size and mask to huge pg values. - * @addr: virtual address - * @iwmr: mr pointer for this memory registration - */ -static void i40iw_set_hugetlb_values(u64 addr, struct i40iw_mr *iwmr) -{ - struct vm_area_struct *vma; - struct hstate *h; - - down_read(¤t->mm->mmap_sem); - vma = find_vma(current->mm, addr); - if (vma && is_vm_hugetlb_page(vma)) { - h = hstate_vma(vma); - if (huge_page_size(h) == 0x200000) { - iwmr->page_size = huge_page_size(h); - iwmr->page_msk = huge_page_mask(h); - } - } - up_read(¤t->mm->mmap_sem); -} - -/** * i40iw_check_mem_contiguous - check if pbls stored in arr are contiguous * @arr: lvl1 pbl array * @npages: page count @@ -1601,10 +1568,10 @@ static int i40iw_hw_alloc_stag(struct i40iw_device *iwdev, struct i40iw_mr *iwmr * @pd: ibpd pointer * @mr_type: memory for stag registrion * @max_num_sg: man number of pages + * @udata: user data or NULL for kernel objects */ -static struct ib_mr *i40iw_alloc_mr(struct ib_pd *pd, - enum ib_mr_type mr_type, - u32 max_num_sg) +static struct ib_mr *i40iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg, struct ib_udata *udata) { struct i40iw_pd *iwpd = to_iwpd(pd); struct i40iw_device *iwdev = to_iwdev(pd->device); @@ -1841,10 +1808,9 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd, iwmr->ibmr.device = pd->device; iwmr->page_size = PAGE_SIZE; - iwmr->page_msk = PAGE_MASK; - - if (region->hugetlb && (req.reg_type == IW_MEMREG_TYPE_MEM)) - i40iw_set_hugetlb_values(start, iwmr); + if (req.reg_type == IW_MEMREG_TYPE_MEM) + iwmr->page_size = ib_umem_find_best_pgsz(region, SZ_4K | SZ_2M, + virt); region_length = region->length + (start & (iwmr->page_size - 1)); pg_shift = ffs(iwmr->page_size) - 1; @@ -2038,7 +2004,7 @@ static void i40iw_del_memlist(struct i40iw_mr *iwmr, * i40iw_dereg_mr - deregister mr * @ib_mr: mr ptr for dereg */ -static int i40iw_dereg_mr(struct ib_mr *ib_mr) +static int i40iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) { struct ib_pd *ibpd = ib_mr->pd; struct i40iw_pd *iwpd = to_iwpd(ibpd); @@ -2058,9 +2024,12 @@ static int i40iw_dereg_mr(struct ib_mr *ib_mr) if (iwmr->type != IW_MEMREG_TYPE_MEM) { /* region is released. only test for userness. */ if (iwmr->region) { - struct i40iw_ucontext *ucontext; + struct i40iw_ucontext *ucontext = + rdma_udata_to_drv_context( + udata, + struct i40iw_ucontext, + ibucontext); - ucontext = to_ucontext(ibpd->uobject->context); i40iw_del_memlist(iwmr, ucontext); } if (iwpbl->pbl_allocated && iwmr->type != IW_MEMREG_TYPE_QP) @@ -2703,6 +2672,14 @@ static const struct ib_device_ops i40iw_dev_ops = { .get_dma_mr = i40iw_get_dma_mr, .get_hw_stats = i40iw_get_hw_stats, .get_port_immutable = i40iw_port_immutable, + .iw_accept = i40iw_accept, + .iw_add_ref = i40iw_add_ref, + .iw_connect = i40iw_connect, + .iw_create_listen = i40iw_create_listen, + .iw_destroy_listen = i40iw_destroy_listen, + .iw_get_qp = i40iw_get_qp, + .iw_reject = i40iw_reject, + .iw_rem_ref = i40iw_rem_ref, .map_mr_sg = i40iw_map_mr_sg, .mmap = i40iw_mmap, .modify_qp = i40iw_modify_qp, @@ -2766,22 +2743,8 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev iwibdev->ibdev.phys_port_cnt = 1; iwibdev->ibdev.num_comp_vectors = iwdev->ceqs_count; iwibdev->ibdev.dev.parent = &pcidev->dev; - iwibdev->ibdev.iwcm = kzalloc(sizeof(*iwibdev->ibdev.iwcm), GFP_KERNEL); - if (!iwibdev->ibdev.iwcm) { - ib_dealloc_device(&iwibdev->ibdev); - return NULL; - } - - iwibdev->ibdev.iwcm->add_ref = i40iw_add_ref; - iwibdev->ibdev.iwcm->rem_ref = i40iw_rem_ref; - iwibdev->ibdev.iwcm->get_qp = i40iw_get_qp; - iwibdev->ibdev.iwcm->connect = i40iw_connect; - iwibdev->ibdev.iwcm->accept = i40iw_accept; - iwibdev->ibdev.iwcm->reject = i40iw_reject; - iwibdev->ibdev.iwcm->create_listen = i40iw_create_listen; - iwibdev->ibdev.iwcm->destroy_listen = i40iw_destroy_listen; - memcpy(iwibdev->ibdev.iwcm->ifname, netdev->name, - sizeof(iwibdev->ibdev.iwcm->ifname)); + memcpy(iwibdev->ibdev.iw_ifname, netdev->name, + sizeof(iwibdev->ibdev.iw_ifname)); ib_set_device_ops(&iwibdev->ibdev, &i40iw_dev_ops); return iwibdev; @@ -2812,8 +2775,6 @@ void i40iw_destroy_rdma_device(struct i40iw_ib_device *iwibdev) return; ib_unregister_device(&iwibdev->ibdev); - kfree(iwibdev->ibdev.iwcm); - iwibdev->ibdev.iwcm = NULL; wait_event_timeout(iwibdev->iwdev->close_wq, !atomic64_read(&iwibdev->iwdev->use_count), I40IW_EVENT_TIMEOUT); @@ -2841,8 +2802,6 @@ int i40iw_register_rdma_device(struct i40iw_device *iwdev) return 0; error: - kfree(iwdev->iwibdev->ibdev.iwcm); - iwdev->iwibdev->ibdev.iwcm = NULL; ib_dealloc_device(&iwdev->iwibdev->ibdev); return ret; } diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.h b/drivers/infiniband/hw/i40iw/i40iw_verbs.h index 76cf173377ab..3a413752ccc3 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.h +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.h @@ -94,8 +94,7 @@ struct i40iw_mr { struct ib_umem *region; u16 type; u32 page_cnt; - u32 page_size; - u64 page_msk; + u64 page_size; u32 npages; u32 stag; u64 length; diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index 1672808262ba..02a169f8027b 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -40,13 +40,12 @@ #include "mlx4_ib.h" -static struct ib_ah *create_ib_ah(struct ib_pd *pd, - struct rdma_ah_attr *ah_attr, - struct mlx4_ib_ah *ah) +static void create_ib_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr) { - struct mlx4_dev *dev = to_mdev(pd->device)->dev; + struct mlx4_ib_ah *ah = to_mah(ib_ah); + struct mlx4_dev *dev = to_mdev(ib_ah->device)->dev; - ah->av.ib.port_pd = cpu_to_be32(to_mpd(pd)->pdn | + ah->av.ib.port_pd = cpu_to_be32(to_mpd(ib_ah->pd)->pdn | (rdma_ah_get_port_num(ah_attr) << 24)); ah->av.ib.g_slid = rdma_ah_get_path_bits(ah_attr); ah->av.ib.sl_tclass_flowlabel = @@ -73,15 +72,12 @@ static struct ib_ah *create_ib_ah(struct ib_pd *pd, --static_rate; ah->av.ib.stat_rate = static_rate; } - - return &ah->ibah; } -static struct ib_ah *create_iboe_ah(struct ib_pd *pd, - struct rdma_ah_attr *ah_attr, - struct mlx4_ib_ah *ah) +static int create_iboe_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr) { - struct mlx4_ib_dev *ibdev = to_mdev(pd->device); + struct mlx4_ib_dev *ibdev = to_mdev(ib_ah->device); + struct mlx4_ib_ah *ah = to_mah(ib_ah); const struct ib_gid_attr *gid_attr; struct mlx4_dev *dev = ibdev->dev; int is_mcast = 0; @@ -103,12 +99,14 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, */ gid_attr = ah_attr->grh.sgid_attr; if (gid_attr) { - if (is_vlan_dev(gid_attr->ndev)) - vlan_tag = vlan_dev_vlan_id(gid_attr->ndev); - memcpy(ah->av.eth.s_mac, gid_attr->ndev->dev_addr, ETH_ALEN); + ret = rdma_read_gid_l2_fields(gid_attr, &vlan_tag, + &ah->av.eth.s_mac[0]); + if (ret) + return ret; + ret = mlx4_ib_gid_index_to_real_index(ibdev, gid_attr); if (ret < 0) - return ERR_PTR(ret); + return ret; ah->av.eth.gid_index = ret; } else { /* mlx4_ib_create_ah_slave fills in the s_mac and the vlan */ @@ -117,7 +115,7 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, if (vlan_tag < 0x1000) vlan_tag |= (rdma_ah_get_sl(ah_attr) & 7) << 13; - ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | + ah->av.eth.port_pd = cpu_to_be32(to_mpd(ib_ah->pd)->pdn | (rdma_ah_get_port_num(ah_attr) << 24)); ah->av.eth.vlan = cpu_to_be16(vlan_tag); ah->av.eth.hop_limit = grh->hop_limit; @@ -140,63 +138,45 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, memcpy(ah->av.eth.dgid, grh->dgid.raw, 16); ah->av.eth.sl_tclass_flowlabel |= cpu_to_be32(rdma_ah_get_sl(ah_attr) << 29); - return &ah->ibah; + return 0; } -struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, - u32 flags, struct ib_udata *udata) +int mlx4_ib_create_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata) { - struct mlx4_ib_ah *ah; - struct ib_ah *ret; - - ah = kzalloc(sizeof *ah, GFP_ATOMIC); - if (!ah) - return ERR_PTR(-ENOMEM); - if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) { - if (!(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH)) { - ret = ERR_PTR(-EINVAL); - } else { - /* - * TBD: need to handle the case when we get - * called in an atomic context and there we - * might sleep. We don't expect this - * currently since we're working with link - * local addresses which we can translate - * without going to sleep. - */ - ret = create_iboe_ah(pd, ah_attr, ah); - } - - if (IS_ERR(ret)) - kfree(ah); + if (!(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH)) + return -EINVAL; + /* + * TBD: need to handle the case when we get + * called in an atomic context and there we + * might sleep. We don't expect this + * currently since we're working with link + * local addresses which we can translate + * without going to sleep. + */ + return create_iboe_ah(ib_ah, ah_attr); + } - return ret; - } else - return create_ib_ah(pd, ah_attr, ah); /* never fails */ + create_ib_ah(ib_ah, ah_attr); + return 0; } -/* AH's created via this call must be free'd by mlx4_ib_destroy_ah. */ -struct ib_ah *mlx4_ib_create_ah_slave(struct ib_pd *pd, - struct rdma_ah_attr *ah_attr, - int slave_sgid_index, u8 *s_mac, - u16 vlan_tag) +int mlx4_ib_create_ah_slave(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, + int slave_sgid_index, u8 *s_mac, u16 vlan_tag) { struct rdma_ah_attr slave_attr = *ah_attr; - struct mlx4_ib_ah *mah; - struct ib_ah *ah; + struct mlx4_ib_ah *mah = to_mah(ah); + int ret; slave_attr.grh.sgid_attr = NULL; slave_attr.grh.sgid_index = slave_sgid_index; - ah = mlx4_ib_create_ah(pd, &slave_attr, 0, NULL); - if (IS_ERR(ah)) - return ah; + ret = mlx4_ib_create_ah(ah, &slave_attr, 0, NULL); + if (ret) + return ret; - ah->device = pd->device; - ah->pd = pd; ah->type = ah_attr->type; - mah = to_mah(ah); /* get rid of force-loopback bit */ mah->av.ib.port_pd &= cpu_to_be32(0x7FFFFFFF); @@ -208,7 +188,7 @@ struct ib_ah *mlx4_ib_create_ah_slave(struct ib_pd *pd, vlan_tag |= (rdma_ah_get_sl(ah_attr) & 7) << 13; mah->av.eth.vlan = cpu_to_be16(vlan_tag); - return ah; + return 0; } int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) @@ -250,8 +230,7 @@ int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) return 0; } -int mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags) +void mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags) { - kfree(to_mah(ah)); - return 0; + return; } diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c index 8c79a480f2b7..ecd6cadd529a 100644 --- a/drivers/infiniband/hw/mlx4/cm.c +++ b/drivers/infiniband/hw/mlx4/cm.c @@ -168,20 +168,17 @@ static void id_map_ent_timeout(struct work_struct *work) { struct delayed_work *delay = to_delayed_work(work); struct id_map_entry *ent = container_of(delay, struct id_map_entry, timeout); - struct id_map_entry *db_ent, *found_ent; + struct id_map_entry *found_ent; struct mlx4_ib_dev *dev = ent->dev; struct mlx4_ib_sriov *sriov = &dev->sriov; struct rb_root *sl_id_map = &sriov->sl_id_map; - int pv_id = (int) ent->pv_cm_id; spin_lock(&sriov->id_map_lock); - db_ent = (struct id_map_entry *)idr_find(&sriov->pv_id_table, pv_id); - if (!db_ent) + if (!xa_erase(&sriov->pv_id_table, ent->pv_cm_id)) goto out; found_ent = id_map_find_by_sl_id(&dev->ib_dev, ent->slave_id, ent->sl_cm_id); if (found_ent && found_ent == ent) rb_erase(&found_ent->node, sl_id_map); - idr_remove(&sriov->pv_id_table, pv_id); out: list_del(&ent->list); @@ -196,13 +193,12 @@ static void id_map_find_del(struct ib_device *ibdev, int pv_cm_id) struct id_map_entry *ent, *found_ent; spin_lock(&sriov->id_map_lock); - ent = (struct id_map_entry *)idr_find(&sriov->pv_id_table, pv_cm_id); + ent = xa_erase(&sriov->pv_id_table, pv_cm_id); if (!ent) goto out; found_ent = id_map_find_by_sl_id(ibdev, ent->slave_id, ent->sl_cm_id); if (found_ent && found_ent == ent) rb_erase(&found_ent->node, sl_id_map); - idr_remove(&sriov->pv_id_table, pv_cm_id); out: spin_unlock(&sriov->id_map_lock); } @@ -256,25 +252,19 @@ id_map_alloc(struct ib_device *ibdev, int slave_id, u32 sl_cm_id) ent->dev = to_mdev(ibdev); INIT_DELAYED_WORK(&ent->timeout, id_map_ent_timeout); - idr_preload(GFP_KERNEL); - spin_lock(&to_mdev(ibdev)->sriov.id_map_lock); - - ret = idr_alloc_cyclic(&sriov->pv_id_table, ent, 0, 0, GFP_NOWAIT); + ret = xa_alloc_cyclic(&sriov->pv_id_table, &ent->pv_cm_id, ent, + xa_limit_32b, &sriov->pv_id_next, GFP_KERNEL); if (ret >= 0) { - ent->pv_cm_id = (u32)ret; + spin_lock(&sriov->id_map_lock); sl_id_map_add(ibdev, ent); list_add_tail(&ent->list, &sriov->cm_list); - } - - spin_unlock(&sriov->id_map_lock); - idr_preload_end(); - - if (ret >= 0) + spin_unlock(&sriov->id_map_lock); return ent; + } /*error flow*/ kfree(ent); - mlx4_ib_warn(ibdev, "No more space in the idr (err:0x%x)\n", ret); + mlx4_ib_warn(ibdev, "Allocation failed (err:0x%x)\n", ret); return ERR_PTR(-ENOMEM); } @@ -290,7 +280,7 @@ id_map_get(struct ib_device *ibdev, int *pv_cm_id, int slave_id, int sl_cm_id) if (ent) *pv_cm_id = (int) ent->pv_cm_id; } else - ent = (struct id_map_entry *)idr_find(&sriov->pv_id_table, *pv_cm_id); + ent = xa_load(&sriov->pv_id_table, *pv_cm_id); spin_unlock(&sriov->id_map_lock); return ent; @@ -407,7 +397,7 @@ void mlx4_ib_cm_paravirt_init(struct mlx4_ib_dev *dev) spin_lock_init(&dev->sriov.id_map_lock); INIT_LIST_HEAD(&dev->sriov.cm_list); dev->sriov.sl_id_map = RB_ROOT; - idr_init(&dev->sriov.pv_id_table); + xa_init_flags(&dev->sriov.pv_id_table, XA_FLAGS_ALLOC); } /* slave = -1 ==> all slaves */ @@ -444,7 +434,7 @@ void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave) struct id_map_entry, node); rb_erase(&ent->node, sl_id_map); - idr_remove(&sriov->pv_id_table, (int) ent->pv_cm_id); + xa_erase(&sriov->pv_id_table, ent->pv_cm_id); } list_splice_init(&dev->sriov.cm_list, &lh); } else { @@ -460,7 +450,7 @@ void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave) /* remove those nodes from databases */ list_for_each_entry_safe(map, tmp_map, &lh, list) { rb_erase(&map->node, sl_id_map); - idr_remove(&sriov->pv_id_table, (int) map->pv_cm_id); + xa_erase(&sriov->pv_id_table, map->pv_cm_id); } /* add remaining nodes from cm_list */ diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 03ac72339dd2..022a0b4ea452 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -38,6 +38,7 @@ #include "mlx4_ib.h" #include <rdma/mlx4-abi.h> +#include <rdma/uverbs_ioctl.h> static void mlx4_ib_cq_comp(struct mlx4_cq *cq) { @@ -173,7 +174,6 @@ err_buf: #define CQ_CREATE_FLAGS_SUPPORTED IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, struct ib_udata *udata) { int entries = attr->cqe; @@ -183,6 +183,8 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, struct mlx4_uar *uar; void *buf_addr; int err; + struct mlx4_ib_ucontext *context = rdma_udata_to_drv_context( + udata, struct mlx4_ib_ucontext, ibucontext); if (entries < 1 || entries > dev->dev->caps.max_cqes) return ERR_PTR(-EINVAL); @@ -204,7 +206,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, INIT_LIST_HEAD(&cq->send_qp_list); INIT_LIST_HEAD(&cq->recv_qp_list); - if (context) { + if (udata) { struct mlx4_ib_create_cq ucmd; if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) { @@ -218,12 +220,11 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, if (err) goto err_cq; - err = mlx4_ib_db_map_user(to_mucontext(context), udata, - ucmd.db_addr, &cq->db); + err = mlx4_ib_db_map_user(udata, ucmd.db_addr, &cq->db); if (err) goto err_mtt; - uar = &to_mucontext(context)->uar; + uar = &context->uar; cq->mcq.usage = MLX4_RES_USAGE_USER_VERBS; } else { err = mlx4_db_alloc(dev->dev, &cq->db, 1); @@ -248,21 +249,21 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, if (dev->eq_table) vector = dev->eq_table[vector % ibdev->num_comp_vectors]; - err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar, - cq->db.dma, &cq->mcq, vector, 0, + err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar, cq->db.dma, + &cq->mcq, vector, 0, !!(cq->create_flags & IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION), - buf_addr, !!context); + buf_addr, !!udata); if (err) goto err_dbmap; - if (context) + if (udata) cq->mcq.tasklet_ctx.comp = mlx4_ib_cq_comp; else cq->mcq.comp = mlx4_ib_cq_comp; cq->mcq.event = mlx4_ib_cq_event; - if (context) + if (udata) if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof (__u32))) { err = -EFAULT; goto err_cq_free; @@ -274,19 +275,19 @@ err_cq_free: mlx4_cq_free(dev->dev, &cq->mcq); err_dbmap: - if (context) - mlx4_ib_db_unmap_user(to_mucontext(context), &cq->db); + if (udata) + mlx4_ib_db_unmap_user(context, &cq->db); err_mtt: mlx4_mtt_cleanup(dev->dev, &cq->buf.mtt); - if (context) + if (udata) ib_umem_release(cq->umem); else mlx4_ib_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe); err_db: - if (!context) + if (!udata) mlx4_db_free(dev->dev, &cq->db); err_cq: @@ -485,7 +486,7 @@ out: return err; } -int mlx4_ib_destroy_cq(struct ib_cq *cq) +int mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) { struct mlx4_ib_dev *dev = to_mdev(cq->device); struct mlx4_ib_cq *mcq = to_mcq(cq); @@ -493,8 +494,13 @@ int mlx4_ib_destroy_cq(struct ib_cq *cq) mlx4_cq_free(dev->dev, &mcq->mcq); mlx4_mtt_cleanup(dev->dev, &mcq->buf.mtt); - if (cq->uobject) { - mlx4_ib_db_unmap_user(to_mucontext(cq->uobject->context), &mcq->db); + if (udata) { + mlx4_ib_db_unmap_user( + rdma_udata_to_drv_context( + udata, + struct mlx4_ib_ucontext, + ibucontext), + &mcq->db); ib_umem_release(mcq->umem); } else { mlx4_ib_free_cq_buf(dev, &mcq->buf, cq->cqe); diff --git a/drivers/infiniband/hw/mlx4/doorbell.c b/drivers/infiniband/hw/mlx4/doorbell.c index 3aab71b29ce8..0f390351cef0 100644 --- a/drivers/infiniband/hw/mlx4/doorbell.c +++ b/drivers/infiniband/hw/mlx4/doorbell.c @@ -31,6 +31,7 @@ */ #include <linux/slab.h> +#include <rdma/uverbs_ioctl.h> #include "mlx4_ib.h" @@ -41,12 +42,13 @@ struct mlx4_ib_user_db_page { int refcnt; }; -int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, - struct ib_udata *udata, unsigned long virt, +int mlx4_ib_db_map_user(struct ib_udata *udata, unsigned long virt, struct mlx4_db *db) { struct mlx4_ib_user_db_page *page; int err = 0; + struct mlx4_ib_ucontext *context = rdma_udata_to_drv_context( + udata, struct mlx4_ib_ucontext, ibucontext); mutex_lock(&context->db_page_mutex); diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 936ee1314bcd..68c951491a08 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -1371,9 +1371,9 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, struct ib_ah *ah; struct ib_qp *send_qp = NULL; unsigned wire_tx_ix = 0; - int ret = 0; u16 wire_pkey_ix; int src_qpnum; + int ret; sqp_ctx = dev->sriov.sqps[port-1]; @@ -1393,12 +1393,20 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, send_qp = sqp->qp; - /* create ah */ - ah = mlx4_ib_create_ah_slave(sqp_ctx->pd, attr, - rdma_ah_retrieve_grh(attr)->sgid_index, - s_mac, vlan_id); - if (IS_ERR(ah)) + ah = rdma_zalloc_drv_obj(sqp_ctx->pd->device, ib_ah); + if (!ah) return -ENOMEM; + + ah->device = sqp_ctx->pd->device; + ah->pd = sqp_ctx->pd; + + /* create ah */ + ret = mlx4_ib_create_ah_slave(ah, attr, + rdma_ah_retrieve_grh(attr)->sgid_index, + s_mac, vlan_id); + if (ret) + goto out; + spin_lock(&sqp->tx_lock); if (sqp->tx_ix_head - sqp->tx_ix_tail >= (MLX4_NUM_TUNNEL_BUFS - 1)) @@ -1410,8 +1418,7 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, goto out; sqp_mad = (struct mlx4_mad_snd_buf *) (sqp->tx_ring[wire_tx_ix].buf.addr); - if (sqp->tx_ring[wire_tx_ix].ah) - mlx4_ib_destroy_ah(sqp->tx_ring[wire_tx_ix].ah, 0); + kfree(sqp->tx_ring[wire_tx_ix].ah); sqp->tx_ring[wire_tx_ix].ah = ah; ib_dma_sync_single_for_cpu(&dev->ib_dev, sqp->tx_ring[wire_tx_ix].buf.map, @@ -1450,7 +1457,7 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, spin_unlock(&sqp->tx_lock); sqp->tx_ring[wire_tx_ix].ah = NULL; out: - mlx4_ib_destroy_ah(ah, 0); + kfree(ah); return ret; } @@ -1902,8 +1909,8 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work) if (wc.status == IB_WC_SUCCESS) { switch (wc.opcode) { case IB_WC_SEND: - mlx4_ib_destroy_ah(sqp->tx_ring[wc.wr_id & - (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0); + kfree(sqp->tx_ring[wc.wr_id & + (MLX4_NUM_TUNNEL_BUFS - 1)].ah); sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah = NULL; spin_lock(&sqp->tx_lock); @@ -1931,8 +1938,8 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work) " status = %d, wrid = 0x%llx\n", ctx->slave, wc.status, wc.wr_id); if (!MLX4_TUN_IS_RECV(wc.wr_id)) { - mlx4_ib_destroy_ah(sqp->tx_ring[wc.wr_id & - (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0); + kfree(sqp->tx_ring[wc.wr_id & + (MLX4_NUM_TUNNEL_BUFS - 1)].ah); sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah = NULL; spin_lock(&sqp->tx_lock); diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 733f7bbd5901..25d09d53b51c 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1177,8 +1177,7 @@ static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) } } -static int mlx4_ib_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, - struct ib_udata *udata) +static int mlx4_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct mlx4_ib_pd *pd = to_mpd(ibpd); struct ib_device *ibdev = ibpd->device; @@ -1188,20 +1187,19 @@ static int mlx4_ib_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, if (err) return err; - if (context && ib_copy_to_udata(udata, &pd->pdn, sizeof(__u32))) { + if (udata && ib_copy_to_udata(udata, &pd->pdn, sizeof(__u32))) { mlx4_pd_free(to_mdev(ibdev)->dev, pd->pdn); return -EFAULT; } return 0; } -static void mlx4_ib_dealloc_pd(struct ib_pd *pd) +static void mlx4_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) { mlx4_pd_free(to_mdev(pd->device)->dev, to_mpd(pd)->pdn); } static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev, - struct ib_ucontext *context, struct ib_udata *udata) { struct mlx4_ib_xrcd *xrcd; @@ -1243,7 +1241,7 @@ err1: return ERR_PTR(err); } -static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd) +static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata) { ib_destroy_cq(to_mxrcd(xrcd)->cq); ib_dealloc_pd(to_mxrcd(xrcd)->pd); @@ -2560,7 +2558,10 @@ static const struct ib_device_ops mlx4_ib_dev_ops = { .req_notify_cq = mlx4_ib_arm_cq, .rereg_user_mr = mlx4_ib_rereg_user_mr, .resize_cq = mlx4_ib_resize_cq, + + INIT_RDMA_OBJ_SIZE(ib_ah, mlx4_ib_ah, ibah), INIT_RDMA_OBJ_SIZE(ib_pd, mlx4_ib_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_srq, mlx4_ib_srq, ibsrq), INIT_RDMA_OBJ_SIZE(ib_ucontext, mlx4_ib_ucontext, ibucontext), }; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 60dc1347c5ab..26897102057d 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -492,10 +492,11 @@ struct mlx4_ib_sriov { struct mlx4_sriov_alias_guid alias_guid; /* CM paravirtualization fields */ - struct list_head cm_list; + struct xarray pv_id_table; + u32 pv_id_next; spinlock_t id_map_lock; struct rb_root sl_id_map; - struct idr pv_id_table; + struct list_head cm_list; }; struct gid_cache_context { @@ -722,8 +723,7 @@ static inline u8 mlx4_ib_bond_next_port(struct mlx4_ib_dev *dev) int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev); void mlx4_ib_close_sriov(struct mlx4_ib_dev *dev); -int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, - struct ib_udata *udata, unsigned long virt, +int mlx4_ib_db_map_user(struct ib_udata *udata, unsigned long virt, struct mlx4_db *db); void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_db *db); @@ -733,43 +733,38 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt, struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); -int mlx4_ib_dereg_mr(struct ib_mr *mr); +int mlx4_ib_dereg_mr(struct ib_mr *mr, struct ib_udata *udata); struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, struct ib_udata *udata); int mlx4_ib_dealloc_mw(struct ib_mw *mw); -struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, - enum ib_mr_type mr_type, - u32 max_num_sg); +struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg, struct ib_udata *udata); int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata); struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, struct ib_udata *udata); -int mlx4_ib_destroy_cq(struct ib_cq *cq); +int mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int mlx4_ib_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq); void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq); -struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, - u32 flags, struct ib_udata *udata); -struct ib_ah *mlx4_ib_create_ah_slave(struct ib_pd *pd, - struct rdma_ah_attr *ah_attr, - int slave_sgid_index, u8 *s_mac, - u16 vlan_tag); +int mlx4_ib_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags, + struct ib_udata *udata); +int mlx4_ib_create_ah_slave(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, + int slave_sgid_index, u8 *s_mac, u16 vlan_tag); int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); -int mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags); +void mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags); -struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, - struct ib_srq_init_attr *init_attr, - struct ib_udata *udata); +int mlx4_ib_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *init_attr, + struct ib_udata *udata); int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); int mlx4_ib_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); -int mlx4_ib_destroy_srq(struct ib_srq *srq); +void mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata); void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index); int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr); @@ -777,7 +772,7 @@ int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata); -int mlx4_ib_destroy_qp(struct ib_qp *qp); +int mlx4_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata); void mlx4_ib_drain_sq(struct ib_qp *qp); void mlx4_ib_drain_rq(struct ib_qp *qp); int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, @@ -912,7 +907,7 @@ void mlx4_ib_sl2vl_update(struct mlx4_ib_dev *mdev, int port); struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd, struct ib_wq_init_attr *init_attr, struct ib_udata *udata); -int mlx4_ib_destroy_wq(struct ib_wq *wq); +int mlx4_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata); int mlx4_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, u32 wq_attr_mask, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 395379a480cb..355205a28544 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -595,7 +595,7 @@ mlx4_free_priv_pages(struct mlx4_ib_mr *mr) } } -int mlx4_ib_dereg_mr(struct ib_mr *ibmr) +int mlx4_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) { struct mlx4_ib_mr *mr = to_mmr(ibmr); int ret; @@ -655,9 +655,8 @@ int mlx4_ib_dealloc_mw(struct ib_mw *ibmw) return 0; } -struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, - enum ib_mr_type mr_type, - u32 max_num_sg) +struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg, struct ib_udata *udata) { struct mlx4_ib_dev *dev = to_mdev(pd->device); struct mlx4_ib_mr *mr; diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 9426936460f8..5221c0794d1d 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1041,11 +1041,11 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, goto err_mtt; if (qp_has_rq(init_attr)) { - err = mlx4_ib_db_map_user( - context, udata, - (src == MLX4_IB_QP_SRC) ? ucmd.qp.db_addr : + err = mlx4_ib_db_map_user(udata, + (src == MLX4_IB_QP_SRC) ? + ucmd.qp.db_addr : ucmd.wq.db_addr, - &qp->db); + &qp->db); if (err) goto err_mtt; } @@ -1338,7 +1338,8 @@ static void destroy_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) } static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, - enum mlx4_ib_source_type src, bool is_user) + enum mlx4_ib_source_type src, + struct ib_udata *udata) { struct mlx4_ib_cq *send_cq, *recv_cq; unsigned long flags; @@ -1380,7 +1381,7 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, list_del(&qp->qps_list); list_del(&qp->cq_send_list); list_del(&qp->cq_recv_list); - if (!is_user) { + if (!udata) { __mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn, qp->ibqp.srq ? to_msrq(qp->ibqp.srq): NULL); if (send_cq != recv_cq) @@ -1398,19 +1399,26 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, if (qp->flags & MLX4_IB_QP_NETIF) mlx4_ib_steer_qp_free(dev, qp->mqp.qpn, 1); else if (src == MLX4_IB_RWQ_SRC) - mlx4_ib_release_wqn(to_mucontext( - qp->ibwq.uobject->context), qp, 1); + mlx4_ib_release_wqn( + rdma_udata_to_drv_context( + udata, + struct mlx4_ib_ucontext, + ibucontext), + qp, 1); else mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1); } mlx4_mtt_cleanup(dev->dev, &qp->mtt); - if (is_user) { + if (udata) { if (qp->rq.wqe_cnt) { - struct mlx4_ib_ucontext *mcontext = !src ? - to_mucontext(qp->ibqp.uobject->context) : - to_mucontext(qp->ibwq.uobject->context); + struct mlx4_ib_ucontext *mcontext = + rdma_udata_to_drv_context( + udata, + struct mlx4_ib_ucontext, + ibucontext); + mlx4_ib_db_unmap_user(mcontext, &qp->db); } ib_umem_release(qp->umem); @@ -1594,7 +1602,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, return ibqp; } -static int _mlx4_ib_destroy_qp(struct ib_qp *qp) +static int _mlx4_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) { struct mlx4_ib_dev *dev = to_mdev(qp->device); struct mlx4_ib_qp *mqp = to_mqp(qp); @@ -1615,7 +1623,7 @@ static int _mlx4_ib_destroy_qp(struct ib_qp *qp) if (qp->rwq_ind_tbl) { destroy_qp_rss(dev, mqp); } else { - destroy_qp_common(dev, mqp, MLX4_IB_QP_SRC, qp->uobject); + destroy_qp_common(dev, mqp, MLX4_IB_QP_SRC, udata); } if (is_sqp(dev, mqp)) @@ -1626,7 +1634,7 @@ static int _mlx4_ib_destroy_qp(struct ib_qp *qp) return 0; } -int mlx4_ib_destroy_qp(struct ib_qp *qp) +int mlx4_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) { struct mlx4_ib_qp *mqp = to_mqp(qp); @@ -1637,7 +1645,7 @@ int mlx4_ib_destroy_qp(struct ib_qp *qp) ib_destroy_qp(sqp->roce_v2_gsi); } - return _mlx4_ib_destroy_qp(qp); + return _mlx4_ib_destroy_qp(qp, udata); } static int to_mlx4_st(struct mlx4_ib_dev *dev, enum mlx4_ib_qp_type type) @@ -2240,8 +2248,10 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type, if (is_eth) { gid_attr = attr->ah_attr.grh.sgid_attr; - vlan = rdma_vlan_dev_vlan_id(gid_attr->ndev); - memcpy(smac, gid_attr->ndev->dev_addr, ETH_ALEN); + err = rdma_read_gid_l2_fields(gid_attr, &vlan, + &smac[0]); + if (err) + goto out; } if (mlx4_set_path(dev, attr, attr_mask, qp, &context->pri_path, @@ -4238,7 +4248,7 @@ int mlx4_ib_modify_wq(struct ib_wq *ibwq, struct ib_wq_attr *wq_attr, return err; } -int mlx4_ib_destroy_wq(struct ib_wq *ibwq) +int mlx4_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata) { struct mlx4_ib_dev *dev = to_mdev(ibwq->device); struct mlx4_ib_qp *qp = to_mqp((struct ib_qp *)ibwq); @@ -4246,7 +4256,7 @@ int mlx4_ib_destroy_wq(struct ib_wq *ibwq) if (qp->counter_index) mlx4_ib_free_qp_counter(dev, qp); - destroy_qp_common(dev, qp, MLX4_IB_RWQ_SRC, 1); + destroy_qp_common(dev, qp, MLX4_IB_RWQ_SRC, udata); kfree(qp); diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index 381cf899bcef..4bf2946b9759 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -69,14 +69,14 @@ static void mlx4_ib_srq_event(struct mlx4_srq *srq, enum mlx4_event type) } } -struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, - struct ib_srq_init_attr *init_attr, - struct ib_udata *udata) +int mlx4_ib_create_srq(struct ib_srq *ib_srq, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) { - struct mlx4_ib_dev *dev = to_mdev(pd->device); + struct mlx4_ib_dev *dev = to_mdev(ib_srq->device); struct mlx4_ib_ucontext *ucontext = rdma_udata_to_drv_context( udata, struct mlx4_ib_ucontext, ibucontext); - struct mlx4_ib_srq *srq; + struct mlx4_ib_srq *srq = to_msrq(ib_srq); struct mlx4_wqe_srq_next_seg *next; struct mlx4_wqe_data_seg *scatter; u32 cqn; @@ -89,11 +89,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, /* Sanity check SRQ size before proceeding */ if (init_attr->attr.max_wr >= dev->dev->caps.max_srq_wqes || init_attr->attr.max_sge > dev->dev->caps.max_srq_sge) - return ERR_PTR(-EINVAL); - - srq = kmalloc(sizeof *srq, GFP_KERNEL); - if (!srq) - return ERR_PTR(-ENOMEM); + return -EINVAL; mutex_init(&srq->mutex); spin_lock_init(&srq->lock); @@ -111,16 +107,12 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, if (udata) { struct mlx4_ib_create_srq ucmd; - if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) { - err = -EFAULT; - goto err_srq; - } + if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) + return -EFAULT; srq->umem = ib_umem_get(udata, ucmd.buf_addr, buf_size, 0, 0); - if (IS_ERR(srq->umem)) { - err = PTR_ERR(srq->umem); - goto err_srq; - } + if (IS_ERR(srq->umem)) + return PTR_ERR(srq->umem); err = mlx4_mtt_init(dev->dev, ib_umem_page_count(srq->umem), srq->umem->page_shift, &srq->mtt); @@ -131,14 +123,13 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, if (err) goto err_mtt; - err = mlx4_ib_db_map_user(ucontext, udata, ucmd.db_addr, - &srq->db); + err = mlx4_ib_db_map_user(udata, ucmd.db_addr, &srq->db); if (err) goto err_mtt; } else { err = mlx4_db_alloc(dev->dev, &srq->db, 0); if (err) - goto err_srq; + return err; *srq->db.db = 0; @@ -185,8 +176,8 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, xrcdn = (init_attr->srq_type == IB_SRQT_XRC) ? to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn : (u16) dev->dev->caps.reserved_xrcds; - err = mlx4_srq_alloc(dev->dev, to_mpd(pd)->pdn, cqn, xrcdn, &srq->mtt, - srq->db.dma, &srq->msrq); + err = mlx4_srq_alloc(dev->dev, to_mpd(ib_srq->pd)->pdn, cqn, xrcdn, + &srq->mtt, srq->db.dma, &srq->msrq); if (err) goto err_wrid; @@ -201,7 +192,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, init_attr->attr.max_wr = srq->msrq.max - 1; - return &srq->ibsrq; + return 0; err_wrid: if (udata) @@ -222,10 +213,7 @@ err_db: if (!udata) mlx4_db_free(dev->dev, &srq->db); -err_srq: - kfree(srq); - - return ERR_PTR(err); + return err; } int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, @@ -272,7 +260,7 @@ int mlx4_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) return 0; } -int mlx4_ib_destroy_srq(struct ib_srq *srq) +void mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) { struct mlx4_ib_dev *dev = to_mdev(srq->device); struct mlx4_ib_srq *msrq = to_msrq(srq); @@ -280,8 +268,13 @@ int mlx4_ib_destroy_srq(struct ib_srq *srq) mlx4_srq_free(dev->dev, &msrq->msrq); mlx4_mtt_cleanup(dev->dev, &msrq->mtt); - if (srq->uobject) { - mlx4_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db); + if (udata) { + mlx4_ib_db_unmap_user( + rdma_udata_to_drv_context( + udata, + struct mlx4_ib_ucontext, + ibucontext), + &msrq->db); ib_umem_release(msrq->umem); } else { kvfree(msrq->wrid); @@ -289,10 +282,6 @@ int mlx4_ib_destroy_srq(struct ib_srq *srq) &msrq->buf); mlx4_db_free(dev->dev, &msrq->db); } - - kfree(msrq); - - return 0; } void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index) diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c index 420ae0897333..80642dd359bc 100644 --- a/drivers/infiniband/hw/mlx5/ah.c +++ b/drivers/infiniband/hw/mlx5/ah.c @@ -32,9 +32,8 @@ #include "mlx5_ib.h" -static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev, - struct mlx5_ib_ah *ah, - struct rdma_ah_attr *ah_attr) +static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah, + struct rdma_ah_attr *ah_attr) { enum ib_gid_type gid_type; @@ -67,21 +66,19 @@ static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev, ah->av.fl_mlid = rdma_ah_get_path_bits(ah_attr) & 0x7f; ah->av.stat_rate_sl |= (rdma_ah_get_sl(ah_attr) & 0xf); } - - return &ah->ibah; } -struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, - u32 flags, struct ib_udata *udata) +int mlx5_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata) { - struct mlx5_ib_ah *ah; - struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_ib_ah *ah = to_mah(ibah); + struct mlx5_ib_dev *dev = to_mdev(ibah->device); enum rdma_ah_attr_type ah_type = ah_attr->type; if ((ah_type == RDMA_AH_ATTR_TYPE_ROCE) && !(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH)) - return ERR_PTR(-EINVAL); + return -EINVAL; if (ah_type == RDMA_AH_ATTR_TYPE_ROCE && udata) { int err; @@ -90,21 +87,18 @@ struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, sizeof(resp.dmac); if (udata->outlen < min_resp_len) - return ERR_PTR(-EINVAL); + return -EINVAL; resp.response_length = min_resp_len; memcpy(resp.dmac, ah_attr->roce.dmac, ETH_ALEN); err = ib_copy_to_udata(udata, &resp, resp.response_length); if (err) - return ERR_PTR(err); + return err; } - ah = kzalloc(sizeof(*ah), GFP_ATOMIC); - if (!ah) - return ERR_PTR(-ENOMEM); - - return create_ib_ah(dev, ah, ah_attr); /* never fails */ + create_ib_ah(dev, ah, ah_attr); + return 0; } int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) @@ -131,8 +125,7 @@ int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) return 0; } -int mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags) +void mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags) { - kfree(to_mah(ah)); - return 0; + return; } diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c index be95ac5aeb30..e3ec79b8f7f5 100644 --- a/drivers/infiniband/hw/mlx5/cmd.c +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -82,10 +82,10 @@ int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *dev, return mlx5_cmd_exec(dev, in, in_size, out, sizeof(out)); } -int mlx5_cmd_alloc_memic(struct mlx5_memic *memic, phys_addr_t *addr, - u64 length, u32 alignment) +int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr, + u64 length, u32 alignment) { - struct mlx5_core_dev *dev = memic->dev; + struct mlx5_core_dev *dev = dm->dev; u64 num_memic_hw_pages = MLX5_CAP_DEV_MEM(dev, memic_bar_size) >> PAGE_SHIFT; u64 hw_start_addr = MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr); @@ -115,17 +115,17 @@ int mlx5_cmd_alloc_memic(struct mlx5_memic *memic, phys_addr_t *addr, mlx5_alignment); while (page_idx < num_memic_hw_pages) { - spin_lock(&memic->memic_lock); - page_idx = bitmap_find_next_zero_area(memic->memic_alloc_pages, + spin_lock(&dm->lock); + page_idx = bitmap_find_next_zero_area(dm->memic_alloc_pages, num_memic_hw_pages, page_idx, num_pages, 0); if (page_idx < num_memic_hw_pages) - bitmap_set(memic->memic_alloc_pages, + bitmap_set(dm->memic_alloc_pages, page_idx, num_pages); - spin_unlock(&memic->memic_lock); + spin_unlock(&dm->lock); if (page_idx >= num_memic_hw_pages) break; @@ -135,10 +135,10 @@ int mlx5_cmd_alloc_memic(struct mlx5_memic *memic, phys_addr_t *addr, ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (ret) { - spin_lock(&memic->memic_lock); - bitmap_clear(memic->memic_alloc_pages, + spin_lock(&dm->lock); + bitmap_clear(dm->memic_alloc_pages, page_idx, num_pages); - spin_unlock(&memic->memic_lock); + spin_unlock(&dm->lock); if (ret == -EAGAIN) { page_idx++; @@ -157,9 +157,9 @@ int mlx5_cmd_alloc_memic(struct mlx5_memic *memic, phys_addr_t *addr, return -ENOMEM; } -int mlx5_cmd_dealloc_memic(struct mlx5_memic *memic, u64 addr, u64 length) +int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length) { - struct mlx5_core_dev *dev = memic->dev; + struct mlx5_core_dev *dev = dm->dev; u64 hw_start_addr = MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr); u32 num_pages = DIV_ROUND_UP(length, PAGE_SIZE); u32 out[MLX5_ST_SZ_DW(dealloc_memic_out)] = {0}; @@ -177,15 +177,140 @@ int mlx5_cmd_dealloc_memic(struct mlx5_memic *memic, u64 addr, u64 length) err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (!err) { - spin_lock(&memic->memic_lock); - bitmap_clear(memic->memic_alloc_pages, + spin_lock(&dm->lock); + bitmap_clear(dm->memic_alloc_pages, start_page_idx, num_pages); - spin_unlock(&memic->memic_lock); + spin_unlock(&dm->lock); } return err; } +int mlx5_cmd_alloc_sw_icm(struct mlx5_dm *dm, int type, u64 length, + u16 uid, phys_addr_t *addr, u32 *obj_id) +{ + struct mlx5_core_dev *dev = dm->dev; + u32 num_blocks = DIV_ROUND_UP(length, MLX5_SW_ICM_BLOCK_SIZE(dev)); + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; + u32 in[MLX5_ST_SZ_DW(create_sw_icm_in)] = {}; + unsigned long *block_map; + u64 icm_start_addr; + u32 log_icm_size; + u32 max_blocks; + u64 block_idx; + void *sw_icm; + int ret; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, + MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_SW_ICM); + MLX5_SET(general_obj_in_cmd_hdr, in, uid, uid); + + switch (type) { + case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: + icm_start_addr = MLX5_CAP64_DEV_MEM(dev, + steering_sw_icm_start_address); + log_icm_size = MLX5_CAP_DEV_MEM(dev, log_steering_sw_icm_size); + block_map = dm->steering_sw_icm_alloc_blocks; + break; + case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: + icm_start_addr = MLX5_CAP64_DEV_MEM(dev, + header_modify_sw_icm_start_address); + log_icm_size = MLX5_CAP_DEV_MEM(dev, + log_header_modify_sw_icm_size); + block_map = dm->header_modify_sw_icm_alloc_blocks; + break; + default: + return -EINVAL; + } + + max_blocks = BIT(log_icm_size - MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)); + spin_lock(&dm->lock); + block_idx = bitmap_find_next_zero_area(block_map, + max_blocks, + 0, + num_blocks, 0); + + if (block_idx < max_blocks) + bitmap_set(block_map, + block_idx, num_blocks); + + spin_unlock(&dm->lock); + + if (block_idx >= max_blocks) + return -ENOMEM; + + sw_icm = MLX5_ADDR_OF(create_sw_icm_in, in, sw_icm); + icm_start_addr += block_idx << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev); + MLX5_SET64(sw_icm, sw_icm, sw_icm_start_addr, + icm_start_addr); + MLX5_SET(sw_icm, sw_icm, log_sw_icm_size, ilog2(length)); + + ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (ret) { + spin_lock(&dm->lock); + bitmap_clear(block_map, + block_idx, num_blocks); + spin_unlock(&dm->lock); + + return ret; + } + + *addr = icm_start_addr; + *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + + return 0; +} + +int mlx5_cmd_dealloc_sw_icm(struct mlx5_dm *dm, int type, u64 length, + u16 uid, phys_addr_t addr, u32 obj_id) +{ + struct mlx5_core_dev *dev = dm->dev; + u32 num_blocks = DIV_ROUND_UP(length, MLX5_SW_ICM_BLOCK_SIZE(dev)); + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; + u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; + unsigned long *block_map; + u64 start_idx; + int err; + + switch (type) { + case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: + start_idx = + (addr - MLX5_CAP64_DEV_MEM( + dev, steering_sw_icm_start_address)) >> + MLX5_LOG_SW_ICM_BLOCK_SIZE(dev); + block_map = dm->steering_sw_icm_alloc_blocks; + break; + case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: + start_idx = + (addr - + MLX5_CAP64_DEV_MEM( + dev, header_modify_sw_icm_start_address)) >> + MLX5_LOG_SW_ICM_BLOCK_SIZE(dev); + block_map = dm->header_modify_sw_icm_alloc_blocks; + break; + default: + return -EINVAL; + } + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, + MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_SW_ICM); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id); + MLX5_SET(general_obj_in_cmd_hdr, in, uid, uid); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + spin_lock(&dm->lock); + bitmap_clear(block_map, + start_idx, num_blocks); + spin_unlock(&dm->lock); + + return 0; +} + int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out) { u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h index 923a7b93f507..0572dcba6eae 100644 --- a/drivers/infiniband/hw/mlx5/cmd.h +++ b/drivers/infiniband/hw/mlx5/cmd.h @@ -44,9 +44,9 @@ int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point, int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out); int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev, void *in, int in_size); -int mlx5_cmd_alloc_memic(struct mlx5_memic *memic, phys_addr_t *addr, +int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr, u64 length, u32 alignment); -int mlx5_cmd_dealloc_memic(struct mlx5_memic *memic, u64 addr, u64 length); +int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length); void mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid); void mlx5_cmd_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u16 uid); void mlx5_cmd_destroy_tis(struct mlx5_core_dev *dev, u32 tisn, u16 uid); @@ -65,4 +65,8 @@ int mlx5_cmd_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id, u16 uid); int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, u16 opmod, u8 port); +int mlx5_cmd_alloc_sw_icm(struct mlx5_dm *dm, int type, u64 length, + u16 uid, phys_addr_t *addr, u32 *obj_id); +int mlx5_cmd_dealloc_sw_icm(struct mlx5_dm *dm, int type, u64 length, + u16 uid, phys_addr_t addr, u32 obj_id); #endif /* MLX5_IB_CMD_H */ diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 18704e503508..2e2e65f00257 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -679,8 +679,7 @@ static int mini_cqe_res_format_to_hw(struct mlx5_ib_dev *dev, u8 format) } static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, - struct ib_ucontext *context, struct mlx5_ib_cq *cq, - int entries, u32 **cqb, + struct mlx5_ib_cq *cq, int entries, u32 **cqb, int *cqe_size, int *index, int *inlen) { struct mlx5_ib_create_cq ucmd = {}; @@ -691,6 +690,8 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, int ncont; void *cqc; int err; + struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context( + udata, struct mlx5_ib_ucontext, ibucontext); ucmdlen = udata->inlen < sizeof(ucmd) ? (sizeof(ucmd) - sizeof(ucmd.flags)) : sizeof(ucmd); @@ -715,8 +716,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, return err; } - err = mlx5_ib_db_map_user(to_mucontext(context), udata, ucmd.db_addr, - &cq->db); + err = mlx5_ib_db_map_user(context, udata, ucmd.db_addr, &cq->db); if (err) goto err_umem; @@ -740,7 +740,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, MLX5_SET(cqc, cqc, log_page_size, page_shift - MLX5_ADAPTER_PAGE_SHIFT); - *index = to_mucontext(context)->bfregi.sys_pages[0]; + *index = context->bfregi.sys_pages[0]; if (ucmd.cqe_comp_en == 1) { int mini_cqe_format; @@ -782,23 +782,26 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, cq->private_flags |= MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD; } - MLX5_SET(create_cq_in, *cqb, uid, to_mucontext(context)->devx_uid); + MLX5_SET(create_cq_in, *cqb, uid, context->devx_uid); return 0; err_cqb: kvfree(*cqb); err_db: - mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db); + mlx5_ib_db_unmap_user(context, &cq->db); err_umem: ib_umem_release(cq->buf.umem); return err; } -static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_ucontext *context) +static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_udata *udata) { - mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db); + struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context( + udata, struct mlx5_ib_ucontext, ibucontext); + + mlx5_ib_db_unmap_user(context, &cq->db); ib_umem_release(cq->buf.umem); } @@ -883,7 +886,6 @@ static void notify_soft_wc_handler(struct work_struct *work) struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, struct ib_udata *udata) { int entries = attr->cqe; @@ -923,9 +925,9 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, INIT_LIST_HEAD(&cq->list_send_qp); INIT_LIST_HEAD(&cq->list_recv_qp); - if (context) { - err = create_cq_user(dev, udata, context, cq, entries, - &cqb, &cqe_size, &index, &inlen); + if (udata) { + err = create_cq_user(dev, udata, cq, entries, &cqb, &cqe_size, + &index, &inlen); if (err) goto err_create; } else { @@ -962,7 +964,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, mlx5_ib_dbg(dev, "cqn 0x%x\n", cq->mcq.cqn); cq->mcq.irqn = irqn; - if (context) + if (udata) cq->mcq.tasklet_ctx.comp = mlx5_ib_cq_comp; else cq->mcq.comp = mlx5_ib_cq_comp; @@ -970,7 +972,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, INIT_LIST_HEAD(&cq->wc_list); - if (context) + if (udata) if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof(__u32))) { err = -EFAULT; goto err_cmd; @@ -985,8 +987,8 @@ err_cmd: err_cqb: kvfree(cqb); - if (context) - destroy_cq_user(cq, context); + if (udata) + destroy_cq_user(cq, udata); else destroy_cq_kernel(dev, cq); @@ -996,19 +998,14 @@ err_create: return ERR_PTR(err); } - -int mlx5_ib_destroy_cq(struct ib_cq *cq) +int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(cq->device); struct mlx5_ib_cq *mcq = to_mcq(cq); - struct ib_ucontext *context = NULL; - - if (cq->uobject) - context = cq->uobject->context; mlx5_core_destroy_cq(dev->mdev, &mcq->mcq); - if (context) - destroy_cq_user(mcq, context); + if (udata) + destroy_cq_user(mcq, udata); else destroy_cq_kernel(dev, mcq); diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 9e08df7914aa..169ffffcf5ed 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -85,6 +85,10 @@ int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user) if (is_user && capable(CAP_NET_RAW) && (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RAW_TX)) cap |= MLX5_UCTX_CAP_RAW_TX; + if (is_user && capable(CAP_SYS_RAWIO) && + (MLX5_CAP_GEN(dev->mdev, uctx_cap) & + MLX5_UCTX_CAP_INTERNAL_DEV_RES)) + cap |= MLX5_UCTX_CAP_INTERNAL_DEV_RES; MLX5_SET(create_uctx_in, in, opcode, MLX5_CMD_OP_CREATE_UCTX); MLX5_SET(uctx, uctx, cap, cap); @@ -373,8 +377,10 @@ static u64 devx_get_obj_id(const void *in) return obj_id; } -static bool devx_is_valid_obj_id(struct ib_uobject *uobj, const void *in) +static bool devx_is_valid_obj_id(struct uverbs_attr_bundle *attrs, + struct ib_uobject *uobj, const void *in) { + struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata); u64 obj_id = devx_get_obj_id(in); if (!obj_id) @@ -389,7 +395,6 @@ static bool devx_is_valid_obj_id(struct ib_uobject *uobj, const void *in) case UVERBS_OBJECT_SRQ: { struct mlx5_core_srq *srq = &(to_msrq(uobj->object)->msrq); - struct mlx5_ib_dev *dev = to_mdev(uobj->context->device); u16 opcode; switch (srq->common.res) { @@ -681,6 +686,7 @@ static bool devx_is_whitelist_cmd(void *in) switch (opcode) { case MLX5_CMD_OP_QUERY_HCA_CAP: case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT: + case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT: return true; default: return false; @@ -718,6 +724,7 @@ static bool devx_is_general_cmd(void *in) switch (opcode) { case MLX5_CMD_OP_QUERY_HCA_CAP: case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT: + case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT: case MLX5_CMD_OP_QUERY_VPORT_STATE: case MLX5_CMD_OP_QUERY_ADAPTER: case MLX5_CMD_OP_QUERY_ISSI: @@ -1117,7 +1124,8 @@ static void devx_cleanup_mkey(struct devx_obj *obj) } static int devx_obj_cleanup(struct ib_uobject *uobject, - enum rdma_remove_reason why) + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) { u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; struct devx_obj *obj = uobject->object; @@ -1135,7 +1143,8 @@ static int devx_obj_cleanup(struct ib_uobject *uobject, return ret; if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) { - struct mlx5_ib_dev *dev = to_mdev(uobject->context->device); + struct mlx5_ib_dev *dev = + mlx5_udata_to_mdev(&attrs->driver_udata); call_srcu(&dev->mr_srcu, &obj->devx_mr.rcu, devx_free_indirect_mkey); @@ -1260,7 +1269,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)( if (!devx_is_obj_modify_cmd(cmd_in)) return -EINVAL; - if (!devx_is_valid_obj_id(uobj, cmd_in)) + if (!devx_is_valid_obj_id(attrs, uobj, cmd_in)) return -EINVAL; cmd_out = uverbs_zalloc(attrs, cmd_out_len); @@ -1302,7 +1311,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)( if (!devx_is_obj_query_cmd(cmd_in)) return -EINVAL; - if (!devx_is_valid_obj_id(uobj, cmd_in)) + if (!devx_is_valid_obj_id(attrs, uobj, cmd_in)) return -EINVAL; cmd_out = uverbs_zalloc(attrs, cmd_out_len); @@ -1350,7 +1359,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC)( struct ib_uobject *uobj = uverbs_attr_get_uobject( attrs, MLX5_IB_ATTR_DEVX_ASYNC_CMD_FD_ALLOC_HANDLE); - struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device); + struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata); ev_file = container_of(uobj, struct devx_async_cmd_event_file, uobj); @@ -1412,7 +1421,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY)( if (err) return err; - if (!devx_is_valid_obj_id(uobj, cmd_in)) + if (!devx_is_valid_obj_id(attrs, uobj, cmd_in)) return -EINVAL; fd_uobj = uverbs_attr_get_uobject(attrs, @@ -1599,7 +1608,8 @@ err_obj_free: } static int devx_umem_cleanup(struct ib_uobject *uobject, - enum rdma_remove_reason why) + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) { struct devx_umem *obj = uobject->object; u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; @@ -1704,7 +1714,7 @@ static __poll_t devx_async_cmd_event_poll(struct file *filp, return pollflags; } -const struct file_operations devx_async_cmd_event_fops = { +static const struct file_operations devx_async_cmd_event_fops = { .owner = THIS_MODULE, .read = devx_async_cmd_event_read, .poll = devx_async_cmd_event_poll, @@ -1900,7 +1910,7 @@ static bool devx_is_supported(struct ib_device *device) { struct mlx5_ib_dev *dev = to_mdev(device); - return !dev->rep && MLX5_CAP_GEN(dev->mdev, log_max_uctx); + return MLX5_CAP_GEN(dev->mdev, log_max_uctx); } const struct uapi_definition mlx5_ib_devx_defs[] = { diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c index 798591a18484..1fc302d41a53 100644 --- a/drivers/infiniband/hw/mlx5/flow.c +++ b/drivers/infiniband/hw/mlx5/flow.c @@ -29,6 +29,9 @@ mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type, case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX: *namespace = MLX5_FLOW_NAMESPACE_EGRESS; break; + case MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB: + *namespace = MLX5_FLOW_NAMESPACE_FDB; + break; default: return -EINVAL; } @@ -75,7 +78,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( struct ib_qp *qp = NULL; struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE); - struct mlx5_ib_dev *dev = to_mdev(uobj->context->device); + struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata); int len, ret, i; u32 counter_id = 0; @@ -93,6 +96,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( ((dest_devx && dest_qp) || (!dest_devx && !dest_qp))) return -EINVAL; + /* Allow only DEVX object as dest when inserting to FDB */ + if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB && !dest_devx) + return -EINVAL; + if (dest_devx) { devx_obj = uverbs_attr_get_obj( attrs, MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX); @@ -104,6 +111,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( */ if (!mlx5_ib_devx_is_flow_dest(devx_obj, &dest_id, &dest_type)) return -EINVAL; + /* Allow only flow table as dest when inserting to FDB */ + if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB && + dest_type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) + return -EINVAL; } else if (dest_qp) { struct mlx5_ib_qp *mqp; @@ -189,7 +200,8 @@ err_out: } static int flow_matcher_cleanup(struct ib_uobject *uobject, - enum rdma_remove_reason why) + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) { struct mlx5_ib_flow_matcher *obj = uobject->object; int ret; @@ -202,21 +214,67 @@ static int flow_matcher_cleanup(struct ib_uobject *uobject, return 0; } +static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs, + struct mlx5_ib_flow_matcher *obj) +{ + enum mlx5_ib_uapi_flow_table_type ft_type = + MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX; + u32 flags; + int err; + + /* New users should use MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE and older + * users should switch to it. We leave this to not break userspace + */ + if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE) && + uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS)) + return -EINVAL; + + if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE)) { + err = uverbs_get_const(&ft_type, attrs, + MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE); + if (err) + return err; + + err = mlx5_ib_ft_type_to_namespace(ft_type, &obj->ns_type); + if (err) + return err; + + return 0; + } + + if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS)) { + err = uverbs_get_flags32(&flags, attrs, + MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS, + IB_FLOW_ATTR_FLAGS_EGRESS); + if (err) + return err; + + if (flags) { + mlx5_ib_ft_type_to_namespace( + MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX, + &obj->ns_type); + return 0; + } + } + + obj->ns_type = MLX5_FLOW_NAMESPACE_BYPASS; + + return 0; +} + static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)( struct uverbs_attr_bundle *attrs) { struct ib_uobject *uobj = uverbs_attr_get_uobject( attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE); - struct mlx5_ib_dev *dev = to_mdev(uobj->context->device); + struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata); struct mlx5_ib_flow_matcher *obj; - u32 flags; int err; obj = kzalloc(sizeof(struct mlx5_ib_flow_matcher), GFP_KERNEL); if (!obj) return -ENOMEM; - obj->ns_type = MLX5_FLOW_NAMESPACE_BYPASS; obj->mask_len = uverbs_attr_get_len( attrs, MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK); err = uverbs_copy_from(&obj->matcher_mask, @@ -242,19 +300,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)( if (err) goto end; - err = uverbs_get_flags32(&flags, attrs, - MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS, - IB_FLOW_ATTR_FLAGS_EGRESS); + err = mlx5_ib_matcher_ns(attrs, obj); if (err) goto end; - if (flags) { - err = mlx5_ib_ft_type_to_namespace( - MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX, &obj->ns_type); - if (err) - goto end; - } - uobj->object = obj; obj->mdev = dev->mdev; atomic_set(&obj->usecnt, 0); @@ -326,7 +375,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)( { struct ib_uobject *uobj = uverbs_attr_get_uobject( attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE); - struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device); + struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata); enum mlx5_ib_uapi_flow_table_type ft_type; struct ib_flow_action *action; int num_actions; @@ -353,7 +402,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)( if (IS_ERR(action)) return PTR_ERR(action); - uverbs_flow_action_fill_action(action, uobj, uobj->context->device, + uverbs_flow_action_fill_action(action, uobj, &mdev->ib_dev, IB_FLOW_ACTION_UNSPECIFIED); return 0; @@ -445,7 +494,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)( { struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE); - struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device); + struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata); enum mlx5_ib_uapi_flow_action_packet_reformat_type dv_prt; enum mlx5_ib_uapi_flow_table_type ft_type; struct mlx5_ib_flow_action *maction; @@ -493,8 +542,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)( goto free_maction; } - uverbs_flow_action_fill_action(&maction->ib_action, uobj, - uobj->context->device, + uverbs_flow_action_fill_action(&maction->ib_action, uobj, &mdev->ib_dev, IB_FLOW_ACTION_UNSPECIFIED); return 0; @@ -605,6 +653,9 @@ DECLARE_UVERBS_NAMED_METHOD( UA_MANDATORY), UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS, enum ib_flow_flags, + UA_OPTIONAL), + UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE, + enum mlx5_ib_uapi_flow_table_type, UA_OPTIONAL)); DECLARE_UVERBS_NAMED_METHOD_DESTROY( @@ -619,15 +670,9 @@ DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER, &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_CREATE), &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_DESTROY)); -static bool flow_is_supported(struct ib_device *device) -{ - return !to_mdev(device)->rep; -} - const struct uapi_definition mlx5_ib_flow_defs[] = { UAPI_DEF_CHAIN_OBJ_TREE_NAMED( - MLX5_IB_OBJECT_FLOW_MATCHER, - UAPI_DEF_IS_OBJ_SUPPORTED(flow_is_supported)), + MLX5_IB_OBJECT_FLOW_MATCHER), UAPI_DEF_CHAIN_OBJ_TREE( UVERBS_OBJECT_FLOW, &mlx5_ib_fs), diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index b8639ac71336..cbcc40d776b9 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -7,69 +7,59 @@ #include "ib_rep.h" #include "srq.h" -static const struct mlx5_ib_profile vf_rep_profile = { - STAGE_CREATE(MLX5_IB_STAGE_INIT, - mlx5_ib_stage_init_init, - mlx5_ib_stage_init_cleanup), - STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB, - mlx5_ib_stage_rep_flow_db_init, - NULL), - STAGE_CREATE(MLX5_IB_STAGE_CAPS, - mlx5_ib_stage_caps_init, - NULL), - STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB, - mlx5_ib_stage_rep_non_default_cb, - NULL), - STAGE_CREATE(MLX5_IB_STAGE_ROCE, - mlx5_ib_stage_rep_roce_init, - mlx5_ib_stage_rep_roce_cleanup), - STAGE_CREATE(MLX5_IB_STAGE_SRQ, - mlx5_init_srq_table, - mlx5_cleanup_srq_table), - STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES, - mlx5_ib_stage_dev_res_init, - mlx5_ib_stage_dev_res_cleanup), - STAGE_CREATE(MLX5_IB_STAGE_COUNTERS, - mlx5_ib_stage_counters_init, - mlx5_ib_stage_counters_cleanup), - STAGE_CREATE(MLX5_IB_STAGE_BFREG, - mlx5_ib_stage_bfrag_init, - mlx5_ib_stage_bfrag_cleanup), - STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR, - NULL, - mlx5_ib_stage_pre_ib_reg_umr_cleanup), - STAGE_CREATE(MLX5_IB_STAGE_IB_REG, - mlx5_ib_stage_ib_reg_init, - mlx5_ib_stage_ib_reg_cleanup), - STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR, - mlx5_ib_stage_post_ib_reg_umr_init, - NULL), -}; +static int +mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) +{ + struct mlx5_ib_dev *ibdev; + int vport_index; + + ibdev = mlx5_ib_get_uplink_ibdev(dev->priv.eswitch); + vport_index = ibdev->free_port++; + + ibdev->port[vport_index].rep = rep; + write_lock(&ibdev->port[vport_index].roce.netdev_lock); + ibdev->port[vport_index].roce.netdev = + mlx5_ib_get_rep_netdev(dev->priv.eswitch, rep->vport); + write_unlock(&ibdev->port[vport_index].roce.netdev_lock); + + return 0; +} static int mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) { + int num_ports = MLX5_TOTAL_VPORTS(dev); const struct mlx5_ib_profile *profile; struct mlx5_ib_dev *ibdev; + int vport_index; if (rep->vport == MLX5_VPORT_UPLINK) profile = &uplink_rep_profile; else - profile = &vf_rep_profile; + return mlx5_ib_set_vport_rep(dev, rep); ibdev = ib_alloc_device(mlx5_ib_dev, ib_dev); if (!ibdev) return -ENOMEM; - ibdev->rep = rep; - ibdev->mdev = dev; - ibdev->num_ports = max(MLX5_CAP_GEN(dev, num_ports), - MLX5_CAP_GEN(dev, num_vhca_ports)); - if (!__mlx5_ib_add(ibdev, profile)) { + ibdev->port = kcalloc(num_ports, sizeof(*ibdev->port), + GFP_KERNEL); + if (!ibdev->port) { ib_dealloc_device(&ibdev->ib_dev); - return -EINVAL; + return -ENOMEM; } + ibdev->is_rep = true; + vport_index = ibdev->free_port++; + ibdev->port[vport_index].rep = rep; + ibdev->port[vport_index].roce.netdev = + mlx5_ib_get_rep_netdev(dev->priv.eswitch, rep->vport); + ibdev->mdev = dev; + ibdev->num_ports = num_ports; + + if (!__mlx5_ib_add(ibdev, profile)) + return -EINVAL; + rep->rep_if[REP_IB].priv = ibdev; return 0; @@ -80,13 +70,13 @@ mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep) { struct mlx5_ib_dev *dev; - if (!rep->rep_if[REP_IB].priv) + if (!rep->rep_if[REP_IB].priv || + rep->vport != MLX5_VPORT_UPLINK) return; dev = mlx5_ib_rep_to_dev(rep); __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX); rep->rep_if[REP_IB].priv = NULL; - ib_dealloc_device(&dev->ib_dev); } static void *mlx5_ib_vport_get_proto_dev(struct mlx5_eswitch_rep *rep) @@ -140,22 +130,21 @@ struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, int vport) return mlx5_eswitch_vport_rep(esw, vport); } -int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, - struct mlx5_ib_sq *sq) +struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, + struct mlx5_ib_sq *sq, + u16 port) { - struct mlx5_flow_handle *flow_rule; struct mlx5_eswitch *esw = dev->mdev->priv.eswitch; + struct mlx5_eswitch_rep *rep; - if (!dev->rep) - return 0; + if (!dev->is_rep || !port) + return NULL; - flow_rule = - mlx5_eswitch_add_send_to_vport_rule(esw, - dev->rep->vport, - sq->base.mqp.qpn); - if (IS_ERR(flow_rule)) - return PTR_ERR(flow_rule); - sq->flow_rule = flow_rule; + if (!dev->port[port - 1].rep) + return ERR_PTR(-EINVAL); - return 0; + rep = dev->port[port - 1].rep; + + return mlx5_eswitch_add_send_to_vport_rule(esw, rep->vport, + sq->base.mqp.qpn); } diff --git a/drivers/infiniband/hw/mlx5/ib_rep.h b/drivers/infiniband/hw/mlx5/ib_rep.h index 798d41e61fb4..1d9778da8a50 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.h +++ b/drivers/infiniband/hw/mlx5/ib_rep.h @@ -20,8 +20,9 @@ struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, int vport_index); void mlx5_ib_register_vport_reps(struct mlx5_core_dev *mdev); void mlx5_ib_unregister_vport_reps(struct mlx5_core_dev *mdev); -int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, - struct mlx5_ib_sq *sq); +struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, + struct mlx5_ib_sq *sq, + u16 port); struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw, int vport_index); #else /* CONFIG_MLX5_ESWITCH */ @@ -52,10 +53,12 @@ struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, static inline void mlx5_ib_register_vport_reps(struct mlx5_core_dev *mdev) {} static inline void mlx5_ib_unregister_vport_reps(struct mlx5_core_dev *mdev) {} -static inline int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, - struct mlx5_ib_sq *sq) +static inline +struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, + struct mlx5_ib_sq *sq, + u16 port) { - return 0; + return NULL; } static inline diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 1aaa2056d188..abac70ad5c7c 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -156,6 +156,34 @@ static int get_port_state(struct ib_device *ibdev, return ret; } +static struct mlx5_roce *mlx5_get_rep_roce(struct mlx5_ib_dev *dev, + struct net_device *ndev, + u8 *port_num) +{ + struct mlx5_eswitch *esw = dev->mdev->priv.eswitch; + struct net_device *rep_ndev; + struct mlx5_ib_port *port; + int i; + + for (i = 0; i < dev->num_ports; i++) { + port = &dev->port[i]; + if (!port->rep) + continue; + + read_lock(&port->roce.netdev_lock); + rep_ndev = mlx5_ib_get_rep_netdev(esw, + port->rep->vport); + if (rep_ndev == ndev) { + read_unlock(&port->roce.netdev_lock); + *port_num = i + 1; + return &port->roce; + } + read_unlock(&port->roce.netdev_lock); + } + + return NULL; +} + static int mlx5_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { @@ -172,22 +200,17 @@ static int mlx5_netdev_event(struct notifier_block *this, switch (event) { case NETDEV_REGISTER: + /* Should already be registered during the load */ + if (ibdev->is_rep) + break; write_lock(&roce->netdev_lock); - if (ibdev->rep) { - struct mlx5_eswitch *esw = ibdev->mdev->priv.eswitch; - struct net_device *rep_ndev; - - rep_ndev = mlx5_ib_get_rep_netdev(esw, - ibdev->rep->vport); - if (rep_ndev == ndev) - roce->netdev = ndev; - } else if (ndev->dev.parent == mdev->device) { + if (ndev->dev.parent == mdev->device) roce->netdev = ndev; - } write_unlock(&roce->netdev_lock); break; case NETDEV_UNREGISTER: + /* In case of reps, ib device goes away before the netdevs */ write_lock(&roce->netdev_lock); if (roce->netdev == ndev) roce->netdev = NULL; @@ -205,6 +228,10 @@ static int mlx5_netdev_event(struct notifier_block *this, dev_put(lag_ndev); } + if (ibdev->is_rep) + roce = mlx5_get_rep_roce(ibdev, ndev, &port_num); + if (!roce) + return NOTIFY_DONE; if ((upper == ndev || (!upper && ndev == roce->netdev)) && ibdev->ib_active) { struct ib_event ibev = { }; @@ -257,11 +284,11 @@ static struct net_device *mlx5_ib_get_netdev(struct ib_device *device, /* Ensure ndev does not disappear before we invoke dev_hold() */ - read_lock(&ibdev->roce[port_num - 1].netdev_lock); - ndev = ibdev->roce[port_num - 1].netdev; + read_lock(&ibdev->port[port_num - 1].roce.netdev_lock); + ndev = ibdev->port[port_num - 1].roce.netdev; if (ndev) dev_hold(ndev); - read_unlock(&ibdev->roce[port_num - 1].netdev_lock); + read_unlock(&ibdev->port[port_num - 1].roce.netdev_lock); out: mlx5_ib_put_native_port_mdev(ibdev, port_num); @@ -479,9 +506,14 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, /* Possible bad flows are checked before filling out props so in case * of an error it will still be zeroed out. + * Use native port in case of reps */ - err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, - mdev_port_num); + if (dev->is_rep) + err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, + 1); + else + err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, + mdev_port_num); if (err) goto out; ext = MLX5_CAP_PCAM_FEATURE(dev->mdev, ptys_extended_ethernet); @@ -542,52 +574,22 @@ out: return err; } -struct mlx5_ib_vlan_info { - u16 vlan_id; - bool vlan; -}; - -static int get_lower_dev_vlan(struct net_device *lower_dev, void *data) -{ - struct mlx5_ib_vlan_info *vlan_info = data; - - if (is_vlan_dev(lower_dev)) { - vlan_info->vlan = true; - vlan_info->vlan_id = vlan_dev_vlan_id(lower_dev); - } - /* We are interested only in first level vlan device, so - * always return 1 to stop iterating over next level devices. - */ - return 1; -} - static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num, unsigned int index, const union ib_gid *gid, const struct ib_gid_attr *attr) { enum ib_gid_type gid_type = IB_GID_TYPE_IB; - struct mlx5_ib_vlan_info vlan_info = { }; + u16 vlan_id = 0xffff; u8 roce_version = 0; u8 roce_l3_type = 0; u8 mac[ETH_ALEN]; + int ret; if (gid) { gid_type = attr->gid_type; - ether_addr_copy(mac, attr->ndev->dev_addr); - - if (is_vlan_dev(attr->ndev)) { - vlan_info.vlan = true; - vlan_info.vlan_id = vlan_dev_vlan_id(attr->ndev); - } else { - /* If the netdev is upper device and if it's lower - * lower device is vlan device, consider vlan id of - * the lower vlan device for this gid entry. - */ - rcu_read_lock(); - netdev_walk_all_lower_dev_rcu(attr->ndev, - get_lower_dev_vlan, &vlan_info); - rcu_read_unlock(); - } + ret = rdma_read_gid_l2_fields(attr, &vlan_id, &mac[0]); + if (ret) + return ret; } switch (gid_type) { @@ -608,7 +610,7 @@ static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num, return mlx5_core_roce_gid_set(dev->mdev, index, roce_version, roce_l3_type, gid->raw, mac, - vlan_info.vlan, vlan_info.vlan_id, + vlan_id < VLAN_CFI_MASK, vlan_id, port_num); } @@ -1407,7 +1409,9 @@ static int mlx5_ib_rep_query_port(struct ib_device *ibdev, u8 port, { int ret; - /* Only link layer == ethernet is valid for representors */ + /* Only link layer == ethernet is valid for representors + * and we always use port 1 + */ ret = mlx5_query_port_roce(ibdev, port, props); if (ret || !props) return ret; @@ -1954,11 +1958,11 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, print_lib_caps(dev, context->lib_caps); if (dev->lag_active) { - u8 port = mlx5_core_native_port_num(dev->mdev); + u8 port = mlx5_core_native_port_num(dev->mdev) - 1; atomic_set(&context->tx_port_affinity, atomic_add_return( - 1, &dev->roce[port].tx_port_affinity)); + 1, &dev->port[port].roce.tx_port_affinity)); } return 0; @@ -2060,21 +2064,22 @@ static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev, struct vm_area_struct *vma, struct mlx5_ib_ucontext *context) { - if (vma->vm_end - vma->vm_start != PAGE_SIZE) + if ((vma->vm_end - vma->vm_start != PAGE_SIZE) || + !(vma->vm_flags & VM_SHARED)) return -EINVAL; if (get_index(vma->vm_pgoff) != MLX5_IB_CLOCK_INFO_V1) return -EOPNOTSUPP; - if (vma->vm_flags & VM_WRITE) + if (vma->vm_flags & (VM_WRITE | VM_EXEC)) return -EPERM; vma->vm_flags &= ~VM_MAYWRITE; - if (!dev->mdev->clock_info_page) + if (!dev->mdev->clock_info) return -EOPNOTSUPP; - return rdma_user_mmap_page(&context->ibucontext, vma, - dev->mdev->clock_info_page, PAGE_SIZE); + return vm_insert_page(vma, vma->vm_start, + virt_to_page(dev->mdev->clock_info)); } static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, @@ -2259,89 +2264,200 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm return 0; } -struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_dm_alloc_attr *attr, - struct uverbs_attr_bundle *attrs) +static inline int check_dm_type_support(struct mlx5_ib_dev *dev, + u32 type) { - u64 act_size = roundup(attr->length, MLX5_MEMIC_BASE_SIZE); - struct mlx5_memic *memic = &to_mdev(ibdev)->memic; - phys_addr_t memic_addr; - struct mlx5_ib_dm *dm; + switch (type) { + case MLX5_IB_UAPI_DM_TYPE_MEMIC: + if (!MLX5_CAP_DEV_MEM(dev->mdev, memic)) + return -EOPNOTSUPP; + break; + case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: + if (!capable(CAP_SYS_RAWIO) || + !capable(CAP_NET_RAW)) + return -EPERM; + + if (!(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner) || + MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, sw_owner))) + return -EOPNOTSUPP; + break; + } + + return 0; +} + +static int handle_alloc_dm_memic(struct ib_ucontext *ctx, + struct mlx5_ib_dm *dm, + struct ib_dm_alloc_attr *attr, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_dm *dm_db = &to_mdev(ctx->device)->dm; u64 start_offset; u32 page_idx; int err; - dm = kzalloc(sizeof(*dm), GFP_KERNEL); - if (!dm) - return ERR_PTR(-ENOMEM); - - mlx5_ib_dbg(to_mdev(ibdev), "alloc_memic req: user_length=0x%llx act_length=0x%llx log_alignment=%d\n", - attr->length, act_size, attr->alignment); + dm->size = roundup(attr->length, MLX5_MEMIC_BASE_SIZE); - err = mlx5_cmd_alloc_memic(memic, &memic_addr, - act_size, attr->alignment); + err = mlx5_cmd_alloc_memic(dm_db, &dm->dev_addr, + dm->size, attr->alignment); if (err) - goto err_free; + return err; - start_offset = memic_addr & ~PAGE_MASK; - page_idx = (memic_addr - memic->dev->bar_addr - - MLX5_CAP64_DEV_MEM(memic->dev, memic_bar_start_addr)) >> + page_idx = (dm->dev_addr - pci_resource_start(dm_db->dev->pdev, 0) - + MLX5_CAP64_DEV_MEM(dm_db->dev, memic_bar_start_addr)) >> PAGE_SHIFT; err = uverbs_copy_to(attrs, + MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX, + &page_idx, sizeof(page_idx)); + if (err) + goto err_dealloc; + + start_offset = dm->dev_addr & ~PAGE_MASK; + err = uverbs_copy_to(attrs, MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET, &start_offset, sizeof(start_offset)); if (err) goto err_dealloc; + bitmap_set(to_mucontext(ctx)->dm_pages, page_idx, + DIV_ROUND_UP(dm->size, PAGE_SIZE)); + + return 0; + +err_dealloc: + mlx5_cmd_dealloc_memic(dm_db, dm->dev_addr, dm->size); + + return err; +} + +static int handle_alloc_dm_sw_icm(struct ib_ucontext *ctx, + struct mlx5_ib_dm *dm, + struct ib_dm_alloc_attr *attr, + struct uverbs_attr_bundle *attrs, + int type) +{ + struct mlx5_dm *dm_db = &to_mdev(ctx->device)->dm; + u64 act_size; + int err; + + /* Allocation size must a multiple of the basic block size + * and a power of 2. + */ + act_size = roundup(attr->length, MLX5_SW_ICM_BLOCK_SIZE(dm_db->dev)); + act_size = roundup_pow_of_two(act_size); + + dm->size = act_size; + err = mlx5_cmd_alloc_sw_icm(dm_db, type, act_size, + to_mucontext(ctx)->devx_uid, &dm->dev_addr, + &dm->icm_dm.obj_id); + if (err) + return err; + err = uverbs_copy_to(attrs, - MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX, - &page_idx, sizeof(page_idx)); + MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET, + &dm->dev_addr, sizeof(dm->dev_addr)); if (err) - goto err_dealloc; + mlx5_cmd_dealloc_sw_icm(dm_db, type, dm->size, + to_mucontext(ctx)->devx_uid, + dm->dev_addr, dm->icm_dm.obj_id); + + return err; +} + +struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_dm_alloc_attr *attr, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_dm *dm; + enum mlx5_ib_uapi_dm_type type; + int err; - bitmap_set(to_mucontext(context)->dm_pages, page_idx, - DIV_ROUND_UP(act_size, PAGE_SIZE)); + err = uverbs_get_const_default(&type, attrs, + MLX5_IB_ATTR_ALLOC_DM_REQ_TYPE, + MLX5_IB_UAPI_DM_TYPE_MEMIC); + if (err) + return ERR_PTR(err); - dm->dev_addr = memic_addr; + mlx5_ib_dbg(to_mdev(ibdev), "alloc_dm req: dm_type=%d user_length=0x%llx log_alignment=%d\n", + type, attr->length, attr->alignment); + + err = check_dm_type_support(to_mdev(ibdev), type); + if (err) + return ERR_PTR(err); + + dm = kzalloc(sizeof(*dm), GFP_KERNEL); + if (!dm) + return ERR_PTR(-ENOMEM); + + dm->type = type; + + switch (type) { + case MLX5_IB_UAPI_DM_TYPE_MEMIC: + err = handle_alloc_dm_memic(context, dm, + attr, + attrs); + break; + case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: + case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: + err = handle_alloc_dm_sw_icm(context, dm, attr, attrs, type); + break; + default: + err = -EOPNOTSUPP; + } + + if (err) + goto err_free; return &dm->ibdm; -err_dealloc: - mlx5_cmd_dealloc_memic(memic, memic_addr, - act_size); err_free: kfree(dm); return ERR_PTR(err); } -int mlx5_ib_dealloc_dm(struct ib_dm *ibdm) +int mlx5_ib_dealloc_dm(struct ib_dm *ibdm, struct uverbs_attr_bundle *attrs) { - struct mlx5_memic *memic = &to_mdev(ibdm->device)->memic; + struct mlx5_ib_ucontext *ctx = rdma_udata_to_drv_context( + &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext); + struct mlx5_dm *dm_db = &to_mdev(ibdm->device)->dm; struct mlx5_ib_dm *dm = to_mdm(ibdm); - u64 act_size = roundup(dm->ibdm.length, MLX5_MEMIC_BASE_SIZE); u32 page_idx; int ret; - ret = mlx5_cmd_dealloc_memic(memic, dm->dev_addr, act_size); - if (ret) - return ret; + switch (dm->type) { + case MLX5_IB_UAPI_DM_TYPE_MEMIC: + ret = mlx5_cmd_dealloc_memic(dm_db, dm->dev_addr, dm->size); + if (ret) + return ret; - page_idx = (dm->dev_addr - memic->dev->bar_addr - - MLX5_CAP64_DEV_MEM(memic->dev, memic_bar_start_addr)) >> - PAGE_SHIFT; - bitmap_clear(to_mucontext(ibdm->uobject->context)->dm_pages, - page_idx, - DIV_ROUND_UP(act_size, PAGE_SIZE)); + page_idx = (dm->dev_addr - + pci_resource_start(dm_db->dev->pdev, 0) - + MLX5_CAP64_DEV_MEM(dm_db->dev, + memic_bar_start_addr)) >> + PAGE_SHIFT; + bitmap_clear(ctx->dm_pages, page_idx, + DIV_ROUND_UP(dm->size, PAGE_SIZE)); + break; + case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: + case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: + ret = mlx5_cmd_dealloc_sw_icm(dm_db, dm->type, dm->size, + ctx->devx_uid, dm->dev_addr, + dm->icm_dm.obj_id); + if (ret) + return ret; + break; + default: + return -EOPNOTSUPP; + } kfree(dm); return 0; } -static int mlx5_ib_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, - struct ib_udata *udata) +static int mlx5_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct mlx5_ib_pd *pd = to_mpd(ibpd); struct ib_device *ibdev = ibpd->device; @@ -2350,8 +2466,10 @@ static int mlx5_ib_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, u32 out[MLX5_ST_SZ_DW(alloc_pd_out)] = {}; u32 in[MLX5_ST_SZ_DW(alloc_pd_in)] = {}; u16 uid = 0; + struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context( + udata, struct mlx5_ib_ucontext, ibucontext); - uid = context ? to_mucontext(context)->devx_uid : 0; + uid = context ? context->devx_uid : 0; MLX5_SET(alloc_pd_in, in, opcode, MLX5_CMD_OP_ALLOC_PD); MLX5_SET(alloc_pd_in, in, uid, uid); err = mlx5_cmd_exec(to_mdev(ibdev)->mdev, in, sizeof(in), @@ -2361,7 +2479,7 @@ static int mlx5_ib_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, pd->pdn = MLX5_GET(alloc_pd_out, out, pd); pd->uid = uid; - if (context) { + if (udata) { resp.pdn = pd->pdn; if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { mlx5_cmd_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn, uid); @@ -2372,7 +2490,7 @@ static int mlx5_ib_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, return 0; } -static void mlx5_ib_dealloc_pd(struct ib_pd *pd) +static void mlx5_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) { struct mlx5_ib_dev *mdev = to_mdev(pd->device); struct mlx5_ib_pd *mpd = to_mpd(pd); @@ -3151,10 +3269,10 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, if (ft_type == MLX5_IB_FT_RX) { fn_type = MLX5_FLOW_NAMESPACE_BYPASS; prio = &dev->flow_db->prios[priority]; - if (!dev->rep && + if (!dev->is_rep && MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap)) flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; - if (!dev->rep && + if (!dev->is_rep && MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, reformat_l3_tunnel_to_l2)) flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; @@ -3164,7 +3282,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, log_max_ft_size)); fn_type = MLX5_FLOW_NAMESPACE_EGRESS; prio = &dev->flow_db->egress_prios[priority]; - if (!dev->rep && + if (!dev->is_rep && MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat)) flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; } @@ -3197,12 +3315,11 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, if (!ns) return ERR_PTR(-ENOTSUPP); - if (num_entries > max_table_size) - return ERR_PTR(-ENOMEM); + max_table_size = min_t(int, num_entries, max_table_size); ft = prio->flow_table; if (!ft) - return _get_prio(ns, prio, priority, num_entries, num_groups, + return _get_prio(ns, prio, priority, max_table_size, num_groups, flags); return prio; @@ -3370,7 +3487,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, if (!is_valid_attr(dev->mdev, flow_attr)) return ERR_PTR(-EINVAL); - if (dev->rep && is_egress) + if (dev->is_rep && is_egress) return ERR_PTR(-EINVAL); spec = kvzalloc(sizeof(*spec), GFP_KERNEL); @@ -3401,13 +3518,17 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, if (!flow_is_multicast_only(flow_attr)) set_underlay_qp(dev, spec, underlay_qpn); - if (dev->rep) { + if (dev->is_rep) { void *misc; + if (!dev->port[flow_attr->port - 1].rep) { + err = -EINVAL; + goto free; + } misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); MLX5_SET(fte_match_set_misc, misc, source_port, - dev->rep->vport); + dev->port[flow_attr->port - 1].rep->vport); misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); @@ -3769,11 +3890,16 @@ _get_flow_table(struct mlx5_ib_dev *dev, bool mcast) { struct mlx5_flow_namespace *ns = NULL; - struct mlx5_ib_flow_prio *prio; - int max_table_size; + struct mlx5_ib_flow_prio *prio = NULL; + int max_table_size = 0; u32 flags = 0; int priority; + if (mcast) + priority = MLX5_IB_FLOW_MCAST_PRIO; + else + priority = ib_prio_to_core_prio(fs_matcher->priority, false); + if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) { max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, log_max_ft_size)); @@ -3782,20 +3908,18 @@ _get_flow_table(struct mlx5_ib_dev *dev, if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, reformat_l3_tunnel_to_l2)) flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; - } else { /* Can only be MLX5_FLOW_NAMESPACE_EGRESS */ - max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, - log_max_ft_size)); + } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) { + max_table_size = BIT( + MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, log_max_ft_size)); if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat)) flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; + } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) { + max_table_size = BIT( + MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, log_max_ft_size)); + priority = FDB_BYPASS_PATH; } - if (max_table_size < MLX5_FS_MAX_ENTRIES) - return ERR_PTR(-ENOMEM); - - if (mcast) - priority = MLX5_IB_FLOW_MCAST_PRIO; - else - priority = ib_prio_to_core_prio(fs_matcher->priority, false); + max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES); ns = mlx5_get_flow_namespace(dev->mdev, fs_matcher->ns_type); if (!ns) @@ -3803,13 +3927,18 @@ _get_flow_table(struct mlx5_ib_dev *dev, if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) prio = &dev->flow_db->prios[priority]; - else + else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) prio = &dev->flow_db->egress_prios[priority]; + else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) + prio = &dev->flow_db->fdb; + + if (!prio) + return ERR_PTR(-EINVAL); if (prio->flow_table) return prio; - return _get_prio(ns, prio, priority, MLX5_FS_MAX_ENTRIES, + return _get_prio(ns, prio, priority, max_table_size, MLX5_FS_MAX_TYPES, flags); } @@ -4509,7 +4638,7 @@ static int set_has_smi_cap(struct mlx5_ib_dev *dev) int err; int port; - for (port = 1; port <= dev->num_ports; port++) { + for (port = 1; port <= ARRAY_SIZE(dev->mdev->port_caps); port++) { dev->mdev->port_caps[port - 1].has_smi = false; if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB) { @@ -4540,7 +4669,7 @@ static void get_ext_port_caps(struct mlx5_ib_dev *dev) mlx5_query_ext_port_caps(dev, port); } -static int get_port_caps(struct mlx5_ib_dev *dev, u8 port) +static int __get_port_caps(struct mlx5_ib_dev *dev, u8 port) { struct ib_device_attr *dprops = NULL; struct ib_port_attr *pprops = NULL; @@ -4555,10 +4684,6 @@ static int get_port_caps(struct mlx5_ib_dev *dev, u8 port) if (!dprops) goto out; - err = set_has_smi_cap(dev); - if (err) - goto out; - err = mlx5_ib_query_device(&dev->ib_dev, dprops, &uhw); if (err) { mlx5_ib_warn(dev, "query_device failed %d\n", err); @@ -4587,6 +4712,16 @@ out: return err; } +static int get_port_caps(struct mlx5_ib_dev *dev, u8 port) +{ + /* For representors use port 1, is this is the only native + * port + */ + if (dev->is_rep) + return __get_port_caps(dev, 1); + return __get_port_caps(dev, port); +} + static void destroy_umrc_res(struct mlx5_ib_dev *dev) { int err; @@ -4596,7 +4731,7 @@ static void destroy_umrc_res(struct mlx5_ib_dev *dev) mlx5_ib_warn(dev, "mr cache cleanup failed\n"); if (dev->umrc.qp) - mlx5_ib_destroy_qp(dev->umrc.qp); + mlx5_ib_destroy_qp(dev->umrc.qp, NULL); if (dev->umrc.cq) ib_free_cq(dev->umrc.cq); if (dev->umrc.pd) @@ -4701,7 +4836,7 @@ static int create_umr_res(struct mlx5_ib_dev *dev) return 0; error_4: - mlx5_ib_destroy_qp(qp); + mlx5_ib_destroy_qp(qp, NULL); dev->umrc.qp = NULL; error_3: @@ -4752,11 +4887,11 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) devr->p0->uobject = NULL; atomic_set(&devr->p0->usecnt, 0); - ret = mlx5_ib_alloc_pd(devr->p0, NULL, NULL); + ret = mlx5_ib_alloc_pd(devr->p0, NULL); if (ret) goto error0; - devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, &cq_attr, NULL, NULL); + devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, &cq_attr, NULL); if (IS_ERR(devr->c0)) { ret = PTR_ERR(devr->c0); goto error1; @@ -4768,7 +4903,7 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) devr->c0->cq_context = NULL; atomic_set(&devr->c0->usecnt, 0); - devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL); + devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL); if (IS_ERR(devr->x0)) { ret = PTR_ERR(devr->x0); goto error2; @@ -4779,7 +4914,7 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) mutex_init(&devr->x0->tgt_qp_mutex); INIT_LIST_HEAD(&devr->x0->tgt_qp_list); - devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL); + devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL); if (IS_ERR(devr->x1)) { ret = PTR_ERR(devr->x1); goto error3; @@ -4797,19 +4932,21 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) attr.ext.cq = devr->c0; attr.ext.xrc.xrcd = devr->x0; - devr->s0 = mlx5_ib_create_srq(devr->p0, &attr, NULL); - if (IS_ERR(devr->s0)) { - ret = PTR_ERR(devr->s0); + devr->s0 = rdma_zalloc_drv_obj(ibdev, ib_srq); + if (!devr->s0) { + ret = -ENOMEM; goto error4; } + devr->s0->device = &dev->ib_dev; devr->s0->pd = devr->p0; - devr->s0->uobject = NULL; - devr->s0->event_handler = NULL; - devr->s0->srq_context = NULL; devr->s0->srq_type = IB_SRQT_XRC; devr->s0->ext.xrc.xrcd = devr->x0; devr->s0->ext.cq = devr->c0; + ret = mlx5_ib_create_srq(devr->s0, &attr, NULL); + if (ret) + goto err_create; + atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt); atomic_inc(&devr->s0->ext.cq->usecnt); atomic_inc(&devr->p0->usecnt); @@ -4819,18 +4956,21 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) attr.attr.max_sge = 1; attr.attr.max_wr = 1; attr.srq_type = IB_SRQT_BASIC; - devr->s1 = mlx5_ib_create_srq(devr->p0, &attr, NULL); - if (IS_ERR(devr->s1)) { - ret = PTR_ERR(devr->s1); + devr->s1 = rdma_zalloc_drv_obj(ibdev, ib_srq); + if (!devr->s1) { + ret = -ENOMEM; goto error5; } + devr->s1->device = &dev->ib_dev; devr->s1->pd = devr->p0; - devr->s1->uobject = NULL; - devr->s1->event_handler = NULL; - devr->s1->srq_context = NULL; devr->s1->srq_type = IB_SRQT_BASIC; devr->s1->ext.cq = devr->c0; + + ret = mlx5_ib_create_srq(devr->s1, &attr, NULL); + if (ret) + goto error6; + atomic_inc(&devr->p0->usecnt); atomic_set(&devr->s1->usecnt, 0); @@ -4842,16 +4982,20 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) return 0; +error6: + kfree(devr->s1); error5: - mlx5_ib_destroy_srq(devr->s0); + mlx5_ib_destroy_srq(devr->s0, NULL); +err_create: + kfree(devr->s0); error4: - mlx5_ib_dealloc_xrcd(devr->x1); + mlx5_ib_dealloc_xrcd(devr->x1, NULL); error3: - mlx5_ib_dealloc_xrcd(devr->x0); + mlx5_ib_dealloc_xrcd(devr->x0, NULL); error2: - mlx5_ib_destroy_cq(devr->c0); + mlx5_ib_destroy_cq(devr->c0, NULL); error1: - mlx5_ib_dealloc_pd(devr->p0); + mlx5_ib_dealloc_pd(devr->p0, NULL); error0: kfree(devr->p0); return ret; @@ -4859,20 +5003,20 @@ error0: static void destroy_dev_resources(struct mlx5_ib_resources *devr) { - struct mlx5_ib_dev *dev = - container_of(devr, struct mlx5_ib_dev, devr); int port; - mlx5_ib_destroy_srq(devr->s1); - mlx5_ib_destroy_srq(devr->s0); - mlx5_ib_dealloc_xrcd(devr->x0); - mlx5_ib_dealloc_xrcd(devr->x1); - mlx5_ib_destroy_cq(devr->c0); - mlx5_ib_dealloc_pd(devr->p0); + mlx5_ib_destroy_srq(devr->s1, NULL); + kfree(devr->s1); + mlx5_ib_destroy_srq(devr->s0, NULL); + kfree(devr->s0); + mlx5_ib_dealloc_xrcd(devr->x0, NULL); + mlx5_ib_dealloc_xrcd(devr->x1, NULL); + mlx5_ib_destroy_cq(devr->c0, NULL); + mlx5_ib_dealloc_pd(devr->p0, NULL); kfree(devr->p0); /* Make sure no change P_Key work items are still executing */ - for (port = 0; port < dev->num_ports; ++port) + for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) cancel_work_sync(&devr->ports[port].pkey_change_work); } @@ -5015,10 +5159,10 @@ static int mlx5_add_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num) { int err; - dev->roce[port_num].nb.notifier_call = mlx5_netdev_event; - err = register_netdevice_notifier(&dev->roce[port_num].nb); + dev->port[port_num].roce.nb.notifier_call = mlx5_netdev_event; + err = register_netdevice_notifier(&dev->port[port_num].roce.nb); if (err) { - dev->roce[port_num].nb.notifier_call = NULL; + dev->port[port_num].roce.nb.notifier_call = NULL; return err; } @@ -5027,9 +5171,9 @@ static int mlx5_add_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num) static void mlx5_remove_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num) { - if (dev->roce[port_num].nb.notifier_call) { - unregister_netdevice_notifier(&dev->roce[port_num].nb); - dev->roce[port_num].nb.notifier_call = NULL; + if (dev->port[port_num].roce.nb.notifier_call) { + unregister_netdevice_notifier(&dev->port[port_num].roce.nb); + dev->port[port_num].roce.nb.notifier_call = NULL; } } @@ -5578,7 +5722,7 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev, mlx5_ib_err(ibdev, "Failed to unaffiliate port %u\n", port_num + 1); - ibdev->roce[port_num].last_port_state = IB_PORT_DOWN; + ibdev->port[port_num].roce.last_port_state = IB_PORT_DOWN; } /* The mlx5_ib_multiport_mutex should be held when calling this function */ @@ -5738,7 +5882,10 @@ ADD_UVERBS_ATTRIBUTES_SIMPLE( UA_MANDATORY), UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX, UVERBS_ATTR_TYPE(u16), - UA_MANDATORY)); + UA_OPTIONAL), + UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_ALLOC_DM_REQ_TYPE, + enum mlx5_ib_uapi_dm_type, + UA_OPTIONAL)); ADD_UVERBS_ATTRIBUTES_SIMPLE( mlx5_ib_flow_action, @@ -5829,35 +5976,58 @@ static struct ib_counters *mlx5_ib_create_counters(struct ib_device *device, return &mcounters->ibcntrs; } -void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) +static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) { + struct mlx5_core_dev *mdev = dev->mdev; + mlx5_ib_cleanup_multiport_master(dev); if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { srcu_barrier(&dev->mr_srcu); cleanup_srcu_struct(&dev->mr_srcu); } - kfree(dev->port); + + WARN_ON(!bitmap_empty(dev->dm.memic_alloc_pages, MLX5_MAX_MEMIC_PAGES)); + + WARN_ON(dev->dm.steering_sw_icm_alloc_blocks && + !bitmap_empty( + dev->dm.steering_sw_icm_alloc_blocks, + BIT(MLX5_CAP_DEV_MEM(mdev, log_steering_sw_icm_size) - + MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev)))); + + kfree(dev->dm.steering_sw_icm_alloc_blocks); + + WARN_ON(dev->dm.header_modify_sw_icm_alloc_blocks && + !bitmap_empty(dev->dm.header_modify_sw_icm_alloc_blocks, + BIT(MLX5_CAP_DEV_MEM( + mdev, log_header_modify_sw_icm_size) - + MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev)))); + + kfree(dev->dm.header_modify_sw_icm_alloc_blocks); } -int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) +static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) { struct mlx5_core_dev *mdev = dev->mdev; + u64 header_modify_icm_blocks = 0; + u64 steering_icm_blocks = 0; int err; int i; - dev->port = kcalloc(dev->num_ports, sizeof(*dev->port), - GFP_KERNEL); - if (!dev->port) - return -ENOMEM; - for (i = 0; i < dev->num_ports; i++) { spin_lock_init(&dev->port[i].mp.mpi_lock); - rwlock_init(&dev->roce[i].netdev_lock); + rwlock_init(&dev->port[i].roce.netdev_lock); + dev->port[i].roce.dev = dev; + dev->port[i].roce.native_port_num = i + 1; + dev->port[i].roce.last_port_state = IB_PORT_DOWN; } err = mlx5_ib_init_multiport_master(dev); if (err) - goto err_free_port; + return err; + + err = set_has_smi_cap(dev); + if (err) + return err; if (!mlx5_core_mp_enabled(mdev)) { for (i = 1; i <= dev->num_ports; i++) { @@ -5885,22 +6055,54 @@ int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) INIT_LIST_HEAD(&dev->qp_list); spin_lock_init(&dev->reset_flow_resource_lock); - spin_lock_init(&dev->memic.memic_lock); - dev->memic.dev = mdev; + if (MLX5_CAP_GEN_64(mdev, general_obj_types) & + MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM) { + if (MLX5_CAP64_DEV_MEM(mdev, steering_sw_icm_start_address)) { + steering_icm_blocks = + BIT(MLX5_CAP_DEV_MEM(mdev, + log_steering_sw_icm_size) - + MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev)); + + dev->dm.steering_sw_icm_alloc_blocks = + kcalloc(BITS_TO_LONGS(steering_icm_blocks), + sizeof(unsigned long), GFP_KERNEL); + if (!dev->dm.steering_sw_icm_alloc_blocks) + goto err_mp; + } + + if (MLX5_CAP64_DEV_MEM(mdev, + header_modify_sw_icm_start_address)) { + header_modify_icm_blocks = BIT( + MLX5_CAP_DEV_MEM( + mdev, log_header_modify_sw_icm_size) - + MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev)); + + dev->dm.header_modify_sw_icm_alloc_blocks = + kcalloc(BITS_TO_LONGS(header_modify_icm_blocks), + sizeof(unsigned long), GFP_KERNEL); + if (!dev->dm.header_modify_sw_icm_alloc_blocks) + goto err_dm; + } + } + + spin_lock_init(&dev->dm.lock); + dev->dm.dev = mdev; if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { err = init_srcu_struct(&dev->mr_srcu); if (err) - goto err_mp; + goto err_dm; } return 0; + +err_dm: + kfree(dev->dm.steering_sw_icm_alloc_blocks); + kfree(dev->dm.header_modify_sw_icm_alloc_blocks); + err_mp: mlx5_ib_cleanup_multiport_master(dev); -err_free_port: - kfree(dev->port); - return -ENOMEM; } @@ -5916,20 +6118,6 @@ static int mlx5_ib_stage_flow_db_init(struct mlx5_ib_dev *dev) return 0; } -int mlx5_ib_stage_rep_flow_db_init(struct mlx5_ib_dev *dev) -{ - struct mlx5_ib_dev *nic_dev; - - nic_dev = mlx5_ib_get_uplink_ibdev(dev->mdev->priv.eswitch); - - if (!nic_dev) - return -EINVAL; - - dev->flow_db = nic_dev->flow_db; - - return 0; -} - static void mlx5_ib_stage_flow_db_cleanup(struct mlx5_ib_dev *dev) { kfree(dev->flow_db); @@ -5989,7 +6177,10 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .req_notify_cq = mlx5_ib_arm_cq, .rereg_user_mr = mlx5_ib_rereg_user_mr, .resize_cq = mlx5_ib_resize_cq, + + INIT_RDMA_OBJ_SIZE(ib_ah, mlx5_ib_ah, ibah), INIT_RDMA_OBJ_SIZE(ib_pd, mlx5_ib_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_srq, mlx5_ib_srq, ibsrq), INIT_RDMA_OBJ_SIZE(ib_ucontext, mlx5_ib_ucontext, ibucontext), }; @@ -6025,7 +6216,7 @@ static const struct ib_device_ops mlx5_ib_dev_dm_ops = { .reg_dm_mr = mlx5_ib_reg_dm_mr, }; -int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) +static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) { struct mlx5_core_dev *mdev = dev->mdev; int err; @@ -6091,7 +6282,9 @@ int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_xrc_ops); } - if (MLX5_CAP_DEV_MEM(mdev, memic)) + if (MLX5_CAP_DEV_MEM(mdev, memic) || + MLX5_CAP_GEN_64(dev->mdev, general_obj_types) & + MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM) ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_dm_ops); if (mlx5_accel_ipsec_device_caps(dev->mdev) & @@ -6131,7 +6324,7 @@ static const struct ib_device_ops mlx5_ib_dev_port_rep_ops = { .query_port = mlx5_ib_rep_query_port, }; -int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev) +static int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev) { ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_port_rep_ops); return 0; @@ -6149,13 +6342,6 @@ static const struct ib_device_ops mlx5_ib_dev_common_roce_ops = { static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev) { u8 port_num; - int i; - - for (i = 0; i < dev->num_ports; i++) { - dev->roce[i].dev = dev; - dev->roce[i].native_port_num = i + 1; - dev->roce[i].last_port_state = IB_PORT_DOWN; - } dev->ib_dev.uverbs_ex_cmd_mask |= (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | @@ -6167,6 +6353,7 @@ static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev) port_num = mlx5_core_native_port_num(dev->mdev) - 1; + /* Register only for native ports */ return mlx5_add_netdev_notifier(dev, port_num); } @@ -6177,7 +6364,7 @@ static void mlx5_ib_stage_common_roce_cleanup(struct mlx5_ib_dev *dev) mlx5_remove_netdev_notifier(dev, port_num); } -int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev) +static int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev) { struct mlx5_core_dev *mdev = dev->mdev; enum rdma_link_layer ll; @@ -6193,7 +6380,7 @@ int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev) return err; } -void mlx5_ib_stage_rep_roce_cleanup(struct mlx5_ib_dev *dev) +static void mlx5_ib_stage_rep_roce_cleanup(struct mlx5_ib_dev *dev) { mlx5_ib_stage_common_roce_cleanup(dev); } @@ -6240,12 +6427,12 @@ static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev) } } -int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev) +static int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev) { return create_dev_resources(&dev->devr); } -void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev) +static void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev) { destroy_dev_resources(&dev->devr); } @@ -6267,7 +6454,7 @@ static const struct ib_device_ops mlx5_ib_dev_hw_stats_ops = { .get_hw_stats = mlx5_ib_get_hw_stats, }; -int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev) +static int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev) { if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) { ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_hw_stats_ops); @@ -6278,7 +6465,7 @@ int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev) return 0; } -void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev) +static void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev) { if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) mlx5_ib_dealloc_counters(dev); @@ -6308,7 +6495,7 @@ static void mlx5_ib_stage_uar_cleanup(struct mlx5_ib_dev *dev) mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar); } -int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev) +static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev) { int err; @@ -6323,13 +6510,13 @@ int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev) return err; } -void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev) +static void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev) { mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg); mlx5_free_bfreg(dev->mdev, &dev->bfreg); } -int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev) +static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev) { const char *name; @@ -6341,17 +6528,17 @@ int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev) return ib_register_device(&dev->ib_dev, name); } -void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev) +static void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev) { destroy_umrc_res(dev); } -void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev) +static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev) { ib_unregister_device(&dev->ib_dev); } -int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev) +static int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev) { return create_umr_res(dev); } @@ -6406,6 +6593,9 @@ void __mlx5_ib_remove(struct mlx5_ib_dev *dev, if (profile->stage[stage].cleanup) profile->stage[stage].cleanup(dev); } + + kfree(dev->port); + ib_dealloc_device(&dev->ib_dev); } void *__mlx5_ib_add(struct mlx5_ib_dev *dev, @@ -6527,6 +6717,9 @@ const struct mlx5_ib_profile uplink_rep_profile = { STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR, NULL, mlx5_ib_stage_pre_ib_reg_umr_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID, + mlx5_ib_stage_devx_init, + mlx5_ib_stage_devx_cleanup), STAGE_CREATE(MLX5_IB_STAGE_IB_REG, mlx5_ib_stage_ib_reg_init, mlx5_ib_stage_ib_reg_cleanup), @@ -6581,12 +6774,14 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) enum rdma_link_layer ll; struct mlx5_ib_dev *dev; int port_type_cap; + int num_ports; printk_once(KERN_INFO "%s", mlx5_version); if (MLX5_ESWITCH_MANAGER(mdev) && mlx5_ib_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) { - mlx5_ib_register_vport_reps(mdev); + if (!mlx5_core_mp_enabled(mdev)) + mlx5_ib_register_vport_reps(mdev); return mdev; } @@ -6596,13 +6791,20 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) if (mlx5_core_is_mp_slave(mdev) && ll == IB_LINK_LAYER_ETHERNET) return mlx5_ib_add_slave_port(mdev); + num_ports = max(MLX5_CAP_GEN(mdev, num_ports), + MLX5_CAP_GEN(mdev, num_vhca_ports)); dev = ib_alloc_device(mlx5_ib_dev, ib_dev); if (!dev) return NULL; + dev->port = kcalloc(num_ports, sizeof(*dev->port), + GFP_KERNEL); + if (!dev->port) { + ib_dealloc_device((struct ib_device *)dev); + return NULL; + } dev->mdev = mdev; - dev->num_ports = max(MLX5_CAP_GEN(mdev, num_ports), - MLX5_CAP_GEN(mdev, num_vhca_ports)); + dev->num_ports = num_ports; return __mlx5_ib_add(dev, &pf_profile); } @@ -6629,8 +6831,6 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) dev = context; __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX); - - ib_dealloc_device((struct ib_device *)dev); } static struct mlx5_interface mlx5_ib_interface = { diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 4a617d78eae1..40eb8be482e4 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -48,6 +48,7 @@ #include <rdma/mlx5-abi.h> #include <rdma/uverbs_ioctl.h> #include <rdma/mlx5_user_ioctl_cmds.h> +#include <rdma/mlx5_user_ioctl_verbs.h> #include "srq.h" @@ -117,6 +118,10 @@ enum { MLX5_MEMIC_BASE_SIZE = 1 << MLX5_MEMIC_BASE_ALIGN, }; +#define MLX5_LOG_SW_ICM_BLOCK_SIZE(dev) \ + (MLX5_CAP_DEV_MEM(dev, log_sw_icm_alloc_granularity)) +#define MLX5_SW_ICM_BLOCK_SIZE(dev) (1 << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)) + struct mlx5_ib_ucontext { struct ib_ucontext ibucontext; struct list_head db_page_list; @@ -194,6 +199,7 @@ struct mlx5_ib_flow_db { struct mlx5_ib_flow_prio egress_prios[MLX5_IB_NUM_FLOW_FT]; struct mlx5_ib_flow_prio sniffer[MLX5_IB_NUM_SNIFFER_FTS]; struct mlx5_ib_flow_prio egress[MLX5_IB_NUM_EGRESS_FTS]; + struct mlx5_ib_flow_prio fdb; struct mlx5_flow_table *lag_demux_ft; /* Protect flow steering bypass flow tables * when add/del flow rules. @@ -553,15 +559,28 @@ enum mlx5_ib_mtt_access_flags { struct mlx5_ib_dm { struct ib_dm ibdm; phys_addr_t dev_addr; + u32 type; + size_t size; + union { + struct { + u32 obj_id; + } icm_dm; + /* other dm types specific params should be added here */ + }; }; #define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE) -#define MLX5_IB_DM_ALLOWED_ACCESS (IB_ACCESS_LOCAL_WRITE |\ - IB_ACCESS_REMOTE_WRITE |\ - IB_ACCESS_REMOTE_READ |\ - IB_ACCESS_REMOTE_ATOMIC |\ - IB_ZERO_BASED) +#define MLX5_IB_DM_MEMIC_ALLOWED_ACCESS (IB_ACCESS_LOCAL_WRITE |\ + IB_ACCESS_REMOTE_WRITE |\ + IB_ACCESS_REMOTE_READ |\ + IB_ACCESS_REMOTE_ATOMIC |\ + IB_ZERO_BASED) + +#define MLX5_IB_DM_SW_ICM_ALLOWED_ACCESS (IB_ACCESS_LOCAL_WRITE |\ + IB_ACCESS_REMOTE_WRITE |\ + IB_ACCESS_REMOTE_READ |\ + IB_ZERO_BASED) struct mlx5_ib_mr { struct ib_mr ibmr; @@ -702,12 +721,6 @@ struct mlx5_ib_multiport { spinlock_t mpi_lock; }; -struct mlx5_ib_port { - struct mlx5_ib_counters cnts; - struct mlx5_ib_multiport mp; - struct mlx5_ib_dbg_cc_params *dbg_cc_params; -}; - struct mlx5_roce { /* Protect mlx5_ib_get_netdev from invoking dev_hold() with a NULL * netdev pointer @@ -721,6 +734,14 @@ struct mlx5_roce { u8 native_port_num; }; +struct mlx5_ib_port { + struct mlx5_ib_counters cnts; + struct mlx5_ib_multiport mp; + struct mlx5_ib_dbg_cc_params *dbg_cc_params; + struct mlx5_roce roce; + struct mlx5_eswitch_rep *rep; +}; + struct mlx5_ib_dbg_param { int offset; struct mlx5_ib_dev *dev; @@ -840,10 +861,16 @@ struct mlx5_ib_flow_action { }; }; -struct mlx5_memic { +struct mlx5_dm { struct mlx5_core_dev *dev; - spinlock_t memic_lock; + /* This lock is used to protect the access to the shared + * allocation map when concurrent requests by different + * processes are handled. + */ + spinlock_t lock; DECLARE_BITMAP(memic_alloc_pages, MLX5_MAX_MEMIC_PAGES); + unsigned long *steering_sw_icm_alloc_blocks; + unsigned long *header_modify_sw_icm_alloc_blocks; }; struct mlx5_read_counters_attr { @@ -905,7 +932,6 @@ struct mlx5_ib_dev { struct ib_device ib_dev; struct mlx5_core_dev *mdev; struct notifier_block mdev_events; - struct mlx5_roce roce[MLX5_MAX_PORTS]; int num_ports; /* serialize update of capability mask */ @@ -940,17 +966,18 @@ struct mlx5_ib_dev { struct mlx5_sq_bfreg fp_bfreg; struct mlx5_ib_delay_drop delay_drop; const struct mlx5_ib_profile *profile; - struct mlx5_eswitch_rep *rep; + bool is_rep; int lag_active; struct mlx5_ib_lb_state lb; u8 umr_fence; struct list_head ib_dev_list; u64 sys_image_guid; - struct mlx5_memic memic; + struct mlx5_dm dm; u16 devx_whitelist_uid; struct mlx5_srq_table srq_table; struct mlx5_async_ctx async_ctx; + int free_port; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) @@ -968,6 +995,14 @@ static inline struct mlx5_ib_dev *to_mdev(struct ib_device *ibdev) return container_of(ibdev, struct mlx5_ib_dev, ib_dev); } +static inline struct mlx5_ib_dev *mlx5_udata_to_mdev(struct ib_udata *udata) +{ + struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context( + udata, struct mlx5_ib_ucontext, ibucontext); + + return to_mdev(context->ibucontext.device); +} + static inline struct mlx5_ib_cq *to_mcq(struct ib_cq *ibcq) { return container_of(ibcq, struct mlx5_ib_cq, ibcq); @@ -1046,17 +1081,16 @@ void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db) void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq); void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq); void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index); -struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, - u32 flags, struct ib_udata *udata); +int mlx5_ib_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags, + struct ib_udata *udata); int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); -int mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags); -struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, - struct ib_srq_init_attr *init_attr, - struct ib_udata *udata); +void mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags); +int mlx5_ib_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *init_attr, + struct ib_udata *udata); int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr); -int mlx5_ib_destroy_srq(struct ib_srq *srq); +void mlx5_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata); int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr); int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp); @@ -1068,7 +1102,7 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); -int mlx5_ib_destroy_qp(struct ib_qp *qp); +int mlx5_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata); void mlx5_ib_drain_sq(struct ib_qp *qp); void mlx5_ib_drain_rq(struct ib_qp *qp); int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, @@ -1083,9 +1117,8 @@ int mlx5_ib_read_user_wqe_srq(struct mlx5_ib_srq *srq, int wqe_index, void *buffer, int buflen, size_t *bc); struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, struct ib_udata *udata); -int mlx5_ib_destroy_cq(struct ib_cq *cq); +int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); @@ -1112,10 +1145,9 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr); int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_pd *pd, struct ib_udata *udata); -int mlx5_ib_dereg_mr(struct ib_mr *ibmr); -struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, - enum ib_mr_type mr_type, - u32 max_num_sg); +int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); +struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg, struct ib_udata *udata); int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, @@ -1124,9 +1156,8 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, struct ib_mad_hdr *out, size_t *out_mad_size, u16 *out_mad_pkey_index); struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_udata *udata); -int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd); + struct ib_udata *udata); +int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset); int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port); int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev, @@ -1170,7 +1201,7 @@ int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd, struct ib_wq_init_attr *init_attr, struct ib_udata *udata); -int mlx5_ib_destroy_wq(struct ib_wq *wq); +int mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata); int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, u32 wq_attr_mask, struct ib_udata *udata); struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device, @@ -1182,7 +1213,7 @@ struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev, struct ib_ucontext *context, struct ib_dm_alloc_attr *attr, struct uverbs_attr_bundle *attrs); -int mlx5_ib_dealloc_dm(struct ib_dm *ibdm); +int mlx5_ib_dealloc_dm(struct ib_dm *ibdm, struct uverbs_attr_bundle *attrs); struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm, struct ib_dm_mr_attr *attr, struct uverbs_attr_bundle *attrs); @@ -1230,23 +1261,6 @@ static inline void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ /* Needed for rep profile */ -int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev); -void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev); -int mlx5_ib_stage_rep_flow_db_init(struct mlx5_ib_dev *dev); -int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev); -int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev); -int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev); -void mlx5_ib_stage_rep_roce_cleanup(struct mlx5_ib_dev *dev); -int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev); -void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev); -int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev); -void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev); -int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev); -void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev); -void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev); -int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev); -void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev); -int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev); void __mlx5_ib_remove(struct mlx5_ib_dev *dev, const struct mlx5_ib_profile *profile, int stage); diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index ca921fd40499..5f09699fab98 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -600,7 +600,7 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c) static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) { - if (!mlx5_debugfs_root || dev->rep) + if (!mlx5_debugfs_root || dev->is_rep) return; debugfs_remove_recursive(dev->cache.root); @@ -614,7 +614,7 @@ static void mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev) struct dentry *dir; int i; - if (!mlx5_debugfs_root || dev->rep) + if (!mlx5_debugfs_root || dev->is_rep) return; cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root); @@ -677,7 +677,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) MLX5_IB_UMR_OCTOWORD; ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT; if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) && - !dev->rep && + !dev->is_rep && mlx5_core_is_pf(dev->mdev)) ent->limit = dev->mdev->profile->mr_cache[i].limit; else @@ -1159,8 +1159,8 @@ static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, mr->access_flags = access_flags; } -static struct ib_mr *mlx5_ib_get_memic_mr(struct ib_pd *pd, u64 memic_addr, - u64 length, int acc) +static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr, + u64 length, int acc, int mode) { struct mlx5_ib_dev *dev = to_mdev(pd->device); int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); @@ -1182,9 +1182,8 @@ static struct ib_mr *mlx5_ib_get_memic_mr(struct ib_pd *pd, u64 memic_addr, mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); - MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MEMIC & 0x3); - MLX5_SET(mkc, mkc, access_mode_4_2, - (MLX5_MKC_ACCESS_MODE_MEMIC >> 2) & 0x7); + MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3); + MLX5_SET(mkc, mkc, access_mode_4_2, (mode >> 2) & 0x7); MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ)); @@ -1194,7 +1193,7 @@ static struct ib_mr *mlx5_ib_get_memic_mr(struct ib_pd *pd, u64 memic_addr, MLX5_SET64(mkc, mkc, len, length); MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); MLX5_SET(mkc, mkc, qpn, 0xffffff); - MLX5_SET64(mkc, mkc, start_addr, memic_addr - dev->mdev->bar_addr); + MLX5_SET64(mkc, mkc, start_addr, start_addr); err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen); if (err) @@ -1236,15 +1235,31 @@ struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm, struct uverbs_attr_bundle *attrs) { struct mlx5_ib_dm *mdm = to_mdm(dm); - u64 memic_addr; + struct mlx5_core_dev *dev = to_mdev(dm->device)->mdev; + u64 start_addr = mdm->dev_addr + attr->offset; + int mode; - if (attr->access_flags & ~MLX5_IB_DM_ALLOWED_ACCESS) - return ERR_PTR(-EINVAL); + switch (mdm->type) { + case MLX5_IB_UAPI_DM_TYPE_MEMIC: + if (attr->access_flags & ~MLX5_IB_DM_MEMIC_ALLOWED_ACCESS) + return ERR_PTR(-EINVAL); + + mode = MLX5_MKC_ACCESS_MODE_MEMIC; + start_addr -= pci_resource_start(dev->pdev, 0); + break; + case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: + case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: + if (attr->access_flags & ~MLX5_IB_DM_SW_ICM_ALLOWED_ACCESS) + return ERR_PTR(-EINVAL); - memic_addr = mdm->dev_addr + attr->offset; + mode = MLX5_MKC_ACCESS_MODE_SW_ICM; + break; + default: + return ERR_PTR(-EINVAL); + } - return mlx5_ib_get_memic_mr(pd, memic_addr, attr->length, - attr->access_flags); + return mlx5_ib_get_dm_mr(pd, start_addr, attr->length, + attr->access_flags, mode); } struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, @@ -1622,15 +1637,14 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) kfree(mr); } -int mlx5_ib_dereg_mr(struct ib_mr *ibmr) +int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) { dereg_mr(to_mdev(ibmr->device), to_mmr(ibmr)); return 0; } -struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, - enum ib_mr_type mr_type, - u32 max_num_sg) +struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(pd->device); int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 0aa10ebda5d9..91507a2e9290 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -288,7 +288,7 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, ib_umem_odp_unmap_dma_pages(umem_odp, start, end); - if (unlikely(!umem->npages && mr->parent && + if (unlikely(!umem_odp->npages && mr->parent && !umem_odp->dying)) { WRITE_ONCE(umem_odp->dying, 1); atomic_inc(&mr->parent->num_leaf_free); @@ -711,6 +711,15 @@ struct pf_frame { int depth; }; +static bool mkey_is_eq(struct mlx5_core_mkey *mmkey, u32 key) +{ + if (!mmkey) + return false; + if (mmkey->type == MLX5_MKEY_MW) + return mlx5_base_mkey(mmkey->key) == mlx5_base_mkey(key); + return mmkey->key == key; +} + static int get_indirect_num_descs(struct mlx5_core_mkey *mmkey) { struct mlx5_ib_mw *mw; @@ -760,7 +769,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, next_mr: mmkey = __mlx5_mr_lookup(dev->mdev, mlx5_base_mkey(key)); - if (!mmkey || mmkey->key != key) { + if (!mkey_is_eq(mmkey, key)) { mlx5_ib_dbg(dev, "failed to find mkey %x\n", key); ret = -EFAULT; goto srcu_unlock; @@ -920,7 +929,7 @@ static int pagefault_data_segments(struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfault, void *wqe, void *wqe_end, u32 *bytes_mapped, - u32 *total_wqe_bytes, int receive_queue) + u32 *total_wqe_bytes, bool receive_queue) { int ret = 0, npages = 0; u64 io_virt; @@ -1200,17 +1209,15 @@ static inline struct mlx5_ib_srq *res_to_srq(struct mlx5_core_rsc_common *res) static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfault) { - int ret; - void *wqe, *wqe_end; + bool sq = pfault->type & MLX5_PFAULT_REQUESTOR; + u16 wqe_index = pfault->wqe.wqe_index; + void *wqe = NULL, *wqe_end = NULL; u32 bytes_mapped, total_wqe_bytes; - char *buffer = NULL; + struct mlx5_core_rsc_common *res; int resume_with_error = 1; - u16 wqe_index = pfault->wqe.wqe_index; - int requestor = pfault->type & MLX5_PFAULT_REQUESTOR; - struct mlx5_core_rsc_common *res = NULL; - struct mlx5_ib_qp *qp = NULL; - struct mlx5_ib_srq *srq = NULL; + struct mlx5_ib_qp *qp; size_t bytes_copied; + int ret = 0; res = odp_get_rsc(dev, pfault->wqe.wq_num, pfault->type); if (!res) { @@ -1218,87 +1225,74 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev, return; } - switch (res->res) { - case MLX5_RES_QP: - qp = res_to_qp(res); - break; - case MLX5_RES_SRQ: - case MLX5_RES_XSRQ: - srq = res_to_srq(res); - break; - default: - mlx5_ib_err(dev, "wqe page fault for unsupported type %d\n", pfault->type); + if (res->res != MLX5_RES_QP && res->res != MLX5_RES_SRQ && + res->res != MLX5_RES_XSRQ) { + mlx5_ib_err(dev, "wqe page fault for unsupported type %d\n", + pfault->type); goto resolve_page_fault; } - buffer = (char *)__get_free_page(GFP_KERNEL); - if (!buffer) { + wqe = (void *)__get_free_page(GFP_KERNEL); + if (!wqe) { mlx5_ib_err(dev, "Error allocating memory for IO page fault handling.\n"); goto resolve_page_fault; } - if (qp) { - if (requestor) { - ret = mlx5_ib_read_user_wqe_sq(qp, wqe_index, - buffer, PAGE_SIZE, - &bytes_copied); - } else { - ret = mlx5_ib_read_user_wqe_rq(qp, wqe_index, - buffer, PAGE_SIZE, - &bytes_copied); - } - } else { - ret = mlx5_ib_read_user_wqe_srq(srq, wqe_index, - buffer, PAGE_SIZE, + qp = (res->res == MLX5_RES_QP) ? res_to_qp(res) : NULL; + if (qp && sq) { + ret = mlx5_ib_read_user_wqe_sq(qp, wqe_index, wqe, PAGE_SIZE, + &bytes_copied); + if (ret) + goto read_user; + ret = mlx5_ib_mr_initiator_pfault_handler( + dev, pfault, qp, &wqe, &wqe_end, bytes_copied); + } else if (qp && !sq) { + ret = mlx5_ib_read_user_wqe_rq(qp, wqe_index, wqe, PAGE_SIZE, + &bytes_copied); + if (ret) + goto read_user; + ret = mlx5_ib_mr_responder_pfault_handler_rq( + dev, qp, wqe, &wqe_end, bytes_copied); + } else if (!qp) { + struct mlx5_ib_srq *srq = res_to_srq(res); + + ret = mlx5_ib_read_user_wqe_srq(srq, wqe_index, wqe, PAGE_SIZE, &bytes_copied); + if (ret) + goto read_user; + ret = mlx5_ib_mr_responder_pfault_handler_srq( + dev, srq, &wqe, &wqe_end, bytes_copied); } - if (ret) { - mlx5_ib_err(dev, "Failed reading a WQE following page fault, error=%d, wqe_index=%x, qpn=%x\n", - ret, wqe_index, pfault->token); + if (ret < 0 || wqe >= wqe_end) goto resolve_page_fault; - } - wqe = buffer; - if (requestor) - ret = mlx5_ib_mr_initiator_pfault_handler(dev, pfault, qp, - &wqe, &wqe_end, - bytes_copied); - else if (qp) - ret = mlx5_ib_mr_responder_pfault_handler_rq(dev, qp, - wqe, &wqe_end, - bytes_copied); - else - ret = mlx5_ib_mr_responder_pfault_handler_srq(dev, srq, - &wqe, &wqe_end, - bytes_copied); + ret = pagefault_data_segments(dev, pfault, wqe, wqe_end, &bytes_mapped, + &total_wqe_bytes, !sq); + if (ret == -EAGAIN) + goto out; - if (ret < 0) + if (ret < 0 || total_wqe_bytes > bytes_mapped) goto resolve_page_fault; - if (wqe >= wqe_end) { - mlx5_ib_err(dev, "ODP fault on invalid WQE.\n"); - goto resolve_page_fault; - } +out: + ret = 0; + resume_with_error = 0; - ret = pagefault_data_segments(dev, pfault, wqe, wqe_end, - &bytes_mapped, &total_wqe_bytes, - !requestor); - if (ret == -EAGAIN) { - resume_with_error = 0; - goto resolve_page_fault; - } else if (ret < 0 || total_wqe_bytes > bytes_mapped) { - goto resolve_page_fault; - } +read_user: + if (ret) + mlx5_ib_err( + dev, + "Failed reading a WQE following page fault, error %d, wqe_index %x, qpn %x\n", + ret, wqe_index, pfault->token); - resume_with_error = 0; resolve_page_fault: mlx5_ib_page_fault_resume(dev, pfault, resume_with_error); mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x resume_with_error=%d, type: 0x%x\n", pfault->wqe.wq_num, resume_with_error, pfault->type); mlx5_core_res_put(res); - free_page((unsigned long)buffer); + free_page((unsigned long)wqe); } static int pages_in_range(u64 address, u32 length) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 581144e224e2..f6623c77443a 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -92,6 +92,7 @@ struct mlx5_modify_raw_qp_param { struct mlx5_rate_limit rl; u8 rq_q_ctr_id; + u16 port; }; static void get_cqs(enum ib_qp_type qp_type, @@ -777,14 +778,17 @@ err_umem: } static void destroy_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, - struct mlx5_ib_rwq *rwq) + struct mlx5_ib_rwq *rwq, struct ib_udata *udata) { - struct mlx5_ib_ucontext *context; + struct mlx5_ib_ucontext *context = + rdma_udata_to_drv_context( + udata, + struct mlx5_ib_ucontext, + ibucontext); if (rwq->create_flags & MLX5_IB_WQ_FLAGS_DELAY_DROP) atomic_dec(&dev->delay_drop.rqs_cnt); - context = to_mucontext(pd->uobject->context); mlx5_ib_db_unmap_user(context, &rwq->db); if (rwq->umem) ib_umem_release(rwq->umem); @@ -983,11 +987,15 @@ err_bfreg: } static void destroy_qp_user(struct mlx5_ib_dev *dev, struct ib_pd *pd, - struct mlx5_ib_qp *qp, struct mlx5_ib_qp_base *base) + struct mlx5_ib_qp *qp, struct mlx5_ib_qp_base *base, + struct ib_udata *udata) { - struct mlx5_ib_ucontext *context; + struct mlx5_ib_ucontext *context = + rdma_udata_to_drv_context( + udata, + struct mlx5_ib_ucontext, + ibucontext); - context = to_mucontext(pd->uobject->context); mlx5_ib_db_unmap_user(context, &qp->db); if (base->ubuffer.umem) ib_umem_release(base->ubuffer.umem); @@ -1206,11 +1214,11 @@ static void destroy_raw_packet_qp_tis(struct mlx5_ib_dev *dev, mlx5_cmd_destroy_tis(dev->mdev, sq->tisn, to_mpd(pd)->uid); } -static void destroy_flow_rule_vport_sq(struct mlx5_ib_dev *dev, - struct mlx5_ib_sq *sq) +static void destroy_flow_rule_vport_sq(struct mlx5_ib_sq *sq) { if (sq->flow_rule) mlx5_del_flow_rules(sq->flow_rule); + sq->flow_rule = NULL; } static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, @@ -1278,15 +1286,8 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, if (err) goto err_umem; - err = create_flow_rule_vport_sq(dev, sq); - if (err) - goto err_flow; - return 0; -err_flow: - mlx5_core_destroy_sq_tracked(dev->mdev, &sq->base.mqp); - err_umem: ib_umem_release(sq->ubuffer.umem); sq->ubuffer.umem = NULL; @@ -1297,7 +1298,7 @@ err_umem: static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev *dev, struct mlx5_ib_sq *sq) { - destroy_flow_rule_vport_sq(dev, sq); + destroy_flow_rule_vport_sq(sq); mlx5_core_destroy_sq_tracked(dev->mdev, &sq->base.mqp); ib_umem_release(sq->ubuffer.umem); } @@ -1402,7 +1403,8 @@ static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev *dev, static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_rq *rq, u32 tdn, u32 *qp_flags_en, - struct ib_pd *pd) + struct ib_pd *pd, + u32 *out, int outlen) { u8 lb_flag = 0; u32 *in; @@ -1429,15 +1431,16 @@ static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev, if (*qp_flags_en & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC) lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST; - if (dev->rep) { + if (dev->is_rep) { lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST; *qp_flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC; } MLX5_SET(tirc, tirc, self_lb_block, lb_flag); - err = mlx5_core_create_tir(dev->mdev, in, inlen, &rq->tirn); + err = mlx5_core_create_tir_out(dev->mdev, in, inlen, out, outlen); + rq->tirn = MLX5_GET(create_tir_out, out, tirn); if (!err && MLX5_GET(tirc, tirc, self_lb_block)) { err = mlx5_ib_enable_lb(dev, false, true); @@ -1463,6 +1466,7 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, int err; u32 tdn = mucontext->tdn; u16 uid = to_mpd(pd)->uid; + u32 out[MLX5_ST_SZ_DW(create_tir_out)] = {}; if (qp->sq.wqe_cnt) { err = create_raw_packet_qp_tis(dev, qp, sq, tdn, pd); @@ -1495,7 +1499,9 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if (err) goto err_destroy_sq; - err = create_raw_packet_qp_tir(dev, rq, tdn, &qp->flags_en, pd); + err = create_raw_packet_qp_tir( + dev, rq, tdn, &qp->flags_en, pd, out, + MLX5_ST_SZ_BYTES(create_tir_out)); if (err) goto err_destroy_rq; @@ -1504,6 +1510,20 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_RQN; resp->tirn = rq->tirn; resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TIRN; + if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner)) { + resp->tir_icm_addr = MLX5_GET( + create_tir_out, out, icm_address_31_0); + resp->tir_icm_addr |= + (u64)MLX5_GET(create_tir_out, out, + icm_address_39_32) + << 32; + resp->tir_icm_addr |= + (u64)MLX5_GET(create_tir_out, out, + icm_address_63_40) + << 40; + resp->comp_mask |= + MLX5_IB_CREATE_QP_RESP_MASK_TIR_ICM_ADDR; + } } } @@ -1577,8 +1597,10 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, udata, struct mlx5_ib_ucontext, ibucontext); struct mlx5_ib_create_qp_resp resp = {}; int inlen; + int outlen; int err; u32 *in; + u32 *out; void *tirc; void *hfso; u32 selected_fields = 0; @@ -1641,7 +1663,7 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, return -EOPNOTSUPP; } - if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC || dev->rep) { + if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC || dev->is_rep) { lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST; qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC; } @@ -1658,10 +1680,12 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, } inlen = MLX5_ST_SZ_BYTES(create_tir_in); - in = kvzalloc(inlen, GFP_KERNEL); + outlen = MLX5_ST_SZ_BYTES(create_tir_out); + in = kvzalloc(inlen + outlen, GFP_KERNEL); if (!in) return -ENOMEM; + out = in + MLX5_ST_SZ_DW(create_tir_in); MLX5_SET(create_tir_in, in, uid, to_mpd(pd)->uid); tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); MLX5_SET(tirc, tirc, disp_type, @@ -1773,8 +1797,9 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, MLX5_SET(rx_hash_field_select, hfso, selected_fields, selected_fields); create_tir: - err = mlx5_core_create_tir(dev->mdev, in, inlen, &qp->rss_qp.tirn); + err = mlx5_core_create_tir_out(dev->mdev, in, inlen, out, outlen); + qp->rss_qp.tirn = MLX5_GET(create_tir_out, out, tirn); if (!err && MLX5_GET(tirc, tirc, self_lb_block)) { err = mlx5_ib_enable_lb(dev, false, true); @@ -1789,6 +1814,18 @@ create_tir: if (mucontext->devx_uid) { resp.comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TIRN; resp.tirn = qp->rss_qp.tirn; + if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner)) { + resp.tir_icm_addr = + MLX5_GET(create_tir_out, out, icm_address_31_0); + resp.tir_icm_addr |= (u64)MLX5_GET(create_tir_out, out, + icm_address_39_32) + << 32; + resp.tir_icm_addr |= (u64)MLX5_GET(create_tir_out, out, + icm_address_63_40) + << 40; + resp.comp_mask |= + MLX5_IB_CREATE_QP_RESP_MASK_TIR_ICM_ADDR; + } } err = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp))); @@ -2287,7 +2324,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, err_create: if (qp->create_type == MLX5_QP_USER) - destroy_qp_user(dev, pd, qp, base); + destroy_qp_user(dev, pd, qp, base, udata); else if (qp->create_type == MLX5_QP_KERNEL) destroy_qp_kernel(dev, qp); @@ -2398,7 +2435,8 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, const struct mlx5_modify_raw_qp_param *raw_qp_param, u8 lag_tx_affinity); -static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) +static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, + struct ib_udata *udata) { struct mlx5_ib_cq *send_cq, *recv_cq; struct mlx5_ib_qp_base *base; @@ -2469,7 +2507,7 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) if (qp->create_type == MLX5_QP_KERNEL) destroy_qp_kernel(dev, qp); else if (qp->create_type == MLX5_QP_USER) - destroy_qp_user(dev, &get_pd(qp)->ibpd, qp, base); + destroy_qp_user(dev, &get_pd(qp)->ibpd, qp, base, udata); } static const char *ib_qp_type_str(enum ib_qp_type type) @@ -2735,7 +2773,7 @@ static int mlx5_ib_destroy_dct(struct mlx5_ib_qp *mqp) return 0; } -int mlx5_ib_destroy_qp(struct ib_qp *qp) +int mlx5_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(qp->device); struct mlx5_ib_qp *mqp = to_mqp(qp); @@ -2746,7 +2784,7 @@ int mlx5_ib_destroy_qp(struct ib_qp *qp) if (mqp->qp_sub_type == MLX5_IB_QPT_DCT) return mlx5_ib_destroy_dct(mqp); - destroy_qp_common(dev, mqp); + destroy_qp_common(dev, mqp, udata); kfree(mqp); @@ -2964,6 +3002,11 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX | MLX5_QP_OPTPAR_Q_KEY | MLX5_QP_OPTPAR_PRI_PORT, + [MLX5_QP_ST_XRC] = MLX5_QP_OPTPAR_RRE | + MLX5_QP_OPTPAR_RAE | + MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_PKEY_INDEX | + MLX5_QP_OPTPAR_PRI_PORT, }, [MLX5_QP_STATE_RTR] = { [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH | @@ -2997,6 +3040,12 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q MLX5_QP_OPTPAR_RWE | MLX5_QP_OPTPAR_PM_STATE, [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY, + [MLX5_QP_ST_XRC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH | + MLX5_QP_OPTPAR_RRE | + MLX5_QP_OPTPAR_RAE | + MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_PM_STATE | + MLX5_QP_OPTPAR_RNR_TIMEOUT, }, }, [MLX5_QP_STATE_RTS] = { @@ -3013,6 +3062,12 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY | MLX5_QP_OPTPAR_SRQN | MLX5_QP_OPTPAR_CQN_RCV, + [MLX5_QP_ST_XRC] = MLX5_QP_OPTPAR_RRE | + MLX5_QP_OPTPAR_RAE | + MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_RNR_TIMEOUT | + MLX5_QP_OPTPAR_PM_STATE | + MLX5_QP_OPTPAR_ALT_ADDR_PATH, }, }, [MLX5_QP_STATE_SQER] = { @@ -3024,6 +3079,10 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q MLX5_QP_OPTPAR_RWE | MLX5_QP_OPTPAR_RAE | MLX5_QP_OPTPAR_RRE, + [MLX5_QP_ST_XRC] = MLX5_QP_OPTPAR_RNR_TIMEOUT | + MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_RAE | + MLX5_QP_OPTPAR_RRE, }, }, }; @@ -3264,6 +3323,8 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, } if (modify_sq) { + struct mlx5_flow_handle *flow_rule; + if (tx_affinity) { err = modify_raw_packet_tx_affinity(dev->mdev, sq, tx_affinity, @@ -3272,8 +3333,25 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, return err; } - return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state, - raw_qp_param, qp->ibqp.pd); + flow_rule = create_flow_rule_vport_sq(dev, sq, + raw_qp_param->port); + if (IS_ERR(flow_rule)) + return PTR_ERR(flow_rule); + + err = modify_raw_packet_qp_sq(dev->mdev, sq, sq_state, + raw_qp_param, qp->ibqp.pd); + if (err) { + if (flow_rule) + mlx5_del_flow_rules(flow_rule); + return err; + } + + if (flow_rule) { + destroy_flow_rule_vport_sq(sq); + sq->flow_rule = flow_rule; + } + + return err; } return 0; @@ -3298,7 +3376,7 @@ static unsigned int get_tx_affinity(struct mlx5_ib_dev *dev, } else { tx_port_affinity = (unsigned int)atomic_add_return( - 1, &dev->roce[port_num].tx_port_affinity) % + 1, &dev->port[port_num].roce.tx_port_affinity) % MLX5_MAX_PORTS + 1; mlx5_ib_dbg(dev, "Set tx affinity 0x%x to qpn 0x%x\n", @@ -3403,7 +3481,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, (ibqp->qp_type == IB_QPT_XRC_INI) || (ibqp->qp_type == IB_QPT_XRC_TGT)) { if (dev->lag_active) { - u8 p = mlx5_core_native_port_num(dev->mdev); + u8 p = mlx5_core_native_port_num(dev->mdev) - 1; tx_affinity = get_tx_affinity(dev, pd, base, p, udata); context->flags |= cpu_to_be32(tx_affinity << 24); @@ -3556,6 +3634,9 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, raw_qp_param.set_mask |= MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID; } + if (attr_mask & IB_QP_PORT) + raw_qp_param.port = attr->port_num; + if (attr_mask & IB_QP_RATE_LIMIT) { raw_qp_param.rl.rate = attr->rate_limit; @@ -4729,16 +4810,15 @@ static void set_linv_wr(struct mlx5_ib_qp *qp, void **seg, int *size, static void dump_wqe(struct mlx5_ib_qp *qp, u32 idx, int size_16) { __be32 *p = NULL; - u32 tidx = idx; int i, j; pr_debug("dump WQE index %u:\n", idx); for (i = 0, j = 0; i < size_16 * 4; i += 4, j += 4) { if ((i & 0xf) == 0) { - tidx = (tidx + 1) & (qp->sq.wqe_cnt - 1); - p = mlx5_frag_buf_get_wqe(&qp->sq.fbc, tidx); + p = mlx5_frag_buf_get_wqe(&qp->sq.fbc, idx); pr_debug("WQBB at %p:\n", (void *)p); j = 0; + idx = (idx + 1) & (qp->sq.wqe_cnt - 1); } pr_debug("%08x %08x %08x %08x\n", be32_to_cpu(p[j]), be32_to_cpu(p[j + 1]), be32_to_cpu(p[j + 2]), @@ -5627,8 +5707,7 @@ out: } struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_udata *udata) + struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_ib_xrcd *xrcd; @@ -5650,7 +5729,7 @@ struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, return &xrcd->ibxrcd; } -int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd) +int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(xrcd->device); u32 xrcdn = to_mxrcd(xrcd)->xrcdn; @@ -5962,19 +6041,19 @@ struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd, err_copy: mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp); err_user_rq: - destroy_user_rq(dev, pd, rwq); + destroy_user_rq(dev, pd, rwq, udata); err: kfree(rwq); return ERR_PTR(err); } -int mlx5_ib_destroy_wq(struct ib_wq *wq) +int mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(wq->device); struct mlx5_ib_rwq *rwq = to_mrwq(wq); mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp); - destroy_user_rq(dev, wq->pd, rwq); + destroy_user_rq(dev, wq->pd, rwq, udata); kfree(rwq); return 0; diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 1ec1beb1296b..4e7fde86c96b 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -194,9 +194,15 @@ err_db: return err; } -static void destroy_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq) +static void destroy_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, + struct ib_udata *udata) { - mlx5_ib_db_unmap_user(to_mucontext(pd->uobject->context), &srq->db); + mlx5_ib_db_unmap_user( + rdma_udata_to_drv_context( + udata, + struct mlx5_ib_ucontext, + ibucontext), + &srq->db); ib_umem_release(srq->umem); } @@ -208,16 +214,16 @@ static void destroy_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq) mlx5_db_free(dev->mdev, &srq->db); } -struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, - struct ib_srq_init_attr *init_attr, - struct ib_udata *udata) +int mlx5_ib_create_srq(struct ib_srq *ib_srq, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) { - struct mlx5_ib_dev *dev = to_mdev(pd->device); - struct mlx5_ib_srq *srq; + struct mlx5_ib_dev *dev = to_mdev(ib_srq->device); + struct mlx5_ib_srq *srq = to_msrq(ib_srq); size_t desc_size; size_t buf_size; int err; - struct mlx5_srq_attr in = {0}; + struct mlx5_srq_attr in = {}; __u32 max_srq_wqes = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); /* Sanity check SRQ size before proceeding */ @@ -225,13 +231,9 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, mlx5_ib_dbg(dev, "max_wr %d, cap %d\n", init_attr->attr.max_wr, max_srq_wqes); - return ERR_PTR(-EINVAL); + return -EINVAL; } - srq = kmalloc(sizeof(*srq), GFP_KERNEL); - if (!srq) - return ERR_PTR(-ENOMEM); - mutex_init(&srq->mutex); spin_lock_init(&srq->lock); srq->msrq.max = roundup_pow_of_two(init_attr->attr.max_wr + 1); @@ -239,35 +241,32 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, desc_size = sizeof(struct mlx5_wqe_srq_next_seg) + srq->msrq.max_gs * sizeof(struct mlx5_wqe_data_seg); - if (desc_size == 0 || srq->msrq.max_gs > desc_size) { - err = -EINVAL; - goto err_srq; - } + if (desc_size == 0 || srq->msrq.max_gs > desc_size) + return -EINVAL; + desc_size = roundup_pow_of_two(desc_size); desc_size = max_t(size_t, 32, desc_size); - if (desc_size < sizeof(struct mlx5_wqe_srq_next_seg)) { - err = -EINVAL; - goto err_srq; - } + if (desc_size < sizeof(struct mlx5_wqe_srq_next_seg)) + return -EINVAL; + srq->msrq.max_avail_gather = (desc_size - sizeof(struct mlx5_wqe_srq_next_seg)) / sizeof(struct mlx5_wqe_data_seg); srq->msrq.wqe_shift = ilog2(desc_size); buf_size = srq->msrq.max * desc_size; - if (buf_size < desc_size) { - err = -EINVAL; - goto err_srq; - } + if (buf_size < desc_size) + return -EINVAL; + in.type = init_attr->srq_type; if (udata) - err = create_srq_user(pd, srq, &in, udata, buf_size); + err = create_srq_user(ib_srq->pd, srq, &in, udata, buf_size); else err = create_srq_kernel(dev, srq, &in, buf_size); if (err) { mlx5_ib_warn(dev, "create srq %s failed, err %d\n", udata ? "user" : "kernel", err); - goto err_srq; + return err; } in.log_size = ilog2(srq->msrq.max); @@ -297,7 +296,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, else in.cqn = to_mcq(dev->devr.c0)->mcq.cqn; - in.pd = to_mpd(pd)->pdn; + in.pd = to_mpd(ib_srq->pd)->pdn; in.db_record = srq->db.dma; err = mlx5_cmd_create_srq(dev, &srq->msrq, &in); kvfree(in.pas); @@ -320,21 +319,18 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, init_attr->attr.max_wr = srq->msrq.max - 1; - return &srq->ibsrq; + return 0; err_core: mlx5_cmd_destroy_srq(dev, &srq->msrq); err_usr_kern_srq: if (udata) - destroy_srq_user(pd, srq); + destroy_srq_user(ib_srq->pd, srq, udata); else destroy_srq_kernel(dev, srq); -err_srq: - kfree(srq); - - return ERR_PTR(err); + return err; } int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, @@ -387,7 +383,7 @@ out_box: return ret; } -int mlx5_ib_destroy_srq(struct ib_srq *srq) +void mlx5_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(srq->device); struct mlx5_ib_srq *msrq = to_msrq(srq); @@ -395,14 +391,16 @@ int mlx5_ib_destroy_srq(struct ib_srq *srq) mlx5_cmd_destroy_srq(dev, &msrq->msrq); if (srq->uobject) { - mlx5_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db); + mlx5_ib_db_unmap_user( + rdma_udata_to_drv_context( + udata, + struct mlx5_ib_ucontext, + ibucontext), + &msrq->db); ib_umem_release(msrq->umem); } else { destroy_srq_kernel(dev, msrq); } - - kfree(srq); - return 0; } void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index) diff --git a/drivers/infiniband/hw/mlx5/srq.h b/drivers/infiniband/hw/mlx5/srq.h index c330af35ff10..af197c36d757 100644 --- a/drivers/infiniband/hw/mlx5/srq.h +++ b/drivers/infiniband/hw/mlx5/srq.h @@ -51,15 +51,12 @@ struct mlx5_core_srq { struct mlx5_srq_table { struct notifier_block nb; - /* protect radix tree - */ - spinlock_t lock; - struct radix_tree_root tree; + struct xarray array; }; int mlx5_cmd_create_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, struct mlx5_srq_attr *in); -int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq); +void mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq); int mlx5_cmd_query_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, struct mlx5_srq_attr *out); int mlx5_cmd_arm_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c index 63ac38bb3498..b0d0687c7a68 100644 --- a/drivers/infiniband/hw/mlx5/srq_cmd.c +++ b/drivers/infiniband/hw/mlx5/srq_cmd.c @@ -83,13 +83,11 @@ struct mlx5_core_srq *mlx5_cmd_get_srq(struct mlx5_ib_dev *dev, u32 srqn) struct mlx5_srq_table *table = &dev->srq_table; struct mlx5_core_srq *srq; - spin_lock(&table->lock); - - srq = radix_tree_lookup(&table->tree, srqn); + xa_lock(&table->array); + srq = xa_load(&table->array, srqn); if (srq) atomic_inc(&srq->common.refcount); - - spin_unlock(&table->lock); + xa_unlock(&table->array); return srq; } @@ -597,9 +595,7 @@ int mlx5_cmd_create_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, atomic_set(&srq->common.refcount, 1); init_completion(&srq->common.free); - spin_lock_irq(&table->lock); - err = radix_tree_insert(&table->tree, srq->srqn, srq); - spin_unlock_irq(&table->lock); + err = xa_err(xa_store_irq(&table->array, srq->srqn, srq, GFP_KERNEL)); if (err) goto err_destroy_srq_split; @@ -611,26 +607,22 @@ err_destroy_srq_split: return err; } -int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) +void mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) { struct mlx5_srq_table *table = &dev->srq_table; struct mlx5_core_srq *tmp; int err; - spin_lock_irq(&table->lock); - tmp = radix_tree_delete(&table->tree, srq->srqn); - spin_unlock_irq(&table->lock); + tmp = xa_erase_irq(&table->array, srq->srqn); if (!tmp || tmp != srq) - return -EINVAL; + return; err = destroy_srq_split(dev, srq); if (err) - return err; + return; mlx5_core_res_put(&srq->common); wait_for_completion(&srq->common.free); - - return 0; } int mlx5_cmd_query_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, @@ -680,13 +672,11 @@ static int srq_event_notifier(struct notifier_block *nb, eqe = data; srqn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff; - spin_lock(&table->lock); - - srq = radix_tree_lookup(&table->tree, srqn); + xa_lock(&table->array); + srq = xa_load(&table->array, srqn); if (srq) atomic_inc(&srq->common.refcount); - - spin_unlock(&table->lock); + xa_unlock(&table->array); if (!srq) return NOTIFY_OK; @@ -703,8 +693,7 @@ int mlx5_init_srq_table(struct mlx5_ib_dev *dev) struct mlx5_srq_table *table = &dev->srq_table; memset(table, 0, sizeof(*table)); - spin_lock_init(&table->lock); - INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); + xa_init_flags(&table->array, XA_FLAGS_LOCK_IRQ); table->nb.notifier_call = srq_event_notifier; mlx5_notifier_register(dev->mdev, &table->nb); diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c index 877a6daffa98..c3cfea243af8 100644 --- a/drivers/infiniband/hw/mthca/mthca_cq.c +++ b/drivers/infiniband/hw/mthca/mthca_cq.c @@ -77,7 +77,7 @@ struct mthca_cq_context { __be32 ci_db; /* Arbel only */ __be32 state_db; /* Arbel only */ u32 reserved; -} __attribute__((packed)); +} __packed; #define MTHCA_CQ_STATUS_OK ( 0 << 28) #define MTHCA_CQ_STATUS_OVERFLOW ( 9 << 28) diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c index 30400ea4808b..2cdf686203c1 100644 --- a/drivers/infiniband/hw/mthca/mthca_eq.c +++ b/drivers/infiniband/hw/mthca/mthca_eq.c @@ -63,7 +63,7 @@ struct mthca_eq_context { __be32 consumer_index; __be32 producer_index; u32 reserved3[4]; -} __attribute__((packed)); +} __packed; #define MTHCA_EQ_STATUS_OK ( 0 << 28) #define MTHCA_EQ_STATUS_OVERFLOW ( 9 << 28) @@ -130,7 +130,7 @@ struct mthca_eqe { u32 raw[6]; struct { __be32 cqn; - } __attribute__((packed)) comp; + } __packed comp; struct { u16 reserved1; __be16 token; @@ -138,27 +138,27 @@ struct mthca_eqe { u8 reserved3[3]; u8 status; __be64 out_param; - } __attribute__((packed)) cmd; + } __packed cmd; struct { __be32 qpn; - } __attribute__((packed)) qp; + } __packed qp; struct { __be32 srqn; - } __attribute__((packed)) srq; + } __packed srq; struct { __be32 cqn; u32 reserved1; u8 reserved2[3]; u8 syndrome; - } __attribute__((packed)) cq_err; + } __packed cq_err; struct { u32 reserved1[2]; __be32 port; - } __attribute__((packed)) port_change; + } __packed port_change; } event; u8 reserved3[3]; u8 owner; -} __attribute__((packed)); +} __packed; #define MTHCA_EQ_ENTRY_OWNER_SW (0 << 7) #define MTHCA_EQ_ENTRY_OWNER_HW (1 << 7) diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c index 6686042aafb4..4250b2c18c64 100644 --- a/drivers/infiniband/hw/mthca/mthca_mr.c +++ b/drivers/infiniband/hw/mthca/mthca_mr.c @@ -60,7 +60,7 @@ struct mthca_mpt_entry { __be64 mtt_seg; __be32 mtt_sz; /* Arbel only */ u32 reserved[2]; -} __attribute__((packed)); +} __packed; #define MTHCA_MPT_FLAG_SW_OWNS (0xfUL << 28) #define MTHCA_MPT_FLAG_MIO (1 << 17) diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index d063d7a37762..4f40dfedf920 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -363,18 +363,17 @@ static int mthca_mmap_uar(struct ib_ucontext *context, return 0; } -static int mthca_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, - struct ib_udata *udata) +static int mthca_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct ib_device *ibdev = ibpd->device; struct mthca_pd *pd = to_mpd(ibpd); int err; - err = mthca_pd_alloc(to_mdev(ibdev), !context, pd); + err = mthca_pd_alloc(to_mdev(ibdev), !udata, pd); if (err) return err; - if (context) { + if (udata) { if (ib_copy_to_udata(udata, &pd->pd_num, sizeof (__u32))) { mthca_pd_free(to_mdev(ibdev), pd); return -EFAULT; @@ -384,114 +383,86 @@ static int mthca_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, return 0; } -static void mthca_dealloc_pd(struct ib_pd *pd) +static void mthca_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) { mthca_pd_free(to_mdev(pd->device), to_mpd(pd)); } -static struct ib_ah *mthca_ah_create(struct ib_pd *pd, - struct rdma_ah_attr *ah_attr, - u32 flags, - struct ib_udata *udata) +static int mthca_ah_create(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata) { - int err; - struct mthca_ah *ah; + struct mthca_ah *ah = to_mah(ibah); - ah = kmalloc(sizeof *ah, GFP_ATOMIC); - if (!ah) - return ERR_PTR(-ENOMEM); - - err = mthca_create_ah(to_mdev(pd->device), to_mpd(pd), ah_attr, ah); - if (err) { - kfree(ah); - return ERR_PTR(err); - } - - return &ah->ibah; + return mthca_create_ah(to_mdev(ibah->device), to_mpd(ibah->pd), ah_attr, + ah); } -static int mthca_ah_destroy(struct ib_ah *ah, u32 flags) +static void mthca_ah_destroy(struct ib_ah *ah, u32 flags) { mthca_destroy_ah(to_mdev(ah->device), to_mah(ah)); - kfree(ah); - - return 0; } -static struct ib_srq *mthca_create_srq(struct ib_pd *pd, - struct ib_srq_init_attr *init_attr, - struct ib_udata *udata) +static int mthca_create_srq(struct ib_srq *ibsrq, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) { struct mthca_create_srq ucmd; struct mthca_ucontext *context = rdma_udata_to_drv_context( udata, struct mthca_ucontext, ibucontext); - struct mthca_srq *srq; + struct mthca_srq *srq = to_msrq(ibsrq); int err; if (init_attr->srq_type != IB_SRQT_BASIC) - return ERR_PTR(-EOPNOTSUPP); - - srq = kmalloc(sizeof *srq, GFP_KERNEL); - if (!srq) - return ERR_PTR(-ENOMEM); + return -EOPNOTSUPP; if (udata) { - if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) { - err = -EFAULT; - goto err_free; - } + if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) + return -EFAULT; - err = mthca_map_user_db(to_mdev(pd->device), &context->uar, + err = mthca_map_user_db(to_mdev(ibsrq->device), &context->uar, context->db_tab, ucmd.db_index, ucmd.db_page); if (err) - goto err_free; + return err; srq->mr.ibmr.lkey = ucmd.lkey; srq->db_index = ucmd.db_index; } - err = mthca_alloc_srq(to_mdev(pd->device), to_mpd(pd), + err = mthca_alloc_srq(to_mdev(ibsrq->device), to_mpd(ibsrq->pd), &init_attr->attr, srq, udata); if (err && udata) - mthca_unmap_user_db(to_mdev(pd->device), &context->uar, + mthca_unmap_user_db(to_mdev(ibsrq->device), &context->uar, context->db_tab, ucmd.db_index); if (err) - goto err_free; + return err; - if (context && ib_copy_to_udata(udata, &srq->srqn, sizeof (__u32))) { - mthca_free_srq(to_mdev(pd->device), srq); - err = -EFAULT; - goto err_free; + if (context && ib_copy_to_udata(udata, &srq->srqn, sizeof(__u32))) { + mthca_free_srq(to_mdev(ibsrq->device), srq); + return -EFAULT; } - return &srq->ibsrq; - -err_free: - kfree(srq); - - return ERR_PTR(err); + return 0; } -static int mthca_destroy_srq(struct ib_srq *srq) +static void mthca_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) { - struct mthca_ucontext *context; - - if (srq->uobject) { - context = to_mucontext(srq->uobject->context); + if (udata) { + struct mthca_ucontext *context = + rdma_udata_to_drv_context( + udata, + struct mthca_ucontext, + ibucontext); mthca_unmap_user_db(to_mdev(srq->device), &context->uar, context->db_tab, to_msrq(srq)->db_index); } mthca_free_srq(to_mdev(srq->device), to_msrq(srq)); - kfree(srq); - - return 0; } static struct ib_qp *mthca_create_qp(struct ib_pd *pd, @@ -607,16 +578,22 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd, return &qp->ibqp; } -static int mthca_destroy_qp(struct ib_qp *qp) +static int mthca_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) { - if (qp->uobject) { + if (udata) { + struct mthca_ucontext *context = + rdma_udata_to_drv_context( + udata, + struct mthca_ucontext, + ibucontext); + mthca_unmap_user_db(to_mdev(qp->device), - &to_mucontext(qp->uobject->context)->uar, - to_mucontext(qp->uobject->context)->db_tab, + &context->uar, + context->db_tab, to_mqp(qp)->sq.db_index); mthca_unmap_user_db(to_mdev(qp->device), - &to_mucontext(qp->uobject->context)->uar, - to_mucontext(qp->uobject->context)->db_tab, + &context->uar, + context->db_tab, to_mqp(qp)->rq.db_index); } mthca_free_qp(to_mdev(qp->device), to_mqp(qp)); @@ -626,7 +603,6 @@ static int mthca_destroy_qp(struct ib_qp *qp) static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, struct ib_udata *udata) { int entries = attr->cqe; @@ -634,6 +610,8 @@ static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, struct mthca_cq *cq; int nent; int err; + struct mthca_ucontext *context = rdma_udata_to_drv_context( + udata, struct mthca_ucontext, ibucontext); if (attr->flags) return ERR_PTR(-EINVAL); @@ -641,19 +619,19 @@ static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, if (entries < 1 || entries > to_mdev(ibdev)->limits.max_cqes) return ERR_PTR(-EINVAL); - if (context) { + if (udata) { if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) return ERR_PTR(-EFAULT); - err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar, - to_mucontext(context)->db_tab, - ucmd.set_db_index, ucmd.set_db_page); + err = mthca_map_user_db(to_mdev(ibdev), &context->uar, + context->db_tab, ucmd.set_db_index, + ucmd.set_db_page); if (err) return ERR_PTR(err); - err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar, - to_mucontext(context)->db_tab, - ucmd.arm_db_index, ucmd.arm_db_page); + err = mthca_map_user_db(to_mdev(ibdev), &context->uar, + context->db_tab, ucmd.arm_db_index, + ucmd.arm_db_page); if (err) goto err_unmap_set; } @@ -664,7 +642,7 @@ static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, goto err_unmap_arm; } - if (context) { + if (udata) { cq->buf.mr.ibmr.lkey = ucmd.lkey; cq->set_ci_db_index = ucmd.set_db_index; cq->arm_db_index = ucmd.arm_db_index; @@ -673,14 +651,13 @@ static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, for (nent = 1; nent <= entries; nent <<= 1) ; /* nothing */ - err = mthca_init_cq(to_mdev(ibdev), nent, - context ? to_mucontext(context) : NULL, - context ? ucmd.pdn : to_mdev(ibdev)->driver_pd.pd_num, + err = mthca_init_cq(to_mdev(ibdev), nent, context, + udata ? ucmd.pdn : to_mdev(ibdev)->driver_pd.pd_num, cq); if (err) goto err_free; - if (context && ib_copy_to_udata(udata, &cq->cqn, sizeof (__u32))) { + if (udata && ib_copy_to_udata(udata, &cq->cqn, sizeof(__u32))) { mthca_free_cq(to_mdev(ibdev), cq); err = -EFAULT; goto err_free; @@ -694,14 +671,14 @@ err_free: kfree(cq); err_unmap_arm: - if (context) - mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar, - to_mucontext(context)->db_tab, ucmd.arm_db_index); + if (udata) + mthca_unmap_user_db(to_mdev(ibdev), &context->uar, + context->db_tab, ucmd.arm_db_index); err_unmap_set: - if (context) - mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar, - to_mucontext(context)->db_tab, ucmd.set_db_index); + if (udata) + mthca_unmap_user_db(to_mdev(ibdev), &context->uar, + context->db_tab, ucmd.set_db_index); return ERR_PTR(err); } @@ -827,16 +804,22 @@ out: return ret; } -static int mthca_destroy_cq(struct ib_cq *cq) +static int mthca_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) { - if (cq->uobject) { + if (udata) { + struct mthca_ucontext *context = + rdma_udata_to_drv_context( + udata, + struct mthca_ucontext, + ibucontext); + mthca_unmap_user_db(to_mdev(cq->device), - &to_mucontext(cq->uobject->context)->uar, - to_mucontext(cq->uobject->context)->db_tab, + &context->uar, + context->db_tab, to_mcq(cq)->arm_db_index); mthca_unmap_user_db(to_mdev(cq->device), - &to_mucontext(cq->uobject->context)->uar, - to_mucontext(cq->uobject->context)->db_tab, + &context->uar, + context->db_tab, to_mcq(cq)->set_ci_db_index); } mthca_free_cq(to_mdev(cq->device), to_mcq(cq)); @@ -914,7 +897,7 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, goto err; } - n = mr->umem->nmap; + n = ib_umem_num_pages(mr->umem); mr->mtt = mthca_alloc_mtt(dev, n); if (IS_ERR(mr->mtt)) { @@ -974,7 +957,7 @@ err: return ERR_PTR(err); } -static int mthca_dereg_mr(struct ib_mr *mr) +static int mthca_dereg_mr(struct ib_mr *mr, struct ib_udata *udata) { struct mthca_mr *mmr = to_mmr(mr); @@ -1200,6 +1183,8 @@ static const struct ib_device_ops mthca_dev_ops = { .query_qp = mthca_query_qp, .reg_user_mr = mthca_reg_user_mr, .resize_cq = mthca_resize_cq, + + INIT_RDMA_OBJ_SIZE(ib_ah, mthca_ah, ibah), INIT_RDMA_OBJ_SIZE(ib_pd, mthca_pd, ibpd), INIT_RDMA_OBJ_SIZE(ib_ucontext, mthca_ucontext, ibucontext), }; @@ -1210,6 +1195,8 @@ static const struct ib_device_ops mthca_dev_arbel_srq_ops = { .modify_srq = mthca_modify_srq, .post_srq_recv = mthca_arbel_post_srq_recv, .query_srq = mthca_query_srq, + + INIT_RDMA_OBJ_SIZE(ib_srq, mthca_srq, ibsrq), }; static const struct ib_device_ops mthca_dev_tavor_srq_ops = { @@ -1218,6 +1205,8 @@ static const struct ib_device_ops mthca_dev_tavor_srq_ops = { .modify_srq = mthca_modify_srq, .post_srq_recv = mthca_tavor_post_srq_recv, .query_srq = mthca_query_srq, + + INIT_RDMA_OBJ_SIZE(ib_srq, mthca_srq, ibsrq), }; static const struct ib_device_ops mthca_dev_arbel_fmr_ops = { diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index d65b189f20ea..d04c245359eb 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -115,7 +115,7 @@ struct mthca_qp_path { u8 hop_limit; __be32 sl_tclass_flowlabel; u8 rgid[16]; -} __attribute__((packed)); +} __packed; struct mthca_qp_context { __be32 flags; @@ -154,14 +154,14 @@ struct mthca_qp_context { __be16 rq_wqe_counter; /* reserved on Tavor */ __be16 sq_wqe_counter; /* reserved on Tavor */ u32 reserved3[18]; -} __attribute__((packed)); +} __packed; struct mthca_qp_param { __be32 opt_param_mask; u32 reserved1; struct mthca_qp_context context; u32 reserved2[62]; -} __attribute__((packed)); +} __packed; enum { MTHCA_QP_OPTPAR_ALT_ADDR_PATH = 1 << 0, diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 0010a3ed64f1..62bf986eba67 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -3033,7 +3033,8 @@ static int nes_disconnect(struct nes_qp *nesqp, int abrupt) /* Need to free the Last Streaming Mode Message */ if (nesqp->ietf_frame) { if (nesqp->lsmm_mr) - nesibdev->ibdev.ops.dereg_mr(nesqp->lsmm_mr); + nesibdev->ibdev.ops.dereg_mr(nesqp->lsmm_mr, + NULL); pci_free_consistent(nesdev->pcidev, nesqp->private_data_len + nesqp->ietf_frame_size, nesqp->ietf_frame, nesqp->ietf_frame_pbase); diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 828e4af3f951..49024326a518 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -52,7 +52,7 @@ atomic_t qps_created; atomic_t sw_qps_destroyed; static void nes_unregister_ofa_device(struct nes_ib_device *nesibdev); -static int nes_dereg_mr(struct ib_mr *ib_mr); +static int nes_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata); /** * nes_alloc_mw @@ -306,9 +306,8 @@ static int alloc_fast_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd, /* * nes_alloc_mr */ -static struct ib_mr *nes_alloc_mr(struct ib_pd *ibpd, - enum ib_mr_type mr_type, - u32 max_num_sg) +static struct ib_mr *nes_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, + u32 max_num_sg, struct ib_udata *udata) { struct nes_pd *nespd = to_nespd(ibpd); struct nes_vnic *nesvnic = to_nesvnic(ibpd->device); @@ -386,7 +385,7 @@ static struct ib_mr *nes_alloc_mr(struct ib_pd *ibpd, return ibmr; err: - nes_dereg_mr(ibmr); + nes_dereg_mr(ibmr, udata); return ERR_PTR(-ENOMEM); } @@ -641,22 +640,24 @@ static int nes_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) /** * nes_alloc_pd */ -static int nes_alloc_pd(struct ib_pd *pd, struct ib_ucontext *context, - struct ib_udata *udata) +static int nes_alloc_pd(struct ib_pd *pd, struct ib_udata *udata) { struct ib_device *ibdev = pd->device; struct nes_pd *nespd = to_nespd(pd); struct nes_vnic *nesvnic = to_nesvnic(ibdev); struct nes_device *nesdev = nesvnic->nesdev; struct nes_adapter *nesadapter = nesdev->nesadapter; - struct nes_ucontext *nesucontext; struct nes_alloc_pd_resp uresp; u32 pd_num = 0; int err; + struct nes_ucontext *nesucontext = rdma_udata_to_drv_context( + udata, struct nes_ucontext, ibucontext); - nes_debug(NES_DBG_PD, "nesvnic=%p, netdev=%p %s, ibdev=%p, context=%p, netdev refcnt=%u\n", - nesvnic, nesdev->netdev[0], nesdev->netdev[0]->name, ibdev, context, - netdev_refcnt_read(nesvnic->netdev)); + nes_debug( + NES_DBG_PD, + "nesvnic=%p, netdev=%p %s, ibdev=%p, context=%p, netdev refcnt=%u\n", + nesvnic, nesdev->netdev[0], nesdev->netdev[0]->name, ibdev, + &nesucontext->ibucontext, netdev_refcnt_read(nesvnic->netdev)); err = nes_alloc_resource(nesadapter, nesadapter->allocated_pds, nesadapter->max_pd, &pd_num, &nesadapter->next_pd, NES_RESOURCE_PD); @@ -668,8 +669,7 @@ static int nes_alloc_pd(struct ib_pd *pd, struct ib_ucontext *context, nespd->pd_id = (pd_num << (PAGE_SHIFT-12)) + nesadapter->base_pd; - if (context) { - nesucontext = to_nesucontext(context); + if (udata) { nespd->mmap_db_index = find_next_zero_bit(nesucontext->allocated_doorbells, NES_MAX_USER_DB_REGIONS, nesucontext->first_free_db); nes_debug(NES_DBG_PD, "find_first_zero_biton doorbells returned %u, mapping pd_id %u.\n", @@ -700,7 +700,7 @@ static int nes_alloc_pd(struct ib_pd *pd, struct ib_ucontext *context, /** * nes_dealloc_pd */ -static void nes_dealloc_pd(struct ib_pd *ibpd) +static void nes_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct nes_ucontext *nesucontext; struct nes_pd *nespd = to_nespd(ibpd); @@ -708,8 +708,12 @@ static void nes_dealloc_pd(struct ib_pd *ibpd) struct nes_device *nesdev = nesvnic->nesdev; struct nes_adapter *nesadapter = nesdev->nesadapter; - if ((ibpd->uobject) && (ibpd->uobject->context)) { - nesucontext = to_nesucontext(ibpd->uobject->context); + if (udata) { + nesucontext = + rdma_udata_to_drv_context( + udata, + struct nes_ucontext, + ibucontext); nes_debug(NES_DBG_PD, "Clearing bit %u from allocated doorbells\n", nespd->mmap_db_index); clear_bit(nespd->mmap_db_index, nesucontext->allocated_doorbells); @@ -1039,53 +1043,48 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, } if (req.user_qp_buffer) nesqp->nesuqp_addr = req.user_qp_buffer; - if (udata) { - nesqp->user_mode = 1; - if (virt_wqs) { - err = 1; - list_for_each_entry(nespbl, &nes_ucontext->qp_reg_mem_list, list) { - if (nespbl->user_base == (unsigned long )req.user_wqe_buffers) { - list_del(&nespbl->list); - err = 0; - nes_debug(NES_DBG_QP, "Found PBL for virtual QP. nespbl=%p. user_base=0x%lx\n", - nespbl, nespbl->user_base); - break; - } - } - if (err) { - nes_debug(NES_DBG_QP, "Didn't Find PBL for virtual QP. address = %llx.\n", - (long long unsigned int)req.user_wqe_buffers); - nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); - kfree(nesqp->allocated_buffer); - return ERR_PTR(-EFAULT); + + nesqp->user_mode = 1; + if (virt_wqs) { + err = 1; + list_for_each_entry(nespbl, &nes_ucontext->qp_reg_mem_list, list) { + if (nespbl->user_base == (unsigned long )req.user_wqe_buffers) { + list_del(&nespbl->list); + err = 0; + nes_debug(NES_DBG_QP, "Found PBL for virtual QP. nespbl=%p. user_base=0x%lx\n", + nespbl, nespbl->user_base); + break; } } - - nesqp->mmap_sq_db_index = - find_next_zero_bit(nes_ucontext->allocated_wqs, - NES_MAX_USER_WQ_REGIONS, nes_ucontext->first_free_wq); - /* nes_debug(NES_DBG_QP, "find_first_zero_biton wqs returned %u\n", - nespd->mmap_db_index); */ - if (nesqp->mmap_sq_db_index >= NES_MAX_USER_WQ_REGIONS) { - nes_debug(NES_DBG_QP, - "db index > max user regions, failing create QP\n"); + if (err) { + nes_debug(NES_DBG_QP, "Didn't Find PBL for virtual QP. address = %llx.\n", + (long long unsigned int)req.user_wqe_buffers); nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); - if (virt_wqs) { - pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, nespbl->pbl_vbase, - nespbl->pbl_pbase); - kfree(nespbl); - } kfree(nesqp->allocated_buffer); - return ERR_PTR(-ENOMEM); + return ERR_PTR(-EFAULT); } - set_bit(nesqp->mmap_sq_db_index, nes_ucontext->allocated_wqs); - nes_ucontext->mmap_nesqp[nesqp->mmap_sq_db_index] = nesqp; - nes_ucontext->first_free_wq = nesqp->mmap_sq_db_index + 1; - } else { + } + + nesqp->mmap_sq_db_index = + find_next_zero_bit(nes_ucontext->allocated_wqs, + NES_MAX_USER_WQ_REGIONS, nes_ucontext->first_free_wq); + /* nes_debug(NES_DBG_QP, "find_first_zero_biton wqs returned %u\n", + nespd->mmap_db_index); */ + if (nesqp->mmap_sq_db_index >= NES_MAX_USER_WQ_REGIONS) { + nes_debug(NES_DBG_QP, + "db index > max user regions, failing create QP\n"); nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); + if (virt_wqs) { + pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, nespbl->pbl_vbase, + nespbl->pbl_pbase); + kfree(nespbl); + } kfree(nesqp->allocated_buffer); - return ERR_PTR(-EFAULT); + return ERR_PTR(-ENOMEM); } + set_bit(nesqp->mmap_sq_db_index, nes_ucontext->allocated_wqs); + nes_ucontext->mmap_nesqp[nesqp->mmap_sq_db_index] = nesqp; + nes_ucontext->first_free_wq = nesqp->mmap_sq_db_index + 1; } err = (!virt_wqs) ? nes_setup_mmap_qp(nesqp, nesvnic, sq_size, rq_size) : nes_setup_virt_qp(nesqp, nespbl, nesvnic, sq_size, rq_size); @@ -1303,7 +1302,7 @@ static void nes_clean_cq(struct nes_qp *nesqp, struct nes_cq *nescq) /** * nes_destroy_qp */ -static int nes_destroy_qp(struct ib_qp *ibqp) +static int nes_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { struct nes_qp *nesqp = to_nesqp(ibqp); struct nes_ucontext *nes_ucontext; @@ -1343,8 +1342,12 @@ static int nes_destroy_qp(struct ib_qp *ibqp) } if (nesqp->user_mode) { - if ((ibqp->uobject)&&(ibqp->uobject->context)) { - nes_ucontext = to_nesucontext(ibqp->uobject->context); + if (udata) { + nes_ucontext = + rdma_udata_to_drv_context( + udata, + struct nes_ucontext, + ibucontext); clear_bit(nesqp->mmap_sq_db_index, nes_ucontext->allocated_wqs); nes_ucontext->mmap_nesqp[nesqp->mmap_sq_db_index] = NULL; if (nes_ucontext->first_free_wq > nesqp->mmap_sq_db_index) { @@ -1373,7 +1376,6 @@ static int nes_destroy_qp(struct ib_qp *ibqp) */ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, struct ib_udata *udata) { int entries = attr->cqe; @@ -1418,9 +1420,10 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, nescq->hw_cq.cq_number = cq_num; nescq->ibcq.cqe = nescq->hw_cq.cq_size - 1; + if (udata) { + struct nes_ucontext *nes_ucontext = rdma_udata_to_drv_context( + udata, struct nes_ucontext, ibucontext); - if (context) { - nes_ucontext = to_nesucontext(context); if (ib_copy_from_udata(&req, udata, sizeof (struct nes_create_cq_req))) { nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); kfree(nescq); @@ -1487,7 +1490,7 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, cqp_request = nes_get_cqp_request(nesdev); if (cqp_request == NULL) { nes_debug(NES_DBG_CQ, "Failed to get a cqp_request.\n"); - if (!context) + if (!udata) pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem, nescq->hw_cq.cq_pbase); else { @@ -1516,7 +1519,7 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, if (nesadapter->free_4kpbl == 0) { spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); nes_free_cqp_request(nesdev, cqp_request); - if (!context) + if (!udata) pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem, nescq->hw_cq.cq_pbase); else { @@ -1538,7 +1541,7 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, if (nesadapter->free_256pbl == 0) { spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); nes_free_cqp_request(nesdev, cqp_request); - if (!context) + if (!udata) pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem, nescq->hw_cq.cq_pbase); else { @@ -1564,7 +1567,7 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, (nescq->hw_cq.cq_number | ((u32)nesdev->ceq_index << 16))); - if (context) { + if (udata) { if (pbl_entries != 1) u64temp = (u64)nespbl->pbl_pbase; else @@ -1595,7 +1598,7 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, nescq->hw_cq.cq_number, ret); if ((!ret) || (cqp_request->major_code)) { nes_put_cqp_request(nesdev, cqp_request); - if (!context) + if (!udata) pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem, nescq->hw_cq.cq_pbase); else { @@ -1609,7 +1612,7 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, } nes_put_cqp_request(nesdev, cqp_request); - if (context) { + if (udata) { /* free the nespbl */ pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, nespbl->pbl_vbase, nespbl->pbl_pbase); @@ -1631,7 +1634,7 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, /** * nes_destroy_cq */ -static int nes_destroy_cq(struct ib_cq *ib_cq) +static int nes_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) { struct nes_cq *nescq; struct nes_device *nesdev; @@ -2382,7 +2385,7 @@ reg_user_mr_err: /** * nes_dereg_mr */ -static int nes_dereg_mr(struct ib_mr *ib_mr) +static int nes_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) { struct nes_mr *nesmr = to_nesmr(ib_mr); struct nes_vnic *nesvnic = to_nesvnic(ib_mr->device); @@ -3574,6 +3577,14 @@ static const struct ib_device_ops nes_dev_ops = { .get_dev_fw_str = get_dev_fw_str, .get_dma_mr = nes_get_dma_mr, .get_port_immutable = nes_port_immutable, + .iw_accept = nes_accept, + .iw_add_ref = nes_add_ref, + .iw_connect = nes_connect, + .iw_create_listen = nes_create_listen, + .iw_destroy_listen = nes_destroy_listen, + .iw_get_qp = nes_get_qp, + .iw_reject = nes_reject, + .iw_rem_ref = nes_rem_ref, .map_mr_sg = nes_map_mr_sg, .mmap = nes_mmap, .modify_qp = nes_modify_qp, @@ -3638,23 +3649,9 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev) nesibdev->ibdev.num_comp_vectors = 1; nesibdev->ibdev.dev.parent = &nesdev->pcidev->dev; - nesibdev->ibdev.iwcm = kzalloc(sizeof(*nesibdev->ibdev.iwcm), GFP_KERNEL); - if (nesibdev->ibdev.iwcm == NULL) { - ib_dealloc_device(&nesibdev->ibdev); - return NULL; - } - nesibdev->ibdev.iwcm->add_ref = nes_add_ref; - nesibdev->ibdev.iwcm->rem_ref = nes_rem_ref; - nesibdev->ibdev.iwcm->get_qp = nes_get_qp; - nesibdev->ibdev.iwcm->connect = nes_connect; - nesibdev->ibdev.iwcm->accept = nes_accept; - nesibdev->ibdev.iwcm->reject = nes_reject; - nesibdev->ibdev.iwcm->create_listen = nes_create_listen; - nesibdev->ibdev.iwcm->destroy_listen = nes_destroy_listen; - ib_set_device_ops(&nesibdev->ibdev, &nes_dev_ops); - memcpy(nesibdev->ibdev.iwcm->ifname, netdev->name, - sizeof(nesibdev->ibdev.iwcm->ifname)); + memcpy(nesibdev->ibdev.iw_ifname, netdev->name, + sizeof(nesibdev->ibdev.iw_ifname)); return nesibdev; } @@ -3715,7 +3712,6 @@ void nes_destroy_ofa_device(struct nes_ib_device *nesibdev) nes_unregister_ofa_device(nesibdev); - kfree(nesibdev->ibdev.iwcm); ib_dealloc_device(&nesibdev->ibdev); } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index a7295322efbc..1d4ea135c28f 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -156,37 +156,34 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, return status; } -struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, - u32 flags, struct ib_udata *udata) +int ocrdma_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr, u32 flags, + struct ib_udata *udata) { u32 *ahid_addr; int status; - struct ocrdma_ah *ah; + struct ocrdma_ah *ah = get_ocrdma_ah(ibah); bool isvlan = false; u16 vlan_tag = 0xffff; const struct ib_gid_attr *sgid_attr; - struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); - struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); + struct ocrdma_pd *pd = get_ocrdma_pd(ibah->pd); + struct ocrdma_dev *dev = get_ocrdma_dev(ibah->device); if ((attr->type != RDMA_AH_ATTR_TYPE_ROCE) || !(rdma_ah_get_ah_flags(attr) & IB_AH_GRH)) - return ERR_PTR(-EINVAL); + return -EINVAL; if (atomic_cmpxchg(&dev->update_sl, 1, 0)) ocrdma_init_service_level(dev); - ah = kzalloc(sizeof(*ah), GFP_ATOMIC); - if (!ah) - return ERR_PTR(-ENOMEM); + sgid_attr = attr->grh.sgid_attr; + status = rdma_read_gid_l2_fields(sgid_attr, &vlan_tag, NULL); + if (status) + return status; status = ocrdma_alloc_av(dev, ah); if (status) goto av_err; - sgid_attr = attr->grh.sgid_attr; - if (is_vlan_dev(sgid_attr->ndev)) - vlan_tag = vlan_dev_vlan_id(sgid_attr->ndev); - /* Get network header type for this GID */ ah->hdr_type = rdma_gid_attr_network_type(sgid_attr); @@ -210,23 +207,20 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, OCRDMA_AH_VLAN_VALID_SHIFT); } - return &ah->ibah; + return 0; av_conf_err: ocrdma_free_av(dev, ah); av_err: - kfree(ah); - return ERR_PTR(status); + return status; } -int ocrdma_destroy_ah(struct ib_ah *ibah, u32 flags) +void ocrdma_destroy_ah(struct ib_ah *ibah, u32 flags) { struct ocrdma_ah *ah = get_ocrdma_ah(ibah); struct ocrdma_dev *dev = get_ocrdma_dev(ibah->device); ocrdma_free_av(dev, ah); - kfree(ah); - return 0; } int ocrdma_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h index eb996e14b520..64cb82c08664 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h @@ -51,9 +51,9 @@ enum { OCRDMA_AH_L3_TYPE_SHIFT = 0x1D /* 29 bits */ }; -struct ib_ah *ocrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, - u32 flags, struct ib_udata *udata); -int ocrdma_destroy_ah(struct ib_ah *ah, u32 flags); +int ocrdma_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags, + struct ib_udata *udata); +void ocrdma_destroy_ah(struct ib_ah *ah, u32 flags); int ocrdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int ocrdma_process_mad(struct ib_device *, diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 097e5ab2a19f..32674b291f60 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -2496,7 +2496,7 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, int status; struct rdma_ah_attr *ah_attr = &attrs->ah_attr; const struct ib_gid_attr *sgid_attr; - u32 vlan_id = 0xFFFF; + u16 vlan_id = 0xFFFF; u8 mac_addr[6], hdr_type; union { struct sockaddr _sockaddr; @@ -2526,8 +2526,9 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, sizeof(cmd->params.dgid)); sgid_attr = ah_attr->grh.sgid_attr; - vlan_id = rdma_vlan_dev_vlan_id(sgid_attr->ndev); - memcpy(mac_addr, sgid_attr->ndev->dev_addr, ETH_ALEN); + status = rdma_read_gid_l2_fields(sgid_attr, &vlan_id, &mac_addr[0]); + if (status) + return status; qp->sgid_idx = grh->sgid_index; memcpy(&cmd->params.sgid[0], &sgid_attr->gid.raw[0], @@ -2863,21 +2864,19 @@ int ocrdma_mbx_query_srq(struct ocrdma_srq *srq, struct ib_srq_attr *srq_attr) return status; } -int ocrdma_mbx_destroy_srq(struct ocrdma_dev *dev, struct ocrdma_srq *srq) +void ocrdma_mbx_destroy_srq(struct ocrdma_dev *dev, struct ocrdma_srq *srq) { - int status = -ENOMEM; struct ocrdma_destroy_srq *cmd; struct pci_dev *pdev = dev->nic_info.pdev; cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_DELETE_SRQ, sizeof(*cmd)); if (!cmd) - return status; + return; cmd->id = srq->id; - status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); + ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); if (srq->rq.va) dma_free_coherent(&pdev->dev, srq->rq.len, srq->rq.va, srq->rq.pa); kfree(cmd); - return status; } static int ocrdma_mbx_get_dcbx_config(struct ocrdma_dev *dev, u32 ptype, @@ -3067,13 +3066,12 @@ int ocrdma_alloc_av(struct ocrdma_dev *dev, struct ocrdma_ah *ah) return status; } -int ocrdma_free_av(struct ocrdma_dev *dev, struct ocrdma_ah *ah) +void ocrdma_free_av(struct ocrdma_dev *dev, struct ocrdma_ah *ah) { unsigned long flags; spin_lock_irqsave(&dev->av_tbl.lock, flags); ah->av->valid = 0; spin_unlock_irqrestore(&dev->av_tbl.lock, flags); - return 0; } static int ocrdma_create_eqs(struct ocrdma_dev *dev) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h index ebc1f442aec3..06ec59326a90 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h @@ -137,10 +137,10 @@ int ocrdma_mbx_create_srq(struct ocrdma_dev *, struct ocrdma_srq *, struct ocrdma_pd *); int ocrdma_mbx_modify_srq(struct ocrdma_srq *, struct ib_srq_attr *); int ocrdma_mbx_query_srq(struct ocrdma_srq *, struct ib_srq_attr *); -int ocrdma_mbx_destroy_srq(struct ocrdma_dev *, struct ocrdma_srq *); +void ocrdma_mbx_destroy_srq(struct ocrdma_dev *dev, struct ocrdma_srq *srq); -int ocrdma_alloc_av(struct ocrdma_dev *, struct ocrdma_ah *); -int ocrdma_free_av(struct ocrdma_dev *, struct ocrdma_ah *); +int ocrdma_alloc_av(struct ocrdma_dev *dev, struct ocrdma_ah *ah); +void ocrdma_free_av(struct ocrdma_dev *dev, struct ocrdma_ah *ah); int ocrdma_qp_state_change(struct ocrdma_qp *, enum ib_qp_state new_state, enum ib_qp_state *old_ib_state); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index b9e10d55a58e..fc6c0962dea9 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -62,8 +62,6 @@ MODULE_DESCRIPTION(OCRDMA_ROCE_DRV_DESC " " OCRDMA_ROCE_DRV_VERSION); MODULE_AUTHOR("Emulex Corporation"); MODULE_LICENSE("Dual BSD/GPL"); -static DEFINE_IDR(ocrdma_dev_id); - void ocrdma_get_guid(struct ocrdma_dev *dev, u8 *guid) { u8 mac_addr[6]; @@ -161,7 +159,6 @@ static const struct ib_device_ops ocrdma_dev_ops = { .get_dev_fw_str = get_dev_fw_str, .get_dma_mr = ocrdma_get_dma_mr, .get_link_layer = ocrdma_link_layer, - .get_netdev = ocrdma_get_netdev, .get_port_immutable = ocrdma_port_immutable, .map_mr_sg = ocrdma_map_mr_sg, .mmap = ocrdma_mmap, @@ -179,6 +176,8 @@ static const struct ib_device_ops ocrdma_dev_ops = { .reg_user_mr = ocrdma_reg_user_mr, .req_notify_cq = ocrdma_arm_cq, .resize_cq = ocrdma_resize_cq, + + INIT_RDMA_OBJ_SIZE(ib_ah, ocrdma_ah, ibah), INIT_RDMA_OBJ_SIZE(ib_pd, ocrdma_pd, ibpd), INIT_RDMA_OBJ_SIZE(ib_ucontext, ocrdma_ucontext, ibucontext), }; @@ -189,10 +188,14 @@ static const struct ib_device_ops ocrdma_dev_srq_ops = { .modify_srq = ocrdma_modify_srq, .post_srq_recv = ocrdma_post_srq_recv, .query_srq = ocrdma_query_srq, + + INIT_RDMA_OBJ_SIZE(ib_srq, ocrdma_srq, ibsrq), }; static int ocrdma_register_device(struct ocrdma_dev *dev) { + int ret; + ocrdma_get_guid(dev, (u8 *)&dev->ibdev.node_guid); BUILD_BUG_ON(sizeof(OCRDMA_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX); memcpy(dev->ibdev.node_desc, OCRDMA_NODE_DESC, @@ -247,6 +250,10 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) } rdma_set_device_sysfs_group(&dev->ibdev, &ocrdma_attr_group); dev->ibdev.driver_id = RDMA_DRIVER_OCRDMA; + ret = ib_device_set_netdev(&dev->ibdev, dev->nic_info.netdev, 1); + if (ret) + return ret; + return ib_register_device(&dev->ibdev, "ocrdma%d"); } @@ -304,15 +311,13 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info) pr_err("Unable to allocate ib device\n"); return NULL; } + dev->mbx_cmd = kzalloc(sizeof(struct ocrdma_mqe_emb_cmd), GFP_KERNEL); if (!dev->mbx_cmd) - goto idr_err; + goto init_err; memcpy(&dev->nic_info, dev_info, sizeof(*dev_info)); - dev->id = idr_alloc(&ocrdma_dev_id, NULL, 0, 0, GFP_KERNEL); - if (dev->id < 0) - goto idr_err; - + dev->id = PCI_FUNC(dev->nic_info.pdev->devfn); status = ocrdma_init_hw(dev); if (status) goto init_err; @@ -349,8 +354,6 @@ alloc_err: ocrdma_free_resources(dev); ocrdma_cleanup_hw(dev); init_err: - idr_remove(&ocrdma_dev_id, dev->id); -idr_err: kfree(dev->mbx_cmd); ib_dealloc_device(&dev->ibdev); pr_err("%s() leaving. ret=%d\n", __func__, status); @@ -360,7 +363,6 @@ idr_err: static void ocrdma_remove_free(struct ocrdma_dev *dev) { - idr_remove(&ocrdma_dev_id, dev->id); kfree(dev->mbx_cmd); ib_dealloc_device(&dev->ibdev); } @@ -465,7 +467,6 @@ static void __exit ocrdma_exit_module(void) { be_roce_unregister_driver(&ocrdma_drv); ocrdma_rem_debugfs(); - idr_destroy(&ocrdma_dev_id); } module_init(ocrdma_init_module); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index b4e1777c2c97..35ec87015792 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -47,6 +47,7 @@ #include <rdma/ib_umem.h> #include <rdma/ib_addr.h> #include <rdma/ib_cache.h> +#include <rdma/uverbs_ioctl.h> #include "ocrdma.h" #include "ocrdma_hw.h" @@ -112,24 +113,6 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, return 0; } -struct net_device *ocrdma_get_netdev(struct ib_device *ibdev, u8 port_num) -{ - struct ocrdma_dev *dev; - struct net_device *ndev = NULL; - - rcu_read_lock(); - - dev = get_ocrdma_dev(ibdev); - if (dev) - ndev = dev->nic_info.netdev; - if (ndev) - dev_hold(ndev); - - rcu_read_unlock(); - - return ndev; -} - static inline void get_link_speed_and_width(struct ocrdma_dev *dev, u8 *ib_speed, u8 *ib_width) { @@ -367,6 +350,16 @@ static int ocrdma_get_pd_num(struct ocrdma_dev *dev, struct ocrdma_pd *pd) return status; } +/* + * NOTE: + * + * ocrdma_ucontext must be used here because this function is also + * called from ocrdma_alloc_ucontext where ib_udata does not have + * valid ib_ucontext pointer. ib_uverbs_get_context does not call + * uobj_{alloc|get_xxx} helpers which are used to store the + * ib_ucontext in uverbs_attr_bundle wrapping the ib_udata. so + * ib_udata does NOT imply valid ib_ucontext here! + */ static int _ocrdma_alloc_pd(struct ocrdma_dev *dev, struct ocrdma_pd *pd, struct ocrdma_ucontext *uctx, struct ib_udata *udata) @@ -593,7 +586,6 @@ int ocrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) } static int ocrdma_copy_pd_uresp(struct ocrdma_dev *dev, struct ocrdma_pd *pd, - struct ib_ucontext *ib_ctx, struct ib_udata *udata) { int status; @@ -601,7 +593,8 @@ static int ocrdma_copy_pd_uresp(struct ocrdma_dev *dev, struct ocrdma_pd *pd, u64 dpp_page_addr = 0; u32 db_page_size; struct ocrdma_alloc_pd_uresp rsp; - struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ib_ctx); + struct ocrdma_ucontext *uctx = rdma_udata_to_drv_context( + udata, struct ocrdma_ucontext, ibucontext); memset(&rsp, 0, sizeof(rsp)); rsp.id = pd->id; @@ -639,18 +632,17 @@ dpp_map_err: return status; } -int ocrdma_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, - struct ib_udata *udata) +int ocrdma_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct ib_device *ibdev = ibpd->device; struct ocrdma_dev *dev = get_ocrdma_dev(ibdev); struct ocrdma_pd *pd; - struct ocrdma_ucontext *uctx = NULL; int status; u8 is_uctx_pd = false; + struct ocrdma_ucontext *uctx = rdma_udata_to_drv_context( + udata, struct ocrdma_ucontext, ibucontext); - if (udata && context) { - uctx = get_ocrdma_ucontext(context); + if (udata) { pd = ocrdma_get_ucontext_pd(uctx); if (pd) { is_uctx_pd = true; @@ -664,8 +656,8 @@ int ocrdma_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, goto exit; pd_mapping: - if (udata && context) { - status = ocrdma_copy_pd_uresp(dev, pd, context, udata); + if (udata) { + status = ocrdma_copy_pd_uresp(dev, pd, udata); if (status) goto err; } @@ -680,7 +672,7 @@ exit: return status; } -void ocrdma_dealloc_pd(struct ib_pd *ibpd) +void ocrdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); @@ -922,7 +914,7 @@ umem_err: return ERR_PTR(status); } -int ocrdma_dereg_mr(struct ib_mr *ib_mr) +int ocrdma_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) { struct ocrdma_mr *mr = get_ocrdma_mr(ib_mr); struct ocrdma_dev *dev = get_ocrdma_dev(ib_mr->device); @@ -946,13 +938,17 @@ int ocrdma_dereg_mr(struct ib_mr *ib_mr) } static int ocrdma_copy_cq_uresp(struct ocrdma_dev *dev, struct ocrdma_cq *cq, - struct ib_udata *udata, - struct ib_ucontext *ib_ctx) + struct ib_udata *udata) { int status; - struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ib_ctx); + struct ocrdma_ucontext *uctx = rdma_udata_to_drv_context( + udata, struct ocrdma_ucontext, ibucontext); struct ocrdma_create_cq_uresp uresp; + /* this must be user flow! */ + if (!udata) + return -EINVAL; + memset(&uresp, 0, sizeof(uresp)); uresp.cq_id = cq->id; uresp.page_size = PAGE_ALIGN(cq->len); @@ -983,13 +979,13 @@ err: struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *ib_ctx, struct ib_udata *udata) { int entries = attr->cqe; struct ocrdma_cq *cq; struct ocrdma_dev *dev = get_ocrdma_dev(ibdev); - struct ocrdma_ucontext *uctx = NULL; + struct ocrdma_ucontext *uctx = rdma_udata_to_drv_context( + udata, struct ocrdma_ucontext, ibucontext); u16 pd_id = 0; int status; struct ocrdma_create_cq_ureq ureq; @@ -1011,18 +1007,16 @@ struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, INIT_LIST_HEAD(&cq->sq_head); INIT_LIST_HEAD(&cq->rq_head); - if (ib_ctx) { - uctx = get_ocrdma_ucontext(ib_ctx); + if (udata) pd_id = uctx->cntxt_pd->id; - } status = ocrdma_mbx_create_cq(dev, cq, entries, ureq.dpp_cq, pd_id); if (status) { kfree(cq); return ERR_PTR(status); } - if (ib_ctx) { - status = ocrdma_copy_cq_uresp(dev, cq, udata, ib_ctx); + if (udata) { + status = ocrdma_copy_cq_uresp(dev, cq, udata); if (status) goto ctx_err; } @@ -1076,7 +1070,7 @@ static void ocrdma_flush_cq(struct ocrdma_cq *cq) spin_unlock_irqrestore(&cq->cq_lock, flags); } -int ocrdma_destroy_cq(struct ib_cq *ibcq) +int ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) { struct ocrdma_cq *cq = get_ocrdma_cq(ibcq); struct ocrdma_eq *eq = NULL; @@ -1697,7 +1691,7 @@ void ocrdma_del_flush_qp(struct ocrdma_qp *qp) spin_unlock_irqrestore(&dev->flush_q_lock, flags); } -int ocrdma_destroy_qp(struct ib_qp *ibqp) +int ocrdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { struct ocrdma_pd *pd; struct ocrdma_qp *qp; @@ -1793,45 +1787,43 @@ static int ocrdma_copy_srq_uresp(struct ocrdma_dev *dev, struct ocrdma_srq *srq, return status; } -struct ib_srq *ocrdma_create_srq(struct ib_pd *ibpd, - struct ib_srq_init_attr *init_attr, - struct ib_udata *udata) +int ocrdma_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) { - int status = -ENOMEM; - struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); - struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); - struct ocrdma_srq *srq; + int status; + struct ocrdma_pd *pd = get_ocrdma_pd(ibsrq->pd); + struct ocrdma_dev *dev = get_ocrdma_dev(ibsrq->device); + struct ocrdma_srq *srq = get_ocrdma_srq(ibsrq); if (init_attr->attr.max_sge > dev->attr.max_recv_sge) - return ERR_PTR(-EINVAL); + return -EINVAL; if (init_attr->attr.max_wr > dev->attr.max_rqe) - return ERR_PTR(-EINVAL); - - srq = kzalloc(sizeof(*srq), GFP_KERNEL); - if (!srq) - return ERR_PTR(status); + return -EINVAL; spin_lock_init(&srq->q_lock); srq->pd = pd; srq->db = dev->nic_info.db + (pd->id * dev->nic_info.db_page_size); status = ocrdma_mbx_create_srq(dev, srq, init_attr, pd); if (status) - goto err; + return status; - if (udata == NULL) { - status = -ENOMEM; + if (!udata) { srq->rqe_wr_id_tbl = kcalloc(srq->rq.max_cnt, sizeof(u64), GFP_KERNEL); - if (srq->rqe_wr_id_tbl == NULL) + if (!srq->rqe_wr_id_tbl) { + status = -ENOMEM; goto arm_err; + } srq->bit_fields_len = (srq->rq.max_cnt / 32) + (srq->rq.max_cnt % 32 ? 1 : 0); srq->idx_bit_fields = kmalloc_array(srq->bit_fields_len, sizeof(u32), GFP_KERNEL); - if (srq->idx_bit_fields == NULL) + if (!srq->idx_bit_fields) { + status = -ENOMEM; goto arm_err; + } memset(srq->idx_bit_fields, 0xff, srq->bit_fields_len * sizeof(u32)); } @@ -1848,15 +1840,13 @@ struct ib_srq *ocrdma_create_srq(struct ib_pd *ibpd, goto arm_err; } - return &srq->ibsrq; + return 0; arm_err: ocrdma_mbx_destroy_srq(dev, srq); -err: kfree(srq->rqe_wr_id_tbl); kfree(srq->idx_bit_fields); - kfree(srq); - return ERR_PTR(status); + return status; } int ocrdma_modify_srq(struct ib_srq *ibsrq, @@ -1885,15 +1875,14 @@ int ocrdma_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) return status; } -int ocrdma_destroy_srq(struct ib_srq *ibsrq) +void ocrdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) { - int status; struct ocrdma_srq *srq; struct ocrdma_dev *dev = get_ocrdma_dev(ibsrq->device); srq = get_ocrdma_srq(ibsrq); - status = ocrdma_mbx_destroy_srq(dev, srq); + ocrdma_mbx_destroy_srq(dev, srq); if (srq->pd->uctx) ocrdma_del_mmap(srq->pd->uctx, (u64) srq->rq.pa, @@ -1901,8 +1890,6 @@ int ocrdma_destroy_srq(struct ib_srq *ibsrq) kfree(srq->idx_bit_fields); kfree(srq->rqe_wr_id_tbl); - kfree(srq); - return status; } /* unprivileged verbs and their support functions. */ @@ -2931,9 +2918,8 @@ int ocrdma_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags cq_flags) return 0; } -struct ib_mr *ocrdma_alloc_mr(struct ib_pd *ibpd, - enum ib_mr_type mr_type, - u32 max_num_sg) +struct ib_mr *ocrdma_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, + u32 max_num_sg, struct ib_udata *udata) { int status; struct ocrdma_mr *mr; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h index 4c04ab40798e..d76aae7ed0d3 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h @@ -61,7 +61,6 @@ enum rdma_protocol_type ocrdma_query_protocol(struct ib_device *device, u8 port_num); void ocrdma_get_guid(struct ocrdma_dev *, u8 *guid); -struct net_device *ocrdma_get_netdev(struct ib_device *device, u8 port_num); int ocrdma_query_pkey(struct ib_device *, u8 port, u16 index, u16 *pkey); int ocrdma_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata); @@ -69,16 +68,14 @@ void ocrdma_dealloc_ucontext(struct ib_ucontext *uctx); int ocrdma_mmap(struct ib_ucontext *, struct vm_area_struct *vma); -int ocrdma_alloc_pd(struct ib_pd *pd, struct ib_ucontext *uctx, - struct ib_udata *udata); -void ocrdma_dealloc_pd(struct ib_pd *pd); +int ocrdma_alloc_pd(struct ib_pd *pd, struct ib_udata *udata); +void ocrdma_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *ib_ctx, struct ib_udata *udata); int ocrdma_resize_cq(struct ib_cq *, int cqe, struct ib_udata *); -int ocrdma_destroy_cq(struct ib_cq *); +int ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); struct ib_qp *ocrdma_create_qp(struct ib_pd *, struct ib_qp_init_attr *attrs, @@ -90,25 +87,24 @@ int ocrdma_modify_qp(struct ib_qp *, struct ib_qp_attr *attr, int ocrdma_query_qp(struct ib_qp *, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *); -int ocrdma_destroy_qp(struct ib_qp *); +int ocrdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata); void ocrdma_del_flush_qp(struct ocrdma_qp *qp); -struct ib_srq *ocrdma_create_srq(struct ib_pd *, struct ib_srq_init_attr *, - struct ib_udata *); +int ocrdma_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *attr, + struct ib_udata *udata); int ocrdma_modify_srq(struct ib_srq *, struct ib_srq_attr *, enum ib_srq_attr_mask, struct ib_udata *); int ocrdma_query_srq(struct ib_srq *, struct ib_srq_attr *); -int ocrdma_destroy_srq(struct ib_srq *); +void ocrdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata); int ocrdma_post_srq_recv(struct ib_srq *, const struct ib_recv_wr *, const struct ib_recv_wr **bad_recv_wr); -int ocrdma_dereg_mr(struct ib_mr *); +int ocrdma_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata); struct ib_mr *ocrdma_get_dma_mr(struct ib_pd *, int acc); struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *, u64 start, u64 length, u64 virt, int acc, struct ib_udata *); -struct ib_mr *ocrdma_alloc_mr(struct ib_pd *pd, - enum ib_mr_type mr_type, - u32 max_num_sg); +struct ib_mr *ocrdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg, struct ib_udata *udata); int ocrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index 996d9ecd93e0..083c2c00a8e9 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -39,7 +39,6 @@ #include <linux/iommu.h> #include <linux/pci.h> #include <net/addrconf.h> -#include <linux/idr.h> #include <linux/qed/qed_chain.h> #include <linux/qed/qed_if.h> @@ -82,20 +81,6 @@ static void qedr_get_dev_fw_str(struct ib_device *ibdev, char *str) (fw_ver >> 8) & 0xFF, fw_ver & 0xFF); } -static struct net_device *qedr_get_netdev(struct ib_device *dev, u8 port_num) -{ - struct qedr_dev *qdev; - - qdev = get_qedr_dev(dev); - dev_hold(qdev->ndev); - - /* The HW vendor's device driver must guarantee - * that this function returns NULL before the net device has finished - * NETDEV_UNREGISTER state. - */ - return qdev->ndev; -} - static int qedr_roce_port_immutable(struct ib_device *ibdev, u8 port_num, struct ib_port_immutable *immutable) { @@ -163,6 +148,14 @@ static const struct attribute_group qedr_attr_group = { static const struct ib_device_ops qedr_iw_dev_ops = { .get_port_immutable = qedr_iw_port_immutable, + .iw_accept = qedr_iw_accept, + .iw_add_ref = qedr_iw_qp_add_ref, + .iw_connect = qedr_iw_connect, + .iw_create_listen = qedr_iw_create_listen, + .iw_destroy_listen = qedr_iw_destroy_listen, + .iw_get_qp = qedr_iw_get_qp, + .iw_reject = qedr_iw_reject, + .iw_rem_ref = qedr_iw_qp_rem_ref, .query_gid = qedr_iw_query_gid, }; @@ -172,21 +165,8 @@ static int qedr_iw_register_device(struct qedr_dev *dev) ib_set_device_ops(&dev->ibdev, &qedr_iw_dev_ops); - dev->ibdev.iwcm = kzalloc(sizeof(*dev->ibdev.iwcm), GFP_KERNEL); - if (!dev->ibdev.iwcm) - return -ENOMEM; - - dev->ibdev.iwcm->connect = qedr_iw_connect; - dev->ibdev.iwcm->accept = qedr_iw_accept; - dev->ibdev.iwcm->reject = qedr_iw_reject; - dev->ibdev.iwcm->create_listen = qedr_iw_create_listen; - dev->ibdev.iwcm->destroy_listen = qedr_iw_destroy_listen; - dev->ibdev.iwcm->add_ref = qedr_iw_qp_add_ref; - dev->ibdev.iwcm->rem_ref = qedr_iw_qp_rem_ref; - dev->ibdev.iwcm->get_qp = qedr_iw_get_qp; - - memcpy(dev->ibdev.iwcm->ifname, - dev->ndev->name, sizeof(dev->ibdev.iwcm->ifname)); + memcpy(dev->ibdev.iw_ifname, + dev->ndev->name, sizeof(dev->ibdev.iw_ifname)); return 0; } @@ -220,7 +200,6 @@ static const struct ib_device_ops qedr_dev_ops = { .get_dev_fw_str = qedr_get_dev_fw_str, .get_dma_mr = qedr_get_dma_mr, .get_link_layer = qedr_link_layer, - .get_netdev = qedr_get_netdev, .map_mr_sg = qedr_map_mr_sg, .mmap = qedr_mmap, .modify_port = qedr_modify_port, @@ -239,7 +218,10 @@ static const struct ib_device_ops qedr_dev_ops = { .reg_user_mr = qedr_reg_user_mr, .req_notify_cq = qedr_arm_cq, .resize_cq = qedr_resize_cq, + + INIT_RDMA_OBJ_SIZE(ib_ah, qedr_ah, ibah), INIT_RDMA_OBJ_SIZE(ib_pd, qedr_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_srq, qedr_srq, ibsrq), INIT_RDMA_OBJ_SIZE(ib_ucontext, qedr_ucontext, ibucontext), }; @@ -293,6 +275,10 @@ static int qedr_register_device(struct qedr_dev *dev) ib_set_device_ops(&dev->ibdev, &qedr_dev_ops); dev->ibdev.driver_id = RDMA_DRIVER_QEDR; + rc = ib_device_set_netdev(&dev->ibdev, dev->ndev, 1); + if (rc) + return rc; + return ib_register_device(&dev->ibdev, "qedr%d"); } @@ -364,8 +350,7 @@ static int qedr_alloc_resources(struct qedr_dev *dev) spin_lock_init(&dev->sgid_lock); if (IS_IWARP(dev)) { - spin_lock_init(&dev->qpidr.idr_lock); - idr_init(&dev->qpidr.idr); + xa_init_flags(&dev->qps, XA_FLAGS_LOCK_IRQ); dev->iwarp_wq = create_singlethread_workqueue("qedr_iwarpq"); } @@ -760,8 +745,8 @@ static void qedr_affiliated_event(void *context, u8 e_code, void *fw_handle) break; case EVENT_TYPE_SRQ: srq_id = (u16)roce_handle64; - spin_lock_irqsave(&dev->srqidr.idr_lock, flags); - srq = idr_find(&dev->srqidr.idr, srq_id); + xa_lock_irqsave(&dev->srqs, flags); + srq = xa_load(&dev->srqs, srq_id); if (srq) { ibsrq = &srq->ibsrq; if (ibsrq->event_handler) { @@ -775,7 +760,7 @@ static void qedr_affiliated_event(void *context, u8 e_code, void *fw_handle) "SRQ event with NULL pointer ibsrq. Handle=%llx\n", roce_handle64); } - spin_unlock_irqrestore(&dev->srqidr.idr_lock, flags); + xa_unlock_irqrestore(&dev->srqs, flags); DP_NOTICE(dev, "SRQ event %d on handle %p\n", e_code, srq); default: break; diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index 53bbe6b4e6e6..6175d1e98717 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -33,7 +33,7 @@ #define __QEDR_H__ #include <linux/pci.h> -#include <linux/idr.h> +#include <linux/xarray.h> #include <rdma/ib_addr.h> #include <linux/qed/qed_if.h> #include <linux/qed/qed_chain.h> @@ -123,11 +123,6 @@ struct qedr_device_attr { #define QEDR_ENET_STATE_BIT (0) -struct qedr_idr { - spinlock_t idr_lock; /* Protect idr data-structure */ - struct idr idr; -}; - struct qedr_dev { struct ib_device ibdev; struct qed_dev *cdev; @@ -171,8 +166,8 @@ struct qedr_dev { struct qedr_cq *gsi_rqcq; struct qedr_qp *gsi_qp; enum qed_rdma_type rdma_type; - struct qedr_idr qpidr; - struct qedr_idr srqidr; + struct xarray qps; + struct xarray srqs; struct workqueue_struct *iwarp_wq; u16 iwarp_max_mtu; diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c index 0555e5a8c9ed..22881d4442b9 100644 --- a/drivers/infiniband/hw/qedr/qedr_iw_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c @@ -491,7 +491,7 @@ int qedr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) int rc = 0; int i; - qp = idr_find(&dev->qpidr.idr, conn_param->qpn); + qp = xa_load(&dev->qps, conn_param->qpn); if (unlikely(!qp)) return -EINVAL; @@ -681,7 +681,7 @@ int qedr_iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) DP_DEBUG(dev, QEDR_MSG_IWARP, "Accept on qpid=%d\n", conn_param->qpn); - qp = idr_find(&dev->qpidr.idr, conn_param->qpn); + qp = xa_load(&dev->qps, conn_param->qpn); if (!qp) { DP_ERR(dev, "Invalid QP number %d\n", conn_param->qpn); return -EINVAL; @@ -739,9 +739,7 @@ void qedr_iw_qp_rem_ref(struct ib_qp *ibqp) struct qedr_qp *qp = get_qedr_qp(ibqp); if (atomic_dec_and_test(&qp->refcnt)) { - spin_lock_irq(&qp->dev->qpidr.idr_lock); - idr_remove(&qp->dev->qpidr.idr, qp->qp_id); - spin_unlock_irq(&qp->dev->qpidr.idr_lock); + xa_erase_irq(&qp->dev->qps, qp->qp_id); kfree(qp); } } @@ -750,5 +748,5 @@ struct ib_qp *qedr_iw_get_qp(struct ib_device *ibdev, int qpn) { struct qedr_dev *dev = get_qedr_dev(ibdev); - return idr_find(&dev->qpidr.idr, qpn); + return xa_load(&dev->qps, qpn); } diff --git a/drivers/infiniband/hw/qedr/qedr_roce_cm.c b/drivers/infiniband/hw/qedr/qedr_roce_cm.c index e1ac2fd60bb1..f5542d703ef9 100644 --- a/drivers/infiniband/hw/qedr/qedr_roce_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_roce_cm.c @@ -397,14 +397,17 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev, bool has_udp = false; int i; - send_size = 0; - for (i = 0; i < swr->num_sge; ++i) - send_size += swr->sg_list[i].length; + rc = rdma_read_gid_l2_fields(sgid_attr, &vlan_id, NULL); + if (rc) + return rc; - vlan_id = rdma_vlan_dev_vlan_id(sgid_attr->ndev); if (vlan_id < VLAN_CFI_MASK) has_vlan = true; + send_size = 0; + for (i = 0; i < swr->num_sge; ++i) + send_size += swr->sg_list[i].length; + has_udp = (sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP); if (!has_udp) { /* RoCE v1 */ diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 8686a98e113d..3d7bde19838e 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -42,6 +42,7 @@ #include <rdma/ib_umem.h> #include <rdma/ib_addr.h> #include <rdma/ib_cache.h> +#include <rdma/uverbs_ioctl.h> #include <linux/qed/common_hsi.h> #include "qedr_hsi_rdma.h" @@ -436,8 +437,7 @@ int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) vma->vm_page_prot); } -int qedr_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, - struct ib_udata *udata) +int qedr_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct ib_device *ibdev = ibpd->device; struct qedr_dev *dev = get_qedr_dev(ibdev); @@ -446,7 +446,7 @@ int qedr_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, int rc; DP_DEBUG(dev, QEDR_MSG_INIT, "Function called from: %s\n", - (udata && context) ? "User Lib" : "Kernel"); + udata ? "User Lib" : "Kernel"); if (!dev->rdma_ctx) { DP_ERR(dev, "invalid RDMA context\n"); @@ -459,10 +459,12 @@ int qedr_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, pd->pd_id = pd_id; - if (udata && context) { + if (udata) { struct qedr_alloc_pd_uresp uresp = { .pd_id = pd_id, }; + struct qedr_ucontext *context = rdma_udata_to_drv_context( + udata, struct qedr_ucontext, ibucontext); rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (rc) { @@ -471,14 +473,14 @@ int qedr_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, return rc; } - pd->uctx = get_qedr_ucontext(context); + pd->uctx = context; pd->uctx->pd = pd; } return 0; } -void qedr_dealloc_pd(struct ib_pd *ibpd) +void qedr_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct qedr_dev *dev = get_qedr_dev(ibpd->device); struct qedr_pd *pd = get_qedr_pd(ibpd); @@ -813,9 +815,10 @@ int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) struct ib_cq *qedr_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *ib_ctx, struct ib_udata *udata) + struct ib_udata *udata) { - struct qedr_ucontext *ctx = get_qedr_ucontext(ib_ctx); + struct qedr_ucontext *ctx = rdma_udata_to_drv_context( + udata, struct qedr_ucontext, ibucontext); struct qed_rdma_destroy_cq_out_params destroy_oparams; struct qed_rdma_destroy_cq_in_params destroy_iparams; struct qedr_dev *dev = get_qedr_dev(ibdev); @@ -903,7 +906,7 @@ struct ib_cq *qedr_create_cq(struct ib_device *ibdev, cq->sig = QEDR_CQ_MAGIC_NUMBER; spin_lock_init(&cq->cq_lock); - if (ib_ctx) { + if (udata) { rc = qedr_copy_cq_uresp(dev, cq, udata); if (rc) goto err3; @@ -959,7 +962,7 @@ int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata) #define QEDR_DESTROY_CQ_MAX_ITERATIONS (10) #define QEDR_DESTROY_CQ_ITER_DURATION (10) -int qedr_destroy_cq(struct ib_cq *ibcq) +int qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) { struct qedr_dev *dev = get_qedr_dev(ibcq->device); struct qed_rdma_destroy_cq_out_params oparams; @@ -983,7 +986,7 @@ int qedr_destroy_cq(struct ib_cq *ibcq) dev->ops->common->chain_free(dev->cdev, &cq->pbl); - if (ibcq->uobject && ibcq->uobject->context) { + if (udata) { qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl); ib_umem_release(cq->q.umem); } @@ -1044,10 +1047,13 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp, enum rdma_network_type nw_type; const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr); u32 ipv4_addr; + int ret; int i; gid_attr = grh->sgid_attr; - qp_params->vlan_id = rdma_vlan_dev_vlan_id(gid_attr->ndev); + ret = rdma_read_gid_l2_fields(gid_attr, &qp_params->vlan_id, NULL); + if (ret) + return ret; nw_type = rdma_gid_attr_network_type(gid_attr); switch (nw_type) { @@ -1261,7 +1267,7 @@ static void qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp) } } -static int qedr_check_srq_params(struct ib_pd *ibpd, struct qedr_dev *dev, +static int qedr_check_srq_params(struct qedr_dev *dev, struct ib_srq_init_attr *attrs, struct ib_udata *udata) { @@ -1377,38 +1383,28 @@ err0: return rc; } -static int qedr_idr_add(struct qedr_dev *dev, struct qedr_idr *qidr, - void *ptr, u32 id); -static void qedr_idr_remove(struct qedr_dev *dev, - struct qedr_idr *qidr, u32 id); - -struct ib_srq *qedr_create_srq(struct ib_pd *ibpd, - struct ib_srq_init_attr *init_attr, - struct ib_udata *udata) +int qedr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) { struct qed_rdma_destroy_srq_in_params destroy_in_params; struct qed_rdma_create_srq_in_params in_params = {}; - struct qedr_dev *dev = get_qedr_dev(ibpd->device); + struct qedr_dev *dev = get_qedr_dev(ibsrq->device); struct qed_rdma_create_srq_out_params out_params; - struct qedr_pd *pd = get_qedr_pd(ibpd); + struct qedr_pd *pd = get_qedr_pd(ibsrq->pd); struct qedr_create_srq_ureq ureq = {}; u64 pbl_base_addr, phy_prod_pair_addr; struct qedr_srq_hwq_info *hw_srq; u32 page_cnt, page_size; - struct qedr_srq *srq; + struct qedr_srq *srq = get_qedr_srq(ibsrq); int rc = 0; DP_DEBUG(dev, QEDR_MSG_QP, "create SRQ called from %s (pd %p)\n", (udata) ? "User lib" : "kernel", pd); - rc = qedr_check_srq_params(ibpd, dev, init_attr, udata); + rc = qedr_check_srq_params(dev, init_attr, udata); if (rc) - return ERR_PTR(-EINVAL); - - srq = kzalloc(sizeof(*srq), GFP_KERNEL); - if (!srq) - return ERR_PTR(-ENOMEM); + return -EINVAL; srq->dev = dev; hw_srq = &srq->hw_srq; @@ -1464,13 +1460,13 @@ struct ib_srq *qedr_create_srq(struct ib_pd *ibpd, goto err2; } - rc = qedr_idr_add(dev, &dev->srqidr, srq, srq->srq_id); + rc = xa_insert_irq(&dev->srqs, srq->srq_id, srq, GFP_KERNEL); if (rc) goto err2; DP_DEBUG(dev, QEDR_MSG_SRQ, "create srq: created srq with srq_id=0x%0x\n", srq->srq_id); - return &srq->ibsrq; + return 0; err2: destroy_in_params.srq_id = srq->srq_id; @@ -1482,18 +1478,16 @@ err1: else qedr_free_srq_kernel_params(srq); err0: - kfree(srq); - - return ERR_PTR(-EFAULT); + return -EFAULT; } -int qedr_destroy_srq(struct ib_srq *ibsrq) +void qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) { struct qed_rdma_destroy_srq_in_params in_params = {}; struct qedr_dev *dev = get_qedr_dev(ibsrq->device); struct qedr_srq *srq = get_qedr_srq(ibsrq); - qedr_idr_remove(dev, &dev->srqidr, srq->srq_id); + xa_erase_irq(&dev->srqs, srq->srq_id); in_params.srq_id = srq->srq_id; dev->ops->rdma_destroy_srq(dev->rdma_ctx, &in_params); @@ -1505,9 +1499,6 @@ int qedr_destroy_srq(struct ib_srq *ibsrq) DP_DEBUG(dev, QEDR_MSG_SRQ, "destroy srq: destroyed srq with srq_id=0x%0x\n", srq->srq_id); - kfree(srq); - - return 0; } int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, @@ -1593,29 +1584,6 @@ static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp) qp->usq.buf_len, qp->urq.buf_addr, qp->urq.buf_len); } -static int qedr_idr_add(struct qedr_dev *dev, struct qedr_idr *qidr, - void *ptr, u32 id) -{ - int rc; - - idr_preload(GFP_KERNEL); - spin_lock_irq(&qidr->idr_lock); - - rc = idr_alloc(&qidr->idr, ptr, id, id + 1, GFP_ATOMIC); - - spin_unlock_irq(&qidr->idr_lock); - idr_preload_end(); - - return rc < 0 ? rc : 0; -} - -static void qedr_idr_remove(struct qedr_dev *dev, struct qedr_idr *qidr, u32 id) -{ - spin_lock_irq(&qidr->idr_lock); - idr_remove(&qidr->idr, id); - spin_unlock_irq(&qidr->idr_lock); -} - static inline void qedr_iwarp_populate_user_qp(struct qedr_dev *dev, struct qedr_qp *qp, @@ -1985,7 +1953,7 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd, qp->ibqp.qp_num = qp->qp_id; if (rdma_protocol_iwarp(&dev->ibdev, 1)) { - rc = qedr_idr_add(dev, &dev->qpidr, qp, qp->qp_id); + rc = xa_insert_irq(&dev->qps, qp->qp_id, qp, GFP_KERNEL); if (rc) goto err; } @@ -2493,7 +2461,8 @@ err: return rc; } -static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp) +static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp, + struct ib_udata *udata) { int rc = 0; @@ -2503,7 +2472,7 @@ static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp) return rc; } - if (qp->ibqp.uobject && qp->ibqp.uobject->context) + if (udata) qedr_cleanup_user(dev, qp); else qedr_cleanup_kernel(dev, qp); @@ -2511,7 +2480,7 @@ static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp) return 0; } -int qedr_destroy_qp(struct ib_qp *ibqp) +int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { struct qedr_qp *qp = get_qedr_qp(ibqp); struct qedr_dev *dev = qp->dev; @@ -2555,37 +2524,31 @@ int qedr_destroy_qp(struct ib_qp *ibqp) if (qp->qp_type == IB_QPT_GSI) qedr_destroy_gsi_qp(dev); - qedr_free_qp_resources(dev, qp); + qedr_free_qp_resources(dev, qp, udata); if (atomic_dec_and_test(&qp->refcnt) && rdma_protocol_iwarp(&dev->ibdev, 1)) { - qedr_idr_remove(dev, &dev->qpidr, qp->qp_id); + xa_erase_irq(&dev->qps, qp->qp_id); kfree(qp); } return rc; } -struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, - u32 flags, struct ib_udata *udata) +int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr, u32 flags, + struct ib_udata *udata) { - struct qedr_ah *ah; - - ah = kzalloc(sizeof(*ah), GFP_ATOMIC); - if (!ah) - return ERR_PTR(-ENOMEM); + struct qedr_ah *ah = get_qedr_ah(ibah); rdma_copy_ah_attr(&ah->attr, attr); - return &ah->ibah; + return 0; } -int qedr_destroy_ah(struct ib_ah *ibah, u32 flags) +void qedr_destroy_ah(struct ib_ah *ibah, u32 flags) { struct qedr_ah *ah = get_qedr_ah(ibah); rdma_destroy_ah_attr(&ah->attr); - kfree(ah); - return 0; } static void free_mr_info(struct qedr_dev *dev, struct mr_info *info) @@ -2734,7 +2697,7 @@ err0: return ERR_PTR(rc); } -int qedr_dereg_mr(struct ib_mr *ib_mr) +int qedr_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) { struct qedr_mr *mr = get_qedr_mr(ib_mr); struct qedr_dev *dev = get_qedr_dev(ib_mr->device); @@ -2826,8 +2789,8 @@ err0: return ERR_PTR(rc); } -struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd, - enum ib_mr_type mr_type, u32 max_num_sg) +struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, + u32 max_num_sg, struct ib_udata *udata) { struct qedr_mr *mr; diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h index f0c05f4771ac..9328c80375ef 100644 --- a/drivers/infiniband/hw/qedr/verbs.h +++ b/drivers/infiniband/hw/qedr/verbs.h @@ -47,16 +47,14 @@ int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata); void qedr_dealloc_ucontext(struct ib_ucontext *uctx); int qedr_mmap(struct ib_ucontext *, struct vm_area_struct *vma); -int qedr_alloc_pd(struct ib_pd *pd, struct ib_ucontext *uctx, - struct ib_udata *udata); -void qedr_dealloc_pd(struct ib_pd *pd); +int qedr_alloc_pd(struct ib_pd *pd, struct ib_udata *udata); +void qedr_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); struct ib_cq *qedr_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *ib_ctx, struct ib_udata *udata); int qedr_resize_cq(struct ib_cq *, int cqe, struct ib_udata *); -int qedr_destroy_cq(struct ib_cq *); +int qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); struct ib_qp *qedr_create_qp(struct ib_pd *, struct ib_qp_init_attr *attrs, struct ib_udata *); @@ -64,22 +62,21 @@ int qedr_modify_qp(struct ib_qp *, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); int qedr_query_qp(struct ib_qp *, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *); -int qedr_destroy_qp(struct ib_qp *ibqp); +int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata); -struct ib_srq *qedr_create_srq(struct ib_pd *ibpd, - struct ib_srq_init_attr *attr, - struct ib_udata *udata); +int qedr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *attr, + struct ib_udata *udata); int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); int qedr_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr); -int qedr_destroy_srq(struct ib_srq *ibsrq); +void qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata); int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_recv_wr); -struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, - u32 flags, struct ib_udata *udata); -int qedr_destroy_ah(struct ib_ah *ibah, u32 flags); +int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr, u32 flags, + struct ib_udata *udata); +void qedr_destroy_ah(struct ib_ah *ibah, u32 flags); -int qedr_dereg_mr(struct ib_mr *); +int qedr_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata); struct ib_mr *qedr_get_dma_mr(struct ib_pd *, int acc); struct ib_mr *qedr_reg_user_mr(struct ib_pd *, u64 start, u64 length, @@ -89,7 +86,7 @@ int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); struct ib_mr *qedr_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg); + u32 max_num_sg, struct ib_udata *udata); int qedr_poll_cq(struct ib_cq *, int num_entries, struct ib_wc *wc); int qedr_post_send(struct ib_qp *, const struct ib_send_wr *, const struct ib_send_wr **bad_wr); diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index 83d2349188db..432d6d0fd7f4 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -52,6 +52,7 @@ #include <linux/kref.h> #include <linux/sched.h> #include <linux/kthread.h> +#include <linux/xarray.h> #include <rdma/ib_hdrs.h> #include <rdma/rdma_vt.h> @@ -1105,8 +1106,7 @@ struct qib_filedata { int rec_cpu_num; /* for cpu affinity; -1 if none */ }; -extern struct list_head qib_dev_list; -extern spinlock_t qib_devs_lock; +extern struct xarray qib_dev_table; extern struct qib_devdata *qib_lookup(int unit); extern u32 qib_cpulist_count; extern unsigned long *qib_cpulist; diff --git a/drivers/infiniband/hw/qib/qib_common.h b/drivers/infiniband/hw/qib/qib_common.h index a4a1f56ce824..f91f23e02283 100644 --- a/drivers/infiniband/hw/qib/qib_common.h +++ b/drivers/infiniband/hw/qib/qib_common.h @@ -57,7 +57,7 @@ * QIB_VERBOSE_TRACING define as 1 if you want additional tracing in * fastpath code * QIB_TRACE_REGWRITES define as 1 if you want register writes to be - * traced in faspath code + * traced in fastpath code * _QIB_TRACING define as 0 if you want to remove all tracing in a * compilation unit */ diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c index 3117cc5f2a9a..92eeea5679e2 100644 --- a/drivers/infiniband/hw/qib/qib_driver.c +++ b/drivers/infiniband/hw/qib/qib_driver.c @@ -49,8 +49,6 @@ */ const char ib_qib_version[] = QIB_DRIVER_VERSION "\n"; -DEFINE_SPINLOCK(qib_devs_lock); -LIST_HEAD(qib_dev_list); DEFINE_MUTEX(qib_mutex); /* general driver use */ unsigned qib_ibmtu; @@ -96,11 +94,11 @@ int qib_count_active_units(void) { struct qib_devdata *dd; struct qib_pportdata *ppd; - unsigned long flags; + unsigned long index, flags; int pidx, nunits_active = 0; - spin_lock_irqsave(&qib_devs_lock, flags); - list_for_each_entry(dd, &qib_dev_list, list) { + xa_lock_irqsave(&qib_dev_table, flags); + xa_for_each(&qib_dev_table, index, dd) { if (!(dd->flags & QIB_PRESENT) || !dd->kregbase) continue; for (pidx = 0; pidx < dd->num_pports; ++pidx) { @@ -112,7 +110,7 @@ int qib_count_active_units(void) } } } - spin_unlock_irqrestore(&qib_devs_lock, flags); + xa_unlock_irqrestore(&qib_dev_table, flags); return nunits_active; } @@ -125,13 +123,12 @@ int qib_count_units(int *npresentp, int *nupp) { int nunits = 0, npresent = 0, nup = 0; struct qib_devdata *dd; - unsigned long flags; + unsigned long index, flags; int pidx; struct qib_pportdata *ppd; - spin_lock_irqsave(&qib_devs_lock, flags); - - list_for_each_entry(dd, &qib_dev_list, list) { + xa_lock_irqsave(&qib_dev_table, flags); + xa_for_each(&qib_dev_table, index, dd) { nunits++; if ((dd->flags & QIB_PRESENT) && dd->kregbase) npresent++; @@ -142,8 +139,7 @@ int qib_count_units(int *npresentp, int *nupp) nup++; } } - - spin_unlock_irqrestore(&qib_devs_lock, flags); + xa_unlock_irqrestore(&qib_dev_table, flags); if (npresentp) *npresentp = npresent; diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index 1d940a2885c9..ceb42d948412 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -508,8 +508,8 @@ bail: */ static int qibfs_fill_super(struct super_block *sb, void *data, int silent) { - struct qib_devdata *dd, *tmp; - unsigned long flags; + struct qib_devdata *dd; + unsigned long index; int ret; static const struct tree_descr files[] = { @@ -524,18 +524,12 @@ static int qibfs_fill_super(struct super_block *sb, void *data, int silent) goto bail; } - spin_lock_irqsave(&qib_devs_lock, flags); - - list_for_each_entry_safe(dd, tmp, &qib_dev_list, list) { - spin_unlock_irqrestore(&qib_devs_lock, flags); + xa_for_each(&qib_dev_table, index, dd) { ret = add_cntr_files(sb, dd); if (ret) goto bail; - spin_lock_irqsave(&qib_devs_lock, flags); } - spin_unlock_irqrestore(&qib_devs_lock, flags); - bail: return ret; } diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index ac6a84f11ad0..dd4843379f51 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -6137,7 +6137,7 @@ static void set_no_qsfp_atten(struct qib_devdata *dd, int change) static int setup_txselect(const char *str, const struct kernel_param *kp) { struct qib_devdata *dd; - unsigned long val; + unsigned long index, val; char *n; if (strlen(str) >= ARRAY_SIZE(txselect_list)) { @@ -6153,7 +6153,7 @@ static int setup_txselect(const char *str, const struct kernel_param *kp) } strncpy(txselect_list, str, ARRAY_SIZE(txselect_list) - 1); - list_for_each_entry(dd, &qib_dev_list, list) + xa_for_each(&qib_dev_table, index, dd) if (dd->deviceid == PCI_DEVICE_ID_QLOGIC_IB_7322) set_no_qsfp_atten(dd, 1); return 0; diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index 9fd69903ca57..d4fd8a6cff7b 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -36,7 +36,6 @@ #include <linux/netdevice.h> #include <linux/vmalloc.h> #include <linux/delay.h> -#include <linux/idr.h> #include <linux/module.h> #include <linux/printk.h> #ifdef CONFIG_INFINIBAND_QIB_DCA @@ -95,7 +94,7 @@ MODULE_PARM_DESC(cc_table_size, "Congestion control table entries 0 (CCA disable static void verify_interrupt(struct timer_list *); -static struct idr qib_unit_table; +DEFINE_XARRAY_FLAGS(qib_dev_table, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ); u32 qib_cpulist_count; unsigned long *qib_cpulist; @@ -785,21 +784,9 @@ void __attribute__((weak)) qib_disable_wc(struct qib_devdata *dd) { } -static inline struct qib_devdata *__qib_lookup(int unit) -{ - return idr_find(&qib_unit_table, unit); -} - struct qib_devdata *qib_lookup(int unit) { - struct qib_devdata *dd; - unsigned long flags; - - spin_lock_irqsave(&qib_devs_lock, flags); - dd = __qib_lookup(unit); - spin_unlock_irqrestore(&qib_devs_lock, flags); - - return dd; + return xa_load(&qib_dev_table, unit); } /* @@ -1046,10 +1033,9 @@ void qib_free_devdata(struct qib_devdata *dd) { unsigned long flags; - spin_lock_irqsave(&qib_devs_lock, flags); - idr_remove(&qib_unit_table, dd->unit); - list_del(&dd->list); - spin_unlock_irqrestore(&qib_devs_lock, flags); + xa_lock_irqsave(&qib_dev_table, flags); + __xa_erase(&qib_dev_table, dd->unit); + xa_unlock_irqrestore(&qib_dev_table, flags); #ifdef CONFIG_DEBUG_FS qib_dbg_ibdev_exit(&dd->verbs_dev); @@ -1070,15 +1056,15 @@ u64 qib_int_counter(struct qib_devdata *dd) u64 qib_sps_ints(void) { - unsigned long flags; + unsigned long index, flags; struct qib_devdata *dd; u64 sps_ints = 0; - spin_lock_irqsave(&qib_devs_lock, flags); - list_for_each_entry(dd, &qib_dev_list, list) { + xa_lock_irqsave(&qib_dev_table, flags); + xa_for_each(&qib_dev_table, index, dd) { sps_ints += qib_int_counter(dd); } - spin_unlock_irqrestore(&qib_devs_lock, flags); + xa_unlock_irqrestore(&qib_dev_table, flags); return sps_ints; } @@ -1087,12 +1073,9 @@ u64 qib_sps_ints(void) * allocator, because the verbs cleanup process both does cleanup and * free of the data structure. * "extra" is for chip-specific data. - * - * Use the idr mechanism to get a unit number for this unit. */ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) { - unsigned long flags; struct qib_devdata *dd; int ret, nports; @@ -1103,20 +1086,8 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) if (!dd) return ERR_PTR(-ENOMEM); - INIT_LIST_HEAD(&dd->list); - - idr_preload(GFP_KERNEL); - spin_lock_irqsave(&qib_devs_lock, flags); - - ret = idr_alloc(&qib_unit_table, dd, 0, 0, GFP_NOWAIT); - if (ret >= 0) { - dd->unit = ret; - list_add(&dd->list, &qib_dev_list); - } - - spin_unlock_irqrestore(&qib_devs_lock, flags); - idr_preload_end(); - + ret = xa_alloc_irq(&qib_dev_table, &dd->unit, dd, xa_limit_32b, + GFP_KERNEL); if (ret < 0) { qib_early_err(&pdev->dev, "Could not allocate unit ID: error %d\n", -ret); @@ -1255,8 +1226,6 @@ static int __init qib_ib_init(void) * These must be called before the driver is registered with * the PCI subsystem. */ - idr_init(&qib_unit_table); - #ifdef CONFIG_INFINIBAND_QIB_DCA dca_register_notify(&dca_notifier); #endif @@ -1281,7 +1250,6 @@ bail_dev: #ifdef CONFIG_DEBUG_FS qib_dbg_exit(); #endif - idr_destroy(&qib_unit_table); qib_dev_cleanup(); bail: return ret; @@ -1313,7 +1281,7 @@ static void __exit qib_ib_cleanup(void) qib_cpulist_count = 0; kfree(qib_cpulist); - idr_destroy(&qib_unit_table); + WARN_ON(!xa_empty(&qib_dev_table)); qib_dev_cleanup(); } diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index 50dd9811b088..2ac4c67f5ba1 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -933,7 +933,7 @@ void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr) qp->s_last = s_last; /* see post_send() */ barrier(); - rvt_put_swqe(wqe); + rvt_put_qp_swqe(qp, wqe); rvt_qp_swqe_complete(qp, wqe, ib_qib_wc_opcode[wqe->wr.opcode], @@ -975,7 +975,7 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) { u32 s_last; - rvt_put_swqe(wqe); + rvt_put_qp_swqe(qp, wqe); s_last = qp->s_last; if (++s_last >= qp->s_size) s_last = 0; diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c index 31c523b2a9f5..ef19d39a44b1 100644 --- a/drivers/infiniband/hw/qib/qib_user_sdma.c +++ b/drivers/infiniband/hw/qib/qib_user_sdma.c @@ -225,8 +225,6 @@ qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt) if (sdma_rb_node) { sdma_rb_node->refcount++; } else { - int ret; - sdma_rb_node = kmalloc(sizeof( struct qib_user_sdma_rb_node), GFP_KERNEL); if (!sdma_rb_node) @@ -235,8 +233,7 @@ qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt) sdma_rb_node->refcount = 1; sdma_rb_node->pid = current->pid; - ret = qib_user_sdma_rb_insert(&qib_user_sdma_rb_root, - sdma_rb_node); + qib_user_sdma_rb_insert(&qib_user_sdma_rb_root, sdma_rb_node); } pq->sdma_rb_node = sdma_rb_node; diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index a4426c24b0d1..17bdf8acee2f 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -46,7 +46,7 @@ #include <rdma/ib_pack.h> #include <rdma/ib_user_verbs.h> #include <rdma/ib_hdrs.h> -#include <rdma/rdma_vt.h> +#include <rdma/rdmavt_qp.h> #include <rdma/rdmavt_cq.h> struct qib_ctxtdata; diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index bd4521b2cc5f..e9352750e029 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -447,8 +447,7 @@ int usnic_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, return 0; } -int usnic_ib_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, - struct ib_udata *udata) +int usnic_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct usnic_ib_pd *pd = to_upd(ibpd); void *umem_pd; @@ -461,7 +460,7 @@ int usnic_ib_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, return 0; } -void usnic_ib_dealloc_pd(struct ib_pd *pd) +void usnic_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) { usnic_uiom_dealloc_pd((to_upd(pd))->umem_pd); } @@ -539,7 +538,7 @@ out_release_mutex: return ERR_PTR(err); } -int usnic_ib_destroy_qp(struct ib_qp *qp) +int usnic_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) { struct usnic_ib_qp_grp *qp_grp; struct usnic_ib_vf *vf; @@ -590,7 +589,6 @@ out_unlock: struct ib_cq *usnic_ib_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, struct ib_udata *udata) { struct ib_cq *cq; @@ -606,7 +604,7 @@ struct ib_cq *usnic_ib_create_cq(struct ib_device *ibdev, return cq; } -int usnic_ib_destroy_cq(struct ib_cq *cq) +int usnic_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) { usnic_dbg("\n"); kfree(cq); @@ -642,13 +640,13 @@ err_free: return ERR_PTR(err); } -int usnic_ib_dereg_mr(struct ib_mr *ibmr) +int usnic_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) { struct usnic_ib_mr *mr = to_umr(ibmr); usnic_dbg("va 0x%lx length 0x%zx\n", mr->umem->va, mr->umem->length); - usnic_uiom_reg_release(mr->umem, ibmr->uobject->context); + usnic_uiom_reg_release(mr->umem); kfree(mr); return 0; } @@ -731,4 +729,3 @@ int usnic_ib_mmap(struct ib_ucontext *context, return -EINVAL; } -/* End of ib callbacks section */ diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h index c40e89b6246f..028f322f8e9b 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h @@ -50,24 +50,22 @@ int usnic_ib_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid); int usnic_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey); -int usnic_ib_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, - struct ib_udata *udata); -void usnic_ib_dealloc_pd(struct ib_pd *pd); +int usnic_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); +void usnic_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata); -int usnic_ib_destroy_qp(struct ib_qp *qp); +int usnic_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata); int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); struct ib_cq *usnic_ib_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, struct ib_udata *udata); -int usnic_ib_destroy_cq(struct ib_cq *cq); +int usnic_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); -int usnic_ib_dereg_mr(struct ib_mr *ibmr); +int usnic_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); int usnic_ib_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata); void usnic_ib_dealloc_ucontext(struct ib_ucontext *ibcontext); int usnic_ib_mmap(struct ib_ucontext *context, diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c index 06862a6af185..da35d6fdfc5e 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -432,8 +432,7 @@ static inline size_t usnic_uiom_num_pages(struct usnic_uiom_reg *uiomr) return PAGE_ALIGN(uiomr->length + uiomr->offset) >> PAGE_SHIFT; } -void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, - struct ib_ucontext *context) +void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr) { __usnic_uiom_reg_release(uiomr->pd, uiomr, 1); diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.h b/drivers/infiniband/hw/usnic/usnic_uiom.h index c88cfa087e3a..70be49b1ca05 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.h +++ b/drivers/infiniband/hw/usnic/usnic_uiom.h @@ -90,7 +90,6 @@ void usnic_uiom_free_dev_list(struct device **devs); struct usnic_uiom_reg *usnic_uiom_reg_get(struct usnic_uiom_pd *pd, unsigned long addr, size_t size, int access, int dmasync); -void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, - struct ib_ucontext *ucontext); +void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr); int usnic_uiom_init(char *drv_name); #endif /* USNIC_UIOM_H_ */ diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c index 104c7db4704f..d7deb19a2800 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c @@ -49,6 +49,7 @@ #include <rdma/ib_addr.h> #include <rdma/ib_smi.h> #include <rdma/ib_user_verbs.h> +#include <rdma/uverbs_ioctl.h> #include "pvrdma.h" @@ -93,7 +94,6 @@ int pvrdma_req_notify_cq(struct ib_cq *ibcq, * pvrdma_create_cq - create completion queue * @ibdev: the device * @attr: completion queue attributes - * @context: user context * @udata: user data * * @return: ib_cq completion queue pointer on success, @@ -101,7 +101,6 @@ int pvrdma_req_notify_cq(struct ib_cq *ibcq, */ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, struct ib_udata *udata) { int entries = attr->cqe; @@ -116,6 +115,8 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev, struct pvrdma_cmd_create_cq_resp *resp = &rsp.create_cq_resp; struct pvrdma_create_cq_resp cq_resp = {0}; struct pvrdma_create_cq ucmd; + struct pvrdma_ucontext *context = rdma_udata_to_drv_context( + udata, struct pvrdma_ucontext, ibucontext); BUILD_BUG_ON(sizeof(struct pvrdma_cqe) != 64); @@ -133,7 +134,7 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev, } cq->ibcq.cqe = entries; - cq->is_kernel = !context; + cq->is_kernel = !udata; if (!cq->is_kernel) { if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { @@ -185,8 +186,7 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev, memset(cmd, 0, sizeof(*cmd)); cmd->hdr.cmd = PVRDMA_CMD_CREATE_CQ; cmd->nchunks = npages; - cmd->ctx_handle = (context) ? - (u64)to_vucontext(context)->ctx_handle : 0; + cmd->ctx_handle = context ? context->ctx_handle : 0; cmd->cqe = entries; cmd->pdir_dma = cq->pdir.dir_dma; ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_CQ_RESP); @@ -204,13 +204,13 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev, spin_unlock_irqrestore(&dev->cq_tbl_lock, flags); if (!cq->is_kernel) { - cq->uar = &(to_vucontext(context)->uar); + cq->uar = &context->uar; /* Copy udata back. */ if (ib_copy_to_udata(udata, &cq_resp, sizeof(cq_resp))) { dev_warn(&dev->pdev->dev, "failed to copy back udata\n"); - pvrdma_destroy_cq(&cq->ibcq); + pvrdma_destroy_cq(&cq->ibcq, udata); return ERR_PTR(-EINVAL); } } @@ -245,10 +245,11 @@ static void pvrdma_free_cq(struct pvrdma_dev *dev, struct pvrdma_cq *cq) /** * pvrdma_destroy_cq - destroy completion queue * @cq: the completion queue to destroy. + * @udata: user data or null for kernel object * * @return: 0 for success. */ -int pvrdma_destroy_cq(struct ib_cq *cq) +int pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) { struct pvrdma_cq *vcq = to_vcq(cq); union pvrdma_cmd_req req; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index ec41400fec0c..40182297f87f 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -143,24 +143,6 @@ static int pvrdma_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } -static struct net_device *pvrdma_get_netdev(struct ib_device *ibdev, - u8 port_num) -{ - struct net_device *netdev; - struct pvrdma_dev *dev = to_vdev(ibdev); - - if (port_num != 1) - return NULL; - - rcu_read_lock(); - netdev = dev->netdev; - if (netdev) - dev_hold(netdev); - rcu_read_unlock(); - - return netdev; -} - static const struct ib_device_ops pvrdma_dev_ops = { .add_gid = pvrdma_add_gid, .alloc_mr = pvrdma_alloc_mr, @@ -179,7 +161,6 @@ static const struct ib_device_ops pvrdma_dev_ops = { .get_dev_fw_str = pvrdma_get_fw_ver_str, .get_dma_mr = pvrdma_get_dma_mr, .get_link_layer = pvrdma_port_link_layer, - .get_netdev = pvrdma_get_netdev, .get_port_immutable = pvrdma_port_immutable, .map_mr_sg = pvrdma_map_mr_sg, .mmap = pvrdma_mmap, @@ -195,6 +176,8 @@ static const struct ib_device_ops pvrdma_dev_ops = { .query_qp = pvrdma_query_qp, .reg_user_mr = pvrdma_reg_user_mr, .req_notify_cq = pvrdma_req_notify_cq, + + INIT_RDMA_OBJ_SIZE(ib_ah, pvrdma_ah, ibah), INIT_RDMA_OBJ_SIZE(ib_pd, pvrdma_pd, ibpd), INIT_RDMA_OBJ_SIZE(ib_ucontext, pvrdma_ucontext, ibucontext), }; @@ -204,6 +187,8 @@ static const struct ib_device_ops pvrdma_dev_srq_ops = { .destroy_srq = pvrdma_destroy_srq, .modify_srq = pvrdma_modify_srq, .query_srq = pvrdma_query_srq, + + INIT_RDMA_OBJ_SIZE(ib_srq, pvrdma_srq, ibsrq), }; static int pvrdma_register_device(struct pvrdma_dev *dev) @@ -277,6 +262,9 @@ static int pvrdma_register_device(struct pvrdma_dev *dev) goto err_qp_free; } dev->ib_dev.driver_id = RDMA_DRIVER_VMW_PVRDMA; + ret = ib_device_set_netdev(&dev->ib_dev, dev->netdev, 1); + if (ret) + return ret; spin_lock_init(&dev->srq_tbl_lock); rdma_set_device_sysfs_group(&dev->ib_dev, &pvrdma_attr_group); @@ -720,6 +708,7 @@ static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev, pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE); break; case NETDEV_UNREGISTER: + ib_device_set_netdev(&dev->ib_dev, NULL, 1); dev_put(dev->netdev); dev->netdev = NULL; break; @@ -731,6 +720,7 @@ static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev, if ((dev->netdev == NULL) && (pci_get_drvdata(pdev_net) == ndev)) { /* this is our netdev */ + ib_device_set_netdev(&dev->ib_dev, ndev, 1); dev->netdev = ndev; dev_hold(ndev); } diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c index a85884e90e84..65dc47ffb8f3 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c @@ -119,7 +119,7 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, union pvrdma_cmd_resp rsp; struct pvrdma_cmd_create_mr *cmd = &req.create_mr; struct pvrdma_cmd_create_mr_resp *resp = &rsp.create_mr_resp; - int ret; + int ret, npages; if (length == 0 || length > dev->dsr->caps.max_mr_size) { dev_warn(&dev->pdev->dev, "invalid mem region length\n"); @@ -133,9 +133,10 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return ERR_CAST(umem); } - if (umem->npages < 0 || umem->npages > PVRDMA_PAGE_DIR_MAX_PAGES) { + npages = ib_umem_num_pages(umem); + if (npages < 0 || npages > PVRDMA_PAGE_DIR_MAX_PAGES) { dev_warn(&dev->pdev->dev, "overflow %d pages in mem region\n", - umem->npages); + npages); ret = -EINVAL; goto err_umem; } @@ -150,7 +151,7 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mr->mmr.size = length; mr->umem = umem; - ret = pvrdma_page_dir_init(dev, &mr->pdir, umem->npages, false); + ret = pvrdma_page_dir_init(dev, &mr->pdir, npages, false); if (ret) { dev_warn(&dev->pdev->dev, "could not allocate page directory\n"); @@ -167,7 +168,7 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, cmd->length = length; cmd->pd_handle = to_vpd(pd)->pd_handle; cmd->access_flags = access_flags; - cmd->nchunks = umem->npages; + cmd->nchunks = npages; cmd->pdir_dma = mr->pdir.dir_dma; ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_MR_RESP); @@ -201,7 +202,7 @@ err_umem: * @return: ib_mr pointer on success, otherwise returns an errno. */ struct ib_mr *pvrdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg) + u32 max_num_sg, struct ib_udata *udata) { struct pvrdma_dev *dev = to_vdev(pd->device); struct pvrdma_user_mr *mr; @@ -272,7 +273,7 @@ freemr: * * @return: 0 on success. */ -int pvrdma_dereg_mr(struct ib_mr *ibmr) +int pvrdma_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) { struct pvrdma_user_mr *mr = to_vmr(ibmr); struct pvrdma_dev *dev = to_vdev(ibmr->device); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c index 08f4257169bd..0eaaead5baec 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c @@ -446,10 +446,11 @@ static void pvrdma_free_qp(struct pvrdma_qp *qp) /** * pvrdma_destroy_qp - destroy a queue pair * @qp: the queue pair to destroy + * @udata: user data or null for kernel object * * @return: 0 on success. */ -int pvrdma_destroy_qp(struct ib_qp *qp) +int pvrdma_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) { struct pvrdma_qp *vqp = to_vqp(qp); union pvrdma_cmd_req req; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c index 951d9d68107a..6cac0c88cf39 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c @@ -94,19 +94,18 @@ int pvrdma_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) * @init_attr: shared receive queue attributes * @udata: user data * - * @return: the ib_srq pointer on success, otherwise returns an errno. + * @return: 0 on success, otherwise returns an errno. */ -struct ib_srq *pvrdma_create_srq(struct ib_pd *pd, - struct ib_srq_init_attr *init_attr, - struct ib_udata *udata) +int pvrdma_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) { - struct pvrdma_srq *srq = NULL; - struct pvrdma_dev *dev = to_vdev(pd->device); + struct pvrdma_srq *srq = to_vsrq(ibsrq); + struct pvrdma_dev *dev = to_vdev(ibsrq->device); union pvrdma_cmd_req req; union pvrdma_cmd_resp rsp; struct pvrdma_cmd_create_srq *cmd = &req.create_srq; struct pvrdma_cmd_create_srq_resp *resp = &rsp.create_srq_resp; - struct pvrdma_create_srq_resp srq_resp = {0}; + struct pvrdma_create_srq_resp srq_resp = {}; struct pvrdma_create_srq ucmd; unsigned long flags; int ret; @@ -115,31 +114,25 @@ struct ib_srq *pvrdma_create_srq(struct ib_pd *pd, /* No support for kernel clients. */ dev_warn(&dev->pdev->dev, "no shared receive queue support for kernel client\n"); - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; } if (init_attr->srq_type != IB_SRQT_BASIC) { dev_warn(&dev->pdev->dev, "shared receive queue type %d not supported\n", init_attr->srq_type); - return ERR_PTR(-EINVAL); + return -EINVAL; } if (init_attr->attr.max_wr > dev->dsr->caps.max_srq_wr || init_attr->attr.max_sge > dev->dsr->caps.max_srq_sge) { dev_warn(&dev->pdev->dev, "shared receive queue size invalid\n"); - return ERR_PTR(-EINVAL); + return -EINVAL; } if (!atomic_add_unless(&dev->num_srqs, 1, dev->dsr->caps.max_srq)) - return ERR_PTR(-ENOMEM); - - srq = kmalloc(sizeof(*srq), GFP_KERNEL); - if (!srq) { - ret = -ENOMEM; - goto err_srq; - } + return -ENOMEM; spin_lock_init(&srq->lock); refcount_set(&srq->refcnt, 1); @@ -181,7 +174,7 @@ struct ib_srq *pvrdma_create_srq(struct ib_pd *pd, cmd->hdr.cmd = PVRDMA_CMD_CREATE_SRQ; cmd->srq_type = init_attr->srq_type; cmd->nchunks = srq->npages; - cmd->pd_handle = to_vpd(pd)->pd_handle; + cmd->pd_handle = to_vpd(ibsrq->pd)->pd_handle; cmd->attrs.max_wr = init_attr->attr.max_wr; cmd->attrs.max_sge = init_attr->attr.max_sge; cmd->attrs.srq_limit = init_attr->attr.srq_limit; @@ -204,21 +197,20 @@ struct ib_srq *pvrdma_create_srq(struct ib_pd *pd, /* Copy udata back. */ if (ib_copy_to_udata(udata, &srq_resp, sizeof(srq_resp))) { dev_warn(&dev->pdev->dev, "failed to copy back udata\n"); - pvrdma_destroy_srq(&srq->ibsrq); - return ERR_PTR(-EINVAL); + pvrdma_destroy_srq(&srq->ibsrq, udata); + return -EINVAL; } - return &srq->ibsrq; + return 0; err_page_dir: pvrdma_page_dir_cleanup(dev, &srq->pdir); err_umem: ib_umem_release(srq->umem); err_srq: - kfree(srq); atomic_dec(&dev->num_srqs); - return ERR_PTR(ret); + return ret; } static void pvrdma_free_srq(struct pvrdma_dev *dev, struct pvrdma_srq *srq) @@ -246,10 +238,11 @@ static void pvrdma_free_srq(struct pvrdma_dev *dev, struct pvrdma_srq *srq) /** * pvrdma_destroy_srq - destroy shared receive queue * @srq: the shared receive queue to destroy + * @udata: user data or null for kernel object * * @return: 0 for success. */ -int pvrdma_destroy_srq(struct ib_srq *srq) +void pvrdma_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) { struct pvrdma_srq *vsrq = to_vsrq(srq); union pvrdma_cmd_req req; @@ -268,8 +261,6 @@ int pvrdma_destroy_srq(struct ib_srq *srq) ret); pvrdma_free_srq(dev, vsrq); - - return 0; } /** diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c index 42fe821f8d58..faf7ecd7b3fa 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c @@ -50,6 +50,7 @@ #include <rdma/ib_smi.h> #include <rdma/ib_user_verbs.h> #include <rdma/vmw_pvrdma-abi.h> +#include <rdma/uverbs_ioctl.h> #include "pvrdma.h" @@ -70,8 +71,6 @@ int pvrdma_query_device(struct ib_device *ibdev, if (uhw->inlen || uhw->outlen) return -EINVAL; - memset(props, 0, sizeof(*props)); - props->fw_ver = dev->dsr->caps.fw_ver; props->sys_image_guid = dev->dsr->caps.sys_image_guid; props->max_mr_size = dev->dsr->caps.max_mr_size; @@ -421,13 +420,11 @@ int pvrdma_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) /** * pvrdma_alloc_pd - allocate protection domain * @ibpd: PD pointer - * @context: user context * @udata: user data * * @return: the ib_pd protection domain pointer on success, otherwise errno. */ -int pvrdma_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, - struct ib_udata *udata) +int pvrdma_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct ib_device *ibdev = ibpd->device; struct pvrdma_pd *pd = to_vpd(ibpd); @@ -438,13 +435,15 @@ int pvrdma_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, struct pvrdma_cmd_create_pd_resp *resp = &rsp.create_pd_resp; struct pvrdma_alloc_pd_resp pd_resp = {0}; int ret; + struct pvrdma_ucontext *context = rdma_udata_to_drv_context( + udata, struct pvrdma_ucontext, ibucontext); /* Check allowed max pds */ if (!atomic_add_unless(&dev->num_pds, 1, dev->dsr->caps.max_pd)) return -ENOMEM; cmd->hdr.cmd = PVRDMA_CMD_CREATE_PD; - cmd->ctx_handle = (context) ? to_vucontext(context)->ctx_handle : 0; + cmd->ctx_handle = context ? context->ctx_handle : 0; ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_PD_RESP); if (ret < 0) { dev_warn(&dev->pdev->dev, @@ -453,16 +452,16 @@ int pvrdma_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, goto err; } - pd->privileged = !context; + pd->privileged = !udata; pd->pd_handle = resp->pd_handle; pd->pdn = resp->pd_handle; pd_resp.pdn = resp->pd_handle; - if (context) { + if (udata) { if (ib_copy_to_udata(udata, &pd_resp, sizeof(pd_resp))) { dev_warn(&dev->pdev->dev, "failed to copy back protection domain\n"); - pvrdma_dealloc_pd(&pd->ibpd); + pvrdma_dealloc_pd(&pd->ibpd, udata); return -EFAULT; } } @@ -478,10 +477,11 @@ err: /** * pvrdma_dealloc_pd - deallocate protection domain * @pd: the protection domain to be released + * @udata: user data or null for kernel object * * @return: 0 on success, otherwise errno. */ -void pvrdma_dealloc_pd(struct ib_pd *pd) +void pvrdma_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) { struct pvrdma_dev *dev = to_vdev(pd->device); union pvrdma_cmd_req req = {}; @@ -507,34 +507,28 @@ void pvrdma_dealloc_pd(struct ib_pd *pd) * @udata: user data blob * @flags: create address handle flags (see enum rdma_create_ah_flags) * - * @return: the ib_ah pointer on success, otherwise errno. + * @return: 0 on success, otherwise errno. */ -struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, - u32 flags, struct ib_udata *udata) +int pvrdma_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata) { - struct pvrdma_dev *dev = to_vdev(pd->device); - struct pvrdma_ah *ah; + struct pvrdma_dev *dev = to_vdev(ibah->device); + struct pvrdma_ah *ah = to_vah(ibah); const struct ib_global_route *grh; u8 port_num = rdma_ah_get_port_num(ah_attr); if (!(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH)) - return ERR_PTR(-EINVAL); + return -EINVAL; grh = rdma_ah_read_grh(ah_attr); if ((ah_attr->type != RDMA_AH_ATTR_TYPE_ROCE) || rdma_is_multicast_addr((struct in6_addr *)grh->dgid.raw)) - return ERR_PTR(-EINVAL); + return -EINVAL; if (!atomic_add_unless(&dev->num_ahs, 1, dev->dsr->caps.max_ah)) - return ERR_PTR(-ENOMEM); - - ah = kzalloc(sizeof(*ah), GFP_ATOMIC); - if (!ah) { - atomic_dec(&dev->num_ahs); - return ERR_PTR(-ENOMEM); - } + return -ENOMEM; - ah->av.port_pd = to_vpd(pd)->pd_handle | (port_num << 24); + ah->av.port_pd = to_vpd(ibah->pd)->pd_handle | (port_num << 24); ah->av.src_path_bits = rdma_ah_get_path_bits(ah_attr); ah->av.src_path_bits |= 0x80; ah->av.gid_index = grh->sgid_index; @@ -544,11 +538,7 @@ struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, memcpy(ah->av.dgid, grh->dgid.raw, 16); memcpy(ah->av.dmac, ah_attr->roce.dmac, ETH_ALEN); - ah->ibah.device = pd->device; - ah->ibah.pd = pd; - ah->ibah.uobject = NULL; - - return &ah->ibah; + return 0; } /** @@ -556,14 +546,10 @@ struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, * @ah: the address handle to destroyed * @flags: destroy address handle flags (see enum rdma_destroy_ah_flags) * - * @return: 0 on success. */ -int pvrdma_destroy_ah(struct ib_ah *ah, u32 flags) +void pvrdma_destroy_ah(struct ib_ah *ah, u32 flags) { struct pvrdma_dev *dev = to_vdev(ah->device); - kfree(to_vah(ah)); atomic_dec(&dev->num_ahs); - - return 0; } diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h index 607aa131d67c..9d7b021e1c59 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h @@ -398,36 +398,33 @@ int pvrdma_modify_port(struct ib_device *ibdev, u8 port, int pvrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); int pvrdma_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata); void pvrdma_dealloc_ucontext(struct ib_ucontext *context); -int pvrdma_alloc_pd(struct ib_pd *pd, struct ib_ucontext *context, - struct ib_udata *udata); -void pvrdma_dealloc_pd(struct ib_pd *ibpd); +int pvrdma_alloc_pd(struct ib_pd *pd, struct ib_udata *udata); +void pvrdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); struct ib_mr *pvrdma_get_dma_mr(struct ib_pd *pd, int acc); struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); -int pvrdma_dereg_mr(struct ib_mr *mr); +int pvrdma_dereg_mr(struct ib_mr *mr, struct ib_udata *udata); struct ib_mr *pvrdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg); + u32 max_num_sg, struct ib_udata *udata); int pvrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, struct ib_udata *udata); -int pvrdma_destroy_cq(struct ib_cq *cq); +int pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int pvrdma_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); -struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, - u32 flags, struct ib_udata *udata); -int pvrdma_destroy_ah(struct ib_ah *ah, u32 flags); +int pvrdma_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags, + struct ib_udata *udata); +void pvrdma_destroy_ah(struct ib_ah *ah, u32 flags); -struct ib_srq *pvrdma_create_srq(struct ib_pd *pd, - struct ib_srq_init_attr *init_attr, - struct ib_udata *udata); +int pvrdma_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *init_attr, + struct ib_udata *udata); int pvrdma_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); int pvrdma_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); -int pvrdma_destroy_srq(struct ib_srq *srq); +void pvrdma_destroy_srq(struct ib_srq *srq, struct ib_udata *udata); struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, @@ -436,7 +433,7 @@ int pvrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); int pvrdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); -int pvrdma_destroy_qp(struct ib_qp *qp); +int pvrdma_destroy_qp(struct ib_qp *qp, struct ib_udata *udata); int pvrdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr); int pvrdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c index fc10e4e26ca7..0e147b32cbe9 100644 --- a/drivers/infiniband/sw/rdmavt/ah.c +++ b/drivers/infiniband/sw/rdmavt/ah.c @@ -89,36 +89,29 @@ EXPORT_SYMBOL(rvt_check_ah); /** * rvt_create_ah - create an address handle - * @pd: the protection domain + * @ibah: the IB address handle * @ah_attr: the attributes of the AH * @create_flags: create address handle flags (see enum rdma_create_ah_flags) * @udata: pointer to user's input output buffer information. * * This may be called from interrupt context. * - * Return: newly allocated ah + * Return: 0 on success */ -struct ib_ah *rvt_create_ah(struct ib_pd *pd, - struct rdma_ah_attr *ah_attr, - u32 create_flags, - struct ib_udata *udata) +int rvt_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, + u32 create_flags, struct ib_udata *udata) { - struct rvt_ah *ah; - struct rvt_dev_info *dev = ib_to_rvt(pd->device); + struct rvt_ah *ah = ibah_to_rvtah(ibah); + struct rvt_dev_info *dev = ib_to_rvt(ibah->device); unsigned long flags; - if (rvt_check_ah(pd->device, ah_attr)) - return ERR_PTR(-EINVAL); - - ah = kmalloc(sizeof(*ah), GFP_ATOMIC); - if (!ah) - return ERR_PTR(-ENOMEM); + if (rvt_check_ah(ibah->device, ah_attr)) + return -EINVAL; spin_lock_irqsave(&dev->n_ahs_lock, flags); if (dev->n_ahs_allocated == dev->dparms.props.max_ah) { spin_unlock_irqrestore(&dev->n_ahs_lock, flags); - kfree(ah); - return ERR_PTR(-ENOMEM); + return -ENOMEM; } dev->n_ahs_allocated++; @@ -129,35 +122,32 @@ struct ib_ah *rvt_create_ah(struct ib_pd *pd, atomic_set(&ah->refcount, 0); if (dev->driver_f.notify_new_ah) - dev->driver_f.notify_new_ah(pd->device, ah_attr, ah); + dev->driver_f.notify_new_ah(ibah->device, ah_attr, ah); - return &ah->ibah; + return 0; } /** * rvt_destory_ah - Destory an address handle * @ibah: address handle * @destroy_flags: destroy address handle flags (see enum rdma_destroy_ah_flags) + * @udata: user data or NULL for kernel object * * Return: 0 on success */ -int rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags) +void rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags) { struct rvt_dev_info *dev = ib_to_rvt(ibah->device); struct rvt_ah *ah = ibah_to_rvtah(ibah); unsigned long flags; - if (atomic_read(&ah->refcount) != 0) - return -EBUSY; + WARN_ON_ONCE(atomic_read(&ah->refcount)); spin_lock_irqsave(&dev->n_ahs_lock, flags); dev->n_ahs_allocated--; spin_unlock_irqrestore(&dev->n_ahs_lock, flags); rdma_destroy_ah_attr(&ah->attr); - kfree(ah); - - return 0; } /** diff --git a/drivers/infiniband/sw/rdmavt/ah.h b/drivers/infiniband/sw/rdmavt/ah.h index 72431a618d5d..bbb4d3bdec4e 100644 --- a/drivers/infiniband/sw/rdmavt/ah.h +++ b/drivers/infiniband/sw/rdmavt/ah.h @@ -50,11 +50,9 @@ #include <rdma/rdma_vt.h> -struct ib_ah *rvt_create_ah(struct ib_pd *pd, - struct rdma_ah_attr *ah_attr, - u32 create_flags, - struct ib_udata *udata); -int rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags); +int rvt_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, + u32 create_flags, struct ib_udata *udata); +void rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags); int rvt_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); int rvt_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index 4f1544ad4aff..a06e6da7a026 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -168,7 +168,6 @@ static void send_complete(struct work_struct *work) * rvt_create_cq - create a completion queue * @ibdev: the device this completion queue is attached to * @attr: creation attributes - * @context: unused by the QLogic_IB driver * @udata: user data for libibverbs.so * * Called by ib_create_cq() in the generic verbs code. @@ -178,7 +177,6 @@ static void send_complete(struct work_struct *work) */ struct ib_cq *rvt_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, struct ib_udata *udata) { struct rvt_dev_info *rdi = ib_to_rvt(ibdev); @@ -232,7 +230,7 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev, if (udata && udata->outlen >= sizeof(__u64)) { int err; - cq->ip = rvt_create_mmap_info(rdi, sz, context, wc); + cq->ip = rvt_create_mmap_info(rdi, sz, udata, wc); if (!cq->ip) { ret = ERR_PTR(-ENOMEM); goto bail_wc; @@ -299,12 +297,13 @@ done: /** * rvt_destroy_cq - destroy a completion queue * @ibcq: the completion queue to destroy. + * @udata: user data or NULL for kernel object * * Called by ib_destroy_cq() in the generic verbs code. * * Return: always 0 */ -int rvt_destroy_cq(struct ib_cq *ibcq) +int rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) { struct rvt_cq *cq = ibcq_to_rvtcq(ibcq); struct rvt_dev_info *rdi = cq->rdi; diff --git a/drivers/infiniband/sw/rdmavt/cq.h b/drivers/infiniband/sw/rdmavt/cq.h index 72184b1c176b..3ad6faf18ecb 100644 --- a/drivers/infiniband/sw/rdmavt/cq.h +++ b/drivers/infiniband/sw/rdmavt/cq.h @@ -53,9 +53,8 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, struct ib_udata *udata); -int rvt_destroy_cq(struct ib_cq *ibcq); +int rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags); int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata); int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry); diff --git a/drivers/infiniband/sw/rdmavt/mmap.c b/drivers/infiniband/sw/rdmavt/mmap.c index 6b712eecbd37..652f4a7efc1b 100644 --- a/drivers/infiniband/sw/rdmavt/mmap.c +++ b/drivers/infiniband/sw/rdmavt/mmap.c @@ -49,6 +49,7 @@ #include <linux/vmalloc.h> #include <linux/mm.h> #include <asm/pgtable.h> +#include <rdma/uverbs_ioctl.h> #include "mmap.h" /** @@ -150,18 +151,19 @@ done: * rvt_create_mmap_info - allocate information for hfi1_mmap * @rdi: rvt dev struct * @size: size in bytes to map - * @context: user context + * @udata: user data (must be valid!) * @obj: opaque pointer to a cq, wq etc * * Return: rvt_mmap struct on success */ -struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi, - u32 size, - struct ib_ucontext *context, - void *obj) +struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi, u32 size, + struct ib_udata *udata, void *obj) { struct rvt_mmap_info *ip; + if (!udata) + return ERR_PTR(-EINVAL); + ip = kmalloc_node(sizeof(*ip), GFP_KERNEL, rdi->dparms.node); if (!ip) return ip; @@ -177,7 +179,9 @@ struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi, INIT_LIST_HEAD(&ip->pending_mmaps); ip->size = size; - ip->context = context; + ip->context = + container_of(udata, struct uverbs_attr_bundle, driver_udata) + ->context; ip->obj = obj; kref_init(&ip->ref); diff --git a/drivers/infiniband/sw/rdmavt/mmap.h b/drivers/infiniband/sw/rdmavt/mmap.h index fab0e7b1daf9..02466c40bc1e 100644 --- a/drivers/infiniband/sw/rdmavt/mmap.h +++ b/drivers/infiniband/sw/rdmavt/mmap.h @@ -53,10 +53,8 @@ void rvt_mmap_init(struct rvt_dev_info *rdi); void rvt_release_mmap_info(struct kref *ref); int rvt_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); -struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi, - u32 size, - struct ib_ucontext *context, - void *obj); +struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi, u32 size, + struct ib_udata *udata, void *obj); void rvt_update_mmap_info(struct rvt_dev_info *rdi, struct rvt_mmap_info *ip, u32 size, void *obj); diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 0bb6e39dd03a..54f3f9c27552 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -392,7 +392,7 @@ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (IS_ERR(umem)) return (void *)umem; - n = umem->nmap; + n = ib_umem_num_pages(umem); mr = __rvt_alloc_mr(n, pd); if (IS_ERR(mr)) { @@ -548,7 +548,7 @@ bool rvt_ss_has_lkey(struct rvt_sge_state *ss, u32 lkey) * * Returns 0 on success. */ -int rvt_dereg_mr(struct ib_mr *ibmr) +int rvt_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) { struct rvt_mr *mr = to_imr(ibmr); int ret; @@ -575,9 +575,8 @@ out: * * Return: the memory region on success, otherwise return an errno. */ -struct ib_mr *rvt_alloc_mr(struct ib_pd *pd, - enum ib_mr_type mr_type, - u32 max_num_sg) +struct ib_mr *rvt_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg, struct ib_udata *udata) { struct rvt_mr *mr; diff --git a/drivers/infiniband/sw/rdmavt/mr.h b/drivers/infiniband/sw/rdmavt/mr.h index 132800ee0205..2c8d0752e8e3 100644 --- a/drivers/infiniband/sw/rdmavt/mr.h +++ b/drivers/infiniband/sw/rdmavt/mr.h @@ -78,10 +78,9 @@ struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc); struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct ib_udata *udata); -int rvt_dereg_mr(struct ib_mr *ibmr); -struct ib_mr *rvt_alloc_mr(struct ib_pd *pd, - enum ib_mr_type mr_type, - u32 max_num_sg); +int rvt_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); +struct ib_mr *rvt_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg, struct ib_udata *udata); int rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); struct ib_fmr *rvt_alloc_fmr(struct ib_pd *pd, int mr_access_flags, diff --git a/drivers/infiniband/sw/rdmavt/pd.c b/drivers/infiniband/sw/rdmavt/pd.c index 6033054b22fa..a403718f0b5e 100644 --- a/drivers/infiniband/sw/rdmavt/pd.c +++ b/drivers/infiniband/sw/rdmavt/pd.c @@ -51,15 +51,13 @@ /** * rvt_alloc_pd - allocate a protection domain * @ibpd: PD - * @context: optional user context * @udata: optional user data * * Allocate and keep track of a PD. * * Return: 0 on success */ -int rvt_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, - struct ib_udata *udata) +int rvt_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct ib_device *ibdev = ibpd->device; struct rvt_dev_info *dev = ib_to_rvt(ibdev); @@ -93,10 +91,11 @@ bail: /** * rvt_dealloc_pd - Free PD * @ibpd: Free up PD + * @udata: Valid user data or NULL for kernel object * * Return: always 0 */ -void rvt_dealloc_pd(struct ib_pd *ibpd) +void rvt_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct rvt_dev_info *dev = ib_to_rvt(ibpd->device); diff --git a/drivers/infiniband/sw/rdmavt/pd.h b/drivers/infiniband/sw/rdmavt/pd.h index 7a887e4a45e7..71ba76d72b1d 100644 --- a/drivers/infiniband/sw/rdmavt/pd.h +++ b/drivers/infiniband/sw/rdmavt/pd.h @@ -50,8 +50,7 @@ #include <rdma/rdma_vt.h> -int rvt_alloc_pd(struct ib_pd *pd, struct ib_ucontext *context, - struct ib_udata *udata); -void rvt_dealloc_pd(struct ib_pd *ibpd); +int rvt_alloc_pd(struct ib_pd *pd, struct ib_udata *udata); +void rvt_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); #endif /* DEF_RDMAVTPD_H */ diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index a34b9a2a32b6..31a2e65e4906 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -623,13 +623,7 @@ static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends) while (qp->s_last != qp->s_head) { struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_last); - rvt_put_swqe(wqe); - - if (qp->ibqp.qp_type == IB_QPT_UD || - qp->ibqp.qp_type == IB_QPT_SMI || - qp->ibqp.qp_type == IB_QPT_GSI) - atomic_dec(&ibah_to_rvtah( - wqe->ud_wr.ah)->refcount); + rvt_put_qp_swqe(qp, wqe); if (++qp->s_last >= qp->s_size) qp->s_last = 0; smp_wmb(); /* see qp_set_savail */ @@ -957,8 +951,6 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, size_t sg_list_sz; struct ib_qp *ret = ERR_PTR(-ENOMEM); struct rvt_dev_info *rdi = ib_to_rvt(ibpd->device); - struct rvt_ucontext *ucontext = rdma_udata_to_drv_context( - udata, struct rvt_ucontext, ibucontext); void *priv = NULL; size_t sqsize; @@ -1131,8 +1123,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, } else { u32 s = sizeof(struct rvt_rwq) + qp->r_rq.size * sz; - qp->ip = rvt_create_mmap_info(rdi, s, - &ucontext->ibucontext, + qp->ip = rvt_create_mmap_info(rdi, s, udata, qp->r_rq.wq); if (!qp->ip) { ret = ERR_PTR(-ENOMEM); @@ -1617,7 +1608,7 @@ inval: * * Return: 0 on success. */ -int rvt_destroy_qp(struct ib_qp *ibqp) +int rvt_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); @@ -2018,8 +2009,7 @@ static int rvt_post_one_wr(struct rvt_qp *qp, * opportunity to adjust PSN values based on internal checks. */ log_pmtu = qp->log_pmtu; - if (qp->ibqp.qp_type != IB_QPT_UC && - qp->ibqp.qp_type != IB_QPT_RC) { + if (qp->allowed_ops == IB_OPCODE_UD) { struct rvt_ah *ah = ibah_to_rvtah(wqe->ud_wr.ah); log_pmtu = ah->log_pmtu; @@ -2067,8 +2057,7 @@ static int rvt_post_one_wr(struct rvt_qp *qp, return 0; bail_inval_free_ref: - if (qp->ibqp.qp_type != IB_QPT_UC && - qp->ibqp.qp_type != IB_QPT_RC) + if (qp->allowed_ops == IB_OPCODE_UD) atomic_dec(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount); bail_inval_free: /* release mr holds */ @@ -2691,11 +2680,7 @@ void rvt_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, qp->s_last = last; /* See post_send() */ barrier(); - rvt_put_swqe(wqe); - if (qp->ibqp.qp_type == IB_QPT_UD || - qp->ibqp.qp_type == IB_QPT_SMI || - qp->ibqp.qp_type == IB_QPT_GSI) - atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount); + rvt_put_qp_swqe(qp, wqe); rvt_qp_swqe_complete(qp, wqe, diff --git a/drivers/infiniband/sw/rdmavt/qp.h b/drivers/infiniband/sw/rdmavt/qp.h index 6d883972e0b8..6db1619389b0 100644 --- a/drivers/infiniband/sw/rdmavt/qp.h +++ b/drivers/infiniband/sw/rdmavt/qp.h @@ -48,7 +48,7 @@ * */ -#include <rdma/rdma_vt.h> +#include <rdma/rdmavt_qp.h> int rvt_driver_qp_init(struct rvt_dev_info *rdi); void rvt_qp_exit(struct rvt_dev_info *rdi); @@ -57,7 +57,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, struct ib_udata *udata); int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); -int rvt_destroy_qp(struct ib_qp *ibqp); +int rvt_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata); int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_qp_init_attr *init_attr); int rvt_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, diff --git a/drivers/infiniband/sw/rdmavt/rc.c b/drivers/infiniband/sw/rdmavt/rc.c index 8d71647820a8..09f0cf538be6 100644 --- a/drivers/infiniband/sw/rdmavt/rc.c +++ b/drivers/infiniband/sw/rdmavt/rc.c @@ -45,7 +45,7 @@ * */ -#include <rdma/rdma_vt.h> +#include <rdma/rdmavt_qp.h> #include <rdma/ib_hdrs.h> /* diff --git a/drivers/infiniband/sw/rdmavt/srq.c b/drivers/infiniband/sw/rdmavt/srq.c index 895b3fabd0bf..8d6b3e764255 100644 --- a/drivers/infiniband/sw/rdmavt/srq.c +++ b/drivers/infiniband/sw/rdmavt/srq.c @@ -71,31 +71,24 @@ void rvt_driver_srq_init(struct rvt_dev_info *rdi) * @srq_init_attr: the attributes of the SRQ * @udata: data from libibverbs when creating a user SRQ * - * Return: Allocated srq object + * Return: 0 on success */ -struct ib_srq *rvt_create_srq(struct ib_pd *ibpd, - struct ib_srq_init_attr *srq_init_attr, - struct ib_udata *udata) +int rvt_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata) { - struct rvt_dev_info *dev = ib_to_rvt(ibpd->device); - struct rvt_ucontext *ucontext = rdma_udata_to_drv_context( - udata, struct rvt_ucontext, ibucontext); - struct rvt_srq *srq; + struct rvt_dev_info *dev = ib_to_rvt(ibsrq->device); + struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); u32 sz; - struct ib_srq *ret; + int ret; if (srq_init_attr->srq_type != IB_SRQT_BASIC) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; if (srq_init_attr->attr.max_sge == 0 || srq_init_attr->attr.max_sge > dev->dparms.props.max_srq_sge || srq_init_attr->attr.max_wr == 0 || srq_init_attr->attr.max_wr > dev->dparms.props.max_srq_wr) - return ERR_PTR(-EINVAL); - - srq = kzalloc_node(sizeof(*srq), GFP_KERNEL, dev->dparms.node); - if (!srq) - return ERR_PTR(-ENOMEM); + return -EINVAL; /* * Need to use vmalloc() if we want to support large #s of entries. @@ -109,7 +102,7 @@ struct ib_srq *rvt_create_srq(struct ib_pd *ibpd, vzalloc_node(sizeof(struct rvt_rwq) + srq->rq.size * sz, dev->dparms.node); if (!srq->rq.wq) { - ret = ERR_PTR(-ENOMEM); + ret = -ENOMEM; goto bail_srq; } @@ -118,23 +111,18 @@ struct ib_srq *rvt_create_srq(struct ib_pd *ibpd, * See rvt_mmap() for details. */ if (udata && udata->outlen >= sizeof(__u64)) { - int err; u32 s = sizeof(struct rvt_rwq) + srq->rq.size * sz; - srq->ip = - rvt_create_mmap_info(dev, s, &ucontext->ibucontext, - srq->rq.wq); + srq->ip = rvt_create_mmap_info(dev, s, udata, srq->rq.wq); if (!srq->ip) { - ret = ERR_PTR(-ENOMEM); + ret = -ENOMEM; goto bail_wq; } - err = ib_copy_to_udata(udata, &srq->ip->offset, + ret = ib_copy_to_udata(udata, &srq->ip->offset, sizeof(srq->ip->offset)); - if (err) { - ret = ERR_PTR(err); + if (ret) goto bail_ip; - } } /* @@ -146,7 +134,7 @@ struct ib_srq *rvt_create_srq(struct ib_pd *ibpd, spin_lock(&dev->n_srqs_lock); if (dev->n_srqs_allocated == dev->dparms.props.max_srq) { spin_unlock(&dev->n_srqs_lock); - ret = ERR_PTR(-ENOMEM); + ret = -ENOMEM; goto bail_ip; } @@ -159,14 +147,13 @@ struct ib_srq *rvt_create_srq(struct ib_pd *ibpd, spin_unlock_irq(&dev->pending_lock); } - return &srq->ibsrq; + return 0; bail_ip: kfree(srq->ip); bail_wq: vfree(srq->rq.wq); bail_srq: - kfree(srq); return ret; } @@ -338,9 +325,8 @@ int rvt_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) * rvt_destroy_srq - destory an srq * @ibsrq: srq object to destroy * - * Return always 0 */ -int rvt_destroy_srq(struct ib_srq *ibsrq) +void rvt_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) { struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); struct rvt_dev_info *dev = ib_to_rvt(ibsrq->device); @@ -352,7 +338,4 @@ int rvt_destroy_srq(struct ib_srq *ibsrq) kref_put(&srq->ip->ref, rvt_release_mmap_info); else vfree(srq->rq.wq); - kfree(srq); - - return 0; } diff --git a/drivers/infiniband/sw/rdmavt/srq.h b/drivers/infiniband/sw/rdmavt/srq.h index bf0eaaf56465..6427d7d62a9a 100644 --- a/drivers/infiniband/sw/rdmavt/srq.h +++ b/drivers/infiniband/sw/rdmavt/srq.h @@ -50,13 +50,12 @@ #include <rdma/rdma_vt.h> void rvt_driver_srq_init(struct rvt_dev_info *rdi); -struct ib_srq *rvt_create_srq(struct ib_pd *ibpd, - struct ib_srq_init_attr *srq_init_attr, - struct ib_udata *udata); +int rvt_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata); int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); int rvt_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr); -int rvt_destroy_srq(struct ib_srq *ibsrq); +void rvt_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata); #endif /* DEF_RVTSRQ_H */ diff --git a/drivers/infiniband/sw/rdmavt/trace_qp.h b/drivers/infiniband/sw/rdmavt/trace_qp.h index efc9d814b032..c32d21cc615e 100644 --- a/drivers/infiniband/sw/rdmavt/trace_qp.h +++ b/drivers/infiniband/sw/rdmavt/trace_qp.h @@ -51,7 +51,7 @@ #include <linux/trace_seq.h> #include <rdma/ib_verbs.h> -#include <rdma/rdma_vt.h> +#include <rdma/rdmavt_qp.h> #undef TRACE_SYSTEM #define TRACE_SYSTEM rvt_qp diff --git a/drivers/infiniband/sw/rdmavt/trace_rc.h b/drivers/infiniband/sw/rdmavt/trace_rc.h index 995276933a55..c47357af2099 100644 --- a/drivers/infiniband/sw/rdmavt/trace_rc.h +++ b/drivers/infiniband/sw/rdmavt/trace_rc.h @@ -51,7 +51,7 @@ #include <linux/trace_seq.h> #include <rdma/ib_verbs.h> -#include <rdma/rdma_vt.h> +#include <rdma/rdmavt_qp.h> #undef TRACE_SYSTEM #define TRACE_SYSTEM rvt_rc diff --git a/drivers/infiniband/sw/rdmavt/trace_tx.h b/drivers/infiniband/sw/rdmavt/trace_tx.h index d5df352eadb1..d963ca755828 100644 --- a/drivers/infiniband/sw/rdmavt/trace_tx.h +++ b/drivers/infiniband/sw/rdmavt/trace_tx.h @@ -51,7 +51,7 @@ #include <linux/trace_seq.h> #include <rdma/ib_verbs.h> -#include <rdma/rdma_vt.h> +#include <rdma/rdmavt_qp.h> #undef TRACE_SYSTEM #define TRACE_SYSTEM rvt_tx diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 42c9d35f832d..9546a837a8ac 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -425,7 +425,10 @@ static const struct ib_device_ops rvt_dev_ops = { .req_notify_cq = rvt_req_notify_cq, .resize_cq = rvt_resize_cq, .unmap_fmr = rvt_unmap_fmr, + + INIT_RDMA_OBJ_SIZE(ib_ah, rvt_ah, ibah), INIT_RDMA_OBJ_SIZE(ib_pd, rvt_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_srq, rvt_srq, ibsrq), INIT_RDMA_OBJ_SIZE(ib_ucontext, rvt_ucontext, ibucontext), }; diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c index a57276f2cb84..ad3090131126 100644 --- a/drivers/infiniband/sw/rxe/rxe_cq.c +++ b/drivers/infiniband/sw/rxe/rxe_cq.c @@ -82,7 +82,7 @@ static void rxe_send_complete(unsigned long data) } int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe, - int comp_vector, struct ib_ucontext *context, + int comp_vector, struct ib_udata *udata, struct rxe_create_cq_resp __user *uresp) { int err; @@ -94,7 +94,7 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe, return -ENOMEM; } - err = do_mmap_info(rxe, uresp ? &uresp->mi : NULL, context, + err = do_mmap_info(rxe, uresp ? &uresp->mi : NULL, udata, cq->queue->buf, cq->queue->buf_size, &cq->queue->ip); if (err) { vfree(cq->queue->buf); @@ -115,13 +115,13 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe, } int rxe_cq_resize_queue(struct rxe_cq *cq, int cqe, - struct rxe_resize_cq_resp __user *uresp) + struct rxe_resize_cq_resp __user *uresp, + struct ib_udata *udata) { int err; err = rxe_queue_resize(cq->queue, (unsigned int *)&cqe, - sizeof(struct rxe_cqe), - cq->queue->ip ? cq->queue->ip->context : NULL, + sizeof(struct rxe_cqe), udata, uresp ? &uresp->mi : NULL, NULL, &cq->cq_lock); if (!err) cq->ibcq.cqe = cqe; diff --git a/drivers/infiniband/sw/rxe/rxe_hdr.h b/drivers/infiniband/sw/rxe/rxe_hdr.h index 6cb18406f5b8..ce003666b800 100644 --- a/drivers/infiniband/sw/rxe/rxe_hdr.h +++ b/drivers/infiniband/sw/rxe/rxe_hdr.h @@ -643,7 +643,7 @@ struct rxe_atmeth { __be32 rkey; __be64 swap_add; __be64 comp; -} __attribute__((__packed__)); +} __packed; static inline u64 __atmeth_va(void *arg) { diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index 3d8cef836f0d..775c23becaec 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -53,11 +53,12 @@ int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe, int comp_vector); int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe, - int comp_vector, struct ib_ucontext *context, + int comp_vector, struct ib_udata *udata, struct rxe_create_cq_resp __user *uresp); int rxe_cq_resize_queue(struct rxe_cq *cq, int new_cqe, - struct rxe_resize_cq_resp __user *uresp); + struct rxe_resize_cq_resp __user *uresp, + struct ib_udata *udata); int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited); @@ -91,10 +92,8 @@ struct rxe_mmap_info { void rxe_mmap_release(struct kref *ref); -struct rxe_mmap_info *rxe_create_mmap_info(struct rxe_dev *dev, - u32 size, - struct ib_ucontext *context, - void *obj); +struct rxe_mmap_info *rxe_create_mmap_info(struct rxe_dev *dev, u32 size, + struct ib_udata *udata, void *obj); int rxe_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); @@ -224,13 +223,12 @@ int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq, struct ib_srq_attr *attr, enum ib_srq_attr_mask mask); int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq, - struct ib_srq_init_attr *init, - struct ib_ucontext *context, + struct ib_srq_init_attr *init, struct ib_udata *udata, struct rxe_create_srq_resp __user *uresp); int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq, struct ib_srq_attr *attr, enum ib_srq_attr_mask mask, - struct rxe_modify_srq_cmd *ucmd); + struct rxe_modify_srq_cmd *ucmd, struct ib_udata *udata); void rxe_dealloc(struct ib_device *ib_dev); diff --git a/drivers/infiniband/sw/rxe/rxe_mmap.c b/drivers/infiniband/sw/rxe/rxe_mmap.c index d22431e3a908..48f48122ddcb 100644 --- a/drivers/infiniband/sw/rxe/rxe_mmap.c +++ b/drivers/infiniband/sw/rxe/rxe_mmap.c @@ -36,6 +36,7 @@ #include <linux/mm.h> #include <linux/errno.h> #include <asm/pgtable.h> +#include <rdma/uverbs_ioctl.h> #include "rxe.h" #include "rxe_loc.h" @@ -140,13 +141,14 @@ done: /* * Allocate information for rxe_mmap */ -struct rxe_mmap_info *rxe_create_mmap_info(struct rxe_dev *rxe, - u32 size, - struct ib_ucontext *context, - void *obj) +struct rxe_mmap_info *rxe_create_mmap_info(struct rxe_dev *rxe, u32 size, + struct ib_udata *udata, void *obj) { struct rxe_mmap_info *ip; + if (!udata) + return ERR_PTR(-EINVAL); + ip = kmalloc(sizeof(*ip), GFP_KERNEL); if (!ip) return NULL; @@ -165,7 +167,9 @@ struct rxe_mmap_info *rxe_create_mmap_info(struct rxe_dev *rxe, INIT_LIST_HEAD(&ip->pending_mmaps); ip->info.size = size; - ip->context = context; + ip->context = + container_of(udata, struct uverbs_attr_bundle, driver_udata) + ->context; ip->obj = obj; kref_init(&ip->ref); diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 42f0f25e396c..f501f72489d8 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -179,7 +179,7 @@ int rxe_mem_init_user(struct rxe_pd *pd, u64 start, } mem->umem = umem; - num_buf = umem->nmap; + num_buf = ib_umem_num_pages(umem); rxe_mem_init(access, mem); @@ -199,6 +199,12 @@ int rxe_mem_init_user(struct rxe_pd *pd, u64 start, buf = map[0]->buf; for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { + if (num_buf >= RXE_BUF_PER_MAP) { + map++; + buf = map[0]->buf; + num_buf = 0; + } + vaddr = page_address(sg_page_iter_page(&sg_iter)); if (!vaddr) { pr_warn("null vaddr\n"); @@ -211,11 +217,6 @@ int rxe_mem_init_user(struct rxe_pd *pd, u64 start, num_buf++; buf++; - if (num_buf >= RXE_BUF_PER_MAP) { - map++; - buf = map[0]->buf; - num_buf = 0; - } } } diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 753cabcd441c..5a3474f9351b 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -338,13 +338,13 @@ static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb, ip6h->payload_len = htons(skb->len - sizeof(*ip6h)); } -static int prepare4(struct rxe_pkt_info *pkt, struct sk_buff *skb, - struct rxe_av *av) +static int prepare4(struct rxe_pkt_info *pkt, struct sk_buff *skb) { struct rxe_qp *qp = pkt->qp; struct dst_entry *dst; bool xnet = false; __be16 df = htons(IP_DF); + struct rxe_av *av = rxe_get_av(pkt); struct in_addr *saddr = &av->sgid_addr._sockaddr_in.sin_addr; struct in_addr *daddr = &av->dgid_addr._sockaddr_in.sin_addr; @@ -364,11 +364,11 @@ static int prepare4(struct rxe_pkt_info *pkt, struct sk_buff *skb, return 0; } -static int prepare6(struct rxe_pkt_info *pkt, struct sk_buff *skb, - struct rxe_av *av) +static int prepare6(struct rxe_pkt_info *pkt, struct sk_buff *skb) { struct rxe_qp *qp = pkt->qp; struct dst_entry *dst; + struct rxe_av *av = rxe_get_av(pkt); struct in6_addr *saddr = &av->sgid_addr._sockaddr_in6.sin6_addr; struct in6_addr *daddr = &av->dgid_addr._sockaddr_in6.sin6_addr; @@ -392,16 +392,15 @@ static int prepare6(struct rxe_pkt_info *pkt, struct sk_buff *skb, int rxe_prepare(struct rxe_pkt_info *pkt, struct sk_buff *skb, u32 *crc) { int err = 0; - struct rxe_av *av = rxe_get_av(pkt); - if (av->network_type == RDMA_NETWORK_IPV4) - err = prepare4(pkt, skb, av); - else if (av->network_type == RDMA_NETWORK_IPV6) - err = prepare6(pkt, skb, av); + if (skb->protocol == htons(ETH_P_IP)) + err = prepare4(pkt, skb); + else if (skb->protocol == htons(ETH_P_IPV6)) + err = prepare6(pkt, skb); *crc = rxe_icrc_hdr(pkt, skb); - if (ether_addr_equal(skb->dev->dev_addr, av->dmac)) + if (ether_addr_equal(skb->dev->dev_addr, rxe_get_av(pkt)->dmac)) pkt->mask |= RXE_LOOPBACK_MASK; return err; @@ -422,23 +421,20 @@ static void rxe_skb_tx_dtor(struct sk_buff *skb) int rxe_send(struct rxe_pkt_info *pkt, struct sk_buff *skb) { - struct rxe_av *av; int err; - av = rxe_get_av(pkt); - skb->destructor = rxe_skb_tx_dtor; skb->sk = pkt->qp->sk->sk; rxe_add_ref(pkt->qp); atomic_inc(&pkt->qp->skb_out); - if (av->network_type == RDMA_NETWORK_IPV4) { + if (skb->protocol == htons(ETH_P_IP)) { err = ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb); - } else if (av->network_type == RDMA_NETWORK_IPV6) { + } else if (skb->protocol == htons(ETH_P_IPV6)) { err = ip6_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb); } else { - pr_err("Unknown layer 3 protocol: %d\n", av->network_type); + pr_err("Unknown layer 3 protocol: %d\n", skb->protocol); atomic_dec(&pkt->qp->skb_out); rxe_drop_ref(pkt->qp); kfree_skb(skb); @@ -462,7 +458,7 @@ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, int paylen, struct rxe_pkt_info *pkt) { unsigned int hdr_len; - struct sk_buff *skb; + struct sk_buff *skb = NULL; struct net_device *ndev; const struct ib_gid_attr *attr; const int port_num = 1; @@ -470,7 +466,6 @@ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, attr = rdma_get_gid_attr(&rxe->ib_dev, port_num, av->grh.sgid_index); if (IS_ERR(attr)) return NULL; - ndev = attr->ndev; if (av->network_type == RDMA_NETWORK_IPV4) hdr_len = ETH_HLEN + sizeof(struct udphdr) + @@ -479,15 +474,26 @@ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, hdr_len = ETH_HLEN + sizeof(struct udphdr) + sizeof(struct ipv6hdr); + rcu_read_lock(); + ndev = rdma_read_gid_attr_ndev_rcu(attr); + if (IS_ERR(ndev)) { + rcu_read_unlock(); + goto out; + } skb = alloc_skb(paylen + hdr_len + LL_RESERVED_SPACE(ndev), GFP_ATOMIC); - if (unlikely(!skb)) + if (unlikely(!skb)) { + rcu_read_unlock(); goto out; + } - skb_reserve(skb, hdr_len + LL_RESERVED_SPACE(rxe->ndev)); + skb_reserve(skb, hdr_len + LL_RESERVED_SPACE(ndev)); + /* FIXME: hold reference to this netdev until life of this skb. */ skb->dev = ndev; + rcu_read_unlock(); + if (av->network_type == RDMA_NETWORK_IPV4) skb->protocol = htons(ETH_P_IP); else diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index 120fa9005954..56cf18af016a 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -52,12 +52,12 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = { [RXE_TYPE_AH] = { .name = "rxe-ah", .size = sizeof(struct rxe_ah), - .flags = RXE_POOL_ATOMIC, + .flags = RXE_POOL_ATOMIC | RXE_POOL_NO_ALLOC, }, [RXE_TYPE_SRQ] = { .name = "rxe-srq", .size = sizeof(struct rxe_srq), - .flags = RXE_POOL_INDEX, + .flags = RXE_POOL_INDEX | RXE_POOL_NO_ALLOC, .min_index = RXE_MIN_SRQ_INDEX, .max_index = RXE_MAX_SRQ_INDEX, }, diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 09ede70dc1e8..e2c6d1cedf41 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -217,8 +217,7 @@ static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp, } static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, - struct ib_qp_init_attr *init, - struct ib_ucontext *context, + struct ib_qp_init_attr *init, struct ib_udata *udata, struct rxe_create_qp_resp __user *uresp) { int err; @@ -254,7 +253,7 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, if (!qp->sq.queue) return -ENOMEM; - err = do_mmap_info(rxe, uresp ? &uresp->sq_mi : NULL, context, + err = do_mmap_info(rxe, uresp ? &uresp->sq_mi : NULL, udata, qp->sq.queue->buf, qp->sq.queue->buf_size, &qp->sq.queue->ip); @@ -287,7 +286,7 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp, struct ib_qp_init_attr *init, - struct ib_ucontext *context, + struct ib_udata *udata, struct rxe_create_qp_resp __user *uresp) { int err; @@ -308,7 +307,7 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp, if (!qp->rq.queue) return -ENOMEM; - err = do_mmap_info(rxe, uresp ? &uresp->rq_mi : NULL, context, + err = do_mmap_info(rxe, uresp ? &uresp->rq_mi : NULL, udata, qp->rq.queue->buf, qp->rq.queue->buf_size, &qp->rq.queue->ip); if (err) { @@ -344,8 +343,6 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd, struct rxe_cq *rcq = to_rcq(init->recv_cq); struct rxe_cq *scq = to_rcq(init->send_cq); struct rxe_srq *srq = init->srq ? to_rsrq(init->srq) : NULL; - struct rxe_ucontext *ucontext = - rdma_udata_to_drv_context(udata, struct rxe_ucontext, ibuc); rxe_add_ref(pd); rxe_add_ref(rcq); @@ -360,11 +357,11 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd, rxe_qp_init_misc(rxe, qp, init); - err = rxe_qp_init_req(rxe, qp, init, &ucontext->ibuc, uresp); + err = rxe_qp_init_req(rxe, qp, init, udata, uresp); if (err) goto err1; - err = rxe_qp_init_resp(rxe, qp, init, &ucontext->ibuc, uresp); + err = rxe_qp_init_resp(rxe, qp, init, udata, uresp); if (err) goto err2; diff --git a/drivers/infiniband/sw/rxe/rxe_queue.c b/drivers/infiniband/sw/rxe/rxe_queue.c index f84ab4469261..ff92704de32f 100644 --- a/drivers/infiniband/sw/rxe/rxe_queue.c +++ b/drivers/infiniband/sw/rxe/rxe_queue.c @@ -36,18 +36,15 @@ #include "rxe_loc.h" #include "rxe_queue.h" -int do_mmap_info(struct rxe_dev *rxe, - struct mminfo __user *outbuf, - struct ib_ucontext *context, - struct rxe_queue_buf *buf, - size_t buf_size, - struct rxe_mmap_info **ip_p) +int do_mmap_info(struct rxe_dev *rxe, struct mminfo __user *outbuf, + struct ib_udata *udata, struct rxe_queue_buf *buf, + size_t buf_size, struct rxe_mmap_info **ip_p) { int err; struct rxe_mmap_info *ip = NULL; if (outbuf) { - ip = rxe_create_mmap_info(rxe, buf_size, context, buf); + ip = rxe_create_mmap_info(rxe, buf_size, udata, buf); if (!ip) goto err1; @@ -153,12 +150,9 @@ static int resize_finish(struct rxe_queue *q, struct rxe_queue *new_q, return 0; } -int rxe_queue_resize(struct rxe_queue *q, - unsigned int *num_elem_p, - unsigned int elem_size, - struct ib_ucontext *context, - struct mminfo __user *outbuf, - spinlock_t *producer_lock, +int rxe_queue_resize(struct rxe_queue *q, unsigned int *num_elem_p, + unsigned int elem_size, struct ib_udata *udata, + struct mminfo __user *outbuf, spinlock_t *producer_lock, spinlock_t *consumer_lock) { struct rxe_queue *new_q; @@ -170,7 +164,7 @@ int rxe_queue_resize(struct rxe_queue *q, if (!new_q) return -ENOMEM; - err = do_mmap_info(new_q->rxe, outbuf, context, new_q->buf, + err = do_mmap_info(new_q->rxe, outbuf, udata, new_q->buf, new_q->buf_size, &new_q->ip); if (err) { vfree(new_q->buf); diff --git a/drivers/infiniband/sw/rxe/rxe_queue.h b/drivers/infiniband/sw/rxe/rxe_queue.h index 79ba4b320054..acd0a925481c 100644 --- a/drivers/infiniband/sw/rxe/rxe_queue.h +++ b/drivers/infiniband/sw/rxe/rxe_queue.h @@ -76,12 +76,9 @@ struct rxe_queue { unsigned int index_mask; }; -int do_mmap_info(struct rxe_dev *rxe, - struct mminfo __user *outbuf, - struct ib_ucontext *context, - struct rxe_queue_buf *buf, - size_t buf_size, - struct rxe_mmap_info **ip_p); +int do_mmap_info(struct rxe_dev *rxe, struct mminfo __user *outbuf, + struct ib_udata *udata, struct rxe_queue_buf *buf, + size_t buf_size, struct rxe_mmap_info **ip_p); void rxe_queue_reset(struct rxe_queue *q); @@ -89,10 +86,8 @@ struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe, int *num_elem, unsigned int elem_size); -int rxe_queue_resize(struct rxe_queue *q, - unsigned int *num_elem_p, - unsigned int elem_size, - struct ib_ucontext *context, +int rxe_queue_resize(struct rxe_queue *q, unsigned int *num_elem_p, + unsigned int elem_size, struct ib_udata *udata, struct mminfo __user *outbuf, /* Protect producers while resizing queue */ spinlock_t *producer_lock, diff --git a/drivers/infiniband/sw/rxe/rxe_srq.c b/drivers/infiniband/sw/rxe/rxe_srq.c index c41a5fee81f7..d8459431534e 100644 --- a/drivers/infiniband/sw/rxe/rxe_srq.c +++ b/drivers/infiniband/sw/rxe/rxe_srq.c @@ -99,8 +99,7 @@ err1: } int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq, - struct ib_srq_init_attr *init, - struct ib_ucontext *context, + struct ib_srq_init_attr *init, struct ib_udata *udata, struct rxe_create_srq_resp __user *uresp) { int err; @@ -128,7 +127,7 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq, srq->rq.queue = q; - err = do_mmap_info(rxe, uresp ? &uresp->mi : NULL, context, q->buf, + err = do_mmap_info(rxe, uresp ? &uresp->mi : NULL, udata, q->buf, q->buf_size, &q->ip); if (err) { vfree(q->buf); @@ -149,7 +148,7 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq, int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq, struct ib_srq_attr *attr, enum ib_srq_attr_mask mask, - struct rxe_modify_srq_cmd *ucmd) + struct rxe_modify_srq_cmd *ucmd, struct ib_udata *udata) { int err; struct rxe_queue *q = srq->rq.queue; @@ -163,11 +162,8 @@ int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq, mi = u64_to_user_ptr(ucmd->mmap_info_addr); err = rxe_queue_resize(q, &attr->max_wr, - rcv_wqe_size(srq->rq.max_sge), - srq->rq.queue->ip ? - srq->rq.queue->ip->context : - NULL, - mi, &srq->rq.producer_lock, + rcv_wqe_size(srq->rq.max_sge), udata, mi, + &srq->rq.producer_lock, &srq->rq.consumer_lock); if (err) goto err2; diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 6ecf28570ff0..8c3e2a18cfe4 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -176,8 +176,7 @@ static int rxe_port_immutable(struct ib_device *dev, u8 port_num, return 0; } -static int rxe_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, - struct ib_udata *udata) +static int rxe_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct rxe_dev *rxe = to_rdev(ibpd->device); struct rxe_pd *pd = to_rpd(ibpd); @@ -185,37 +184,31 @@ static int rxe_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, return rxe_add_to_pool(&rxe->pd_pool, &pd->pelem); } -static void rxe_dealloc_pd(struct ib_pd *ibpd) +static void rxe_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct rxe_pd *pd = to_rpd(ibpd); rxe_drop_ref(pd); } -static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd, - struct rdma_ah_attr *attr, - u32 flags, - struct ib_udata *udata) +static int rxe_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr, + u32 flags, struct ib_udata *udata) { int err; - struct rxe_dev *rxe = to_rdev(ibpd->device); - struct rxe_pd *pd = to_rpd(ibpd); - struct rxe_ah *ah; + struct rxe_dev *rxe = to_rdev(ibah->device); + struct rxe_ah *ah = to_rah(ibah); err = rxe_av_chk_attr(rxe, attr); if (err) - return ERR_PTR(err); - - ah = rxe_alloc(&rxe->ah_pool); - if (!ah) - return ERR_PTR(-ENOMEM); + return err; - rxe_add_ref(pd); - ah->pd = pd; + err = rxe_add_to_pool(&rxe->ah_pool, &ah->pelem); + if (err) + return err; rxe_init_av(attr, &ah->av); - return &ah->ibah; + return 0; } static int rxe_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr) @@ -242,13 +235,11 @@ static int rxe_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr) return 0; } -static int rxe_destroy_ah(struct ib_ah *ibah, u32 flags) +static void rxe_destroy_ah(struct ib_ah *ibah, u32 flags) { struct rxe_ah *ah = to_rah(ibah); - rxe_drop_ref(ah->pd); rxe_drop_ref(ah); - return 0; } static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr) @@ -298,21 +289,18 @@ err1: return err; } -static struct ib_srq *rxe_create_srq(struct ib_pd *ibpd, - struct ib_srq_init_attr *init, - struct ib_udata *udata) +static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init, + struct ib_udata *udata) { int err; - struct rxe_dev *rxe = to_rdev(ibpd->device); - struct rxe_pd *pd = to_rpd(ibpd); - struct rxe_ucontext *ucontext = - rdma_udata_to_drv_context(udata, struct rxe_ucontext, ibuc); - struct rxe_srq *srq; + struct rxe_dev *rxe = to_rdev(ibsrq->device); + struct rxe_pd *pd = to_rpd(ibsrq->pd); + struct rxe_srq *srq = to_rsrq(ibsrq); struct rxe_create_srq_resp __user *uresp = NULL; if (udata) { if (udata->outlen < sizeof(*uresp)) - return ERR_PTR(-EINVAL); + return -EINVAL; uresp = udata->outbuf; } @@ -320,28 +308,24 @@ static struct ib_srq *rxe_create_srq(struct ib_pd *ibpd, if (err) goto err1; - srq = rxe_alloc(&rxe->srq_pool); - if (!srq) { - err = -ENOMEM; + err = rxe_add_to_pool(&rxe->srq_pool, &srq->pelem); + if (err) goto err1; - } - rxe_add_index(srq); rxe_add_ref(pd); srq->pd = pd; - err = rxe_srq_from_init(rxe, srq, init, &ucontext->ibuc, uresp); + err = rxe_srq_from_init(rxe, srq, init, udata, uresp); if (err) goto err2; - return &srq->ibsrq; + return 0; err2: rxe_drop_ref(pd); - rxe_drop_index(srq); rxe_drop_ref(srq); err1: - return ERR_PTR(err); + return err; } static int rxe_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, @@ -366,7 +350,7 @@ static int rxe_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, if (err) goto err1; - err = rxe_srq_from_attr(rxe, srq, attr, mask, &ucmd); + err = rxe_srq_from_attr(rxe, srq, attr, mask, &ucmd, udata); if (err) goto err1; @@ -389,7 +373,7 @@ static int rxe_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) return 0; } -static int rxe_destroy_srq(struct ib_srq *ibsrq) +static void rxe_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) { struct rxe_srq *srq = to_rsrq(ibsrq); @@ -397,10 +381,7 @@ static int rxe_destroy_srq(struct ib_srq *ibsrq) rxe_queue_cleanup(srq->rq.queue); rxe_drop_ref(srq->pd); - rxe_drop_index(srq); rxe_drop_ref(srq); - - return 0; } static int rxe_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, @@ -509,7 +490,7 @@ static int rxe_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, return 0; } -static int rxe_destroy_qp(struct ib_qp *ibqp) +static int rxe_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { struct rxe_qp *qp = to_rqp(ibqp); @@ -799,7 +780,6 @@ err1: static struct ib_cq *rxe_create_cq(struct ib_device *dev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, struct ib_udata *udata) { int err; @@ -826,8 +806,8 @@ static struct ib_cq *rxe_create_cq(struct ib_device *dev, goto err1; } - err = rxe_cq_from_init(rxe, cq, attr->cqe, attr->comp_vector, - context, uresp); + err = rxe_cq_from_init(rxe, cq, attr->cqe, attr->comp_vector, udata, + uresp); if (err) goto err2; @@ -839,7 +819,7 @@ err1: return ERR_PTR(err); } -static int rxe_destroy_cq(struct ib_cq *ibcq) +static int rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) { struct rxe_cq *cq = to_rcq(ibcq); @@ -866,7 +846,7 @@ static int rxe_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) if (err) goto err1; - err = rxe_cq_resize_queue(cq, cqe, uresp); + err = rxe_cq_resize_queue(cq, cqe, uresp, udata); if (err) goto err1; @@ -990,7 +970,7 @@ err2: return ERR_PTR(err); } -static int rxe_dereg_mr(struct ib_mr *ibmr) +static int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) { struct rxe_mem *mr = to_rmr(ibmr); @@ -1001,9 +981,8 @@ static int rxe_dereg_mr(struct ib_mr *ibmr) return 0; } -static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, - enum ib_mr_type mr_type, - u32 max_num_sg) +static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, + u32 max_num_sg, struct ib_udata *udata) { struct rxe_dev *rxe = to_rdev(ibpd->device); struct rxe_pd *pd = to_rpd(ibpd); @@ -1176,7 +1155,10 @@ static const struct ib_device_ops rxe_dev_ops = { .reg_user_mr = rxe_reg_user_mr, .req_notify_cq = rxe_req_notify_cq, .resize_cq = rxe_resize_cq, + + INIT_RDMA_OBJ_SIZE(ib_ah, rxe_ah, ibah), INIT_RDMA_OBJ_SIZE(ib_pd, rxe_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_srq, rxe_srq, ibsrq), INIT_RDMA_OBJ_SIZE(ib_ucontext, rxe_ucontext, ibuc), }; diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index 157e51aeb1e1..e8be7f44e3be 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -71,8 +71,8 @@ struct rxe_pd { }; struct rxe_ah { - struct rxe_pool_entry pelem; struct ib_ah ibah; + struct rxe_pool_entry pelem; struct rxe_pd *pd; struct rxe_av av; }; @@ -120,8 +120,8 @@ struct rxe_rq { }; struct rxe_srq { - struct rxe_pool_entry pelem; struct ib_srq ibsrq; + struct rxe_pool_entry pelem; struct rxe_pd *pd; struct rxe_rq rq; u32 srq_num; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 48eda16db1a7..9b5e11d3fb85 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -2402,7 +2402,18 @@ static ssize_t dev_id_show(struct device *dev, { struct net_device *ndev = to_net_dev(dev); - if (ndev->dev_id == ndev->dev_port) + /* + * ndev->dev_port will be equal to 0 in old kernel prior to commit + * 9b8b2a323008 ("IB/ipoib: Use dev_port to expose network interface + * port numbers") Zero was chosen as special case for user space + * applications to fallback and query dev_id to check if it has + * different value or not. + * + * Don't print warning in such scenario. + * + * https://github.com/systemd/systemd/blob/master/src/udev/udev-builtin-net_id.c#L358 + */ + if (ndev->dev_port && ndev->dev_id == ndev->dev_port) netdev_info_once(ndev, "\"%s\" wants to know my dev_id. Should it look at dev_port instead? See Documentation/ABI/testing/sysfs-class-net for more info.\n", current->comm); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 1e88213459f2..ba09068f6200 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -279,8 +279,7 @@ void ipoib_event(struct ib_event_handler *handler, ipoib_dbg(priv, "Event %d on device %s port %d\n", record->event, dev_name(&record->device->dev), record->element.port_num); - if (record->event == IB_EVENT_SM_CHANGE || - record->event == IB_EVENT_CLIENT_REREGISTER) { + if (record->event == IB_EVENT_CLIENT_REREGISTER) { queue_work(ipoib_workqueue, &priv->flush_light); } else if (record->event == IB_EVENT_PORT_ERR || record->event == IB_EVENT_PORT_ACTIVE || diff --git a/drivers/infiniband/ulp/iser/Kconfig b/drivers/infiniband/ulp/iser/Kconfig index d00af71a2cfc..299268f261ee 100644 --- a/drivers/infiniband/ulp/iser/Kconfig +++ b/drivers/infiniband/ulp/iser/Kconfig @@ -4,8 +4,8 @@ config INFINIBAND_ISER select SCSI_ISCSI_ATTRS ---help--- Support for the iSCSI Extensions for RDMA (iSER) Protocol - over InfiniBand. This allows you to access storage devices - that speak iSCSI over iSER over InfiniBand. + over InfiniBand. This allows you to access storage devices + that speak iSCSI over iSER over InfiniBand. The iSER protocol is defined by IETF. See <http://www.ietf.org/rfc/rfc5046.txt> diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 8c707accd148..9c185a8dabd3 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -763,7 +763,6 @@ static int iscsi_iser_get_ep_param(struct iscsi_endpoint *ep, enum iscsi_param param, char *buf) { struct iser_conn *iser_conn = ep->dd_data; - int len; switch (param) { case ISCSI_PARAM_CONN_PORT: @@ -774,12 +773,10 @@ static int iscsi_iser_get_ep_param(struct iscsi_endpoint *ep, return iscsi_conn_get_addr_param((struct sockaddr_storage *) &iser_conn->ib_conn.cma_id->route.addr.dst_addr, param, buf); - break; default: - return -ENOSYS; + break; } - - return len; + return -ENOSYS; } /** diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index a7aeaa0c6fbc..36d525110fd2 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -311,7 +311,7 @@ struct iser_login_desc { u64 rsp_dma; struct ib_sge sge; struct ib_cqe cqe; -} __attribute__((packed)); +} __packed; struct iser_conn; struct ib_conn; diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c index 560e4f2d466e..be5befd92d16 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c @@ -51,6 +51,7 @@ */ #include <linux/module.h> +#include <linux/xarray.h> #include <rdma/ib_addr.h> #include <rdma/ib_verbs.h> #include <rdma/opa_smi.h> @@ -97,7 +98,7 @@ const char opa_vnic_driver_version[] = DRV_VERSION; * @class_port_info: Class port info information. * @tid: Transaction id * @port_num: OPA port number - * @vport_idr: vnic ports idr + * @vports: vnic ports * @event_handler: ib event handler * @lock: adapter interface lock */ @@ -107,7 +108,7 @@ struct opa_vnic_vema_port { struct opa_class_port_info class_port_info; u64 tid; u8 port_num; - struct idr vport_idr; + struct xarray vports; struct ib_event_handler event_handler; /* Lock to query/update network adapter */ @@ -148,7 +149,7 @@ vema_get_vport_adapter(struct opa_vnic_vema_mad *recvd_mad, { u8 vport_num = vema_get_vport_num(recvd_mad); - return idr_find(&port->vport_idr, vport_num); + return xa_load(&port->vports, vport_num); } /** @@ -207,8 +208,7 @@ static struct opa_vnic_adapter *vema_add_vport(struct opa_vnic_vema_port *port, int rc; adapter->cport = cport; - rc = idr_alloc(&port->vport_idr, adapter, vport_num, - vport_num + 1, GFP_NOWAIT); + rc = xa_insert(&port->vports, vport_num, adapter, GFP_KERNEL); if (rc < 0) { opa_vnic_rem_netdev(adapter); adapter = ERR_PTR(rc); @@ -853,36 +853,14 @@ err_exit: v_err("Aborting trap\n"); } -static int vema_rem_vport(int id, void *p, void *data) -{ - struct opa_vnic_adapter *adapter = p; - - opa_vnic_rem_netdev(adapter); - return 0; -} - -static int vema_enable_vport(int id, void *p, void *data) -{ - struct opa_vnic_adapter *adapter = p; - - netif_carrier_on(adapter->netdev); - return 0; -} - -static int vema_disable_vport(int id, void *p, void *data) -{ - struct opa_vnic_adapter *adapter = p; - - netif_carrier_off(adapter->netdev); - return 0; -} - static void opa_vnic_event(struct ib_event_handler *handler, struct ib_event *record) { struct opa_vnic_vema_port *port = container_of(handler, struct opa_vnic_vema_port, event_handler); struct opa_vnic_ctrl_port *cport = port->cport; + struct opa_vnic_adapter *adapter; + unsigned long index; if (record->element.port_num != port->port_num) return; @@ -891,10 +869,16 @@ static void opa_vnic_event(struct ib_event_handler *handler, record->event, dev_name(&record->device->dev), record->element.port_num); - if (record->event == IB_EVENT_PORT_ERR) - idr_for_each(&port->vport_idr, vema_disable_vport, NULL); - if (record->event == IB_EVENT_PORT_ACTIVE) - idr_for_each(&port->vport_idr, vema_enable_vport, NULL); + if (record->event != IB_EVENT_PORT_ERR && + record->event != IB_EVENT_PORT_ACTIVE) + return; + + xa_for_each(&port->vports, index, adapter) { + if (record->event == IB_EVENT_PORT_ACTIVE) + netif_carrier_on(adapter->netdev); + else + netif_carrier_off(adapter->netdev); + } } /** @@ -905,6 +889,8 @@ static void opa_vnic_event(struct ib_event_handler *handler, */ static void vema_unregister(struct opa_vnic_ctrl_port *cport) { + struct opa_vnic_adapter *adapter; + unsigned long index; int i; for (i = 1; i <= cport->num_ports; i++) { @@ -915,13 +901,14 @@ static void vema_unregister(struct opa_vnic_ctrl_port *cport) /* Lock ensures no MAD is being processed */ mutex_lock(&port->lock); - idr_for_each(&port->vport_idr, vema_rem_vport, NULL); + xa_for_each(&port->vports, index, adapter) + opa_vnic_rem_netdev(adapter); mutex_unlock(&port->lock); ib_unregister_mad_agent(port->mad_agent); port->mad_agent = NULL; mutex_destroy(&port->lock); - idr_destroy(&port->vport_idr); + xa_destroy(&port->vports); ib_unregister_event_handler(&port->event_handler); } } @@ -958,7 +945,7 @@ static int vema_register(struct opa_vnic_ctrl_port *cport) cport->ibdev, opa_vnic_event); ib_register_event_handler(&port->event_handler); - idr_init(&port->vport_idr); + xa_init(&port->vports); mutex_init(&port->lock); port->mad_agent = ib_register_mad_agent(cport->ibdev, i, IB_QPT_GSI, ®_req, @@ -969,7 +956,6 @@ static int vema_register(struct opa_vnic_ctrl_port *cport) ret = PTR_ERR(port->mad_agent); port->mad_agent = NULL; mutex_destroy(&port->lock); - idr_destroy(&port->vport_idr); vema_unregister(cport); return ret; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index ca0ee9916e9e..0059b290e095 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -535,23 +535,16 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev) do_div(ns, NSEC_PER_SEC / HZ); clock->overflow_period = ns; - mdev->clock_info_page = alloc_page(GFP_KERNEL); - if (mdev->clock_info_page) { - mdev->clock_info = kmap(mdev->clock_info_page); - if (!mdev->clock_info) { - __free_page(mdev->clock_info_page); - mlx5_core_warn(mdev, "failed to map clock page\n"); - } else { - mdev->clock_info->sign = 0; - mdev->clock_info->nsec = clock->tc.nsec; - mdev->clock_info->cycles = clock->tc.cycle_last; - mdev->clock_info->mask = clock->cycles.mask; - mdev->clock_info->mult = clock->nominal_c_mult; - mdev->clock_info->shift = clock->cycles.shift; - mdev->clock_info->frac = clock->tc.frac; - mdev->clock_info->overflow_period = - clock->overflow_period; - } + mdev->clock_info = + (struct mlx5_ib_clock_info *)get_zeroed_page(GFP_KERNEL); + if (mdev->clock_info) { + mdev->clock_info->nsec = clock->tc.nsec; + mdev->clock_info->cycles = clock->tc.cycle_last; + mdev->clock_info->mask = clock->cycles.mask; + mdev->clock_info->mult = clock->nominal_c_mult; + mdev->clock_info->shift = clock->cycles.shift; + mdev->clock_info->frac = clock->tc.frac; + mdev->clock_info->overflow_period = clock->overflow_period; } INIT_WORK(&clock->pps_info.out_work, mlx5_pps_out); @@ -599,8 +592,7 @@ void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) cancel_delayed_work_sync(&clock->overflow_work); if (mdev->clock_info) { - kunmap(mdev->clock_info_page); - __free_page(mdev->clock_info_page); + free_page((unsigned long)mdev->clock_info); mdev->clock_info = NULL; } diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h index c2be029b9b53..6c809440f319 100644 --- a/include/linux/dynamic_debug.h +++ b/include/linux/dynamic_debug.h @@ -71,6 +71,13 @@ void __dynamic_netdev_dbg(struct _ddebug *descriptor, const struct net_device *dev, const char *fmt, ...); +struct ib_device; + +extern __printf(3, 4) +void __dynamic_ibdev_dbg(struct _ddebug *descriptor, + const struct ib_device *ibdev, + const char *fmt, ...); + #define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt) \ static struct _ddebug __aligned(8) \ __attribute__((section("__verbose"))) name = { \ @@ -154,6 +161,10 @@ void __dynamic_netdev_dbg(struct _ddebug *descriptor, _dynamic_func_call(fmt, __dynamic_netdev_dbg, \ dev, fmt, ##__VA_ARGS__) +#define dynamic_ibdev_dbg(dev, fmt, ...) \ + _dynamic_func_call(fmt, __dynamic_ibdev_dbg, \ + dev, fmt, ##__VA_ARGS__) + #define dynamic_hex_dump(prefix_str, prefix_type, rowsize, \ groupsize, buf, len, ascii) \ _dynamic_func_call_no_desc(__builtin_constant_p(prefix_str) ? prefix_str : "hexdump", \ diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 5a39b323c52e..5a27246db883 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -689,7 +689,6 @@ struct mlx5_core_dev { #endif struct mlx5_clock clock; struct mlx5_ib_clock_info *clock_info; - struct page *clock_info_page; struct mlx5_fw_tracer *tracer; }; diff --git a/include/linux/overflow.h b/include/linux/overflow.h index 40b48e2133cb..15eb85de9226 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -36,6 +36,12 @@ #define type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T))) #define type_min(T) ((T)((T)-type_max(T)-(T)1)) +/* + * Avoids triggering -Wtype-limits compilation warning, + * while using unsigned data types to check a < 0. + */ +#define is_non_negative(a) ((a) > 0 || (a) == 0) +#define is_negative(a) (!(is_non_negative(a))) #ifdef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW /* @@ -227,10 +233,10 @@ typeof(d) _d = d; \ u64 _a_full = _a; \ unsigned int _to_shift = \ - _s >= 0 && _s < 8 * sizeof(*d) ? _s : 0; \ + is_non_negative(_s) && _s < 8 * sizeof(*d) ? _s : 0; \ *_d = (_a_full << _to_shift); \ - (_to_shift != _s || *_d < 0 || _a < 0 || \ - (*_d >> _to_shift) != _a); \ + (_to_shift != _s || is_negative(*_d) || is_negative(_a) || \ + (*_d >> _to_shift) != _a); \ }) /** diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index b4be960c7e5d..30a9a55c28ba 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -340,11 +340,11 @@ int sg_alloc_table_chained(struct sg_table *table, int nents, * sg page iterator * * Iterates over sg entries page-by-page. On each successful iteration, you - * can call sg_page_iter_page(@piter) to get the current page and its dma - * address. @piter->sg will point to the sg holding this page and - * @piter->sg_pgoffset to the page's page offset within the sg. The iteration - * will stop either when a maximum number of sg entries was reached or a - * terminating sg (sg_last(sg) == true) was reached. + * can call sg_page_iter_page(@piter) to get the current page. + * @piter->sg will point to the sg holding this page and @piter->sg_pgoffset to + * the page's page offset within the sg. The iteration will stop either when a + * maximum number of sg entries was reached or a terminating sg + * (sg_last(sg) == true) was reached. */ struct sg_page_iter { struct scatterlist *sg; /* sg holding the page */ diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h index 62e990b620aa..870b5e6c06db 100644 --- a/include/rdma/ib_cache.h +++ b/include/rdma/ib_cache.h @@ -54,6 +54,10 @@ const struct ib_gid_attr *rdma_find_gid_by_filter( void *), void *context); +int rdma_read_gid_l2_fields(const struct ib_gid_attr *attr, + u16 *vlan_id, u8 *smac); +struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr); + /** * ib_get_cached_pkey - Returns a cached PKey table entry * @device: The device to query. diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index 79ba8219e7dc..eea946fcc819 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -198,7 +198,7 @@ struct ib_sa_hdr { __be16 attr_offset; __be16 reserved; ib_sa_comp_mask comp_mask; -} __attribute__ ((packed)); +} __packed; struct ib_mad { struct ib_mad_hdr mad_hdr; @@ -227,7 +227,7 @@ struct ib_sa_mad { struct ib_rmpp_hdr rmpp_hdr; struct ib_sa_hdr sa_hdr; u8 data[IB_MGMT_SA_DATA]; -} __attribute__ ((packed)); +} __packed; struct ib_vendor_mad { struct ib_mad_hdr mad_hdr; diff --git a/include/rdma/ib_smi.h b/include/rdma/ib_smi.h index b439e988408e..7be0028f155c 100644 --- a/include/rdma/ib_smi.h +++ b/include/rdma/ib_smi.h @@ -61,7 +61,7 @@ struct ib_smp { u8 data[IB_SMP_DATA_SIZE]; u8 initial_path[IB_SMP_MAX_PATH_HOPS]; u8 return_path[IB_SMP_MAX_PATH_HOPS]; -} __attribute__ ((packed)); +} __packed; #define IB_SMP_DIRECTION cpu_to_be16(0x8000) diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index 73af05db04c7..040d853077c6 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -48,12 +48,11 @@ struct ib_umem { unsigned long address; int page_shift; u32 writable : 1; - u32 hugetlb : 1; u32 is_odp : 1; struct work_struct work; struct sg_table sg_head; int nmap; - int npages; + unsigned int sg_nents; }; /* Returns the offset of the umem start relative to the first page. */ @@ -87,6 +86,9 @@ void ib_umem_release(struct ib_umem *umem); int ib_umem_page_count(struct ib_umem *umem); int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset, size_t length); +unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, + unsigned long pgsz_bitmap, + unsigned long virt); #else /* CONFIG_INFINIBAND_USER_MEM */ @@ -104,6 +106,12 @@ static inline int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offs size_t length) { return -EINVAL; } +static inline int ib_umem_find_best_pgsz(struct ib_umem *umem, + unsigned long pgsz_bitmap, + unsigned long virt) { + return -EINVAL; +} + #endif /* CONFIG_INFINIBAND_USER_MEM */ #endif /* IB_UMEM_H */ diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h index dadc96dea39c..eeec4e53c448 100644 --- a/include/rdma/ib_umem_odp.h +++ b/include/rdma/ib_umem_odp.h @@ -69,6 +69,7 @@ struct ib_umem_odp { int notifiers_seq; int notifiers_count; + int npages; /* Tree tracking */ struct umem_odp_node interval_tree; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 9b9e17bcc201..0742095355f2 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -59,6 +59,8 @@ #include <linux/mmu_notifier.h> #include <linux/uaccess.h> #include <linux/cgroup_rdma.h> +#include <linux/irqflags.h> +#include <linux/preempt.h> #include <uapi/rdma/ib_user_verbs.h> #include <rdma/restrack.h> #include <uapi/rdma/rdma_user_ioctl.h> @@ -72,6 +74,36 @@ extern struct workqueue_struct *ib_wq; extern struct workqueue_struct *ib_comp_wq; extern struct workqueue_struct *ib_comp_unbound_wq; +__printf(3, 4) __cold +void ibdev_printk(const char *level, const struct ib_device *ibdev, + const char *format, ...); +__printf(2, 3) __cold +void ibdev_emerg(const struct ib_device *ibdev, const char *format, ...); +__printf(2, 3) __cold +void ibdev_alert(const struct ib_device *ibdev, const char *format, ...); +__printf(2, 3) __cold +void ibdev_crit(const struct ib_device *ibdev, const char *format, ...); +__printf(2, 3) __cold +void ibdev_err(const struct ib_device *ibdev, const char *format, ...); +__printf(2, 3) __cold +void ibdev_warn(const struct ib_device *ibdev, const char *format, ...); +__printf(2, 3) __cold +void ibdev_notice(const struct ib_device *ibdev, const char *format, ...); +__printf(2, 3) __cold +void ibdev_info(const struct ib_device *ibdev, const char *format, ...); + +#if defined(CONFIG_DYNAMIC_DEBUG) +#define ibdev_dbg(__dev, format, args...) \ + dynamic_ibdev_dbg(__dev, format, ##args) +#elif defined(DEBUG) +#define ibdev_dbg(__dev, format, args...) \ + ibdev_printk(KERN_DEBUG, __dev, format, ##args) +#else +__printf(2, 3) __cold +static inline +void ibdev_dbg(const struct ib_device *ibdev, const char *format, ...) {} +#endif + union ib_gid { u8 raw[16]; struct { @@ -92,7 +124,7 @@ enum ib_gid_type { #define ROCE_V2_UDP_DPORT 4791 struct ib_gid_attr { - struct net_device *ndev; + struct net_device __rcu *ndev; struct ib_device *device; union ib_gid gid; enum ib_gid_type gid_type; @@ -108,6 +140,7 @@ enum rdma_node_type { RDMA_NODE_RNIC, RDMA_NODE_USNIC, RDMA_NODE_USNIC_UDP, + RDMA_NODE_UNSPECIFIED, }; enum { @@ -119,7 +152,8 @@ enum rdma_transport_type { RDMA_TRANSPORT_IB, RDMA_TRANSPORT_IWARP, RDMA_TRANSPORT_USNIC, - RDMA_TRANSPORT_USNIC_UDP + RDMA_TRANSPORT_USNIC_UDP, + RDMA_TRANSPORT_UNSPECIFIED, }; enum rdma_protocol_type { @@ -2189,8 +2223,6 @@ struct ib_cache { struct ib_event_handler event_handler; }; -struct iw_cm_verbs; - struct ib_port_immutable { int pkey_tbl_len; int gid_tbl_len; @@ -2272,6 +2304,8 @@ struct ib_counters_read_attr { }; struct uverbs_attr_bundle; +struct iw_cm_id; +struct iw_cm_conn_param; #define INIT_RDMA_OBJ_SIZE(ib_struct, drv_struct, member) \ .size_##ib_struct = \ @@ -2281,8 +2315,11 @@ struct uverbs_attr_bundle; !__same_type(((struct drv_struct *)NULL)->member, \ struct ib_struct))) +#define rdma_zalloc_drv_obj_gfp(ib_dev, ib_type, gfp) \ + ((struct ib_type *)kzalloc(ib_dev->ops.size_##ib_type, gfp)) + #define rdma_zalloc_drv_obj(ib_dev, ib_type) \ - ((struct ib_type *)kzalloc(ib_dev->ops.size_##ib_type, GFP_KERNEL)) + rdma_zalloc_drv_obj_gfp(ib_dev, ib_type, GFP_KERNEL) #define DECLARE_RDMA_OBJ_SIZE(ib_struct) size_t size_##ib_struct @@ -2394,23 +2431,21 @@ struct ib_device_ops { void (*dealloc_ucontext)(struct ib_ucontext *context); int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma); void (*disassociate_ucontext)(struct ib_ucontext *ibcontext); - int (*alloc_pd)(struct ib_pd *pd, struct ib_ucontext *context, - struct ib_udata *udata); - void (*dealloc_pd)(struct ib_pd *pd); - struct ib_ah *(*create_ah)(struct ib_pd *pd, - struct rdma_ah_attr *ah_attr, u32 flags, - struct ib_udata *udata); + int (*alloc_pd)(struct ib_pd *pd, struct ib_udata *udata); + void (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata); + int (*create_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata); int (*modify_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int (*query_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); - int (*destroy_ah)(struct ib_ah *ah, u32 flags); - struct ib_srq *(*create_srq)(struct ib_pd *pd, - struct ib_srq_init_attr *srq_init_attr, - struct ib_udata *udata); + void (*destroy_ah)(struct ib_ah *ah, u32 flags); + int (*create_srq)(struct ib_srq *srq, + struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata); int (*modify_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr, enum ib_srq_attr_mask srq_attr_mask, struct ib_udata *udata); int (*query_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr); - int (*destroy_srq)(struct ib_srq *srq); + void (*destroy_srq)(struct ib_srq *srq, struct ib_udata *udata); struct ib_qp *(*create_qp)(struct ib_pd *pd, struct ib_qp_init_attr *qp_init_attr, struct ib_udata *udata); @@ -2418,13 +2453,12 @@ struct ib_device_ops { int qp_attr_mask, struct ib_udata *udata); int (*query_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); - int (*destroy_qp)(struct ib_qp *qp); + int (*destroy_qp)(struct ib_qp *qp, struct ib_udata *udata); struct ib_cq *(*create_cq)(struct ib_device *device, const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, struct ib_udata *udata); int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period); - int (*destroy_cq)(struct ib_cq *cq); + int (*destroy_cq)(struct ib_cq *cq, struct ib_udata *udata); int (*resize_cq)(struct ib_cq *cq, int cqe, struct ib_udata *udata); struct ib_mr *(*get_dma_mr)(struct ib_pd *pd, int mr_access_flags); struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length, @@ -2433,9 +2467,9 @@ struct ib_device_ops { int (*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct ib_pd *pd, struct ib_udata *udata); - int (*dereg_mr)(struct ib_mr *mr); + int (*dereg_mr)(struct ib_mr *mr, struct ib_udata *udata); struct ib_mr *(*alloc_mr)(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg); + u32 max_num_sg, struct ib_udata *udata); int (*advise_mr)(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, u32 flags, struct ib_sge *sg_list, u32 num_sge, @@ -2456,9 +2490,8 @@ struct ib_device_ops { int (*attach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid); int (*detach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid); struct ib_xrcd *(*alloc_xrcd)(struct ib_device *device, - struct ib_ucontext *ucontext, struct ib_udata *udata); - int (*dealloc_xrcd)(struct ib_xrcd *xrcd); + int (*dealloc_xrcd)(struct ib_xrcd *xrcd, struct ib_udata *udata); struct ib_flow *(*create_flow)(struct ib_qp *qp, struct ib_flow_attr *flow_attr, int domain, struct ib_udata *udata); @@ -2483,7 +2516,7 @@ struct ib_device_ops { struct ib_wq *(*create_wq)(struct ib_pd *pd, struct ib_wq_init_attr *init_attr, struct ib_udata *udata); - int (*destroy_wq)(struct ib_wq *wq); + int (*destroy_wq)(struct ib_wq *wq, struct ib_udata *udata); int (*modify_wq)(struct ib_wq *wq, struct ib_wq_attr *attr, u32 wq_attr_mask, struct ib_udata *udata); struct ib_rwq_ind_table *(*create_rwq_ind_table)( @@ -2495,7 +2528,7 @@ struct ib_device_ops { struct ib_ucontext *context, struct ib_dm_alloc_attr *attr, struct uverbs_attr_bundle *attrs); - int (*dealloc_dm)(struct ib_dm *dm); + int (*dealloc_dm)(struct ib_dm *dm, struct uverbs_attr_bundle *attrs); struct ib_mr *(*reg_dm_mr)(struct ib_pd *pd, struct ib_dm *dm, struct ib_dm_mr_attr *attr, struct uverbs_attr_bundle *attrs); @@ -2550,12 +2583,37 @@ struct ib_device_ops { */ void (*dealloc_driver)(struct ib_device *dev); + /* iWarp CM callbacks */ + void (*iw_add_ref)(struct ib_qp *qp); + void (*iw_rem_ref)(struct ib_qp *qp); + struct ib_qp *(*iw_get_qp)(struct ib_device *device, int qpn); + int (*iw_connect)(struct iw_cm_id *cm_id, + struct iw_cm_conn_param *conn_param); + int (*iw_accept)(struct iw_cm_id *cm_id, + struct iw_cm_conn_param *conn_param); + int (*iw_reject)(struct iw_cm_id *cm_id, const void *pdata, + u8 pdata_len); + int (*iw_create_listen)(struct iw_cm_id *cm_id, int backlog); + int (*iw_destroy_listen)(struct iw_cm_id *cm_id); + + DECLARE_RDMA_OBJ_SIZE(ib_ah); DECLARE_RDMA_OBJ_SIZE(ib_pd); + DECLARE_RDMA_OBJ_SIZE(ib_srq); DECLARE_RDMA_OBJ_SIZE(ib_ucontext); }; -struct rdma_restrack_root; +struct ib_core_device { + /* device must be the first element in structure until, + * union of ib_core_device and device exists in ib_device. + */ + struct device dev; + possible_net_t rdma_net; + struct kobject *ports_kobj; + struct list_head port_list; + struct ib_device *owner; /* reach back to owner ib_device */ +}; +struct rdma_restrack_root; struct ib_device { /* Do not access @dma_device directly from ULP nor from HW drivers. */ struct device *dma_device; @@ -2578,19 +2636,18 @@ struct ib_device { int num_comp_vectors; - struct iw_cm_verbs *iwcm; - struct module *owner; - struct device dev; + union { + struct device dev; + struct ib_core_device coredev; + }; + /* First group for device attributes, * Second group for driver provided attributes (optional). * It is NULL terminated array. */ const struct attribute_group *groups[3]; - struct kobject *ports_kobj; - struct list_head port_list; - int uverbs_abi_ver; u64 uverbs_cmd_mask; u64 uverbs_ex_cmd_mask; @@ -2626,6 +2683,15 @@ struct ib_device { struct work_struct unregistration_work; const struct rdma_link_ops *link_ops; + + /* Protects compat_devs xarray modifications */ + struct mutex compat_devs_mutex; + /* Maintains compat devices for each net namespace */ + struct xarray compat_devs; + + /* Used by iWarp CM */ + char iw_ifname[IFNAMSIZ]; + u32 iw_driver_flags; }; struct ib_client { @@ -2662,6 +2728,21 @@ struct ib_client { u8 no_kverbs_req:1; }; +/* + * IB block DMA iterator + * + * Iterates the DMA-mapped SGL in contiguous memory blocks aligned + * to a HW supported page size. + */ +struct ib_block_iter { + /* internal states */ + struct scatterlist *__sg; /* sg holding the current aligned block */ + dma_addr_t __dma_addr; /* unaligned DMA address of this block */ + unsigned int __sg_nents; /* number of SG entries */ + unsigned int __sg_advance; /* number of bytes to advance in sg in next step */ + unsigned int __pg_bit; /* alignment of current block */ +}; + struct ib_device *_ib_alloc_device(size_t size); #define ib_alloc_device(drv_struct, member) \ container_of(_ib_alloc_device(sizeof(struct drv_struct) + \ @@ -2682,6 +2763,38 @@ void ib_unregister_device_queued(struct ib_device *ib_dev); int ib_register_client (struct ib_client *client); void ib_unregister_client(struct ib_client *client); +void __rdma_block_iter_start(struct ib_block_iter *biter, + struct scatterlist *sglist, + unsigned int nents, + unsigned long pgsz); +bool __rdma_block_iter_next(struct ib_block_iter *biter); + +/** + * rdma_block_iter_dma_address - get the aligned dma address of the current + * block held by the block iterator. + * @biter: block iterator holding the memory block + */ +static inline dma_addr_t +rdma_block_iter_dma_address(struct ib_block_iter *biter) +{ + return biter->__dma_addr & ~(BIT_ULL(biter->__pg_bit) - 1); +} + +/** + * rdma_for_each_block - iterate over contiguous memory blocks of the sg list + * @sglist: sglist to iterate over + * @biter: block iterator holding the memory block + * @nents: maximum number of sg entries to iterate over + * @pgsz: best HW supported page size to use + * + * Callers may use rdma_block_iter_dma_address() to get each + * blocks aligned DMA address. + */ +#define rdma_for_each_block(sglist, biter, nents, pgsz) \ + for (__rdma_block_iter_start(biter, sglist, nents, \ + pgsz); \ + __rdma_block_iter_next(biter);) + /** * ib_get_client_data - Get IB client context * @device:Device to get context for @@ -2705,9 +2818,6 @@ void ib_set_device_ops(struct ib_device *device, #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, unsigned long pfn, unsigned long size, pgprot_t prot); -int rdma_user_mmap_page(struct ib_ucontext *ucontext, - struct vm_area_struct *vma, struct page *page, - unsigned long size); #else static inline int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, @@ -2716,12 +2826,6 @@ static inline int rdma_user_mmap_io(struct ib_ucontext *ucontext, { return -EINVAL; } -static inline int rdma_user_mmap_page(struct ib_ucontext *ucontext, - struct vm_area_struct *vma, struct page *page, - unsigned long size) -{ - return -EINVAL; -} #endif static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len) @@ -2978,8 +3082,8 @@ static inline bool rdma_cap_ib_mad(const struct ib_device *device, u8 port_num) */ static inline bool rdma_cap_opa_mad(struct ib_device *device, u8 port_num) { - return (device->port_data[port_num].immutable.core_cap_flags & - RDMA_CORE_CAP_OPA_MAD) == RDMA_CORE_CAP_OPA_MAD; + return device->port_data[port_num].immutable.core_cap_flags & + RDMA_CORE_CAP_OPA_MAD; } /** @@ -3195,6 +3299,30 @@ static inline bool rdma_cap_read_inv(struct ib_device *dev, u32 port_num) return rdma_protocol_iwarp(dev, port_num); } +/** + * rdma_find_pg_bit - Find page bit given address and HW supported page sizes + * + * @addr: address + * @pgsz_bitmap: bitmap of HW supported page sizes + */ +static inline unsigned int rdma_find_pg_bit(unsigned long addr, + unsigned long pgsz_bitmap) +{ + unsigned long align; + unsigned long pgsz; + + align = addr & -addr; + + /* Find page bit such that addr is aligned to the highest supported + * HW page size + */ + pgsz = pgsz_bitmap & ~(-align << 1); + if (!pgsz) + return __ffs(pgsz_bitmap); + + return __fls(pgsz); +} + int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port, int state); int ib_get_vf_config(struct ib_device *device, int vf, u8 port, @@ -3236,9 +3364,27 @@ enum ib_pd_flags { struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, const char *caller); + #define ib_alloc_pd(device, flags) \ __ib_alloc_pd((device), (flags), KBUILD_MODNAME) -void ib_dealloc_pd(struct ib_pd *pd); + +/** + * ib_dealloc_pd_user - Deallocate kernel/user PD + * @pd: The protection domain + * @udata: Valid user data or NULL for kernel objects + */ +void ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata); + +/** + * ib_dealloc_pd - Deallocate kernel PD + * @pd: The protection domain + * + * NOTE: for user PD use ib_dealloc_pd_user with valid udata! + */ +static inline void ib_dealloc_pd(struct ib_pd *pd) +{ + ib_dealloc_pd_user(pd, NULL); +} enum rdma_create_ah_flags { /* In a sleepable context */ @@ -3351,11 +3497,24 @@ enum rdma_destroy_ah_flags { }; /** - * rdma_destroy_ah - Destroys an address handle. + * rdma_destroy_ah_user - Destroys an address handle. * @ah: The address handle to destroy. * @flags: Destroy address handle flags (see enum rdma_destroy_ah_flags). + * @udata: Valid user data or NULL for kernel objects */ -int rdma_destroy_ah(struct ib_ah *ah, u32 flags); +int rdma_destroy_ah_user(struct ib_ah *ah, u32 flags, struct ib_udata *udata); + +/** + * rdma_destroy_ah - Destroys an kernel address handle. + * @ah: The address handle to destroy. + * @flags: Destroy address handle flags (see enum rdma_destroy_ah_flags). + * + * NOTE: for user ah use rdma_destroy_ah_user with valid udata! + */ +static inline int rdma_destroy_ah(struct ib_ah *ah, u32 flags) +{ + return rdma_destroy_ah_user(ah, flags, NULL); +} /** * ib_create_srq - Creates a SRQ associated with the specified protection @@ -3399,10 +3558,22 @@ int ib_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); /** - * ib_destroy_srq - Destroys the specified SRQ. + * ib_destroy_srq_user - Destroys the specified SRQ. * @srq: The SRQ to destroy. + * @udata: Valid user data or NULL for kernel objects */ -int ib_destroy_srq(struct ib_srq *srq); +int ib_destroy_srq_user(struct ib_srq *srq, struct ib_udata *udata); + +/** + * ib_destroy_srq - Destroys the specified kernel SRQ. + * @srq: The SRQ to destroy. + * + * NOTE: for user srq use ib_destroy_srq_user with valid udata! + */ +static inline int ib_destroy_srq(struct ib_srq *srq) +{ + return ib_destroy_srq_user(srq, NULL); +} /** * ib_post_srq_recv - Posts a list of work requests to the specified SRQ. @@ -3422,15 +3593,34 @@ static inline int ib_post_srq_recv(struct ib_srq *srq, } /** - * ib_create_qp - Creates a QP associated with the specified protection + * ib_create_qp_user - Creates a QP associated with the specified protection * domain. * @pd: The protection domain associated with the QP. * @qp_init_attr: A list of initial attributes required to create the * QP. If QP creation succeeds, then the attributes are updated to * the actual capabilities of the created QP. + * @udata: Valid user data or NULL for kernel objects */ -struct ib_qp *ib_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *qp_init_attr); +struct ib_qp *ib_create_qp_user(struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr, + struct ib_udata *udata); + +/** + * ib_create_qp - Creates a kernel QP associated with the specified protection + * domain. + * @pd: The protection domain associated with the QP. + * @qp_init_attr: A list of initial attributes required to create the + * QP. If QP creation succeeds, then the attributes are updated to + * the actual capabilities of the created QP. + * @udata: Valid user data or NULL for kernel objects + * + * NOTE: for user qp use ib_create_qp_user with valid udata! + */ +static inline struct ib_qp *ib_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr) +{ + return ib_create_qp_user(pd, qp_init_attr, NULL); +} /** * ib_modify_qp_with_udata - Modifies the attributes for the specified QP. @@ -3480,8 +3670,20 @@ int ib_query_qp(struct ib_qp *qp, /** * ib_destroy_qp - Destroys the specified QP. * @qp: The QP to destroy. + * @udata: Valid udata or NULL for kernel objects */ -int ib_destroy_qp(struct ib_qp *qp); +int ib_destroy_qp_user(struct ib_qp *qp, struct ib_udata *udata); + +/** + * ib_destroy_qp - Destroys the specified kernel QP. + * @qp: The QP to destroy. + * + * NOTE: for user qp use ib_destroy_qp_user with valid udata! + */ +static inline int ib_destroy_qp(struct ib_qp *qp) +{ + return ib_destroy_qp_user(qp, NULL); +} /** * ib_open_qp - Obtain a reference to an existing sharable QP. @@ -3541,13 +3743,66 @@ static inline int ib_post_recv(struct ib_qp *qp, return qp->device->ops.post_recv(qp, recv_wr, bad_recv_wr ? : &dummy); } -struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private, - int nr_cqe, int comp_vector, - enum ib_poll_context poll_ctx, const char *caller); -#define ib_alloc_cq(device, priv, nr_cqe, comp_vect, poll_ctx) \ - __ib_alloc_cq((device), (priv), (nr_cqe), (comp_vect), (poll_ctx), KBUILD_MODNAME) +struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private, + int nr_cqe, int comp_vector, + enum ib_poll_context poll_ctx, + const char *caller, struct ib_udata *udata); + +/** + * ib_alloc_cq_user: Allocate kernel/user CQ + * @dev: The IB device + * @private: Private data attached to the CQE + * @nr_cqe: Number of CQEs in the CQ + * @comp_vector: Completion vector used for the IRQs + * @poll_ctx: Context used for polling the CQ + * @udata: Valid user data or NULL for kernel objects + */ +static inline struct ib_cq *ib_alloc_cq_user(struct ib_device *dev, + void *private, int nr_cqe, + int comp_vector, + enum ib_poll_context poll_ctx, + struct ib_udata *udata) +{ + return __ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx, + KBUILD_MODNAME, udata); +} + +/** + * ib_alloc_cq: Allocate kernel CQ + * @dev: The IB device + * @private: Private data attached to the CQE + * @nr_cqe: Number of CQEs in the CQ + * @comp_vector: Completion vector used for the IRQs + * @poll_ctx: Context used for polling the CQ + * + * NOTE: for user cq use ib_alloc_cq_user with valid udata! + */ +static inline struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private, + int nr_cqe, int comp_vector, + enum ib_poll_context poll_ctx) +{ + return ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx, + NULL); +} + +/** + * ib_free_cq_user - Free kernel/user CQ + * @cq: The CQ to free + * @udata: Valid user data or NULL for kernel objects + */ +void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata); + +/** + * ib_free_cq - Free kernel CQ + * @cq: The CQ to free + * + * NOTE: for user cq use ib_free_cq_user with valid udata! + */ +static inline void ib_free_cq(struct ib_cq *cq) +{ + ib_free_cq_user(cq, NULL); +} -void ib_free_cq(struct ib_cq *cq); int ib_process_cq_direct(struct ib_cq *cq, int budget); /** @@ -3591,10 +3846,22 @@ int ib_resize_cq(struct ib_cq *cq, int cqe); int rdma_set_cq_moderation(struct ib_cq *cq, u16 cq_count, u16 cq_period); /** - * ib_destroy_cq - Destroys the specified CQ. + * ib_destroy_cq_user - Destroys the specified CQ. * @cq: The CQ to destroy. + * @udata: Valid user data or NULL for kernel objects */ -int ib_destroy_cq(struct ib_cq *cq); +int ib_destroy_cq_user(struct ib_cq *cq, struct ib_udata *udata); + +/** + * ib_destroy_cq - Destroys the specified kernel CQ. + * @cq: The CQ to destroy. + * + * NOTE: for user cq use ib_destroy_cq_user with valid udata! + */ +static inline int ib_destroy_cq(struct ib_cq *cq) +{ + return ib_destroy_cq_user(cq, NULL); +} /** * ib_poll_cq - poll a CQ for completion(s) @@ -3848,17 +4115,37 @@ static inline void ib_dma_free_coherent(struct ib_device *dev, } /** - * ib_dereg_mr - Deregisters a memory region and removes it from the + * ib_dereg_mr_user - Deregisters a memory region and removes it from the + * HCA translation table. + * @mr: The memory region to deregister. + * @udata: Valid user data or NULL for kernel object + * + * This function can fail, if the memory region has memory windows bound to it. + */ +int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata); + +/** + * ib_dereg_mr - Deregisters a kernel memory region and removes it from the * HCA translation table. * @mr: The memory region to deregister. * * This function can fail, if the memory region has memory windows bound to it. + * + * NOTE: for user mr use ib_dereg_mr_user with valid udata! */ -int ib_dereg_mr(struct ib_mr *mr); +static inline int ib_dereg_mr(struct ib_mr *mr) +{ + return ib_dereg_mr_user(mr, NULL); +} + +struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg, struct ib_udata *udata); -struct ib_mr *ib_alloc_mr(struct ib_pd *pd, - enum ib_mr_type mr_type, - u32 max_num_sg); +static inline struct ib_mr *ib_alloc_mr(struct ib_pd *pd, + enum ib_mr_type mr_type, u32 max_num_sg) +{ + return ib_alloc_mr_user(pd, mr_type, max_num_sg, NULL); +} /** * ib_update_fast_reg_key - updates the key portion of the fast_reg MR @@ -3956,8 +4243,9 @@ struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller); /** * ib_dealloc_xrcd - Deallocates an XRC domain. * @xrcd: The XRC domain to deallocate. + * @udata: Valid user data or NULL for kernel object */ -int ib_dealloc_xrcd(struct ib_xrcd *xrcd); +int ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); static inline int ib_check_mr_access(int flags) { @@ -4033,7 +4321,7 @@ struct net_device *ib_device_netdev(struct ib_device *dev, u8 port); struct ib_wq *ib_create_wq(struct ib_pd *pd, struct ib_wq_init_attr *init_attr); -int ib_destroy_wq(struct ib_wq *wq); +int ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata); int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *attr, u32 wq_attr_mask); struct ib_rwq_ind_table *ib_create_rwq_ind_table(struct ib_device *device, @@ -4349,7 +4637,10 @@ rdma_set_device_sysfs_group(struct ib_device *dev, */ static inline struct ib_device *rdma_device_to_ibdev(struct device *device) { - return container_of(device, struct ib_device, dev); + struct ib_core_device *coredev = + container_of(device, struct ib_core_device, dev); + + return coredev->owner; } /** @@ -4362,4 +4653,7 @@ static inline struct ib_device *rdma_device_to_ibdev(struct device *device) */ #define rdma_device_to_drv_device(dev, drv_dev_struct, ibdev_member) \ container_of(rdma_device_to_ibdev(dev), drv_dev_struct, ibdev_member) + +bool rdma_dev_access_netns(const struct ib_device *device, + const struct net *net); #endif /* IB_VERBS_H */ diff --git a/include/rdma/iw_cm.h b/include/rdma/iw_cm.h index 0e1f02815643..5aa8a9c76aa0 100644 --- a/include/rdma/iw_cm.h +++ b/include/rdma/iw_cm.h @@ -118,31 +118,6 @@ enum iw_flags { IW_F_NO_PORT_MAP = (1 << 0), }; -struct iw_cm_verbs { - void (*add_ref)(struct ib_qp *qp); - - void (*rem_ref)(struct ib_qp *qp); - - struct ib_qp * (*get_qp)(struct ib_device *device, - int qpn); - - int (*connect)(struct iw_cm_id *cm_id, - struct iw_cm_conn_param *conn_param); - - int (*accept)(struct iw_cm_id *cm_id, - struct iw_cm_conn_param *conn_param); - - int (*reject)(struct iw_cm_id *cm_id, - const void *pdata, u8 pdata_len); - - int (*create_listen)(struct iw_cm_id *cm_id, - int backlog); - - int (*destroy_listen)(struct iw_cm_id *cm_id); - char ifname[IFNAMSIZ]; - enum iw_flags driver_flags; -}; - /** * iw_create_cm_id - Create an IW CM identifier. * diff --git a/include/rdma/opa_port_info.h b/include/rdma/opa_port_info.h index b4f0ac02f283..7147a9263011 100644 --- a/include/rdma/opa_port_info.h +++ b/include/rdma/opa_port_info.h @@ -413,6 +413,6 @@ struct opa_port_info { u8 local_port_num; u8 reserved12; u8 reserved13; /* was guid_cap */ -} __attribute__ ((packed)); +} __packed; #endif /* OPA_PORT_INFO_H */ diff --git a/include/rdma/opa_smi.h b/include/rdma/opa_smi.h index f7896117936e..c7b2ef12792d 100644 --- a/include/rdma/opa_smi.h +++ b/include/rdma/opa_smi.h @@ -98,7 +98,7 @@ struct opa_smp { struct opa_node_description { u8 data[64]; -} __attribute__ ((packed)); +} __packed; struct opa_node_info { u8 base_version; @@ -114,7 +114,7 @@ struct opa_node_info { __be32 revision; u8 local_port_num; u8 vendor_id[3]; /* network byte order */ -} __attribute__ ((packed)); +} __packed; #define OPA_PARTITION_TABLE_BLK_SIZE 32 diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 4c257aff7d32..b9cd06db1a71 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -59,7 +59,6 @@ #include <rdma/ib_verbs.h> #include <rdma/ib_mad.h> #include <rdma/rdmavt_mr.h> -#include <rdma/rdmavt_qp.h> #define RVT_MAX_PKEY_VALUES 16 @@ -72,6 +71,8 @@ struct trap_list { struct list_head list; }; +struct rvt_qp; +struct rvt_qpn_table; struct rvt_ibport { struct rvt_qp __rcu *qp[2]; struct ib_mad_agent *send_agent; /* agent for SMI (traps) */ @@ -206,6 +207,20 @@ struct rvt_ah { u8 log_pmtu; }; +/* + * This structure is used by rvt_mmap() to validate an offset + * when an mmap() request is made. The vm_area_struct then uses + * this as its vm_private_data. + */ +struct rvt_mmap_info { + struct list_head pending_mmaps; + struct ib_ucontext *context; + void *obj; + __u64 offset; + struct kref ref; + u32 size; +}; + /* memory working set size */ struct rvt_wss { unsigned long *entries; @@ -501,16 +516,6 @@ static inline struct rvt_dev_info *ib_to_rvt(struct ib_device *ibdev) return container_of(ibdev, struct rvt_dev_info, ibdev); } -static inline struct rvt_srq *ibsrq_to_rvtsrq(struct ib_srq *ibsrq) -{ - return container_of(ibsrq, struct rvt_srq, ibsrq); -} - -static inline struct rvt_qp *ibqp_to_rvtqp(struct ib_qp *ibqp) -{ - return container_of(ibqp, struct rvt_qp, ibqp); -} - static inline unsigned rvt_get_npkeys(struct rvt_dev_info *rdi) { /* @@ -548,57 +553,6 @@ static inline u16 rvt_get_pkey(struct rvt_dev_info *rdi, return rdi->ports[port_index]->pkey_table[index]; } -/** - * rvt_lookup_qpn - return the QP with the given QPN - * @ibp: the ibport - * @qpn: the QP number to look up - * - * The caller must hold the rcu_read_lock(), and keep the lock until - * the returned qp is no longer in use. - */ -/* TODO: Remove this and put in rdmavt/qp.h when no longer needed by drivers */ -static inline struct rvt_qp *rvt_lookup_qpn(struct rvt_dev_info *rdi, - struct rvt_ibport *rvp, - u32 qpn) __must_hold(RCU) -{ - struct rvt_qp *qp = NULL; - - if (unlikely(qpn <= 1)) { - qp = rcu_dereference(rvp->qp[qpn]); - } else { - u32 n = hash_32(qpn, rdi->qp_dev->qp_table_bits); - - for (qp = rcu_dereference(rdi->qp_dev->qp_table[n]); qp; - qp = rcu_dereference(qp->next)) - if (qp->ibqp.qp_num == qpn) - break; - } - return qp; -} - -/** - * rvt_mod_retry_timer - mod a retry timer - * @qp - the QP - * @shift - timeout shift to wait for multiple packets - * Modify a potentially already running retry timer - */ -static inline void rvt_mod_retry_timer_ext(struct rvt_qp *qp, u8 shift) -{ - struct ib_qp *ibqp = &qp->ibqp; - struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); - - lockdep_assert_held(&qp->s_lock); - qp->s_flags |= RVT_S_TIMER; - /* 4.096 usec. * (1 << qp->timeout) */ - mod_timer(&qp->s_timer, jiffies + rdi->busy_jiffies + - (qp->timeout_jiffies << shift)); -} - -static inline void rvt_mod_retry_timer(struct rvt_qp *qp) -{ - return rvt_mod_retry_timer_ext(qp, 0); -} - struct rvt_dev_info *rvt_alloc_device(size_t size, int nports); void rvt_dealloc_device(struct rvt_dev_info *rdi); int rvt_register_device(struct rvt_dev_info *rvd, u32 driver_id); diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index f0fbd4063fef..68e38c20afc0 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -83,7 +83,6 @@ * RVT_S_WAIT_DMA - waiting for send DMA queue to drain before generating * next send completion entry not via send DMA * RVT_S_WAIT_PIO - waiting for a send buffer to be available - * RVT_S_WAIT_PIO_DRAIN - waiting for a qp to drain pio packets * RVT_S_WAIT_TX - waiting for a struct verbs_txreq to be available * RVT_S_WAIT_DMA_DESC - waiting for DMA descriptors to be available * RVT_S_WAIT_KMEM - waiting for kernel memory to be available @@ -212,20 +211,6 @@ struct rvt_rq { }; /* - * This structure is used by rvt_mmap() to validate an offset - * when an mmap() request is made. The vm_area_struct then uses - * this as its vm_private_data. - */ -struct rvt_mmap_info { - struct list_head pending_mmaps; - struct ib_ucontext *context; - void *obj; - __u64 offset; - struct kref ref; - unsigned size; -}; - -/* * This structure holds the information that the send tasklet needs * to send a RDMA read response or atomic operation. */ @@ -399,6 +384,16 @@ struct rvt_srq { u32 limit; }; +static inline struct rvt_srq *ibsrq_to_rvtsrq(struct ib_srq *ibsrq) +{ + return container_of(ibsrq, struct rvt_srq, ibsrq); +} + +static inline struct rvt_qp *ibqp_to_rvtqp(struct ib_qp *ibqp) +{ + return container_of(ibqp, struct rvt_qp, ibqp); +} + #define RVT_QPN_MAX BIT(24) #define RVT_QPNMAP_ENTRIES (RVT_QPN_MAX / PAGE_SIZE / BITS_PER_BYTE) #define RVT_BITS_PER_PAGE (PAGE_SIZE * BITS_PER_BYTE) @@ -678,6 +673,70 @@ static inline unsigned long rvt_timeout_to_jiffies(u8 timeout) return usecs_to_jiffies(1U << timeout) * 4096UL / 1000UL; } +/** + * rvt_lookup_qpn - return the QP with the given QPN + * @ibp: the ibport + * @qpn: the QP number to look up + * + * The caller must hold the rcu_read_lock(), and keep the lock until + * the returned qp is no longer in use. + */ +static inline struct rvt_qp *rvt_lookup_qpn(struct rvt_dev_info *rdi, + struct rvt_ibport *rvp, + u32 qpn) __must_hold(RCU) +{ + struct rvt_qp *qp = NULL; + + if (unlikely(qpn <= 1)) { + qp = rcu_dereference(rvp->qp[qpn]); + } else { + u32 n = hash_32(qpn, rdi->qp_dev->qp_table_bits); + + for (qp = rcu_dereference(rdi->qp_dev->qp_table[n]); qp; + qp = rcu_dereference(qp->next)) + if (qp->ibqp.qp_num == qpn) + break; + } + return qp; +} + +/** + * rvt_mod_retry_timer - mod a retry timer + * @qp - the QP + * @shift - timeout shift to wait for multiple packets + * Modify a potentially already running retry timer + */ +static inline void rvt_mod_retry_timer_ext(struct rvt_qp *qp, u8 shift) +{ + struct ib_qp *ibqp = &qp->ibqp; + struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); + + lockdep_assert_held(&qp->s_lock); + qp->s_flags |= RVT_S_TIMER; + /* 4.096 usec. * (1 << qp->timeout) */ + mod_timer(&qp->s_timer, jiffies + rdi->busy_jiffies + + (qp->timeout_jiffies << shift)); +} + +static inline void rvt_mod_retry_timer(struct rvt_qp *qp) +{ + return rvt_mod_retry_timer_ext(qp, 0); +} + +/** + * rvt_put_qp_swqe - drop refs held by swqe + * @qp: the send qp + * @wqe: the send wqe + * + * This drops any references held by the swqe + */ +static inline void rvt_put_qp_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe) +{ + rvt_put_swqe(wqe); + if (qp->allowed_ops == IB_OPCODE_UD) + atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount); +} + extern const int ib_rvt_state_ops[]; struct rvt_dev_info; diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h index 794c47565971..05eabfd5d0d3 100644 --- a/include/rdma/uverbs_std_types.h +++ b/include/rdma/uverbs_std_types.h @@ -48,17 +48,15 @@ #define uobj_get_type(_attrs, _object) \ uapi_get_object((_attrs)->ufile->device->uapi, _object) -struct ib_uobject *_uobj_get_read(enum uverbs_default_objects type, - u32 object_id, - struct uverbs_attr_bundle *attrs); - #define uobj_get_read(_type, _id, _attrs) \ - _uobj_get_read(_type, _uobj_check_id(_id), _attrs) + rdma_lookup_get_uobject(uobj_get_type(_attrs, _type), (_attrs)->ufile, \ + _uobj_check_id(_id), UVERBS_LOOKUP_READ, \ + _attrs) #define ufd_get_read(_type, _fdnum, _attrs) \ rdma_lookup_get_uobject(uobj_get_type(_attrs, _type), (_attrs)->ufile, \ (_fdnum)*typecheck(s32, _fdnum), \ - UVERBS_LOOKUP_READ) + UVERBS_LOOKUP_READ, _attrs) static inline void *_uobj_get_obj_read(struct ib_uobject *uobj) { @@ -70,22 +68,19 @@ static inline void *_uobj_get_obj_read(struct ib_uobject *uobj) ((struct ib_##_object *)_uobj_get_obj_read( \ uobj_get_read(_type, _id, _attrs))) -struct ib_uobject *_uobj_get_write(enum uverbs_default_objects type, - u32 object_id, - struct uverbs_attr_bundle *attrs); - #define uobj_get_write(_type, _id, _attrs) \ - _uobj_get_write(_type, _uobj_check_id(_id), _attrs) + rdma_lookup_get_uobject(uobj_get_type(_attrs, _type), (_attrs)->ufile, \ + _uobj_check_id(_id), UVERBS_LOOKUP_WRITE, \ + _attrs) int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id, - const struct uverbs_attr_bundle *attrs); + struct uverbs_attr_bundle *attrs); #define uobj_perform_destroy(_type, _id, _attrs) \ __uobj_perform_destroy(uobj_get_type(_attrs, _type), \ _uobj_check_id(_id), _attrs) struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj, - u32 id, - const struct uverbs_attr_bundle *attrs); + u32 id, struct uverbs_attr_bundle *attrs); #define uobj_get_destroy(_type, _id, _attrs) \ __uobj_get_destroy(uobj_get_type(_attrs, _type), _uobj_check_id(_id), \ @@ -109,30 +104,31 @@ static inline void uobj_put_write(struct ib_uobject *uobj) rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE); } -static inline int __must_check uobj_alloc_commit(struct ib_uobject *uobj) +static inline int __must_check +uobj_alloc_commit(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs) { - int ret = rdma_alloc_commit_uobject(uobj); + int ret = rdma_alloc_commit_uobject(uobj, attrs); if (ret) return ret; return 0; } -static inline void uobj_alloc_abort(struct ib_uobject *uobj) +static inline void uobj_alloc_abort(struct ib_uobject *uobj, + struct uverbs_attr_bundle *attrs) { - rdma_alloc_abort_uobject(uobj); + rdma_alloc_abort_uobject(uobj, attrs); } static inline struct ib_uobject * __uobj_alloc(const struct uverbs_api_object *obj, struct uverbs_attr_bundle *attrs, struct ib_device **ib_dev) { - struct ib_uobject *uobj = rdma_alloc_begin_uobject(obj, attrs->ufile); + struct ib_uobject *uobj = + rdma_alloc_begin_uobject(obj, attrs->ufile, attrs); - if (!IS_ERR(uobj)) { - *ib_dev = uobj->context->device; - attrs->context = uobj->context; - } + if (!IS_ERR(uobj)) + *ib_dev = attrs->context->device; return uobj; } diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h index 175d761695e1..d57a5ba00c74 100644 --- a/include/rdma/uverbs_types.h +++ b/include/rdma/uverbs_types.h @@ -95,7 +95,8 @@ struct uverbs_obj_type_class { void (*lookup_put)(struct ib_uobject *uobj, enum rdma_lookup_mode mode); /* This does not consume the kref on uobj */ int __must_check (*destroy_hw)(struct ib_uobject *uobj, - enum rdma_remove_reason why); + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs); void (*remove_handle)(struct ib_uobject *uobj); u8 needs_kfree_rcu; }; @@ -126,18 +127,23 @@ struct uverbs_obj_idr_type { * completely unchanged. */ int __must_check (*destroy_object)(struct ib_uobject *uobj, - enum rdma_remove_reason why); + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs); }; struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj, struct ib_uverbs_file *ufile, s64 id, - enum rdma_lookup_mode mode); + enum rdma_lookup_mode mode, + struct uverbs_attr_bundle *attrs); void rdma_lookup_put_uobject(struct ib_uobject *uobj, enum rdma_lookup_mode mode); struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj, - struct ib_uverbs_file *ufile); -void rdma_alloc_abort_uobject(struct ib_uobject *uobj); -int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj); + struct ib_uverbs_file *ufile, + struct uverbs_attr_bundle *attrs); +void rdma_alloc_abort_uobject(struct ib_uobject *uobj, + struct uverbs_attr_bundle *attrs); +int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj, + struct uverbs_attr_bundle *attrs); struct uverbs_obj_fd_type { /* diff --git a/include/trace/events/ib_mad.h b/include/trace/events/ib_mad.h new file mode 100644 index 000000000000..59363a083ecb --- /dev/null +++ b/include/trace/events/ib_mad.h @@ -0,0 +1,390 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ + +/* + * Copyright (c) 2018 Intel Corporation. All rights reserved. + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM ib_mad + +#if !defined(_TRACE_IB_MAD_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_IB_MAD_H + +#include <linux/tracepoint.h> +#include <rdma/ib_mad.h> + +#ifdef CONFIG_TRACEPOINTS +struct trace_event_raw_ib_mad_send_template; +static void create_mad_addr_info(struct ib_mad_send_wr_private *mad_send_wr, + struct ib_mad_qp_info *qp_info, + struct trace_event_raw_ib_mad_send_template *entry); +#endif + +DECLARE_EVENT_CLASS(ib_mad_send_template, + TP_PROTO(struct ib_mad_send_wr_private *wr, + struct ib_mad_qp_info *qp_info), + TP_ARGS(wr, qp_info), + + TP_STRUCT__entry( + __field(u8, base_version) + __field(u8, mgmt_class) + __field(u8, class_version) + __field(u8, port_num) + __field(u32, qp_num) + __field(u8, method) + __field(u8, sl) + __field(u16, attr_id) + __field(u32, attr_mod) + __field(u64, wrtid) + __field(u64, tid) + __field(u16, status) + __field(u16, class_specific) + __field(u32, length) + __field(u32, dlid) + __field(u32, rqpn) + __field(u32, rqkey) + __field(u32, dev_index) + __field(void *, agent_priv) + __field(unsigned long, timeout) + __field(int, retries_left) + __field(int, max_retries) + __field(int, retry) + __field(u16, pkey) + ), + + TP_fast_assign( + __entry->dev_index = wr->mad_agent_priv->agent.device->index; + __entry->port_num = wr->mad_agent_priv->agent.port_num; + __entry->qp_num = wr->mad_agent_priv->qp_info->qp->qp_num; + __entry->agent_priv = wr->mad_agent_priv; + __entry->wrtid = wr->tid; + __entry->max_retries = wr->max_retries; + __entry->retries_left = wr->retries_left; + __entry->retry = wr->retry; + __entry->timeout = wr->timeout; + __entry->length = wr->send_buf.hdr_len + + wr->send_buf.data_len; + __entry->base_version = + ((struct ib_mad_hdr *)wr->send_buf.mad)->base_version; + __entry->mgmt_class = + ((struct ib_mad_hdr *)wr->send_buf.mad)->mgmt_class; + __entry->class_version = + ((struct ib_mad_hdr *)wr->send_buf.mad)->class_version; + __entry->method = + ((struct ib_mad_hdr *)wr->send_buf.mad)->method; + __entry->status = + ((struct ib_mad_hdr *)wr->send_buf.mad)->status; + __entry->class_specific = + ((struct ib_mad_hdr *)wr->send_buf.mad)->class_specific; + __entry->tid = ((struct ib_mad_hdr *)wr->send_buf.mad)->tid; + __entry->attr_id = + ((struct ib_mad_hdr *)wr->send_buf.mad)->attr_id; + __entry->attr_mod = + ((struct ib_mad_hdr *)wr->send_buf.mad)->attr_mod; + create_mad_addr_info(wr, qp_info, __entry); + ), + + TP_printk("%d:%d QP%d agent %p: " \ + "wrtid 0x%llx; %d/%d retries(%d); timeout %lu length %d : " \ + "hdr : base_ver 0x%x class 0x%x class_ver 0x%x " \ + "method 0x%x status 0x%x class_specific 0x%x tid 0x%llx " \ + "attr_id 0x%x attr_mod 0x%x => dlid 0x%08x sl %d "\ + "pkey 0x%x rpqn 0x%x rqpkey 0x%x", + __entry->dev_index, __entry->port_num, __entry->qp_num, + __entry->agent_priv, be64_to_cpu(__entry->wrtid), + __entry->retries_left, __entry->max_retries, + __entry->retry, __entry->timeout, __entry->length, + __entry->base_version, __entry->mgmt_class, + __entry->class_version, + __entry->method, be16_to_cpu(__entry->status), + be16_to_cpu(__entry->class_specific), + be64_to_cpu(__entry->tid), be16_to_cpu(__entry->attr_id), + be32_to_cpu(__entry->attr_mod), + be32_to_cpu(__entry->dlid), __entry->sl, __entry->pkey, + __entry->rqpn, __entry->rqkey + ) +); + +DEFINE_EVENT(ib_mad_send_template, ib_mad_error_handler, + TP_PROTO(struct ib_mad_send_wr_private *wr, + struct ib_mad_qp_info *qp_info), + TP_ARGS(wr, qp_info)); +DEFINE_EVENT(ib_mad_send_template, ib_mad_ib_send_mad, + TP_PROTO(struct ib_mad_send_wr_private *wr, + struct ib_mad_qp_info *qp_info), + TP_ARGS(wr, qp_info)); +DEFINE_EVENT(ib_mad_send_template, ib_mad_send_done_resend, + TP_PROTO(struct ib_mad_send_wr_private *wr, + struct ib_mad_qp_info *qp_info), + TP_ARGS(wr, qp_info)); + +TRACE_EVENT(ib_mad_send_done_handler, + TP_PROTO(struct ib_mad_send_wr_private *wr, struct ib_wc *wc), + TP_ARGS(wr, wc), + + TP_STRUCT__entry( + __field(u8, port_num) + __field(u8, base_version) + __field(u8, mgmt_class) + __field(u8, class_version) + __field(u32, qp_num) + __field(u64, wrtid) + __field(u16, status) + __field(u16, wc_status) + __field(u32, length) + __field(void *, agent_priv) + __field(unsigned long, timeout) + __field(u32, dev_index) + __field(int, retries_left) + __field(int, max_retries) + __field(int, retry) + __field(u8, method) + ), + + TP_fast_assign( + __entry->dev_index = wr->mad_agent_priv->agent.device->index; + __entry->port_num = wr->mad_agent_priv->agent.port_num; + __entry->qp_num = wr->mad_agent_priv->qp_info->qp->qp_num; + __entry->agent_priv = wr->mad_agent_priv; + __entry->wrtid = wr->tid; + __entry->max_retries = wr->max_retries; + __entry->retries_left = wr->retries_left; + __entry->retry = wr->retry; + __entry->timeout = wr->timeout; + __entry->base_version = + ((struct ib_mad_hdr *)wr->send_buf.mad)->base_version; + __entry->mgmt_class = + ((struct ib_mad_hdr *)wr->send_buf.mad)->mgmt_class; + __entry->class_version = + ((struct ib_mad_hdr *)wr->send_buf.mad)->class_version; + __entry->method = + ((struct ib_mad_hdr *)wr->send_buf.mad)->method; + __entry->status = + ((struct ib_mad_hdr *)wr->send_buf.mad)->status; + __entry->wc_status = wc->status; + __entry->length = wc->byte_len; + ), + + TP_printk("%d:%d QP%d : SEND WC Status %d : agent %p: " \ + "wrtid 0x%llx %d/%d retries(%d) timeout %lu length %d: " \ + "hdr : base_ver 0x%x class 0x%x class_ver 0x%x " \ + "method 0x%x status 0x%x", + __entry->dev_index, __entry->port_num, __entry->qp_num, + __entry->wc_status, + __entry->agent_priv, be64_to_cpu(__entry->wrtid), + __entry->retries_left, __entry->max_retries, + __entry->retry, __entry->timeout, + __entry->length, + __entry->base_version, __entry->mgmt_class, + __entry->class_version, __entry->method, + be16_to_cpu(__entry->status) + ) +); + +TRACE_EVENT(ib_mad_recv_done_handler, + TP_PROTO(struct ib_mad_qp_info *qp_info, struct ib_wc *wc, + struct ib_mad_hdr *mad_hdr), + TP_ARGS(qp_info, wc, mad_hdr), + + TP_STRUCT__entry( + __field(u8, base_version) + __field(u8, mgmt_class) + __field(u8, class_version) + __field(u8, port_num) + __field(u32, qp_num) + __field(u16, status) + __field(u16, class_specific) + __field(u32, length) + __field(u64, tid) + __field(u8, method) + __field(u8, sl) + __field(u16, attr_id) + __field(u32, attr_mod) + __field(u16, src_qp) + __field(u16, wc_status) + __field(u32, slid) + __field(u32, dev_index) + __field(u16, pkey) + ), + + TP_fast_assign( + __entry->dev_index = qp_info->port_priv->device->index; + __entry->port_num = qp_info->port_priv->port_num; + __entry->qp_num = qp_info->qp->qp_num; + __entry->length = wc->byte_len; + __entry->base_version = mad_hdr->base_version; + __entry->mgmt_class = mad_hdr->mgmt_class; + __entry->class_version = mad_hdr->class_version; + __entry->method = mad_hdr->method; + __entry->status = mad_hdr->status; + __entry->class_specific = mad_hdr->class_specific; + __entry->tid = mad_hdr->tid; + __entry->attr_id = mad_hdr->attr_id; + __entry->attr_mod = mad_hdr->attr_mod; + __entry->slid = wc->slid; + __entry->src_qp = wc->src_qp; + __entry->sl = wc->sl; + ib_query_pkey(qp_info->port_priv->device, + qp_info->port_priv->port_num, + wc->pkey_index, &__entry->pkey); + __entry->wc_status = wc->status; + ), + + TP_printk("%d:%d QP%d : RECV WC Status %d : length %d : hdr : " \ + "base_ver 0x%02x class 0x%02x class_ver 0x%02x " \ + "method 0x%02x status 0x%04x class_specific 0x%04x " \ + "tid 0x%016llx attr_id 0x%04x attr_mod 0x%08x " \ + "slid 0x%08x src QP%d, sl %d pkey 0x%04x", + __entry->dev_index, __entry->port_num, __entry->qp_num, + __entry->wc_status, + __entry->length, + __entry->base_version, __entry->mgmt_class, + __entry->class_version, __entry->method, + be16_to_cpu(__entry->status), + be16_to_cpu(__entry->class_specific), + be64_to_cpu(__entry->tid), be16_to_cpu(__entry->attr_id), + be32_to_cpu(__entry->attr_mod), + __entry->slid, __entry->src_qp, __entry->sl, __entry->pkey + ) +); + +DECLARE_EVENT_CLASS(ib_mad_agent_template, + TP_PROTO(struct ib_mad_agent_private *agent), + TP_ARGS(agent), + + TP_STRUCT__entry( + __field(u32, dev_index) + __field(u32, hi_tid) + __field(u8, port_num) + __field(u8, mgmt_class) + __field(u8, mgmt_class_version) + ), + + TP_fast_assign( + __entry->dev_index = agent->agent.device->index; + __entry->port_num = agent->agent.port_num; + __entry->hi_tid = agent->agent.hi_tid; + + if (agent->reg_req) { + __entry->mgmt_class = agent->reg_req->mgmt_class; + __entry->mgmt_class_version = + agent->reg_req->mgmt_class_version; + } else { + __entry->mgmt_class = 0; + __entry->mgmt_class_version = 0; + } + ), + + TP_printk("%d:%d mad agent : hi_tid 0x%08x class 0x%02x class_ver 0x%02x", + __entry->dev_index, __entry->port_num, + __entry->hi_tid, __entry->mgmt_class, + __entry->mgmt_class_version + ) +); +DEFINE_EVENT(ib_mad_agent_template, ib_mad_recv_done_agent, + TP_PROTO(struct ib_mad_agent_private *agent), + TP_ARGS(agent)); +DEFINE_EVENT(ib_mad_agent_template, ib_mad_send_done_agent, + TP_PROTO(struct ib_mad_agent_private *agent), + TP_ARGS(agent)); +DEFINE_EVENT(ib_mad_agent_template, ib_mad_create_agent, + TP_PROTO(struct ib_mad_agent_private *agent), + TP_ARGS(agent)); +DEFINE_EVENT(ib_mad_agent_template, ib_mad_unregister_agent, + TP_PROTO(struct ib_mad_agent_private *agent), + TP_ARGS(agent)); + + + +DECLARE_EVENT_CLASS(ib_mad_opa_smi_template, + TP_PROTO(struct opa_smp *smp), + TP_ARGS(smp), + + TP_STRUCT__entry( + __field(u64, mkey) + __field(u32, dr_slid) + __field(u32, dr_dlid) + __field(u8, hop_ptr) + __field(u8, hop_cnt) + __array(u8, initial_path, OPA_SMP_MAX_PATH_HOPS) + __array(u8, return_path, OPA_SMP_MAX_PATH_HOPS) + ), + + TP_fast_assign( + __entry->hop_ptr = smp->hop_ptr; + __entry->hop_cnt = smp->hop_cnt; + __entry->mkey = smp->mkey; + __entry->dr_slid = smp->route.dr.dr_slid; + __entry->dr_dlid = smp->route.dr.dr_dlid; + memcpy(__entry->initial_path, smp->route.dr.initial_path, + OPA_SMP_MAX_PATH_HOPS); + memcpy(__entry->return_path, smp->route.dr.return_path, + OPA_SMP_MAX_PATH_HOPS); + ), + + TP_printk("OPA SMP: hop_ptr %d hop_cnt %d " \ + "mkey 0x%016llx dr_slid 0x%08x dr_dlid 0x%08x " \ + "initial_path %*ph return_path %*ph ", + __entry->hop_ptr, __entry->hop_cnt, + be64_to_cpu(__entry->mkey), be32_to_cpu(__entry->dr_slid), + be32_to_cpu(__entry->dr_dlid), + OPA_SMP_MAX_PATH_HOPS, __entry->initial_path, + OPA_SMP_MAX_PATH_HOPS, __entry->return_path + ) +); + +DEFINE_EVENT(ib_mad_opa_smi_template, ib_mad_handle_opa_smi, + TP_PROTO(struct opa_smp *smp), + TP_ARGS(smp)); +DEFINE_EVENT(ib_mad_opa_smi_template, ib_mad_handle_out_opa_smi, + TP_PROTO(struct opa_smp *smp), + TP_ARGS(smp)); + + +DECLARE_EVENT_CLASS(ib_mad_opa_ib_template, + TP_PROTO(struct ib_smp *smp), + TP_ARGS(smp), + + TP_STRUCT__entry( + __field(u64, mkey) + __field(u32, dr_slid) + __field(u32, dr_dlid) + __field(u8, hop_ptr) + __field(u8, hop_cnt) + __array(u8, initial_path, IB_SMP_MAX_PATH_HOPS) + __array(u8, return_path, IB_SMP_MAX_PATH_HOPS) + ), + + TP_fast_assign( + __entry->hop_ptr = smp->hop_ptr; + __entry->hop_cnt = smp->hop_cnt; + __entry->mkey = smp->mkey; + __entry->dr_slid = smp->dr_slid; + __entry->dr_dlid = smp->dr_dlid; + memcpy(__entry->initial_path, smp->initial_path, + IB_SMP_MAX_PATH_HOPS); + memcpy(__entry->return_path, smp->return_path, + IB_SMP_MAX_PATH_HOPS); + ), + + TP_printk("OPA SMP: hop_ptr %d hop_cnt %d " \ + "mkey 0x%016llx dr_slid 0x%04x dr_dlid 0x%04x " \ + "initial_path %*ph return_path %*ph ", + __entry->hop_ptr, __entry->hop_cnt, + be64_to_cpu(__entry->mkey), be16_to_cpu(__entry->dr_slid), + be16_to_cpu(__entry->dr_dlid), + IB_SMP_MAX_PATH_HOPS, __entry->initial_path, + IB_SMP_MAX_PATH_HOPS, __entry->return_path + ) +); + +DEFINE_EVENT(ib_mad_opa_ib_template, ib_mad_handle_ib_smi, + TP_PROTO(struct ib_smp *smp), + TP_ARGS(smp)); +DEFINE_EVENT(ib_mad_opa_ib_template, ib_mad_handle_out_ib_smi, + TP_PROTO(struct ib_smp *smp), + TP_ARGS(smp)); + +#endif /* _TRACE_IB_MAD_H */ + +#include <trace/define_trace.h> diff --git a/include/trace/events/ib_umad.h b/include/trace/events/ib_umad.h new file mode 100644 index 000000000000..c393a19a0f60 --- /dev/null +++ b/include/trace/events/ib_umad.h @@ -0,0 +1,126 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ + +/* + * Copyright (c) 2018 Intel Corporation. All rights reserved. + * + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM ib_umad + +#if !defined(_TRACE_IB_UMAD_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_IB_UMAD_H + +#include <linux/tracepoint.h> + +DECLARE_EVENT_CLASS(ib_umad_template, + TP_PROTO(struct ib_umad_file *file, struct ib_user_mad_hdr *umad_hdr, + struct ib_mad_hdr *mad_hdr), + TP_ARGS(file, umad_hdr, mad_hdr), + + TP_STRUCT__entry( + __field(u8, port_num) + __field(u8, sl) + __field(u8, path_bits) + __field(u8, grh_present) + __field(u32, id) + __field(u32, status) + __field(u32, timeout_ms) + __field(u32, retires) + __field(u32, length) + __field(u32, qpn) + __field(u32, qkey) + __field(u8, gid_index) + __field(u8, hop_limit) + __field(u16, lid) + __field(u16, attr_id) + __field(u16, pkey_index) + __field(u8, base_version) + __field(u8, mgmt_class) + __field(u8, class_version) + __field(u8, method) + __field(u32, flow_label) + __field(u16, mad_status) + __field(u16, class_specific) + __field(u32, attr_mod) + __field(u64, tid) + __array(u8, gid, 16) + __field(u32, dev_index) + __field(u8, traffic_class) + ), + + TP_fast_assign( + __entry->dev_index = file->port->ib_dev->index; + __entry->port_num = file->port->port_num; + + __entry->id = umad_hdr->id; + __entry->status = umad_hdr->status; + __entry->timeout_ms = umad_hdr->timeout_ms; + __entry->retires = umad_hdr->retries; + __entry->length = umad_hdr->length; + __entry->qpn = umad_hdr->qpn; + __entry->qkey = umad_hdr->qkey; + __entry->lid = umad_hdr->lid; + __entry->sl = umad_hdr->sl; + __entry->path_bits = umad_hdr->path_bits; + __entry->grh_present = umad_hdr->grh_present; + __entry->gid_index = umad_hdr->gid_index; + __entry->hop_limit = umad_hdr->hop_limit; + __entry->traffic_class = umad_hdr->traffic_class; + memcpy(__entry->gid, umad_hdr->gid, sizeof(umad_hdr->gid)); + __entry->flow_label = umad_hdr->flow_label; + __entry->pkey_index = umad_hdr->pkey_index; + + __entry->base_version = mad_hdr->base_version; + __entry->mgmt_class = mad_hdr->mgmt_class; + __entry->class_version = mad_hdr->class_version; + __entry->method = mad_hdr->method; + __entry->mad_status = mad_hdr->status; + __entry->class_specific = mad_hdr->class_specific; + __entry->tid = mad_hdr->tid; + __entry->attr_id = mad_hdr->attr_id; + __entry->attr_mod = mad_hdr->attr_mod; + ), + + TP_printk("%d:%d umad_hdr: id 0x%08x status 0x%08x ms %u ret %u " \ + "len %u QP%u qkey 0x%08x lid 0x%04x sl %u path_bits 0x%x " \ + "grh 0x%x gidi %u hop_lim %u traf_cl %u gid %pI6c " \ + "flow 0x%08x pkeyi %u MAD: base_ver 0x%x class 0x%x " \ + "class_ver 0x%x method 0x%x status 0x%04x " \ + "class_specific 0x%04x tid 0x%016llx attr_id 0x%04x " \ + "attr_mod 0x%08x ", + __entry->dev_index, __entry->port_num, + __entry->id, __entry->status, __entry->timeout_ms, + __entry->retires, __entry->length, be32_to_cpu(__entry->qpn), + be32_to_cpu(__entry->qkey), be16_to_cpu(__entry->lid), + __entry->sl, __entry->path_bits, __entry->grh_present, + __entry->gid_index, __entry->hop_limit, + __entry->traffic_class, &__entry->gid, + be32_to_cpu(__entry->flow_label), __entry->pkey_index, + __entry->base_version, __entry->mgmt_class, + __entry->class_version, __entry->method, + be16_to_cpu(__entry->mad_status), + be16_to_cpu(__entry->class_specific), + be64_to_cpu(__entry->tid), be16_to_cpu(__entry->attr_id), + be32_to_cpu(__entry->attr_mod) + ) +); + +DEFINE_EVENT(ib_umad_template, ib_umad_write, + TP_PROTO(struct ib_umad_file *file, struct ib_user_mad_hdr *umad_hdr, + struct ib_mad_hdr *mad_hdr), + TP_ARGS(file, umad_hdr, mad_hdr)); + +DEFINE_EVENT(ib_umad_template, ib_umad_read_recv, + TP_PROTO(struct ib_umad_file *file, struct ib_user_mad_hdr *umad_hdr, + struct ib_mad_hdr *mad_hdr), + TP_ARGS(file, umad_hdr, mad_hdr)); + +DEFINE_EVENT(ib_umad_template, ib_umad_read_send, + TP_PROTO(struct ib_umad_file *file, struct ib_user_mad_hdr *umad_hdr, + struct ib_mad_hdr *mad_hdr), + TP_ARGS(file, umad_hdr, mad_hdr)); + +#endif /* _TRACE_IB_UMAD_H */ + +#include <trace/define_trace.h> diff --git a/include/uapi/rdma/efa-abi.h b/include/uapi/rdma/efa-abi.h new file mode 100644 index 000000000000..9599a2a62be8 --- /dev/null +++ b/include/uapi/rdma/efa-abi.h @@ -0,0 +1,101 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ +/* + * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + */ + +#ifndef EFA_ABI_USER_H +#define EFA_ABI_USER_H + +#include <linux/types.h> + +/* + * Increment this value if any changes that break userspace ABI + * compatibility are made. + */ +#define EFA_UVERBS_ABI_VERSION 1 + +/* + * Keep structs aligned to 8 bytes. + * Keep reserved fields as arrays of __u8 named reserved_XXX where XXX is the + * hex bit offset of the field. + */ + +enum efa_ibv_user_cmds_supp_udata { + EFA_USER_CMDS_SUPP_UDATA_QUERY_DEVICE = 1 << 0, + EFA_USER_CMDS_SUPP_UDATA_CREATE_AH = 1 << 1, +}; + +struct efa_ibv_alloc_ucontext_resp { + __u32 comp_mask; + __u32 cmds_supp_udata_mask; + __u16 sub_cqs_per_cq; + __u16 inline_buf_size; + __u32 max_llq_size; /* bytes */ +}; + +struct efa_ibv_alloc_pd_resp { + __u32 comp_mask; + __u16 pdn; + __u8 reserved_30[2]; +}; + +struct efa_ibv_create_cq { + __u32 comp_mask; + __u32 cq_entry_size; + __u16 num_sub_cqs; + __u8 reserved_50[6]; +}; + +struct efa_ibv_create_cq_resp { + __u32 comp_mask; + __u8 reserved_20[4]; + __aligned_u64 q_mmap_key; + __aligned_u64 q_mmap_size; + __u16 cq_idx; + __u8 reserved_d0[6]; +}; + +enum { + EFA_QP_DRIVER_TYPE_SRD = 0, +}; + +struct efa_ibv_create_qp { + __u32 comp_mask; + __u32 rq_ring_size; /* bytes */ + __u32 sq_ring_size; /* bytes */ + __u32 driver_qp_type; +}; + +struct efa_ibv_create_qp_resp { + __u32 comp_mask; + /* the offset inside the page of the rq db */ + __u32 rq_db_offset; + /* the offset inside the page of the sq db */ + __u32 sq_db_offset; + /* the offset inside the page of descriptors buffer */ + __u32 llq_desc_offset; + __aligned_u64 rq_mmap_key; + __aligned_u64 rq_mmap_size; + __aligned_u64 rq_db_mmap_key; + __aligned_u64 sq_db_mmap_key; + __aligned_u64 llq_desc_mmap_key; + __u16 send_sub_cq_idx; + __u16 recv_sub_cq_idx; + __u8 reserved_1e0[4]; +}; + +struct efa_ibv_create_ah_resp { + __u32 comp_mask; + __u16 efa_address_handle; + __u8 reserved_30[2]; +}; + +struct efa_ibv_ex_query_device_resp { + __u32 comp_mask; + __u32 max_sq_wr; + __u32 max_rq_wr; + __u16 max_sq_sge; + __u16 max_rq_sge; +}; + +#endif /* EFA_ABI_USER_H */ diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index f4d4010b7e3e..624f5b53eb1f 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -360,6 +360,7 @@ enum mlx5_ib_create_qp_resp_mask { MLX5_IB_CREATE_QP_RESP_MASK_TISN = 1UL << 1, MLX5_IB_CREATE_QP_RESP_MASK_RQN = 1UL << 2, MLX5_IB_CREATE_QP_RESP_MASK_SQN = 1UL << 3, + MLX5_IB_CREATE_QP_RESP_MASK_TIR_ICM_ADDR = 1UL << 4, }; struct mlx5_ib_create_qp_resp { @@ -371,6 +372,7 @@ struct mlx5_ib_create_qp_resp { __u32 rqn; __u32 sqn; __u32 reserved1; + __u64 tir_icm_addr; }; struct mlx5_ib_alloc_mw { diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h index 8149d224030b..d404c951954c 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h +++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h @@ -44,6 +44,7 @@ enum mlx5_ib_create_flow_action_attrs { enum mlx5_ib_alloc_dm_attrs { MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET = (1U << UVERBS_ID_NS_SHIFT), MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX, + MLX5_IB_ATTR_ALLOC_DM_REQ_TYPE, }; enum mlx5_ib_devx_methods { @@ -144,6 +145,7 @@ enum mlx5_ib_flow_matcher_create_attrs { MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE, MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS, + MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE, }; enum mlx5_ib_flow_matcher_destroy_attrs { diff --git a/include/uapi/rdma/mlx5_user_ioctl_verbs.h b/include/uapi/rdma/mlx5_user_ioctl_verbs.h index 4a701033b93f..a8f34c237458 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_verbs.h +++ b/include/uapi/rdma/mlx5_user_ioctl_verbs.h @@ -42,6 +42,7 @@ enum mlx5_ib_uapi_flow_action_flags { enum mlx5_ib_uapi_flow_table_type { MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX = 0x0, MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX = 0x1, + MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB = 0x2, }; enum mlx5_ib_uapi_flow_action_packet_reformat_type { @@ -56,5 +57,11 @@ struct mlx5_ib_uapi_devx_async_cmd_hdr { __u8 out_data[]; }; +enum mlx5_ib_uapi_dm_type { + MLX5_IB_UAPI_DM_TYPE_MEMIC, + MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM, + MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM, +}; + #endif diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 5cc592728071..42a8bdc40a14 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -49,17 +49,6 @@ enum { RDMA_NL_IWPM_NUM_OPS }; -struct rdma_cm_id_stats { - __u32 qp_num; - __u32 bound_dev_if; - __u32 port_space; - __s32 pid; - __u8 cm_state; - __u8 node_type; - __u8 port_num; - __u8 qp_type; -}; - enum { IWPM_NLA_REG_PID_UNSPEC = 0, IWPM_NLA_REG_PID_SEQ, @@ -261,7 +250,10 @@ enum rdma_nldev_command { RDMA_NLDEV_CMD_PORT_GET, /* can dump */ - /* 6 - 8 are free to use */ + RDMA_NLDEV_CMD_SYS_GET, /* can dump */ + RDMA_NLDEV_CMD_SYS_SET, + + /* 8 is free to use */ RDMA_NLDEV_CMD_RES_GET = 9, /* can dump */ @@ -473,6 +465,21 @@ enum rdma_nldev_attr { RDMA_NLDEV_ATTR_LINK_TYPE, /* string */ /* + * net namespace mode for rdma subsystem: + * either shared or exclusive among multiple net namespaces. + */ + RDMA_NLDEV_SYS_ATTR_NETNS_MODE, /* u8 */ + /* + * Device protocol, e.g. ib, iw, usnic, roce and opa + */ + RDMA_NLDEV_ATTR_DEV_PROTOCOL, /* string */ + + /* + * File descriptor handle of the net namespace object + */ + RDMA_NLDEV_NET_NS_FD, /* u32 */ + + /* * Always the end */ RDMA_NLDEV_ATTR_MAX diff --git a/include/uapi/rdma/rdma_user_ioctl_cmds.h b/include/uapi/rdma/rdma_user_ioctl_cmds.h index 06c34d99be85..26213f49f5c8 100644 --- a/include/uapi/rdma/rdma_user_ioctl_cmds.h +++ b/include/uapi/rdma/rdma_user_ioctl_cmds.h @@ -102,6 +102,7 @@ enum rdma_driver_id { RDMA_DRIVER_RXE, RDMA_DRIVER_HFI1, RDMA_DRIVER_QIB, + RDMA_DRIVER_EFA, }; #endif diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 7bdf98c37e91..8a16c2d498e9 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -37,6 +37,8 @@ #include <linux/device.h> #include <linux/netdevice.h> +#include <rdma/ib_verbs.h> + extern struct _ddebug __start___verbose[]; extern struct _ddebug __stop___verbose[]; @@ -636,6 +638,41 @@ EXPORT_SYMBOL(__dynamic_netdev_dbg); #endif +#if IS_ENABLED(CONFIG_INFINIBAND) + +void __dynamic_ibdev_dbg(struct _ddebug *descriptor, + const struct ib_device *ibdev, const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + + va_start(args, fmt); + + vaf.fmt = fmt; + vaf.va = &args; + + if (ibdev && ibdev->dev.parent) { + char buf[PREFIX_SIZE]; + + dev_printk_emit(LOGLEVEL_DEBUG, ibdev->dev.parent, + "%s%s %s %s: %pV", + dynamic_emit_prefix(descriptor, buf), + dev_driver_string(ibdev->dev.parent), + dev_name(ibdev->dev.parent), + dev_name(&ibdev->dev), + &vaf); + } else if (ibdev) { + printk(KERN_DEBUG "%s: %pV", dev_name(&ibdev->dev), &vaf); + } else { + printk(KERN_DEBUG "(NULL ib_device): %pV", &vaf); + } + + va_end(args); +} +EXPORT_SYMBOL(__dynamic_ibdev_dbg); + +#endif + #define DDEBUG_STRING_SIZE 1024 static __initdata char ddebug_setup_string[DDEBUG_STRING_SIZE]; diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 53f429c04843..d14ca4af6f94 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -146,18 +146,13 @@ out: static int smc_ib_fill_mac(struct smc_ib_device *smcibdev, u8 ibport) { const struct ib_gid_attr *attr; - int rc = 0; + int rc; attr = rdma_get_gid_attr(smcibdev->ibdev, ibport, 0); if (IS_ERR(attr)) return -ENODEV; - if (attr->ndev) - memcpy(smcibdev->mac[ibport - 1], attr->ndev->dev_addr, - ETH_ALEN); - else - rc = -ENODEV; - + rc = rdma_read_gid_l2_fields(attr, NULL, smcibdev->mac[ibport - 1]); rdma_put_gid_attr(attr); return rc; } @@ -185,6 +180,7 @@ int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport, unsigned short vlan_id, u8 gid[], u8 *sgid_index) { const struct ib_gid_attr *attr; + const struct net_device *ndev; int i; for (i = 0; i < smcibdev->pattr[ibport - 1].gid_tbl_len; i++) { @@ -192,11 +188,14 @@ int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport, if (IS_ERR(attr)) continue; - if (attr->ndev && + rcu_read_lock(); + ndev = rdma_read_gid_attr_ndev_rcu(attr); + if (!IS_ERR(ndev) && ((!vlan_id && !is_vlan_dev(attr->ndev)) || (vlan_id && is_vlan_dev(attr->ndev) && vlan_dev_vlan_id(attr->ndev) == vlan_id)) && attr->gid_type == IB_GID_TYPE_ROCE) { + rcu_read_unlock(); if (gid) memcpy(gid, &attr->gid, SMC_GID_SIZE); if (sgid_index) @@ -204,6 +203,7 @@ int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport, rdma_put_gid_attr(attr); return 0; } + rcu_read_unlock(); rdma_put_gid_attr(attr); } return -ENODEV; diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 65e667bdf979..4f0a1cdbfe7c 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -52,6 +52,7 @@ hostprogs-y += xdpsock hostprogs-y += xdp_fwd hostprogs-y += task_fd_query hostprogs-y += xdp_sample_pkts +hostprogs-y += ibumad hostprogs-y += hbm # Libbpf dependencies @@ -108,6 +109,7 @@ xdpsock-objs := xdpsock_user.o xdp_fwd-objs := xdp_fwd_user.o task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS) xdp_sample_pkts-objs := xdp_sample_pkts_user.o $(TRACE_HELPERS) +ibumad-objs := bpf_load.o ibumad_user.o $(TRACE_HELPERS) hbm-objs := bpf_load.o hbm.o $(CGROUP_HELPERS) # Tell kbuild to always build the programs @@ -166,6 +168,7 @@ always += xdp_adjust_tail_kern.o always += xdp_fwd_kern.o always += task_fd_query_kern.o always += xdp_sample_pkts_kern.o +always += ibumad_kern.o always += hbm_out_kern.o KBUILD_HOSTCFLAGS += -I$(objtree)/usr/include diff --git a/samples/bpf/ibumad_kern.c b/samples/bpf/ibumad_kern.c new file mode 100644 index 000000000000..38b2b3f22049 --- /dev/null +++ b/samples/bpf/ibumad_kern.c @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB + +/** + * ibumad BPF sample kernel side + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Copyright(c) 2018 Ira Weiny, Intel Corporation + */ + +#define KBUILD_MODNAME "ibumad_count_pkts_by_class" +#include <uapi/linux/bpf.h> + +#include "bpf_helpers.h" + + +struct bpf_map_def SEC("maps") read_count = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(u32), /* class; u32 required */ + .value_size = sizeof(u64), /* count of mads read */ + .max_entries = 256, /* Room for all Classes */ +}; + +struct bpf_map_def SEC("maps") write_count = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(u32), /* class; u32 required */ + .value_size = sizeof(u64), /* count of mads written */ + .max_entries = 256, /* Room for all Classes */ +}; + +#undef DEBUG +#ifdef DEBUG +#define bpf_debug(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) +#else +#define bpf_debug(fmt, ...) +#endif + +/* Taken from the current format defined in + * include/trace/events/ib_umad.h + * and + * /sys/kernel/debug/tracing/events/ib_umad/ib_umad_read/format + * /sys/kernel/debug/tracing/events/ib_umad/ib_umad_write/format + */ +struct ib_umad_rw_args { + u64 pad; + u8 port_num; + u8 sl; + u8 path_bits; + u8 grh_present; + u32 id; + u32 status; + u32 timeout_ms; + u32 retires; + u32 length; + u32 qpn; + u32 qkey; + u8 gid_index; + u8 hop_limit; + u16 lid; + u16 attr_id; + u16 pkey_index; + u8 base_version; + u8 mgmt_class; + u8 class_version; + u8 method; + u32 flow_label; + u16 mad_status; + u16 class_specific; + u32 attr_mod; + u64 tid; + u8 gid[16]; + u32 dev_index; + u8 traffic_class; +}; + +SEC("tracepoint/ib_umad/ib_umad_read_recv") +int on_ib_umad_read_recv(struct ib_umad_rw_args *ctx) +{ + u64 zero = 0, *val; + u8 class = ctx->mgmt_class; + + bpf_debug("ib_umad read recv : class 0x%x\n", class); + + val = bpf_map_lookup_elem(&read_count, &class); + if (!val) { + bpf_map_update_elem(&read_count, &class, &zero, BPF_NOEXIST); + val = bpf_map_lookup_elem(&read_count, &class); + if (!val) + return 0; + } + + (*val) += 1; + + return 0; +} +SEC("tracepoint/ib_umad/ib_umad_read_send") +int on_ib_umad_read_send(struct ib_umad_rw_args *ctx) +{ + u64 zero = 0, *val; + u8 class = ctx->mgmt_class; + + bpf_debug("ib_umad read send : class 0x%x\n", class); + + val = bpf_map_lookup_elem(&read_count, &class); + if (!val) { + bpf_map_update_elem(&read_count, &class, &zero, BPF_NOEXIST); + val = bpf_map_lookup_elem(&read_count, &class); + if (!val) + return 0; + } + + (*val) += 1; + + return 0; +} +SEC("tracepoint/ib_umad/ib_umad_write") +int on_ib_umad_write(struct ib_umad_rw_args *ctx) +{ + u64 zero = 0, *val; + u8 class = ctx->mgmt_class; + + bpf_debug("ib_umad write : class 0x%x\n", class); + + val = bpf_map_lookup_elem(&write_count, &class); + if (!val) { + bpf_map_update_elem(&write_count, &class, &zero, BPF_NOEXIST); + val = bpf_map_lookup_elem(&write_count, &class); + if (!val) + return 0; + } + + (*val) += 1; + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/ibumad_user.c b/samples/bpf/ibumad_user.c new file mode 100644 index 000000000000..097d76143363 --- /dev/null +++ b/samples/bpf/ibumad_user.c @@ -0,0 +1,122 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB + +/** + * ibumad BPF sample user side + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Copyright(c) 2018 Ira Weiny, Intel Corporation + */ + +#include <linux/bpf.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/types.h> +#include <limits.h> + +#include <sys/resource.h> +#include <getopt.h> +#include <net/if.h> + +#include "bpf_load.h" +#include "bpf_util.h" +#include "bpf/libbpf.h" + +static void dump_counts(int fd) +{ + __u32 key; + __u64 value; + + for (key = 0; key < 256; key++) { + if (bpf_map_lookup_elem(fd, &key, &value)) { + printf("failed to read key %u\n", key); + continue; + } + if (value) + printf("0x%02x : %llu\n", key, value); + } +} + +static void dump_all_counts(void) +{ + printf("Read 'Class : count'\n"); + dump_counts(map_fd[0]); + printf("Write 'Class : count'\n"); + dump_counts(map_fd[1]); +} + +static void dump_exit(int sig) +{ + dump_all_counts(); + exit(0); +} + +static const struct option long_options[] = { + {"help", no_argument, NULL, 'h'}, + {"delay", required_argument, NULL, 'd'}, +}; + +static void usage(char *cmd) +{ + printf("eBPF test program to count packets from various IP addresses\n" + "Usage: %s <options>\n" + " --help, -h this menu\n" + " --delay, -d <delay> wait <delay> sec between prints [1 - 1000000]\n" + , cmd + ); +} + +int main(int argc, char **argv) +{ + unsigned long delay = 5; + int longindex = 0; + int opt; + char bpf_file[256]; + + /* Create the eBPF kernel code path name. + * This follows the pattern of all of the other bpf samples + */ + snprintf(bpf_file, sizeof(bpf_file), "%s_kern.o", argv[0]); + + /* Do one final dump when exiting */ + signal(SIGINT, dump_exit); + signal(SIGTERM, dump_exit); + + while ((opt = getopt_long(argc, argv, "hd:rSw", + long_options, &longindex)) != -1) { + switch (opt) { + case 'd': + delay = strtoul(optarg, NULL, 0); + if (delay == ULONG_MAX || delay < 0 || + delay > 1000000) { + fprintf(stderr, "ERROR: invalid delay : %s\n", + optarg); + usage(argv[0]); + return 1; + } + break; + default: + case 'h': + usage(argv[0]); + return 1; + } + } + + if (load_bpf_file(bpf_file)) { + fprintf(stderr, "ERROR: failed to load eBPF from file : %s\n", + bpf_file); + return 1; + } + + while (1) { + sleep(delay); + dump_all_counts(); + } + + return 0; +} |