diff options
Diffstat (limited to 'drivers/infiniband/core')
-rw-r--r-- | drivers/infiniband/core/Makefile | 1 | ||||
-rw-r--r-- | drivers/infiniband/core/cache.c | 162 | ||||
-rw-r--r-- | drivers/infiniband/core/cgroup.c | 62 | ||||
-rw-r--r-- | drivers/infiniband/core/cm.c | 2 | ||||
-rw-r--r-- | drivers/infiniband/core/cma.c | 177 | ||||
-rw-r--r-- | drivers/infiniband/core/cma_configfs.c | 42 | ||||
-rw-r--r-- | drivers/infiniband/core/core_priv.h | 33 | ||||
-rw-r--r-- | drivers/infiniband/core/cq.c | 6 | ||||
-rw-r--r-- | drivers/infiniband/core/device.c | 23 | ||||
-rw-r--r-- | drivers/infiniband/core/mad.c | 4 | ||||
-rw-r--r-- | drivers/infiniband/core/roce_gid_mgmt.c | 28 | ||||
-rw-r--r-- | drivers/infiniband/core/sysfs.c | 2 | ||||
-rw-r--r-- | drivers/infiniband/core/ucm.c | 2 | ||||
-rw-r--r-- | drivers/infiniband/core/umem.c | 6 | ||||
-rw-r--r-- | drivers/infiniband/core/umem_odp.c | 94 | ||||
-rw-r--r-- | drivers/infiniband/core/umem_rbtree.c | 21 | ||||
-rw-r--r-- | drivers/infiniband/core/user_mad.c | 4 | ||||
-rw-r--r-- | drivers/infiniband/core/uverbs.h | 1 | ||||
-rw-r--r-- | drivers/infiniband/core/uverbs_cmd.c | 155 | ||||
-rw-r--r-- | drivers/infiniband/core/uverbs_main.c | 22 | ||||
-rw-r--r-- | drivers/infiniband/core/verbs.c | 38 |
21 files changed, 695 insertions, 190 deletions
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index edaae9f9853c..e426ac877d19 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -13,6 +13,7 @@ ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \ multicast.o mad.o smi.o agent.o mad_rmpp.o ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o umem_rbtree.o +ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o ib_cm-y := cm.o diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index ae04826e82fc..b1371eb9f46c 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -314,14 +314,13 @@ static void make_default_gid(struct net_device *dev, union ib_gid *gid) int ib_cache_gid_add(struct ib_device *ib_dev, u8 port, union ib_gid *gid, struct ib_gid_attr *attr) { - struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; struct ib_gid_table *table; int ix; int ret = 0; struct net_device *idev; int empty; - table = ports_table[port - rdma_start_port(ib_dev)]; + table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid; if (!memcmp(gid, &zgid, sizeof(*gid))) return -EINVAL; @@ -369,11 +368,10 @@ out_unlock: int ib_cache_gid_del(struct ib_device *ib_dev, u8 port, union ib_gid *gid, struct ib_gid_attr *attr) { - struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; struct ib_gid_table *table; int ix; - table = ports_table[port - rdma_start_port(ib_dev)]; + table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid; mutex_lock(&table->lock); write_lock_irq(&table->rwlock); @@ -399,12 +397,11 @@ out_unlock: int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port, struct net_device *ndev) { - struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; struct ib_gid_table *table; int ix; bool deleted = false; - table = ports_table[port - rdma_start_port(ib_dev)]; + table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid; mutex_lock(&table->lock); write_lock_irq(&table->rwlock); @@ -428,10 +425,9 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port, static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index, union ib_gid *gid, struct ib_gid_attr *attr) { - struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; struct ib_gid_table *table; - table = ports_table[port - rdma_start_port(ib_dev)]; + table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid; if (index < 0 || index >= table->sz) return -EINVAL; @@ -455,14 +451,13 @@ static int _ib_cache_gid_table_find(struct ib_device *ib_dev, unsigned long mask, u8 *port, u16 *index) { - struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; struct ib_gid_table *table; u8 p; int local_index; unsigned long flags; for (p = 0; p < ib_dev->phys_port_cnt; p++) { - table = ports_table[p]; + table = ib_dev->cache.ports[p].gid; read_lock_irqsave(&table->rwlock, flags); local_index = find_gid(table, gid, val, false, mask, NULL); if (local_index >= 0) { @@ -503,18 +498,16 @@ int ib_find_cached_gid_by_port(struct ib_device *ib_dev, u16 *index) { int local_index; - struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; struct ib_gid_table *table; unsigned long mask = GID_ATTR_FIND_MASK_GID | GID_ATTR_FIND_MASK_GID_TYPE; struct ib_gid_attr val = {.ndev = ndev, .gid_type = gid_type}; unsigned long flags; - if (port < rdma_start_port(ib_dev) || - port > rdma_end_port(ib_dev)) + if (!rdma_is_port_valid(ib_dev, port)) return -ENOENT; - table = ports_table[port - rdma_start_port(ib_dev)]; + table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid; if (ndev) mask |= GID_ATTR_FIND_MASK_NETDEV; @@ -562,21 +555,17 @@ static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev, void *context, u16 *index) { - struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; struct ib_gid_table *table; unsigned int i; unsigned long flags; bool found = false; - if (!ports_table) - return -EOPNOTSUPP; - if (port < rdma_start_port(ib_dev) || - port > rdma_end_port(ib_dev) || + if (!rdma_is_port_valid(ib_dev, port) || !rdma_protocol_roce(ib_dev, port)) return -EPROTONOSUPPORT; - table = ports_table[port - rdma_start_port(ib_dev)]; + table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid; read_lock_irqsave(&table->rwlock, flags); for (i = 0; i < table->sz; i++) { @@ -668,14 +657,13 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port, unsigned long gid_type_mask, enum ib_cache_gid_default_mode mode) { - struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; union ib_gid gid; struct ib_gid_attr gid_attr; struct ib_gid_attr zattr_type = zattr; struct ib_gid_table *table; unsigned int gid_type; - table = ports_table[port - rdma_start_port(ib_dev)]; + table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid; make_default_gid(ndev, &gid); memset(&gid_attr, 0, sizeof(gid_attr)); @@ -766,71 +754,64 @@ static int gid_table_reserve_default(struct ib_device *ib_dev, u8 port, static int _gid_table_setup_one(struct ib_device *ib_dev) { u8 port; - struct ib_gid_table **table; + struct ib_gid_table *table; int err = 0; - table = kcalloc(ib_dev->phys_port_cnt, sizeof(*table), GFP_KERNEL); - if (!table) - return -ENOMEM; - for (port = 0; port < ib_dev->phys_port_cnt; port++) { u8 rdma_port = port + rdma_start_port(ib_dev); - table[port] = + table = alloc_gid_table( ib_dev->port_immutable[rdma_port].gid_tbl_len); - if (!table[port]) { + if (!table) { err = -ENOMEM; goto rollback_table_setup; } err = gid_table_reserve_default(ib_dev, port + rdma_start_port(ib_dev), - table[port]); + table); if (err) goto rollback_table_setup; + ib_dev->cache.ports[port].gid = table; } - ib_dev->cache.gid_cache = table; return 0; rollback_table_setup: for (port = 0; port < ib_dev->phys_port_cnt; port++) { + table = ib_dev->cache.ports[port].gid; + cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev), - table[port]); - release_gid_table(table[port]); + table); + release_gid_table(table); } - kfree(table); return err; } static void gid_table_release_one(struct ib_device *ib_dev) { - struct ib_gid_table **table = ib_dev->cache.gid_cache; + struct ib_gid_table *table; u8 port; - if (!table) - return; - - for (port = 0; port < ib_dev->phys_port_cnt; port++) - release_gid_table(table[port]); - - kfree(table); - ib_dev->cache.gid_cache = NULL; + for (port = 0; port < ib_dev->phys_port_cnt; port++) { + table = ib_dev->cache.ports[port].gid; + release_gid_table(table); + ib_dev->cache.ports[port].gid = NULL; + } } static void gid_table_cleanup_one(struct ib_device *ib_dev) { - struct ib_gid_table **table = ib_dev->cache.gid_cache; + struct ib_gid_table *table; u8 port; - if (!table) - return; - - for (port = 0; port < ib_dev->phys_port_cnt; port++) + for (port = 0; port < ib_dev->phys_port_cnt; port++) { + table = ib_dev->cache.ports[port].gid; cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev), - table[port]); + table); + } } static int gid_table_setup_one(struct ib_device *ib_dev) @@ -860,12 +841,12 @@ int ib_get_cached_gid(struct ib_device *device, { int res; unsigned long flags; - struct ib_gid_table **ports_table = device->cache.gid_cache; - struct ib_gid_table *table = ports_table[port_num - rdma_start_port(device)]; + struct ib_gid_table *table; - if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) + if (!rdma_is_port_valid(device, port_num)) return -EINVAL; + table = device->cache.ports[port_num - rdma_start_port(device)].gid; read_lock_irqsave(&table->rwlock, flags); res = __ib_cache_gid_get(device, port_num, index, gid, gid_attr); read_unlock_irqrestore(&table->rwlock, flags); @@ -912,12 +893,12 @@ int ib_get_cached_pkey(struct ib_device *device, unsigned long flags; int ret = 0; - if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) + if (!rdma_is_port_valid(device, port_num)) return -EINVAL; read_lock_irqsave(&device->cache.lock, flags); - cache = device->cache.pkey_cache[port_num - rdma_start_port(device)]; + cache = device->cache.ports[port_num - rdma_start_port(device)].pkey; if (index < 0 || index >= cache->table_len) ret = -EINVAL; @@ -941,12 +922,12 @@ int ib_find_cached_pkey(struct ib_device *device, int ret = -ENOENT; int partial_ix = -1; - if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) + if (!rdma_is_port_valid(device, port_num)) return -EINVAL; read_lock_irqsave(&device->cache.lock, flags); - cache = device->cache.pkey_cache[port_num - rdma_start_port(device)]; + cache = device->cache.ports[port_num - rdma_start_port(device)].pkey; *index = -1; @@ -981,12 +962,12 @@ int ib_find_exact_cached_pkey(struct ib_device *device, int i; int ret = -ENOENT; - if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) + if (!rdma_is_port_valid(device, port_num)) return -EINVAL; read_lock_irqsave(&device->cache.lock, flags); - cache = device->cache.pkey_cache[port_num - rdma_start_port(device)]; + cache = device->cache.ports[port_num - rdma_start_port(device)].pkey; *index = -1; @@ -1010,17 +991,36 @@ int ib_get_cached_lmc(struct ib_device *device, unsigned long flags; int ret = 0; - if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) + if (!rdma_is_port_valid(device, port_num)) return -EINVAL; read_lock_irqsave(&device->cache.lock, flags); - *lmc = device->cache.lmc_cache[port_num - rdma_start_port(device)]; + *lmc = device->cache.ports[port_num - rdma_start_port(device)].lmc; read_unlock_irqrestore(&device->cache.lock, flags); return ret; } EXPORT_SYMBOL(ib_get_cached_lmc); +int ib_get_cached_port_state(struct ib_device *device, + u8 port_num, + enum ib_port_state *port_state) +{ + unsigned long flags; + int ret = 0; + + if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) + return -EINVAL; + + read_lock_irqsave(&device->cache.lock, flags); + *port_state = device->cache.ports[port_num + - rdma_start_port(device)].port_state; + read_unlock_irqrestore(&device->cache.lock, flags); + + return ret; +} +EXPORT_SYMBOL(ib_get_cached_port_state); + static void ib_cache_update(struct ib_device *device, u8 port) { @@ -1033,14 +1033,13 @@ static void ib_cache_update(struct ib_device *device, int i; int ret; struct ib_gid_table *table; - struct ib_gid_table **ports_table = device->cache.gid_cache; bool use_roce_gid_table = rdma_cap_roce_gid_table(device, port); - if (port < rdma_start_port(device) || port > rdma_end_port(device)) + if (!rdma_is_port_valid(device, port)) return; - table = ports_table[port - rdma_start_port(device)]; + table = device->cache.ports[port - rdma_start_port(device)].gid; tprops = kmalloc(sizeof *tprops, GFP_KERNEL); if (!tprops) @@ -1092,9 +1091,10 @@ static void ib_cache_update(struct ib_device *device, write_lock_irq(&device->cache.lock); - old_pkey_cache = device->cache.pkey_cache[port - rdma_start_port(device)]; + old_pkey_cache = device->cache.ports[port - + rdma_start_port(device)].pkey; - device->cache.pkey_cache[port - rdma_start_port(device)] = pkey_cache; + device->cache.ports[port - rdma_start_port(device)].pkey = pkey_cache; if (!use_roce_gid_table) { write_lock(&table->rwlock); for (i = 0; i < gid_cache->table_len; i++) { @@ -1104,7 +1104,9 @@ static void ib_cache_update(struct ib_device *device, write_unlock(&table->rwlock); } - device->cache.lmc_cache[port - rdma_start_port(device)] = tprops->lmc; + device->cache.ports[port - rdma_start_port(device)].lmc = tprops->lmc; + device->cache.ports[port - rdma_start_port(device)].port_state = + tprops->state; write_unlock_irq(&device->cache.lock); @@ -1157,22 +1159,17 @@ int ib_cache_setup_one(struct ib_device *device) rwlock_init(&device->cache.lock); - device->cache.pkey_cache = - kzalloc(sizeof *device->cache.pkey_cache * + device->cache.ports = + kzalloc(sizeof(*device->cache.ports) * (rdma_end_port(device) - rdma_start_port(device) + 1), GFP_KERNEL); - device->cache.lmc_cache = kmalloc(sizeof *device->cache.lmc_cache * - (rdma_end_port(device) - - rdma_start_port(device) + 1), - GFP_KERNEL); - if (!device->cache.pkey_cache || - !device->cache.lmc_cache) { + if (!device->cache.ports) { err = -ENOMEM; - goto free; + goto out; } err = gid_table_setup_one(device); if (err) - goto free; + goto out; for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p) ib_cache_update(device, p + rdma_start_port(device)); @@ -1187,9 +1184,7 @@ int ib_cache_setup_one(struct ib_device *device) err: gid_table_cleanup_one(device); -free: - kfree(device->cache.pkey_cache); - kfree(device->cache.lmc_cache); +out: return err; } @@ -1203,14 +1198,11 @@ void ib_cache_release_one(struct ib_device *device) * all the device's resources when the cache could no * longer be accessed. */ - if (device->cache.pkey_cache) - for (p = 0; - p <= rdma_end_port(device) - rdma_start_port(device); ++p) - kfree(device->cache.pkey_cache[p]); + for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p) + kfree(device->cache.ports[p].pkey); gid_table_release_one(device); - kfree(device->cache.pkey_cache); - kfree(device->cache.lmc_cache); + kfree(device->cache.ports); } void ib_cache_cleanup_one(struct ib_device *device) diff --git a/drivers/infiniband/core/cgroup.c b/drivers/infiniband/core/cgroup.c new file mode 100644 index 000000000000..126ac5f99db7 --- /dev/null +++ b/drivers/infiniband/core/cgroup.c @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2016 Parav Pandit <pandit.parav@gmail.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include "core_priv.h" + +/** + * ib_device_register_rdmacg - register with rdma cgroup. + * @device: device to register to participate in resource + * accounting by rdma cgroup. + * + * Register with the rdma cgroup. Should be called before + * exposing rdma device to user space applications to avoid + * resource accounting leak. + * Returns 0 on success or otherwise failure code. + */ +int ib_device_register_rdmacg(struct ib_device *device) +{ + device->cg_device.name = device->name; + return rdmacg_register_device(&device->cg_device); +} + +/** + * ib_device_unregister_rdmacg - unregister with rdma cgroup. + * @device: device to unregister. + * + * Unregister with the rdma cgroup. Should be called after + * all the resources are deallocated, and after a stage when any + * other resource allocation by user application cannot be done + * for this device to avoid any leak in accounting. + */ +void ib_device_unregister_rdmacg(struct ib_device *device) +{ + rdmacg_unregister_device(&device->cg_device); +} + +int ib_rdmacg_try_charge(struct ib_rdmacg_object *cg_obj, + struct ib_device *device, + enum rdmacg_resource_type resource_index) +{ + return rdmacg_try_charge(&cg_obj->cg, &device->cg_device, + resource_index); +} +EXPORT_SYMBOL(ib_rdmacg_try_charge); + +void ib_rdmacg_uncharge(struct ib_rdmacg_object *cg_obj, + struct ib_device *device, + enum rdmacg_resource_type resource_index) +{ + rdmacg_uncharge(cg_obj->cg, &device->cg_device, + resource_index); +} +EXPORT_SYMBOL(ib_rdmacg_uncharge); diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index cf1edfa1cbac..6535f09dc575 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -3409,6 +3409,8 @@ static void cm_process_send_error(struct ib_mad_send_buf *msg, if (msg != cm_id_priv->msg || state != cm_id_priv->id.state) goto discard; + pr_debug_ratelimited("CM: failed sending MAD in state %d. (%s)\n", + state, ib_wc_status_msg(wc_status)); switch (state) { case IB_CM_REQ_SENT: case IB_CM_MRA_REQ_RCVD: diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 3e70a9c5d79d..acd10d666f1c 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -198,6 +198,7 @@ struct cma_device { atomic_t refcount; struct list_head id_list; enum ib_gid_type *default_gid_type; + u8 *default_roce_tos; }; struct rdma_bind_list { @@ -269,8 +270,7 @@ struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter, int cma_get_default_gid_type(struct cma_device *cma_dev, unsigned int port) { - if (port < rdma_start_port(cma_dev->device) || - port > rdma_end_port(cma_dev->device)) + if (!rdma_is_port_valid(cma_dev->device, port)) return -EINVAL; return cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)]; @@ -282,8 +282,7 @@ int cma_set_default_gid_type(struct cma_device *cma_dev, { unsigned long supported_gids; - if (port < rdma_start_port(cma_dev->device) || - port > rdma_end_port(cma_dev->device)) + if (!rdma_is_port_valid(cma_dev->device, port)) return -EINVAL; supported_gids = roce_gid_type_mask_support(cma_dev->device, port); @@ -297,6 +296,25 @@ int cma_set_default_gid_type(struct cma_device *cma_dev, return 0; } +int cma_get_default_roce_tos(struct cma_device *cma_dev, unsigned int port) +{ + if (!rdma_is_port_valid(cma_dev->device, port)) + return -EINVAL; + + return cma_dev->default_roce_tos[port - rdma_start_port(cma_dev->device)]; +} + +int cma_set_default_roce_tos(struct cma_device *cma_dev, unsigned int port, + u8 default_roce_tos) +{ + if (!rdma_is_port_valid(cma_dev->device, port)) + return -EINVAL; + + cma_dev->default_roce_tos[port - rdma_start_port(cma_dev->device)] = + default_roce_tos; + + return 0; +} struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev) { return cma_dev->device; @@ -343,6 +361,7 @@ struct rdma_id_private { u32 options; u8 srq; u8 tos; + bool tos_set; u8 reuseaddr; u8 afonly; enum ib_gid_type gid_type; @@ -709,6 +728,7 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv) union ib_gid gid, sgid, *dgid; u16 pkey, index; u8 p; + enum ib_port_state port_state; int i; cma_dev = NULL; @@ -724,6 +744,8 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv) if (ib_find_cached_pkey(cur_dev->device, p, pkey, &index)) continue; + if (ib_get_cached_port_state(cur_dev->device, p, &port_state)) + continue; for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i, &gid, NULL); i++) { @@ -735,7 +757,8 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv) } if (!cma_dev && (gid.global.subnet_prefix == - dgid->global.subnet_prefix)) { + dgid->global.subnet_prefix) && + port_state == IB_PORT_ACTIVE) { cma_dev = cur_dev; sgid = gid; id_priv->id.port_num = p; @@ -778,6 +801,7 @@ struct rdma_cm_id *rdma_create_id(struct net *net, id_priv->id.event_handler = event_handler; id_priv->id.ps = ps; id_priv->id.qp_type = qp_type; + id_priv->tos_set = false; spin_lock_init(&id_priv->lock); mutex_init(&id_priv->qp_mutex); init_completion(&id_priv->comp); @@ -1689,6 +1713,7 @@ static int cma_rep_recv(struct rdma_id_private *id_priv) return 0; reject: + pr_debug_ratelimited("RDMA CM: CONNECT_ERROR: failed to handle reply. status %d\n", ret); cma_modify_qp_err(id_priv); ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, NULL, 0); @@ -1760,6 +1785,8 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) /* ignore event */ goto out; case IB_CM_REJ_RECEIVED: + pr_debug_ratelimited("RDMA CM: REJECTED: %s\n", rdma_reject_msg(&id_priv->id, + ib_event->param.rej_rcvd.reason)); cma_modify_qp_err(id_priv); event.status = ib_event->param.rej_rcvd.reason; event.event = RDMA_CM_EVENT_REJECTED; @@ -2266,6 +2293,7 @@ void rdma_set_service_type(struct rdma_cm_id *id, int tos) id_priv = container_of(id, struct rdma_id_private, id); id_priv->tos = (u8) tos; + id_priv->tos_set = true; } EXPORT_SYMBOL(rdma_set_service_type); @@ -2285,6 +2313,8 @@ static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec, work->new_state = RDMA_CM_ADDR_RESOLVED; work->event.event = RDMA_CM_EVENT_ROUTE_ERROR; work->event.status = status; + pr_debug_ratelimited("RDMA CM: ROUTE_ERROR: failed to query path. status %d\n", + status); } queue_work(cma_wq, &work->work); @@ -2467,14 +2497,12 @@ static int iboe_tos_to_sl(struct net_device *ndev, int tos) struct net_device *dev; prio = rt_tos2priority(tos); - dev = ndev->priv_flags & IFF_802_1Q_VLAN ? - vlan_dev_real_dev(ndev) : ndev; - + dev = is_vlan_dev(ndev) ? vlan_dev_real_dev(ndev) : ndev; if (dev->num_tc) return netdev_get_prio_tc_map(dev, prio); #if IS_ENABLED(CONFIG_VLAN_8021Q) - if (ndev->priv_flags & IFF_802_1Q_VLAN) + if (is_vlan_dev(ndev)) return (vlan_dev_get_egress_qos_mask(ndev, prio) & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; #endif @@ -2500,6 +2528,9 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) struct cma_work *work; int ret; struct net_device *ndev = NULL; + u8 default_roce_tos = id_priv->cma_dev->default_roce_tos[id_priv->id.port_num - + rdma_start_port(id_priv->cma_dev->device)]; + u8 tos = id_priv->tos_set ? id_priv->tos : default_roce_tos; work = kzalloc(sizeof *work, GFP_KERNEL); @@ -2573,7 +2604,8 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) route->path_rec->reversible = 1; route->path_rec->pkey = cpu_to_be16(0xffff); route->path_rec->mtu_selector = IB_SA_EQ; - route->path_rec->sl = iboe_tos_to_sl(ndev, id_priv->tos); + route->path_rec->sl = iboe_tos_to_sl(ndev, tos); + route->path_rec->traffic_class = tos; route->path_rec->mtu = iboe_get_mtu(ndev->mtu); route->path_rec->rate_selector = IB_SA_EQ; route->path_rec->rate = iboe_get_rate(ndev); @@ -2652,8 +2684,8 @@ static void cma_set_loopback(struct sockaddr *addr) static int cma_bind_loopback(struct rdma_id_private *id_priv) { struct cma_device *cma_dev, *cur_dev; - struct ib_port_attr port_attr; union ib_gid gid; + enum ib_port_state port_state; u16 pkey; int ret; u8 p; @@ -2669,8 +2701,8 @@ static int cma_bind_loopback(struct rdma_id_private *id_priv) cma_dev = cur_dev; for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) { - if (!ib_query_port(cur_dev->device, p, &port_attr) && - port_attr.state == IB_PORT_ACTIVE) { + if (!ib_get_cached_port_state(cur_dev->device, p, &port_state) && + port_state == IB_PORT_ACTIVE) { cma_dev = cur_dev; goto port_found; } @@ -2720,8 +2752,14 @@ static void addr_handler(int status, struct sockaddr *src_addr, goto out; memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr)); - if (!status && !id_priv->cma_dev) + if (!status && !id_priv->cma_dev) { status = cma_acquire_dev(id_priv, NULL); + if (status) + pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to acquire device. status %d\n", + status); + } else { + pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to resolve IP. status %d\n", status); + } if (status) { if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, @@ -2833,20 +2871,26 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, int ret; id_priv = container_of(id, struct rdma_id_private, id); + memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); if (id_priv->state == RDMA_CM_IDLE) { ret = cma_bind_addr(id, src_addr, dst_addr); - if (ret) + if (ret) { + memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); return ret; + } } - if (cma_family(id_priv) != dst_addr->sa_family) + if (cma_family(id_priv) != dst_addr->sa_family) { + memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); return -EINVAL; + } - if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) + if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) { + memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); return -EINVAL; + } atomic_inc(&id_priv->refcount); - memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); if (cma_any_addr(dst_addr)) { ret = cma_resolve_loopback(id_priv); } else { @@ -2962,6 +3006,43 @@ err: return ret == -ENOSPC ? -EADDRNOTAVAIL : ret; } +static int cma_port_is_unique(struct rdma_bind_list *bind_list, + struct rdma_id_private *id_priv) +{ + struct rdma_id_private *cur_id; + struct sockaddr *daddr = cma_dst_addr(id_priv); + struct sockaddr *saddr = cma_src_addr(id_priv); + __be16 dport = cma_port(daddr); + + hlist_for_each_entry(cur_id, &bind_list->owners, node) { + struct sockaddr *cur_daddr = cma_dst_addr(cur_id); + struct sockaddr *cur_saddr = cma_src_addr(cur_id); + __be16 cur_dport = cma_port(cur_daddr); + + if (id_priv == cur_id) + continue; + + /* different dest port -> unique */ + if (!cma_any_port(cur_daddr) && + (dport != cur_dport)) + continue; + + /* different src address -> unique */ + if (!cma_any_addr(saddr) && + !cma_any_addr(cur_saddr) && + cma_addr_cmp(saddr, cur_saddr)) + continue; + + /* different dst address -> unique */ + if (!cma_any_addr(cur_daddr) && + cma_addr_cmp(daddr, cur_daddr)) + continue; + + return -EADDRNOTAVAIL; + } + return 0; +} + static int cma_alloc_any_port(enum rdma_port_space ps, struct rdma_id_private *id_priv) { @@ -2974,9 +3055,19 @@ static int cma_alloc_any_port(enum rdma_port_space ps, remaining = (high - low) + 1; rover = prandom_u32() % remaining + low; retry: - if (last_used_port != rover && - !cma_ps_find(net, ps, (unsigned short)rover)) { - int ret = cma_alloc_port(ps, id_priv, rover); + if (last_used_port != rover) { + struct rdma_bind_list *bind_list; + int ret; + + bind_list = cma_ps_find(net, ps, (unsigned short)rover); + + if (!bind_list) { + ret = cma_alloc_port(ps, id_priv, rover); + } else { + ret = cma_port_is_unique(bind_list, id_priv); + if (!ret) + cma_bind_port(bind_list, id_priv); + } /* * Remember previously used port number in order to avoid * re-using same port immediately after it is closed. @@ -3205,6 +3296,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) { struct rdma_id_private *id_priv; int ret; + struct sockaddr *daddr; if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 && addr->sa_family != AF_IB) @@ -3244,6 +3336,9 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) if (ret) goto err2; + daddr = cma_dst_addr(id_priv); + daddr->sa_family = addr->sa_family; + return 0; err2: if (id_priv->cma_dev) @@ -3308,10 +3403,13 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, if (rep->status != IB_SIDR_SUCCESS) { event.event = RDMA_CM_EVENT_UNREACHABLE; event.status = ib_event->param.sidr_rep_rcvd.status; + pr_debug_ratelimited("RDMA CM: UNREACHABLE: bad SIDR reply. status %d\n", + event.status); break; } ret = cma_set_qkey(id_priv, rep->qkey); if (ret) { + pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to set qkey. status %d\n", ret); event.event = RDMA_CM_EVENT_ADDR_ERROR; event.status = ret; break; @@ -3583,6 +3681,9 @@ static int cma_accept_iw(struct rdma_id_private *id_priv, struct iw_cm_conn_param iw_param; int ret; + if (!conn_param) + return -EINVAL; + ret = cma_modify_qp_rtr(id_priv, conn_param); if (ret) return ret; @@ -3760,10 +3861,17 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) if (!status) status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey)); + else + pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to join multicast. status %d\n", + status); mutex_lock(&id_priv->qp_mutex); - if (!status && id_priv->id.qp) + if (!status && id_priv->id.qp) { status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid, be16_to_cpu(multicast->rec.mlid)); + if (status) + pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to attach QP. status %d\n", + status); + } mutex_unlock(&id_priv->qp_mutex); memset(&event, 0, sizeof event); @@ -4229,15 +4337,21 @@ static void cma_add_one(struct ib_device *device) cma_dev->default_gid_type = kcalloc(device->phys_port_cnt, sizeof(*cma_dev->default_gid_type), GFP_KERNEL); - if (!cma_dev->default_gid_type) { - kfree(cma_dev); - return; - } + if (!cma_dev->default_gid_type) + goto free_cma_dev; + + cma_dev->default_roce_tos = kcalloc(device->phys_port_cnt, + sizeof(*cma_dev->default_roce_tos), + GFP_KERNEL); + if (!cma_dev->default_roce_tos) + goto free_gid_type; + for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) { supported_gids = roce_gid_type_mask_support(device, i); WARN_ON(!supported_gids); cma_dev->default_gid_type[i - rdma_start_port(device)] = find_first_bit(&supported_gids, BITS_PER_LONG); + cma_dev->default_roce_tos[i - rdma_start_port(device)] = 0; } init_completion(&cma_dev->comp); @@ -4250,6 +4364,16 @@ static void cma_add_one(struct ib_device *device) list_for_each_entry(id_priv, &listen_any_list, list) cma_listen_on_dev(id_priv, cma_dev); mutex_unlock(&lock); + + return; + +free_gid_type: + kfree(cma_dev->default_gid_type); + +free_cma_dev: + kfree(cma_dev); + + return; } static int cma_remove_id_dev(struct rdma_id_private *id_priv) @@ -4318,6 +4442,7 @@ static void cma_remove_one(struct ib_device *device, void *client_data) mutex_unlock(&lock); cma_process_remove(cma_dev); + kfree(cma_dev->default_roce_tos); kfree(cma_dev->default_gid_type); kfree(cma_dev); } diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c index 41573df1d9fc..54076a3e8007 100644 --- a/drivers/infiniband/core/cma_configfs.c +++ b/drivers/infiniband/core/cma_configfs.c @@ -139,8 +139,50 @@ static ssize_t default_roce_mode_store(struct config_item *item, CONFIGFS_ATTR(, default_roce_mode); +static ssize_t default_roce_tos_show(struct config_item *item, char *buf) +{ + struct cma_device *cma_dev; + struct cma_dev_port_group *group; + ssize_t ret; + u8 tos; + + ret = cma_configfs_params_get(item, &cma_dev, &group); + if (ret) + return ret; + + tos = cma_get_default_roce_tos(cma_dev, group->port_num); + cma_configfs_params_put(cma_dev); + + return sprintf(buf, "%u\n", tos); +} + +static ssize_t default_roce_tos_store(struct config_item *item, + const char *buf, size_t count) +{ + struct cma_device *cma_dev; + struct cma_dev_port_group *group; + ssize_t ret; + u8 tos; + + ret = kstrtou8(buf, 0, &tos); + if (ret) + return ret; + + ret = cma_configfs_params_get(item, &cma_dev, &group); + if (ret) + return ret; + + ret = cma_set_default_roce_tos(cma_dev, group->port_num, tos); + cma_configfs_params_put(cma_dev); + + return ret ? ret : strnlen(buf, count); +} + +CONFIGFS_ATTR(, default_roce_tos); + static struct configfs_attribute *cma_configfs_attributes[] = { &attr_default_roce_mode, + &attr_default_roce_tos, NULL, }; diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index d29372624f3a..cb7d372e4bdf 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -35,6 +35,7 @@ #include <linux/list.h> #include <linux/spinlock.h> +#include <linux/cgroup_rdma.h> #include <rdma/ib_verbs.h> @@ -62,6 +63,9 @@ int cma_get_default_gid_type(struct cma_device *cma_dev, int cma_set_default_gid_type(struct cma_device *cma_dev, unsigned int port, enum ib_gid_type default_gid_type); +int cma_get_default_roce_tos(struct cma_device *cma_dev, unsigned int port); +int cma_set_default_roce_tos(struct cma_device *a_dev, unsigned int port, + u8 default_roce_tos); struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev); int ib_device_register_sysfs(struct ib_device *device, @@ -121,6 +125,35 @@ int ib_cache_setup_one(struct ib_device *device); void ib_cache_cleanup_one(struct ib_device *device); void ib_cache_release_one(struct ib_device *device); +#ifdef CONFIG_CGROUP_RDMA +int ib_device_register_rdmacg(struct ib_device *device); +void ib_device_unregister_rdmacg(struct ib_device *device); + +int ib_rdmacg_try_charge(struct ib_rdmacg_object *cg_obj, + struct ib_device *device, + enum rdmacg_resource_type resource_index); + +void ib_rdmacg_uncharge(struct ib_rdmacg_object *cg_obj, + struct ib_device *device, + enum rdmacg_resource_type resource_index); +#else +static inline int ib_device_register_rdmacg(struct ib_device *device) +{ return 0; } + +static inline void ib_device_unregister_rdmacg(struct ib_device *device) +{ } + +static inline int ib_rdmacg_try_charge(struct ib_rdmacg_object *cg_obj, + struct ib_device *device, + enum rdmacg_resource_type resource_index) +{ return 0; } + +static inline void ib_rdmacg_uncharge(struct ib_rdmacg_object *cg_obj, + struct ib_device *device, + enum rdmacg_resource_type resource_index) +{ } +#endif + static inline bool rdma_is_upper_dev_rcu(struct net_device *dev, struct net_device *upper) { diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c index a754fc727de5..e95510117a6d 100644 --- a/drivers/infiniband/core/cq.c +++ b/drivers/infiniband/core/cq.c @@ -58,8 +58,8 @@ static int __ib_process_cq(struct ib_cq *cq, int budget) * %IB_POLL_DIRECT CQ. It does not offload CQ processing to a different * context and does not ask for completion interrupts from the HCA. * - * Note: for compatibility reasons -1 can be passed in %budget for unlimited - * polling. Do not use this feature in new code, it will be removed soon. + * Note: do not pass -1 as %budget unless it is guaranteed that the number + * of completions that will be processed is small. */ int ib_process_cq_direct(struct ib_cq *cq, int budget) { @@ -120,7 +120,7 @@ static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private) * * This is the proper interface to allocate a CQ for in-kernel users. A * CQ allocated with this interface will automatically be polled from the - * specified context. The ULP needs must use wr->wr_cqe instead of wr->wr_id + * specified context. The ULP must use wr->wr_cqe instead of wr->wr_id * to use this CQ abstraction. */ struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private, diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 571974cd3919..593d2ce6ec7c 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -333,6 +333,15 @@ int ib_register_device(struct ib_device *device, int ret; struct ib_client *client; struct ib_udata uhw = {.outlen = 0, .inlen = 0}; + struct device *parent = device->dev.parent; + + WARN_ON_ONCE(!parent); + if (!device->dev.dma_ops) + device->dev.dma_ops = parent->dma_ops; + if (!device->dev.dma_mask) + device->dev.dma_mask = parent->dma_mask; + if (!device->dev.coherent_dma_mask) + device->dev.coherent_dma_mask = parent->coherent_dma_mask; mutex_lock(&device_mutex); @@ -360,10 +369,18 @@ int ib_register_device(struct ib_device *device, goto out; } + ret = ib_device_register_rdmacg(device); + if (ret) { + pr_warn("Couldn't register device with rdma cgroup\n"); + ib_cache_cleanup_one(device); + goto out; + } + memset(&device->attrs, 0, sizeof(device->attrs)); ret = device->query_device(device, &device->attrs, &uhw); if (ret) { pr_warn("Couldn't query the device attributes\n"); + ib_device_unregister_rdmacg(device); ib_cache_cleanup_one(device); goto out; } @@ -372,6 +389,7 @@ int ib_register_device(struct ib_device *device, if (ret) { pr_warn("Couldn't register device %s with driver model\n", device->name); + ib_device_unregister_rdmacg(device); ib_cache_cleanup_one(device); goto out; } @@ -421,6 +439,7 @@ void ib_unregister_device(struct ib_device *device) mutex_unlock(&device_mutex); + ib_device_unregister_rdmacg(device); ib_device_unregister_sysfs(device); ib_cache_cleanup_one(device); @@ -659,7 +678,7 @@ int ib_query_port(struct ib_device *device, union ib_gid gid; int err; - if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) + if (!rdma_is_port_valid(device, port_num)) return -EINVAL; memset(port_attr, 0, sizeof(*port_attr)); @@ -825,7 +844,7 @@ int ib_modify_port(struct ib_device *device, if (!device->modify_port) return -ENOSYS; - if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) + if (!rdma_is_port_valid(device, port_num)) return -EINVAL; return device->modify_port(device, port_num, port_modify_mask, diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index a009f7132c73..57f231f1c721 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -316,7 +316,9 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, /* Validate device and port */ port_priv = ib_get_mad_port(device, port_num); if (!port_priv) { - dev_notice(&device->dev, "ib_register_mad_agent: Invalid port\n"); + dev_notice(&device->dev, + "ib_register_mad_agent: Invalid port %d\n", + port_num); ret = ERR_PTR(-ENODEV); goto error1; } diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c index 0621f4455732..db958d3207ef 100644 --- a/drivers/infiniband/core/roce_gid_mgmt.c +++ b/drivers/infiniband/core/roce_gid_mgmt.c @@ -144,7 +144,6 @@ static enum bonding_slave_state is_eth_active_slave_of_bonding_rcu(struct net_de static int is_eth_port_of_netdev(struct ib_device *ib_dev, u8 port, struct net_device *rdma_ndev, void *cookie) { - struct net_device *event_ndev = (struct net_device *)cookie; struct net_device *real_dev; int res; @@ -152,11 +151,11 @@ static int is_eth_port_of_netdev(struct ib_device *ib_dev, u8 port, return 0; rcu_read_lock(); - real_dev = rdma_vlan_dev_real_dev(event_ndev); + real_dev = rdma_vlan_dev_real_dev(cookie); if (!real_dev) - real_dev = event_ndev; + real_dev = cookie; - res = ((rdma_is_upper_dev_rcu(rdma_ndev, event_ndev) && + res = ((rdma_is_upper_dev_rcu(rdma_ndev, cookie) && (is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev) & REQUIRED_BOND_STATES)) || real_dev == rdma_ndev); @@ -192,17 +191,16 @@ static int pass_all_filter(struct ib_device *ib_dev, u8 port, static int upper_device_filter(struct ib_device *ib_dev, u8 port, struct net_device *rdma_ndev, void *cookie) { - struct net_device *event_ndev = (struct net_device *)cookie; int res; if (!rdma_ndev) return 0; - if (rdma_ndev == event_ndev) + if (rdma_ndev == cookie) return 1; rcu_read_lock(); - res = rdma_is_upper_dev_rcu(rdma_ndev, event_ndev); + res = rdma_is_upper_dev_rcu(rdma_ndev, cookie); rcu_read_unlock(); return res; @@ -379,18 +377,14 @@ static void _add_netdev_ips(struct ib_device *ib_dev, u8 port, static void add_netdev_ips(struct ib_device *ib_dev, u8 port, struct net_device *rdma_ndev, void *cookie) { - struct net_device *event_ndev = (struct net_device *)cookie; - - enum_netdev_default_gids(ib_dev, port, event_ndev, rdma_ndev); - _add_netdev_ips(ib_dev, port, event_ndev); + enum_netdev_default_gids(ib_dev, port, cookie, rdma_ndev); + _add_netdev_ips(ib_dev, port, cookie); } static void del_netdev_ips(struct ib_device *ib_dev, u8 port, struct net_device *rdma_ndev, void *cookie) { - struct net_device *event_ndev = (struct net_device *)cookie; - - ib_cache_gid_del_all_netdev_gids(ib_dev, port, event_ndev); + ib_cache_gid_del_all_netdev_gids(ib_dev, port, cookie); } static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev, @@ -460,7 +454,7 @@ static void handle_netdev_upper(struct ib_device *ib_dev, u8 port, u8 port, struct net_device *ndev)) { - struct net_device *ndev = (struct net_device *)cookie; + struct net_device *ndev = cookie; struct upper_list *upper_iter; struct upper_list *upper_temp; LIST_HEAD(upper_list); @@ -519,9 +513,7 @@ static void del_netdev_default_ips_join(struct ib_device *ib_dev, u8 port, static void del_netdev_default_ips(struct ib_device *ib_dev, u8 port, struct net_device *rdma_ndev, void *cookie) { - struct net_device *event_ndev = (struct net_device *)cookie; - - bond_delete_netdev_default_gids(ib_dev, port, event_ndev, rdma_ndev); + bond_delete_netdev_default_gids(ib_dev, port, cookie, rdma_ndev); } /* The following functions operate on all IB devices. netdevice_event and diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index c1fb545e8d78..daadf3130c9f 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -1258,7 +1258,7 @@ int ib_device_register_sysfs(struct ib_device *device, int ret; int i; - device->dev.parent = device->dma_device; + WARN_ON_ONCE(!device->dev.parent); ret = dev_set_name(class_dev, "%s", device->name); if (ret) return ret; diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index e0a995b85a2d..cc0d51fb06e3 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -1290,7 +1290,7 @@ static void ib_ucm_add_one(struct ib_device *device) goto err; ucm_dev->dev.class = &cm_class; - ucm_dev->dev.parent = device->dma_device; + ucm_dev->dev.parent = device->dev.parent; ucm_dev->dev.devt = ucm_dev->cdev.dev; ucm_dev->dev.release = ib_ucm_release_dev; dev_set_name(&ucm_dev->dev, "ucm%d", ucm_dev->devnum); diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 4609b921f899..27f155d2df8d 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -34,7 +34,8 @@ #include <linux/mm.h> #include <linux/dma-mapping.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> +#include <linux/sched/mm.h> #include <linux/export.h> #include <linux/hugetlb.h> #include <linux/slab.h> @@ -99,9 +100,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, if (dmasync) dma_attrs |= DMA_ATTR_WRITE_BARRIER; - if (!size) - return ERR_PTR(-EINVAL); - /* * If the combination of the addr and size requested for this memory * region causes an integer overflow, return error. diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index 6b079a31dced..cb2742b548bb 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -32,6 +32,8 @@ #include <linux/types.h> #include <linux/sched.h> +#include <linux/sched/mm.h> +#include <linux/sched/task.h> #include <linux/pid.h> #include <linux/slab.h> #include <linux/export.h> @@ -239,6 +241,71 @@ static const struct mmu_notifier_ops ib_umem_notifiers = { .invalidate_range_end = ib_umem_notifier_invalidate_range_end, }; +struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context, + unsigned long addr, + size_t size) +{ + struct ib_umem *umem; + struct ib_umem_odp *odp_data; + int pages = size >> PAGE_SHIFT; + int ret; + + umem = kzalloc(sizeof(*umem), GFP_KERNEL); + if (!umem) + return ERR_PTR(-ENOMEM); + + umem->context = context; + umem->length = size; + umem->address = addr; + umem->page_size = PAGE_SIZE; + umem->writable = 1; + + odp_data = kzalloc(sizeof(*odp_data), GFP_KERNEL); + if (!odp_data) { + ret = -ENOMEM; + goto out_umem; + } + odp_data->umem = umem; + + mutex_init(&odp_data->umem_mutex); + init_completion(&odp_data->notifier_completion); + + odp_data->page_list = vzalloc(pages * sizeof(*odp_data->page_list)); + if (!odp_data->page_list) { + ret = -ENOMEM; + goto out_odp_data; + } + + odp_data->dma_list = vzalloc(pages * sizeof(*odp_data->dma_list)); + if (!odp_data->dma_list) { + ret = -ENOMEM; + goto out_page_list; + } + + down_write(&context->umem_rwsem); + context->odp_mrs_count++; + rbt_ib_umem_insert(&odp_data->interval_tree, &context->umem_tree); + if (likely(!atomic_read(&context->notifier_count))) + odp_data->mn_counters_active = true; + else + list_add(&odp_data->no_private_counters, + &context->no_private_counters); + up_write(&context->umem_rwsem); + + umem->odp_data = odp_data; + + return umem; + +out_page_list: + vfree(odp_data->page_list); +out_odp_data: + kfree(odp_data); +out_umem: + kfree(umem); + return ERR_PTR(ret); +} +EXPORT_SYMBOL(ib_alloc_odp_umem); + int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem) { int ret_val; @@ -270,18 +337,20 @@ int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem) init_completion(&umem->odp_data->notifier_completion); - umem->odp_data->page_list = vzalloc(ib_umem_num_pages(umem) * + if (ib_umem_num_pages(umem)) { + umem->odp_data->page_list = vzalloc(ib_umem_num_pages(umem) * sizeof(*umem->odp_data->page_list)); - if (!umem->odp_data->page_list) { - ret_val = -ENOMEM; - goto out_odp_data; - } + if (!umem->odp_data->page_list) { + ret_val = -ENOMEM; + goto out_odp_data; + } - umem->odp_data->dma_list = vzalloc(ib_umem_num_pages(umem) * + umem->odp_data->dma_list = vzalloc(ib_umem_num_pages(umem) * sizeof(*umem->odp_data->dma_list)); - if (!umem->odp_data->dma_list) { - ret_val = -ENOMEM; - goto out_page_list; + if (!umem->odp_data->dma_list) { + ret_val = -ENOMEM; + goto out_page_list; + } } /* @@ -466,6 +535,7 @@ static int ib_umem_odp_map_dma_single_page( } umem->odp_data->dma_list[page_index] = dma_addr | access_mask; umem->odp_data->page_list[page_index] = page; + umem->npages++; stored_page = 1; } else if (umem->odp_data->page_list[page_index] == page) { umem->odp_data->dma_list[page_index] |= access_mask; @@ -505,7 +575,8 @@ out: * for failure. * An -EAGAIN error code is returned when a concurrent mmu notifier prevents * the function from completing its task. - * + * An -ENOENT error code indicates that userspace process is being terminated + * and mm was already destroyed. * @umem: the umem to map and pin * @user_virt: the address from which we need to map. * @bcnt: the minimal number of bytes to pin and map. The mapping might be @@ -553,7 +624,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt, owning_mm = get_task_mm(owning_process); if (owning_mm == NULL) { - ret = -EINVAL; + ret = -ENOENT; goto out_put_task; } @@ -665,6 +736,7 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt, put_page(page); umem->odp_data->page_list[idx] = NULL; umem->odp_data->dma_list[idx] = 0; + umem->npages--; } } mutex_unlock(&umem->odp_data->umem_mutex); diff --git a/drivers/infiniband/core/umem_rbtree.c b/drivers/infiniband/core/umem_rbtree.c index 727d788448f5..d176597b4d78 100644 --- a/drivers/infiniband/core/umem_rbtree.c +++ b/drivers/infiniband/core/umem_rbtree.c @@ -78,17 +78,32 @@ int rbt_ib_umem_for_each_in_range(struct rb_root *root, void *cookie) { int ret_val = 0; - struct umem_odp_node *node; + struct umem_odp_node *node, *next; struct ib_umem_odp *umem; if (unlikely(start == last)) return ret_val; - for (node = rbt_ib_umem_iter_first(root, start, last - 1); node; - node = rbt_ib_umem_iter_next(node, start, last - 1)) { + for (node = rbt_ib_umem_iter_first(root, start, last - 1); + node; node = next) { + next = rbt_ib_umem_iter_next(node, start, last - 1); umem = container_of(node, struct ib_umem_odp, interval_tree); ret_val = cb(umem->umem, start, last, cookie) || ret_val; } return ret_val; } +EXPORT_SYMBOL(rbt_ib_umem_for_each_in_range); + +struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root *root, + u64 addr, u64 length) +{ + struct umem_odp_node *node; + + node = rbt_ib_umem_iter_first(root, addr, addr + length - 1); + if (node) + return container_of(node, struct ib_umem_odp, interval_tree); + return NULL; + +} +EXPORT_SYMBOL(rbt_ib_umem_lookup); diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 249b403b43a4..aca7ff7abedc 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -1188,7 +1188,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num, if (cdev_add(&port->cdev, base, 1)) goto err_cdev; - port->dev = device_create(umad_class, device->dma_device, + port->dev = device_create(umad_class, device->dev.parent, port->cdev.dev, port, "umad%d", port->dev_num); if (IS_ERR(port->dev)) @@ -1207,7 +1207,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num, if (cdev_add(&port->sm_cdev, base, 1)) goto err_sm_cdev; - port->sm_dev = device_create(umad_class, device->dma_device, + port->sm_dev = device_create(umad_class, device->dev.parent, port->sm_cdev.dev, port, "issm%d", port->dev_num); if (IS_ERR(port->sm_dev)) diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 455034ac994e..e1bedf0bac04 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -228,6 +228,7 @@ struct ib_uverbs_flow_spec { struct ib_uverbs_flow_spec_ipv4 ipv4; struct ib_uverbs_flow_spec_tcp_udp tcp_udp; struct ib_uverbs_flow_spec_ipv6 ipv6; + struct ib_uverbs_flow_spec_action_tag flow_tag; }; }; diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 700782203483..7b7a76e1279a 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -316,6 +316,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, struct ib_udata udata; struct ib_ucontext *ucontext; struct file *filp; + struct ib_rdmacg_object cg_obj; int ret; if (out_len < sizeof resp) @@ -335,13 +336,18 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); + ret = ib_rdmacg_try_charge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE); + if (ret) + goto err; + ucontext = ib_dev->alloc_ucontext(ib_dev, &udata); if (IS_ERR(ucontext)) { ret = PTR_ERR(ucontext); - goto err; + goto err_alloc; } ucontext->device = ib_dev; + ucontext->cg_obj = cg_obj; INIT_LIST_HEAD(&ucontext->pd_list); INIT_LIST_HEAD(&ucontext->mr_list); INIT_LIST_HEAD(&ucontext->mw_list); @@ -407,6 +413,9 @@ err_free: put_pid(ucontext->tgid); ib_dev->dealloc_ucontext(ucontext); +err_alloc: + ib_rdmacg_uncharge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE); + err: mutex_unlock(&file->mutex); return ret; @@ -561,6 +570,13 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, return -ENOMEM; init_uobj(uobj, 0, file->ucontext, &pd_lock_class); + ret = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev, + RDMACG_RESOURCE_HCA_OBJECT); + if (ret) { + kfree(uobj); + return ret; + } + down_write(&uobj->mutex); pd = ib_dev->alloc_pd(ib_dev, file->ucontext, &udata); @@ -605,6 +621,7 @@ err_idr: ib_dealloc_pd(pd); err: + ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT); put_uobj_write(uobj); return ret; } @@ -637,6 +654,8 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file, if (ret) goto err_put; + ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT); + uobj->live = 0; put_uobj_write(uobj); @@ -1006,6 +1025,10 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, goto err_put; } } + ret = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev, + RDMACG_RESOURCE_HCA_OBJECT); + if (ret) + goto err_charge; mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va, cmd.access_flags, &udata); @@ -1054,6 +1077,9 @@ err_unreg: ib_dereg_mr(mr); err_put: + ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT); + +err_charge: put_pd_read(pd); err_free: @@ -1178,6 +1204,8 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, if (ret) return ret; + ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT); + idr_remove_uobj(&ib_uverbs_mr_idr, uobj); mutex_lock(&file->mutex); @@ -1226,6 +1254,11 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), out_len - sizeof(resp)); + ret = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev, + RDMACG_RESOURCE_HCA_OBJECT); + if (ret) + goto err_charge; + mw = pd->device->alloc_mw(pd, cmd.mw_type, &udata); if (IS_ERR(mw)) { ret = PTR_ERR(mw); @@ -1271,6 +1304,9 @@ err_unalloc: uverbs_dealloc_mw(mw); err_put: + ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT); + +err_charge: put_pd_read(pd); err_free: @@ -1306,6 +1342,8 @@ ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file, if (ret) return ret; + ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT); + idr_remove_uobj(&ib_uverbs_mw_idr, uobj); mutex_lock(&file->mutex); @@ -1405,6 +1443,11 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, if (cmd_sz > offsetof(typeof(*cmd), flags) + sizeof(cmd->flags)) attr.flags = cmd->flags; + ret = ib_rdmacg_try_charge(&obj->uobject.cg_obj, ib_dev, + RDMACG_RESOURCE_HCA_OBJECT); + if (ret) + goto err_charge; + cq = ib_dev->create_cq(ib_dev, &attr, file->ucontext, uhw); if (IS_ERR(cq)) { @@ -1452,6 +1495,10 @@ err_free: ib_destroy_cq(cq); err_file: + ib_rdmacg_uncharge(&obj->uobject.cg_obj, ib_dev, + RDMACG_RESOURCE_HCA_OBJECT); + +err_charge: if (ev_file) ib_uverbs_release_ucq(file, ev_file, obj); @@ -1732,6 +1779,8 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, if (ret) return ret; + ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT); + idr_remove_uobj(&ib_uverbs_cq_idr, uobj); mutex_lock(&file->mutex); @@ -1891,7 +1940,8 @@ static int create_qp(struct ib_uverbs_file *file, IB_QP_CREATE_CROSS_CHANNEL | IB_QP_CREATE_MANAGED_SEND | IB_QP_CREATE_MANAGED_RECV | - IB_QP_CREATE_SCATTER_FCS)) { + IB_QP_CREATE_SCATTER_FCS | + IB_QP_CREATE_CVLAN_STRIPPING)) { ret = -EINVAL; goto err_put; } @@ -1904,6 +1954,11 @@ static int create_qp(struct ib_uverbs_file *file, goto err_put; } + ret = ib_rdmacg_try_charge(&obj->uevent.uobject.cg_obj, device, + RDMACG_RESOURCE_HCA_OBJECT); + if (ret) + goto err_put; + if (cmd->qp_type == IB_QPT_XRC_TGT) qp = ib_create_qp(pd, &attr); else @@ -1911,7 +1966,7 @@ static int create_qp(struct ib_uverbs_file *file, if (IS_ERR(qp)) { ret = PTR_ERR(qp); - goto err_put; + goto err_create; } if (cmd->qp_type != IB_QPT_XRC_TGT) { @@ -1992,6 +2047,10 @@ err_cb: err_destroy: ib_destroy_qp(qp); +err_create: + ib_rdmacg_uncharge(&obj->uevent.uobject.cg_obj, device, + RDMACG_RESOURCE_HCA_OBJECT); + err_put: if (xrcd) put_xrcd_read(xrcd_uobj); @@ -2518,6 +2577,8 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, if (ret) return ret; + ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT); + if (obj->uxrcd) atomic_dec(&obj->uxrcd->refcnt); @@ -2969,11 +3030,16 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, memset(&attr.dmac, 0, sizeof(attr.dmac)); memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16); + ret = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev, + RDMACG_RESOURCE_HCA_OBJECT); + if (ret) + goto err_charge; + ah = pd->device->create_ah(pd, &attr, &udata); if (IS_ERR(ah)) { ret = PTR_ERR(ah); - goto err_put; + goto err_create; } ah->device = pd->device; @@ -3012,7 +3078,10 @@ err_copy: err_destroy: ib_destroy_ah(ah); -err_put: +err_create: + ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT); + +err_charge: put_pd_read(pd); err: @@ -3046,6 +3115,8 @@ ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file, if (ret) return ret; + ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT); + idr_remove_uobj(&ib_uverbs_ah_idr, uobj); mutex_lock(&file->mutex); @@ -3143,6 +3214,25 @@ out_put: return ret ? ret : in_len; } +static int kern_spec_to_ib_spec_action(struct ib_uverbs_flow_spec *kern_spec, + union ib_flow_spec *ib_spec) +{ + ib_spec->type = kern_spec->type; + switch (ib_spec->type) { + case IB_FLOW_SPEC_ACTION_TAG: + if (kern_spec->flow_tag.size != + sizeof(struct ib_uverbs_flow_spec_action_tag)) + return -EINVAL; + + ib_spec->flow_tag.size = sizeof(struct ib_flow_spec_action_tag); + ib_spec->flow_tag.tag_id = kern_spec->flow_tag.tag_id; + break; + default: + return -EINVAL; + } + return 0; +} + static size_t kern_spec_filter_sz(struct ib_uverbs_flow_spec_hdr *spec) { /* Returns user space filter size, includes padding */ @@ -3167,8 +3257,8 @@ static ssize_t spec_filter_size(void *kern_spec_filter, u16 kern_filter_size, return kern_filter_size; } -static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec, - union ib_flow_spec *ib_spec) +static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec, + union ib_flow_spec *ib_spec) { ssize_t actual_filter_sz; ssize_t kern_filter_sz; @@ -3263,6 +3353,18 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec, return 0; } +static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec, + union ib_flow_spec *ib_spec) +{ + if (kern_spec->reserved) + return -EINVAL; + + if (kern_spec->type >= IB_FLOW_SPEC_ACTION_TAG) + return kern_spec_to_ib_spec_action(kern_spec, ib_spec); + else + return kern_spec_to_ib_spec_filter(kern_spec, ib_spec); +} + int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, struct ib_device *ib_dev, struct ib_udata *ucore, @@ -3325,6 +3427,9 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, wq_init_attr.wq_context = file; wq_init_attr.wq_type = cmd.wq_type; wq_init_attr.event_handler = ib_uverbs_wq_event_handler; + if (ucore->inlen >= (offsetof(typeof(cmd), create_flags) + + sizeof(cmd.create_flags))) + wq_init_attr.create_flags = cmd.create_flags; obj->uevent.events_reported = 0; INIT_LIST_HEAD(&obj->uevent.event_list); wq = pd->device->create_wq(pd, &wq_init_attr, uhw); @@ -3480,7 +3585,7 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file, if (!cmd.attr_mask) return -EINVAL; - if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE)) + if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE | IB_WQ_FLAGS)) return -EINVAL; wq = idr_read_wq(cmd.wq_handle, file->ucontext); @@ -3489,6 +3594,10 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file, wq_attr.curr_wq_state = cmd.curr_wq_state; wq_attr.wq_state = cmd.wq_state; + if (cmd.attr_mask & IB_WQ_FLAGS) { + wq_attr.flags = cmd.flags; + wq_attr.flags_mask = cmd.flags_mask; + } ret = wq->device->modify_wq(wq, &wq_attr, cmd.attr_mask, uhw); put_wq_read(wq); return ret; @@ -3822,10 +3931,16 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, err = -EINVAL; goto err_free; } + + err = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev, + RDMACG_RESOURCE_HCA_OBJECT); + if (err) + goto err_free; + flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER); if (IS_ERR(flow_id)) { err = PTR_ERR(flow_id); - goto err_free; + goto err_create; } flow_id->uobject = uobj; uobj->object = flow_id; @@ -3858,6 +3973,8 @@ err_copy: idr_remove_uobj(&ib_uverbs_rule_idr, uobj); destroy_flow: ib_destroy_flow(flow_id); +err_create: + ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT); err_free: kfree(flow_attr); err_put: @@ -3897,8 +4014,11 @@ int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file, flow_id = uobj->object; ret = ib_destroy_flow(flow_id); - if (!ret) + if (!ret) { + ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, + RDMACG_RESOURCE_HCA_OBJECT); uobj->live = 0; + } put_uobj_write(uobj); @@ -3966,6 +4086,11 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, obj->uevent.events_reported = 0; INIT_LIST_HEAD(&obj->uevent.event_list); + ret = ib_rdmacg_try_charge(&obj->uevent.uobject.cg_obj, ib_dev, + RDMACG_RESOURCE_HCA_OBJECT); + if (ret) + goto err_put_cq; + srq = pd->device->create_srq(pd, &attr, udata); if (IS_ERR(srq)) { ret = PTR_ERR(srq); @@ -4030,6 +4155,8 @@ err_destroy: ib_destroy_srq(srq); err_put: + ib_rdmacg_uncharge(&obj->uevent.uobject.cg_obj, ib_dev, + RDMACG_RESOURCE_HCA_OBJECT); put_pd_read(pd); err_put_cq: @@ -4216,6 +4343,8 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, if (ret) return ret; + ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT); + if (srq_type == IB_SRQT_XRC) { us = container_of(obj, struct ib_usrq_object, uevent); atomic_dec(&us->uxrcd->refcnt); @@ -4323,6 +4452,12 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, resp.max_wq_type_rq = attr.max_wq_type_rq; resp.response_length += sizeof(resp.max_wq_type_rq); + + if (ucore->outlen < resp.response_length + sizeof(resp.raw_packet_caps)) + goto end; + + resp.raw_packet_caps = attr.raw_packet_caps; + resp.response_length += sizeof(resp.raw_packet_caps); end: err = ib_copy_to_udata(ucore, &resp, resp.response_length); return err; diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index b3f95d453fba..35c788a32e26 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -51,6 +51,7 @@ #include <rdma/ib.h> #include "uverbs.h" +#include "core_priv.h" MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("InfiniBand userspace verbs access"); @@ -237,6 +238,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, idr_remove_uobj(&ib_uverbs_ah_idr, uobj); ib_destroy_ah(ah); + ib_rdmacg_uncharge(&uobj->cg_obj, context->device, + RDMACG_RESOURCE_HCA_OBJECT); kfree(uobj); } @@ -246,6 +249,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, idr_remove_uobj(&ib_uverbs_mw_idr, uobj); uverbs_dealloc_mw(mw); + ib_rdmacg_uncharge(&uobj->cg_obj, context->device, + RDMACG_RESOURCE_HCA_OBJECT); kfree(uobj); } @@ -254,6 +259,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, idr_remove_uobj(&ib_uverbs_rule_idr, uobj); ib_destroy_flow(flow_id); + ib_rdmacg_uncharge(&uobj->cg_obj, context->device, + RDMACG_RESOURCE_HCA_OBJECT); kfree(uobj); } @@ -266,6 +273,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, if (qp == qp->real_qp) ib_uverbs_detach_umcast(qp, uqp); ib_destroy_qp(qp); + ib_rdmacg_uncharge(&uobj->cg_obj, context->device, + RDMACG_RESOURCE_HCA_OBJECT); ib_uverbs_release_uevent(file, &uqp->uevent); kfree(uqp); } @@ -298,6 +307,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, idr_remove_uobj(&ib_uverbs_srq_idr, uobj); ib_destroy_srq(srq); + ib_rdmacg_uncharge(&uobj->cg_obj, context->device, + RDMACG_RESOURCE_HCA_OBJECT); ib_uverbs_release_uevent(file, uevent); kfree(uevent); } @@ -310,6 +321,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, idr_remove_uobj(&ib_uverbs_cq_idr, uobj); ib_destroy_cq(cq); + ib_rdmacg_uncharge(&uobj->cg_obj, context->device, + RDMACG_RESOURCE_HCA_OBJECT); ib_uverbs_release_ucq(file, ev_file, ucq); kfree(ucq); } @@ -319,6 +332,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, idr_remove_uobj(&ib_uverbs_mr_idr, uobj); ib_dereg_mr(mr); + ib_rdmacg_uncharge(&uobj->cg_obj, context->device, + RDMACG_RESOURCE_HCA_OBJECT); kfree(uobj); } @@ -339,11 +354,16 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, idr_remove_uobj(&ib_uverbs_pd_idr, uobj); ib_dealloc_pd(pd); + ib_rdmacg_uncharge(&uobj->cg_obj, context->device, + RDMACG_RESOURCE_HCA_OBJECT); kfree(uobj); } put_pid(context->tgid); + ib_rdmacg_uncharge(&context->cg_obj, context->device, + RDMACG_RESOURCE_HCA_HANDLE); + return context->device->dealloc_ucontext(context); } @@ -1174,7 +1194,7 @@ static void ib_uverbs_add_one(struct ib_device *device) if (cdev_add(&uverbs_dev->cdev, base, 1)) goto err_cdev; - uverbs_dev->dev = device_create(uverbs_class, device->dma_device, + uverbs_dev->dev = device_create(uverbs_class, device->dev.parent, uverbs_dev->cdev.dev, uverbs_dev, "uverbs%d", uverbs_dev->devnum); if (IS_ERR(uverbs_dev->dev)) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 71580cc28c9e..85ed5051fdfd 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1205,8 +1205,7 @@ int ib_resolve_eth_dmac(struct ib_device *device, { int ret = 0; - if (ah_attr->port_num < rdma_start_port(device) || - ah_attr->port_num > rdma_end_port(device)) + if (!rdma_is_port_valid(device, ah_attr->port_num)) return -EINVAL; if (!rdma_cap_eth_ah(device, ah_attr->port_num)) @@ -1949,17 +1948,12 @@ static void ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc) */ static void __ib_drain_sq(struct ib_qp *qp) { + struct ib_cq *cq = qp->send_cq; struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; struct ib_drain_cqe sdrain; struct ib_send_wr swr = {}, *bad_swr; int ret; - if (qp->send_cq->poll_ctx == IB_POLL_DIRECT) { - WARN_ONCE(qp->send_cq->poll_ctx == IB_POLL_DIRECT, - "IB_POLL_DIRECT poll_ctx not supported for drain\n"); - return; - } - swr.wr_cqe = &sdrain.cqe; sdrain.cqe.done = ib_drain_qp_done; init_completion(&sdrain.done); @@ -1976,7 +1970,11 @@ static void __ib_drain_sq(struct ib_qp *qp) return; } - wait_for_completion(&sdrain.done); + if (cq->poll_ctx == IB_POLL_DIRECT) + while (wait_for_completion_timeout(&sdrain.done, HZ / 10) <= 0) + ib_process_cq_direct(cq, -1); + else + wait_for_completion(&sdrain.done); } /* @@ -1984,17 +1982,12 @@ static void __ib_drain_sq(struct ib_qp *qp) */ static void __ib_drain_rq(struct ib_qp *qp) { + struct ib_cq *cq = qp->recv_cq; struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; struct ib_drain_cqe rdrain; struct ib_recv_wr rwr = {}, *bad_rwr; int ret; - if (qp->recv_cq->poll_ctx == IB_POLL_DIRECT) { - WARN_ONCE(qp->recv_cq->poll_ctx == IB_POLL_DIRECT, - "IB_POLL_DIRECT poll_ctx not supported for drain\n"); - return; - } - rwr.wr_cqe = &rdrain.cqe; rdrain.cqe.done = ib_drain_qp_done; init_completion(&rdrain.done); @@ -2011,7 +2004,11 @@ static void __ib_drain_rq(struct ib_qp *qp) return; } - wait_for_completion(&rdrain.done); + if (cq->poll_ctx == IB_POLL_DIRECT) + while (wait_for_completion_timeout(&rdrain.done, HZ / 10) <= 0) + ib_process_cq_direct(cq, -1); + else + wait_for_completion(&rdrain.done); } /** @@ -2028,8 +2025,7 @@ static void __ib_drain_rq(struct ib_qp *qp) * ensure there is room in the CQ and SQ for the drain work request and * completion. * - * allocate the CQ using ib_alloc_cq() and the CQ poll context cannot be - * IB_POLL_DIRECT. + * allocate the CQ using ib_alloc_cq(). * * ensure that there are no other contexts that are posting WRs concurrently. * Otherwise the drain is not guaranteed. @@ -2057,8 +2053,7 @@ EXPORT_SYMBOL(ib_drain_sq); * ensure there is room in the CQ and RQ for the drain work request and * completion. * - * allocate the CQ using ib_alloc_cq() and the CQ poll context cannot be - * IB_POLL_DIRECT. + * allocate the CQ using ib_alloc_cq(). * * ensure that there are no other contexts that are posting WRs concurrently. * Otherwise the drain is not guaranteed. @@ -2082,8 +2077,7 @@ EXPORT_SYMBOL(ib_drain_rq); * ensure there is room in the CQ(s), SQ, and RQ for drain work requests * and completions. * - * allocate the CQs using ib_alloc_cq() and the CQ poll context cannot be - * IB_POLL_DIRECT. + * allocate the CQs using ib_alloc_cq(). * * ensure that there are no other contexts that are posting WRs concurrently. * Otherwise the drain is not guaranteed. |