summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/core
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/core')
-rw-r--r--drivers/infiniband/core/Makefile1
-rw-r--r--drivers/infiniband/core/cache.c162
-rw-r--r--drivers/infiniband/core/cgroup.c62
-rw-r--r--drivers/infiniband/core/cm.c2
-rw-r--r--drivers/infiniband/core/cma.c177
-rw-r--r--drivers/infiniband/core/cma_configfs.c42
-rw-r--r--drivers/infiniband/core/core_priv.h33
-rw-r--r--drivers/infiniband/core/cq.c6
-rw-r--r--drivers/infiniband/core/device.c23
-rw-r--r--drivers/infiniband/core/mad.c4
-rw-r--r--drivers/infiniband/core/roce_gid_mgmt.c28
-rw-r--r--drivers/infiniband/core/sysfs.c2
-rw-r--r--drivers/infiniband/core/ucm.c2
-rw-r--r--drivers/infiniband/core/umem.c6
-rw-r--r--drivers/infiniband/core/umem_odp.c94
-rw-r--r--drivers/infiniband/core/umem_rbtree.c21
-rw-r--r--drivers/infiniband/core/user_mad.c4
-rw-r--r--drivers/infiniband/core/uverbs.h1
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c155
-rw-r--r--drivers/infiniband/core/uverbs_main.c22
-rw-r--r--drivers/infiniband/core/verbs.c38
21 files changed, 695 insertions, 190 deletions
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index edaae9f9853c..e426ac877d19 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -13,6 +13,7 @@ ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
multicast.o mad.o smi.o agent.o mad_rmpp.o
ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o umem_rbtree.o
+ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o
ib_cm-y := cm.o
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index ae04826e82fc..b1371eb9f46c 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -314,14 +314,13 @@ static void make_default_gid(struct net_device *dev, union ib_gid *gid)
int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
union ib_gid *gid, struct ib_gid_attr *attr)
{
- struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
int ix;
int ret = 0;
struct net_device *idev;
int empty;
- table = ports_table[port - rdma_start_port(ib_dev)];
+ table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
if (!memcmp(gid, &zgid, sizeof(*gid)))
return -EINVAL;
@@ -369,11 +368,10 @@ out_unlock:
int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
union ib_gid *gid, struct ib_gid_attr *attr)
{
- struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
int ix;
- table = ports_table[port - rdma_start_port(ib_dev)];
+ table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
mutex_lock(&table->lock);
write_lock_irq(&table->rwlock);
@@ -399,12 +397,11 @@ out_unlock:
int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
struct net_device *ndev)
{
- struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
int ix;
bool deleted = false;
- table = ports_table[port - rdma_start_port(ib_dev)];
+ table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
mutex_lock(&table->lock);
write_lock_irq(&table->rwlock);
@@ -428,10 +425,9 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index,
union ib_gid *gid, struct ib_gid_attr *attr)
{
- struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
- table = ports_table[port - rdma_start_port(ib_dev)];
+ table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
if (index < 0 || index >= table->sz)
return -EINVAL;
@@ -455,14 +451,13 @@ static int _ib_cache_gid_table_find(struct ib_device *ib_dev,
unsigned long mask,
u8 *port, u16 *index)
{
- struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
u8 p;
int local_index;
unsigned long flags;
for (p = 0; p < ib_dev->phys_port_cnt; p++) {
- table = ports_table[p];
+ table = ib_dev->cache.ports[p].gid;
read_lock_irqsave(&table->rwlock, flags);
local_index = find_gid(table, gid, val, false, mask, NULL);
if (local_index >= 0) {
@@ -503,18 +498,16 @@ int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
u16 *index)
{
int local_index;
- struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
unsigned long mask = GID_ATTR_FIND_MASK_GID |
GID_ATTR_FIND_MASK_GID_TYPE;
struct ib_gid_attr val = {.ndev = ndev, .gid_type = gid_type};
unsigned long flags;
- if (port < rdma_start_port(ib_dev) ||
- port > rdma_end_port(ib_dev))
+ if (!rdma_is_port_valid(ib_dev, port))
return -ENOENT;
- table = ports_table[port - rdma_start_port(ib_dev)];
+ table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
if (ndev)
mask |= GID_ATTR_FIND_MASK_NETDEV;
@@ -562,21 +555,17 @@ static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
void *context,
u16 *index)
{
- struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
unsigned int i;
unsigned long flags;
bool found = false;
- if (!ports_table)
- return -EOPNOTSUPP;
- if (port < rdma_start_port(ib_dev) ||
- port > rdma_end_port(ib_dev) ||
+ if (!rdma_is_port_valid(ib_dev, port) ||
!rdma_protocol_roce(ib_dev, port))
return -EPROTONOSUPPORT;
- table = ports_table[port - rdma_start_port(ib_dev)];
+ table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
read_lock_irqsave(&table->rwlock, flags);
for (i = 0; i < table->sz; i++) {
@@ -668,14 +657,13 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
unsigned long gid_type_mask,
enum ib_cache_gid_default_mode mode)
{
- struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
union ib_gid gid;
struct ib_gid_attr gid_attr;
struct ib_gid_attr zattr_type = zattr;
struct ib_gid_table *table;
unsigned int gid_type;
- table = ports_table[port - rdma_start_port(ib_dev)];
+ table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
make_default_gid(ndev, &gid);
memset(&gid_attr, 0, sizeof(gid_attr));
@@ -766,71 +754,64 @@ static int gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
static int _gid_table_setup_one(struct ib_device *ib_dev)
{
u8 port;
- struct ib_gid_table **table;
+ struct ib_gid_table *table;
int err = 0;
- table = kcalloc(ib_dev->phys_port_cnt, sizeof(*table), GFP_KERNEL);
- if (!table)
- return -ENOMEM;
-
for (port = 0; port < ib_dev->phys_port_cnt; port++) {
u8 rdma_port = port + rdma_start_port(ib_dev);
- table[port] =
+ table =
alloc_gid_table(
ib_dev->port_immutable[rdma_port].gid_tbl_len);
- if (!table[port]) {
+ if (!table) {
err = -ENOMEM;
goto rollback_table_setup;
}
err = gid_table_reserve_default(ib_dev,
port + rdma_start_port(ib_dev),
- table[port]);
+ table);
if (err)
goto rollback_table_setup;
+ ib_dev->cache.ports[port].gid = table;
}
- ib_dev->cache.gid_cache = table;
return 0;
rollback_table_setup:
for (port = 0; port < ib_dev->phys_port_cnt; port++) {
+ table = ib_dev->cache.ports[port].gid;
+
cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev),
- table[port]);
- release_gid_table(table[port]);
+ table);
+ release_gid_table(table);
}
- kfree(table);
return err;
}
static void gid_table_release_one(struct ib_device *ib_dev)
{
- struct ib_gid_table **table = ib_dev->cache.gid_cache;
+ struct ib_gid_table *table;
u8 port;
- if (!table)
- return;
-
- for (port = 0; port < ib_dev->phys_port_cnt; port++)
- release_gid_table(table[port]);
-
- kfree(table);
- ib_dev->cache.gid_cache = NULL;
+ for (port = 0; port < ib_dev->phys_port_cnt; port++) {
+ table = ib_dev->cache.ports[port].gid;
+ release_gid_table(table);
+ ib_dev->cache.ports[port].gid = NULL;
+ }
}
static void gid_table_cleanup_one(struct ib_device *ib_dev)
{
- struct ib_gid_table **table = ib_dev->cache.gid_cache;
+ struct ib_gid_table *table;
u8 port;
- if (!table)
- return;
-
- for (port = 0; port < ib_dev->phys_port_cnt; port++)
+ for (port = 0; port < ib_dev->phys_port_cnt; port++) {
+ table = ib_dev->cache.ports[port].gid;
cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev),
- table[port]);
+ table);
+ }
}
static int gid_table_setup_one(struct ib_device *ib_dev)
@@ -860,12 +841,12 @@ int ib_get_cached_gid(struct ib_device *device,
{
int res;
unsigned long flags;
- struct ib_gid_table **ports_table = device->cache.gid_cache;
- struct ib_gid_table *table = ports_table[port_num - rdma_start_port(device)];
+ struct ib_gid_table *table;
- if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
+ table = device->cache.ports[port_num - rdma_start_port(device)].gid;
read_lock_irqsave(&table->rwlock, flags);
res = __ib_cache_gid_get(device, port_num, index, gid, gid_attr);
read_unlock_irqrestore(&table->rwlock, flags);
@@ -912,12 +893,12 @@ int ib_get_cached_pkey(struct ib_device *device,
unsigned long flags;
int ret = 0;
- if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
read_lock_irqsave(&device->cache.lock, flags);
- cache = device->cache.pkey_cache[port_num - rdma_start_port(device)];
+ cache = device->cache.ports[port_num - rdma_start_port(device)].pkey;
if (index < 0 || index >= cache->table_len)
ret = -EINVAL;
@@ -941,12 +922,12 @@ int ib_find_cached_pkey(struct ib_device *device,
int ret = -ENOENT;
int partial_ix = -1;
- if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
read_lock_irqsave(&device->cache.lock, flags);
- cache = device->cache.pkey_cache[port_num - rdma_start_port(device)];
+ cache = device->cache.ports[port_num - rdma_start_port(device)].pkey;
*index = -1;
@@ -981,12 +962,12 @@ int ib_find_exact_cached_pkey(struct ib_device *device,
int i;
int ret = -ENOENT;
- if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
read_lock_irqsave(&device->cache.lock, flags);
- cache = device->cache.pkey_cache[port_num - rdma_start_port(device)];
+ cache = device->cache.ports[port_num - rdma_start_port(device)].pkey;
*index = -1;
@@ -1010,17 +991,36 @@ int ib_get_cached_lmc(struct ib_device *device,
unsigned long flags;
int ret = 0;
- if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
read_lock_irqsave(&device->cache.lock, flags);
- *lmc = device->cache.lmc_cache[port_num - rdma_start_port(device)];
+ *lmc = device->cache.ports[port_num - rdma_start_port(device)].lmc;
read_unlock_irqrestore(&device->cache.lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_get_cached_lmc);
+int ib_get_cached_port_state(struct ib_device *device,
+ u8 port_num,
+ enum ib_port_state *port_state)
+{
+ unsigned long flags;
+ int ret = 0;
+
+ if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ return -EINVAL;
+
+ read_lock_irqsave(&device->cache.lock, flags);
+ *port_state = device->cache.ports[port_num
+ - rdma_start_port(device)].port_state;
+ read_unlock_irqrestore(&device->cache.lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL(ib_get_cached_port_state);
+
static void ib_cache_update(struct ib_device *device,
u8 port)
{
@@ -1033,14 +1033,13 @@ static void ib_cache_update(struct ib_device *device,
int i;
int ret;
struct ib_gid_table *table;
- struct ib_gid_table **ports_table = device->cache.gid_cache;
bool use_roce_gid_table =
rdma_cap_roce_gid_table(device, port);
- if (port < rdma_start_port(device) || port > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port))
return;
- table = ports_table[port - rdma_start_port(device)];
+ table = device->cache.ports[port - rdma_start_port(device)].gid;
tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
if (!tprops)
@@ -1092,9 +1091,10 @@ static void ib_cache_update(struct ib_device *device,
write_lock_irq(&device->cache.lock);
- old_pkey_cache = device->cache.pkey_cache[port - rdma_start_port(device)];
+ old_pkey_cache = device->cache.ports[port -
+ rdma_start_port(device)].pkey;
- device->cache.pkey_cache[port - rdma_start_port(device)] = pkey_cache;
+ device->cache.ports[port - rdma_start_port(device)].pkey = pkey_cache;
if (!use_roce_gid_table) {
write_lock(&table->rwlock);
for (i = 0; i < gid_cache->table_len; i++) {
@@ -1104,7 +1104,9 @@ static void ib_cache_update(struct ib_device *device,
write_unlock(&table->rwlock);
}
- device->cache.lmc_cache[port - rdma_start_port(device)] = tprops->lmc;
+ device->cache.ports[port - rdma_start_port(device)].lmc = tprops->lmc;
+ device->cache.ports[port - rdma_start_port(device)].port_state =
+ tprops->state;
write_unlock_irq(&device->cache.lock);
@@ -1157,22 +1159,17 @@ int ib_cache_setup_one(struct ib_device *device)
rwlock_init(&device->cache.lock);
- device->cache.pkey_cache =
- kzalloc(sizeof *device->cache.pkey_cache *
+ device->cache.ports =
+ kzalloc(sizeof(*device->cache.ports) *
(rdma_end_port(device) - rdma_start_port(device) + 1), GFP_KERNEL);
- device->cache.lmc_cache = kmalloc(sizeof *device->cache.lmc_cache *
- (rdma_end_port(device) -
- rdma_start_port(device) + 1),
- GFP_KERNEL);
- if (!device->cache.pkey_cache ||
- !device->cache.lmc_cache) {
+ if (!device->cache.ports) {
err = -ENOMEM;
- goto free;
+ goto out;
}
err = gid_table_setup_one(device);
if (err)
- goto free;
+ goto out;
for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p)
ib_cache_update(device, p + rdma_start_port(device));
@@ -1187,9 +1184,7 @@ int ib_cache_setup_one(struct ib_device *device)
err:
gid_table_cleanup_one(device);
-free:
- kfree(device->cache.pkey_cache);
- kfree(device->cache.lmc_cache);
+out:
return err;
}
@@ -1203,14 +1198,11 @@ void ib_cache_release_one(struct ib_device *device)
* all the device's resources when the cache could no
* longer be accessed.
*/
- if (device->cache.pkey_cache)
- for (p = 0;
- p <= rdma_end_port(device) - rdma_start_port(device); ++p)
- kfree(device->cache.pkey_cache[p]);
+ for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p)
+ kfree(device->cache.ports[p].pkey);
gid_table_release_one(device);
- kfree(device->cache.pkey_cache);
- kfree(device->cache.lmc_cache);
+ kfree(device->cache.ports);
}
void ib_cache_cleanup_one(struct ib_device *device)
diff --git a/drivers/infiniband/core/cgroup.c b/drivers/infiniband/core/cgroup.c
new file mode 100644
index 000000000000..126ac5f99db7
--- /dev/null
+++ b/drivers/infiniband/core/cgroup.c
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2016 Parav Pandit <pandit.parav@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#include "core_priv.h"
+
+/**
+ * ib_device_register_rdmacg - register with rdma cgroup.
+ * @device: device to register to participate in resource
+ * accounting by rdma cgroup.
+ *
+ * Register with the rdma cgroup. Should be called before
+ * exposing rdma device to user space applications to avoid
+ * resource accounting leak.
+ * Returns 0 on success or otherwise failure code.
+ */
+int ib_device_register_rdmacg(struct ib_device *device)
+{
+ device->cg_device.name = device->name;
+ return rdmacg_register_device(&device->cg_device);
+}
+
+/**
+ * ib_device_unregister_rdmacg - unregister with rdma cgroup.
+ * @device: device to unregister.
+ *
+ * Unregister with the rdma cgroup. Should be called after
+ * all the resources are deallocated, and after a stage when any
+ * other resource allocation by user application cannot be done
+ * for this device to avoid any leak in accounting.
+ */
+void ib_device_unregister_rdmacg(struct ib_device *device)
+{
+ rdmacg_unregister_device(&device->cg_device);
+}
+
+int ib_rdmacg_try_charge(struct ib_rdmacg_object *cg_obj,
+ struct ib_device *device,
+ enum rdmacg_resource_type resource_index)
+{
+ return rdmacg_try_charge(&cg_obj->cg, &device->cg_device,
+ resource_index);
+}
+EXPORT_SYMBOL(ib_rdmacg_try_charge);
+
+void ib_rdmacg_uncharge(struct ib_rdmacg_object *cg_obj,
+ struct ib_device *device,
+ enum rdmacg_resource_type resource_index)
+{
+ rdmacg_uncharge(cg_obj->cg, &device->cg_device,
+ resource_index);
+}
+EXPORT_SYMBOL(ib_rdmacg_uncharge);
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index cf1edfa1cbac..6535f09dc575 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -3409,6 +3409,8 @@ static void cm_process_send_error(struct ib_mad_send_buf *msg,
if (msg != cm_id_priv->msg || state != cm_id_priv->id.state)
goto discard;
+ pr_debug_ratelimited("CM: failed sending MAD in state %d. (%s)\n",
+ state, ib_wc_status_msg(wc_status));
switch (state) {
case IB_CM_REQ_SENT:
case IB_CM_MRA_REQ_RCVD:
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 3e70a9c5d79d..acd10d666f1c 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -198,6 +198,7 @@ struct cma_device {
atomic_t refcount;
struct list_head id_list;
enum ib_gid_type *default_gid_type;
+ u8 *default_roce_tos;
};
struct rdma_bind_list {
@@ -269,8 +270,7 @@ struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
int cma_get_default_gid_type(struct cma_device *cma_dev,
unsigned int port)
{
- if (port < rdma_start_port(cma_dev->device) ||
- port > rdma_end_port(cma_dev->device))
+ if (!rdma_is_port_valid(cma_dev->device, port))
return -EINVAL;
return cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)];
@@ -282,8 +282,7 @@ int cma_set_default_gid_type(struct cma_device *cma_dev,
{
unsigned long supported_gids;
- if (port < rdma_start_port(cma_dev->device) ||
- port > rdma_end_port(cma_dev->device))
+ if (!rdma_is_port_valid(cma_dev->device, port))
return -EINVAL;
supported_gids = roce_gid_type_mask_support(cma_dev->device, port);
@@ -297,6 +296,25 @@ int cma_set_default_gid_type(struct cma_device *cma_dev,
return 0;
}
+int cma_get_default_roce_tos(struct cma_device *cma_dev, unsigned int port)
+{
+ if (!rdma_is_port_valid(cma_dev->device, port))
+ return -EINVAL;
+
+ return cma_dev->default_roce_tos[port - rdma_start_port(cma_dev->device)];
+}
+
+int cma_set_default_roce_tos(struct cma_device *cma_dev, unsigned int port,
+ u8 default_roce_tos)
+{
+ if (!rdma_is_port_valid(cma_dev->device, port))
+ return -EINVAL;
+
+ cma_dev->default_roce_tos[port - rdma_start_port(cma_dev->device)] =
+ default_roce_tos;
+
+ return 0;
+}
struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev)
{
return cma_dev->device;
@@ -343,6 +361,7 @@ struct rdma_id_private {
u32 options;
u8 srq;
u8 tos;
+ bool tos_set;
u8 reuseaddr;
u8 afonly;
enum ib_gid_type gid_type;
@@ -709,6 +728,7 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
union ib_gid gid, sgid, *dgid;
u16 pkey, index;
u8 p;
+ enum ib_port_state port_state;
int i;
cma_dev = NULL;
@@ -724,6 +744,8 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
if (ib_find_cached_pkey(cur_dev->device, p, pkey, &index))
continue;
+ if (ib_get_cached_port_state(cur_dev->device, p, &port_state))
+ continue;
for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i,
&gid, NULL);
i++) {
@@ -735,7 +757,8 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
}
if (!cma_dev && (gid.global.subnet_prefix ==
- dgid->global.subnet_prefix)) {
+ dgid->global.subnet_prefix) &&
+ port_state == IB_PORT_ACTIVE) {
cma_dev = cur_dev;
sgid = gid;
id_priv->id.port_num = p;
@@ -778,6 +801,7 @@ struct rdma_cm_id *rdma_create_id(struct net *net,
id_priv->id.event_handler = event_handler;
id_priv->id.ps = ps;
id_priv->id.qp_type = qp_type;
+ id_priv->tos_set = false;
spin_lock_init(&id_priv->lock);
mutex_init(&id_priv->qp_mutex);
init_completion(&id_priv->comp);
@@ -1689,6 +1713,7 @@ static int cma_rep_recv(struct rdma_id_private *id_priv)
return 0;
reject:
+ pr_debug_ratelimited("RDMA CM: CONNECT_ERROR: failed to handle reply. status %d\n", ret);
cma_modify_qp_err(id_priv);
ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
NULL, 0, NULL, 0);
@@ -1760,6 +1785,8 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
/* ignore event */
goto out;
case IB_CM_REJ_RECEIVED:
+ pr_debug_ratelimited("RDMA CM: REJECTED: %s\n", rdma_reject_msg(&id_priv->id,
+ ib_event->param.rej_rcvd.reason));
cma_modify_qp_err(id_priv);
event.status = ib_event->param.rej_rcvd.reason;
event.event = RDMA_CM_EVENT_REJECTED;
@@ -2266,6 +2293,7 @@ void rdma_set_service_type(struct rdma_cm_id *id, int tos)
id_priv = container_of(id, struct rdma_id_private, id);
id_priv->tos = (u8) tos;
+ id_priv->tos_set = true;
}
EXPORT_SYMBOL(rdma_set_service_type);
@@ -2285,6 +2313,8 @@ static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
work->new_state = RDMA_CM_ADDR_RESOLVED;
work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
work->event.status = status;
+ pr_debug_ratelimited("RDMA CM: ROUTE_ERROR: failed to query path. status %d\n",
+ status);
}
queue_work(cma_wq, &work->work);
@@ -2467,14 +2497,12 @@ static int iboe_tos_to_sl(struct net_device *ndev, int tos)
struct net_device *dev;
prio = rt_tos2priority(tos);
- dev = ndev->priv_flags & IFF_802_1Q_VLAN ?
- vlan_dev_real_dev(ndev) : ndev;
-
+ dev = is_vlan_dev(ndev) ? vlan_dev_real_dev(ndev) : ndev;
if (dev->num_tc)
return netdev_get_prio_tc_map(dev, prio);
#if IS_ENABLED(CONFIG_VLAN_8021Q)
- if (ndev->priv_flags & IFF_802_1Q_VLAN)
+ if (is_vlan_dev(ndev))
return (vlan_dev_get_egress_qos_mask(ndev, prio) &
VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
#endif
@@ -2500,6 +2528,9 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
struct cma_work *work;
int ret;
struct net_device *ndev = NULL;
+ u8 default_roce_tos = id_priv->cma_dev->default_roce_tos[id_priv->id.port_num -
+ rdma_start_port(id_priv->cma_dev->device)];
+ u8 tos = id_priv->tos_set ? id_priv->tos : default_roce_tos;
work = kzalloc(sizeof *work, GFP_KERNEL);
@@ -2573,7 +2604,8 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
route->path_rec->reversible = 1;
route->path_rec->pkey = cpu_to_be16(0xffff);
route->path_rec->mtu_selector = IB_SA_EQ;
- route->path_rec->sl = iboe_tos_to_sl(ndev, id_priv->tos);
+ route->path_rec->sl = iboe_tos_to_sl(ndev, tos);
+ route->path_rec->traffic_class = tos;
route->path_rec->mtu = iboe_get_mtu(ndev->mtu);
route->path_rec->rate_selector = IB_SA_EQ;
route->path_rec->rate = iboe_get_rate(ndev);
@@ -2652,8 +2684,8 @@ static void cma_set_loopback(struct sockaddr *addr)
static int cma_bind_loopback(struct rdma_id_private *id_priv)
{
struct cma_device *cma_dev, *cur_dev;
- struct ib_port_attr port_attr;
union ib_gid gid;
+ enum ib_port_state port_state;
u16 pkey;
int ret;
u8 p;
@@ -2669,8 +2701,8 @@ static int cma_bind_loopback(struct rdma_id_private *id_priv)
cma_dev = cur_dev;
for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) {
- if (!ib_query_port(cur_dev->device, p, &port_attr) &&
- port_attr.state == IB_PORT_ACTIVE) {
+ if (!ib_get_cached_port_state(cur_dev->device, p, &port_state) &&
+ port_state == IB_PORT_ACTIVE) {
cma_dev = cur_dev;
goto port_found;
}
@@ -2720,8 +2752,14 @@ static void addr_handler(int status, struct sockaddr *src_addr,
goto out;
memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr));
- if (!status && !id_priv->cma_dev)
+ if (!status && !id_priv->cma_dev) {
status = cma_acquire_dev(id_priv, NULL);
+ if (status)
+ pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to acquire device. status %d\n",
+ status);
+ } else {
+ pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to resolve IP. status %d\n", status);
+ }
if (status) {
if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED,
@@ -2833,20 +2871,26 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
+ memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
if (id_priv->state == RDMA_CM_IDLE) {
ret = cma_bind_addr(id, src_addr, dst_addr);
- if (ret)
+ if (ret) {
+ memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
return ret;
+ }
}
- if (cma_family(id_priv) != dst_addr->sa_family)
+ if (cma_family(id_priv) != dst_addr->sa_family) {
+ memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
return -EINVAL;
+ }
- if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY))
+ if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) {
+ memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
return -EINVAL;
+ }
atomic_inc(&id_priv->refcount);
- memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
if (cma_any_addr(dst_addr)) {
ret = cma_resolve_loopback(id_priv);
} else {
@@ -2962,6 +3006,43 @@ err:
return ret == -ENOSPC ? -EADDRNOTAVAIL : ret;
}
+static int cma_port_is_unique(struct rdma_bind_list *bind_list,
+ struct rdma_id_private *id_priv)
+{
+ struct rdma_id_private *cur_id;
+ struct sockaddr *daddr = cma_dst_addr(id_priv);
+ struct sockaddr *saddr = cma_src_addr(id_priv);
+ __be16 dport = cma_port(daddr);
+
+ hlist_for_each_entry(cur_id, &bind_list->owners, node) {
+ struct sockaddr *cur_daddr = cma_dst_addr(cur_id);
+ struct sockaddr *cur_saddr = cma_src_addr(cur_id);
+ __be16 cur_dport = cma_port(cur_daddr);
+
+ if (id_priv == cur_id)
+ continue;
+
+ /* different dest port -> unique */
+ if (!cma_any_port(cur_daddr) &&
+ (dport != cur_dport))
+ continue;
+
+ /* different src address -> unique */
+ if (!cma_any_addr(saddr) &&
+ !cma_any_addr(cur_saddr) &&
+ cma_addr_cmp(saddr, cur_saddr))
+ continue;
+
+ /* different dst address -> unique */
+ if (!cma_any_addr(cur_daddr) &&
+ cma_addr_cmp(daddr, cur_daddr))
+ continue;
+
+ return -EADDRNOTAVAIL;
+ }
+ return 0;
+}
+
static int cma_alloc_any_port(enum rdma_port_space ps,
struct rdma_id_private *id_priv)
{
@@ -2974,9 +3055,19 @@ static int cma_alloc_any_port(enum rdma_port_space ps,
remaining = (high - low) + 1;
rover = prandom_u32() % remaining + low;
retry:
- if (last_used_port != rover &&
- !cma_ps_find(net, ps, (unsigned short)rover)) {
- int ret = cma_alloc_port(ps, id_priv, rover);
+ if (last_used_port != rover) {
+ struct rdma_bind_list *bind_list;
+ int ret;
+
+ bind_list = cma_ps_find(net, ps, (unsigned short)rover);
+
+ if (!bind_list) {
+ ret = cma_alloc_port(ps, id_priv, rover);
+ } else {
+ ret = cma_port_is_unique(bind_list, id_priv);
+ if (!ret)
+ cma_bind_port(bind_list, id_priv);
+ }
/*
* Remember previously used port number in order to avoid
* re-using same port immediately after it is closed.
@@ -3205,6 +3296,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
{
struct rdma_id_private *id_priv;
int ret;
+ struct sockaddr *daddr;
if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 &&
addr->sa_family != AF_IB)
@@ -3244,6 +3336,9 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
if (ret)
goto err2;
+ daddr = cma_dst_addr(id_priv);
+ daddr->sa_family = addr->sa_family;
+
return 0;
err2:
if (id_priv->cma_dev)
@@ -3308,10 +3403,13 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
if (rep->status != IB_SIDR_SUCCESS) {
event.event = RDMA_CM_EVENT_UNREACHABLE;
event.status = ib_event->param.sidr_rep_rcvd.status;
+ pr_debug_ratelimited("RDMA CM: UNREACHABLE: bad SIDR reply. status %d\n",
+ event.status);
break;
}
ret = cma_set_qkey(id_priv, rep->qkey);
if (ret) {
+ pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to set qkey. status %d\n", ret);
event.event = RDMA_CM_EVENT_ADDR_ERROR;
event.status = ret;
break;
@@ -3583,6 +3681,9 @@ static int cma_accept_iw(struct rdma_id_private *id_priv,
struct iw_cm_conn_param iw_param;
int ret;
+ if (!conn_param)
+ return -EINVAL;
+
ret = cma_modify_qp_rtr(id_priv, conn_param);
if (ret)
return ret;
@@ -3760,10 +3861,17 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
if (!status)
status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey));
+ else
+ pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to join multicast. status %d\n",
+ status);
mutex_lock(&id_priv->qp_mutex);
- if (!status && id_priv->id.qp)
+ if (!status && id_priv->id.qp) {
status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
be16_to_cpu(multicast->rec.mlid));
+ if (status)
+ pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to attach QP. status %d\n",
+ status);
+ }
mutex_unlock(&id_priv->qp_mutex);
memset(&event, 0, sizeof event);
@@ -4229,15 +4337,21 @@ static void cma_add_one(struct ib_device *device)
cma_dev->default_gid_type = kcalloc(device->phys_port_cnt,
sizeof(*cma_dev->default_gid_type),
GFP_KERNEL);
- if (!cma_dev->default_gid_type) {
- kfree(cma_dev);
- return;
- }
+ if (!cma_dev->default_gid_type)
+ goto free_cma_dev;
+
+ cma_dev->default_roce_tos = kcalloc(device->phys_port_cnt,
+ sizeof(*cma_dev->default_roce_tos),
+ GFP_KERNEL);
+ if (!cma_dev->default_roce_tos)
+ goto free_gid_type;
+
for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) {
supported_gids = roce_gid_type_mask_support(device, i);
WARN_ON(!supported_gids);
cma_dev->default_gid_type[i - rdma_start_port(device)] =
find_first_bit(&supported_gids, BITS_PER_LONG);
+ cma_dev->default_roce_tos[i - rdma_start_port(device)] = 0;
}
init_completion(&cma_dev->comp);
@@ -4250,6 +4364,16 @@ static void cma_add_one(struct ib_device *device)
list_for_each_entry(id_priv, &listen_any_list, list)
cma_listen_on_dev(id_priv, cma_dev);
mutex_unlock(&lock);
+
+ return;
+
+free_gid_type:
+ kfree(cma_dev->default_gid_type);
+
+free_cma_dev:
+ kfree(cma_dev);
+
+ return;
}
static int cma_remove_id_dev(struct rdma_id_private *id_priv)
@@ -4318,6 +4442,7 @@ static void cma_remove_one(struct ib_device *device, void *client_data)
mutex_unlock(&lock);
cma_process_remove(cma_dev);
+ kfree(cma_dev->default_roce_tos);
kfree(cma_dev->default_gid_type);
kfree(cma_dev);
}
diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c
index 41573df1d9fc..54076a3e8007 100644
--- a/drivers/infiniband/core/cma_configfs.c
+++ b/drivers/infiniband/core/cma_configfs.c
@@ -139,8 +139,50 @@ static ssize_t default_roce_mode_store(struct config_item *item,
CONFIGFS_ATTR(, default_roce_mode);
+static ssize_t default_roce_tos_show(struct config_item *item, char *buf)
+{
+ struct cma_device *cma_dev;
+ struct cma_dev_port_group *group;
+ ssize_t ret;
+ u8 tos;
+
+ ret = cma_configfs_params_get(item, &cma_dev, &group);
+ if (ret)
+ return ret;
+
+ tos = cma_get_default_roce_tos(cma_dev, group->port_num);
+ cma_configfs_params_put(cma_dev);
+
+ return sprintf(buf, "%u\n", tos);
+}
+
+static ssize_t default_roce_tos_store(struct config_item *item,
+ const char *buf, size_t count)
+{
+ struct cma_device *cma_dev;
+ struct cma_dev_port_group *group;
+ ssize_t ret;
+ u8 tos;
+
+ ret = kstrtou8(buf, 0, &tos);
+ if (ret)
+ return ret;
+
+ ret = cma_configfs_params_get(item, &cma_dev, &group);
+ if (ret)
+ return ret;
+
+ ret = cma_set_default_roce_tos(cma_dev, group->port_num, tos);
+ cma_configfs_params_put(cma_dev);
+
+ return ret ? ret : strnlen(buf, count);
+}
+
+CONFIGFS_ATTR(, default_roce_tos);
+
static struct configfs_attribute *cma_configfs_attributes[] = {
&attr_default_roce_mode,
+ &attr_default_roce_tos,
NULL,
};
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index d29372624f3a..cb7d372e4bdf 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -35,6 +35,7 @@
#include <linux/list.h>
#include <linux/spinlock.h>
+#include <linux/cgroup_rdma.h>
#include <rdma/ib_verbs.h>
@@ -62,6 +63,9 @@ int cma_get_default_gid_type(struct cma_device *cma_dev,
int cma_set_default_gid_type(struct cma_device *cma_dev,
unsigned int port,
enum ib_gid_type default_gid_type);
+int cma_get_default_roce_tos(struct cma_device *cma_dev, unsigned int port);
+int cma_set_default_roce_tos(struct cma_device *a_dev, unsigned int port,
+ u8 default_roce_tos);
struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev);
int ib_device_register_sysfs(struct ib_device *device,
@@ -121,6 +125,35 @@ int ib_cache_setup_one(struct ib_device *device);
void ib_cache_cleanup_one(struct ib_device *device);
void ib_cache_release_one(struct ib_device *device);
+#ifdef CONFIG_CGROUP_RDMA
+int ib_device_register_rdmacg(struct ib_device *device);
+void ib_device_unregister_rdmacg(struct ib_device *device);
+
+int ib_rdmacg_try_charge(struct ib_rdmacg_object *cg_obj,
+ struct ib_device *device,
+ enum rdmacg_resource_type resource_index);
+
+void ib_rdmacg_uncharge(struct ib_rdmacg_object *cg_obj,
+ struct ib_device *device,
+ enum rdmacg_resource_type resource_index);
+#else
+static inline int ib_device_register_rdmacg(struct ib_device *device)
+{ return 0; }
+
+static inline void ib_device_unregister_rdmacg(struct ib_device *device)
+{ }
+
+static inline int ib_rdmacg_try_charge(struct ib_rdmacg_object *cg_obj,
+ struct ib_device *device,
+ enum rdmacg_resource_type resource_index)
+{ return 0; }
+
+static inline void ib_rdmacg_uncharge(struct ib_rdmacg_object *cg_obj,
+ struct ib_device *device,
+ enum rdmacg_resource_type resource_index)
+{ }
+#endif
+
static inline bool rdma_is_upper_dev_rcu(struct net_device *dev,
struct net_device *upper)
{
diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c
index a754fc727de5..e95510117a6d 100644
--- a/drivers/infiniband/core/cq.c
+++ b/drivers/infiniband/core/cq.c
@@ -58,8 +58,8 @@ static int __ib_process_cq(struct ib_cq *cq, int budget)
* %IB_POLL_DIRECT CQ. It does not offload CQ processing to a different
* context and does not ask for completion interrupts from the HCA.
*
- * Note: for compatibility reasons -1 can be passed in %budget for unlimited
- * polling. Do not use this feature in new code, it will be removed soon.
+ * Note: do not pass -1 as %budget unless it is guaranteed that the number
+ * of completions that will be processed is small.
*/
int ib_process_cq_direct(struct ib_cq *cq, int budget)
{
@@ -120,7 +120,7 @@ static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
*
* This is the proper interface to allocate a CQ for in-kernel users. A
* CQ allocated with this interface will automatically be polled from the
- * specified context. The ULP needs must use wr->wr_cqe instead of wr->wr_id
+ * specified context. The ULP must use wr->wr_cqe instead of wr->wr_id
* to use this CQ abstraction.
*/
struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 571974cd3919..593d2ce6ec7c 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -333,6 +333,15 @@ int ib_register_device(struct ib_device *device,
int ret;
struct ib_client *client;
struct ib_udata uhw = {.outlen = 0, .inlen = 0};
+ struct device *parent = device->dev.parent;
+
+ WARN_ON_ONCE(!parent);
+ if (!device->dev.dma_ops)
+ device->dev.dma_ops = parent->dma_ops;
+ if (!device->dev.dma_mask)
+ device->dev.dma_mask = parent->dma_mask;
+ if (!device->dev.coherent_dma_mask)
+ device->dev.coherent_dma_mask = parent->coherent_dma_mask;
mutex_lock(&device_mutex);
@@ -360,10 +369,18 @@ int ib_register_device(struct ib_device *device,
goto out;
}
+ ret = ib_device_register_rdmacg(device);
+ if (ret) {
+ pr_warn("Couldn't register device with rdma cgroup\n");
+ ib_cache_cleanup_one(device);
+ goto out;
+ }
+
memset(&device->attrs, 0, sizeof(device->attrs));
ret = device->query_device(device, &device->attrs, &uhw);
if (ret) {
pr_warn("Couldn't query the device attributes\n");
+ ib_device_unregister_rdmacg(device);
ib_cache_cleanup_one(device);
goto out;
}
@@ -372,6 +389,7 @@ int ib_register_device(struct ib_device *device,
if (ret) {
pr_warn("Couldn't register device %s with driver model\n",
device->name);
+ ib_device_unregister_rdmacg(device);
ib_cache_cleanup_one(device);
goto out;
}
@@ -421,6 +439,7 @@ void ib_unregister_device(struct ib_device *device)
mutex_unlock(&device_mutex);
+ ib_device_unregister_rdmacg(device);
ib_device_unregister_sysfs(device);
ib_cache_cleanup_one(device);
@@ -659,7 +678,7 @@ int ib_query_port(struct ib_device *device,
union ib_gid gid;
int err;
- if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
memset(port_attr, 0, sizeof(*port_attr));
@@ -825,7 +844,7 @@ int ib_modify_port(struct ib_device *device,
if (!device->modify_port)
return -ENOSYS;
- if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
return device->modify_port(device, port_num, port_modify_mask,
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index a009f7132c73..57f231f1c721 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -316,7 +316,9 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
/* Validate device and port */
port_priv = ib_get_mad_port(device, port_num);
if (!port_priv) {
- dev_notice(&device->dev, "ib_register_mad_agent: Invalid port\n");
+ dev_notice(&device->dev,
+ "ib_register_mad_agent: Invalid port %d\n",
+ port_num);
ret = ERR_PTR(-ENODEV);
goto error1;
}
diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c
index 0621f4455732..db958d3207ef 100644
--- a/drivers/infiniband/core/roce_gid_mgmt.c
+++ b/drivers/infiniband/core/roce_gid_mgmt.c
@@ -144,7 +144,6 @@ static enum bonding_slave_state is_eth_active_slave_of_bonding_rcu(struct net_de
static int is_eth_port_of_netdev(struct ib_device *ib_dev, u8 port,
struct net_device *rdma_ndev, void *cookie)
{
- struct net_device *event_ndev = (struct net_device *)cookie;
struct net_device *real_dev;
int res;
@@ -152,11 +151,11 @@ static int is_eth_port_of_netdev(struct ib_device *ib_dev, u8 port,
return 0;
rcu_read_lock();
- real_dev = rdma_vlan_dev_real_dev(event_ndev);
+ real_dev = rdma_vlan_dev_real_dev(cookie);
if (!real_dev)
- real_dev = event_ndev;
+ real_dev = cookie;
- res = ((rdma_is_upper_dev_rcu(rdma_ndev, event_ndev) &&
+ res = ((rdma_is_upper_dev_rcu(rdma_ndev, cookie) &&
(is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev) &
REQUIRED_BOND_STATES)) ||
real_dev == rdma_ndev);
@@ -192,17 +191,16 @@ static int pass_all_filter(struct ib_device *ib_dev, u8 port,
static int upper_device_filter(struct ib_device *ib_dev, u8 port,
struct net_device *rdma_ndev, void *cookie)
{
- struct net_device *event_ndev = (struct net_device *)cookie;
int res;
if (!rdma_ndev)
return 0;
- if (rdma_ndev == event_ndev)
+ if (rdma_ndev == cookie)
return 1;
rcu_read_lock();
- res = rdma_is_upper_dev_rcu(rdma_ndev, event_ndev);
+ res = rdma_is_upper_dev_rcu(rdma_ndev, cookie);
rcu_read_unlock();
return res;
@@ -379,18 +377,14 @@ static void _add_netdev_ips(struct ib_device *ib_dev, u8 port,
static void add_netdev_ips(struct ib_device *ib_dev, u8 port,
struct net_device *rdma_ndev, void *cookie)
{
- struct net_device *event_ndev = (struct net_device *)cookie;
-
- enum_netdev_default_gids(ib_dev, port, event_ndev, rdma_ndev);
- _add_netdev_ips(ib_dev, port, event_ndev);
+ enum_netdev_default_gids(ib_dev, port, cookie, rdma_ndev);
+ _add_netdev_ips(ib_dev, port, cookie);
}
static void del_netdev_ips(struct ib_device *ib_dev, u8 port,
struct net_device *rdma_ndev, void *cookie)
{
- struct net_device *event_ndev = (struct net_device *)cookie;
-
- ib_cache_gid_del_all_netdev_gids(ib_dev, port, event_ndev);
+ ib_cache_gid_del_all_netdev_gids(ib_dev, port, cookie);
}
static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev,
@@ -460,7 +454,7 @@ static void handle_netdev_upper(struct ib_device *ib_dev, u8 port,
u8 port,
struct net_device *ndev))
{
- struct net_device *ndev = (struct net_device *)cookie;
+ struct net_device *ndev = cookie;
struct upper_list *upper_iter;
struct upper_list *upper_temp;
LIST_HEAD(upper_list);
@@ -519,9 +513,7 @@ static void del_netdev_default_ips_join(struct ib_device *ib_dev, u8 port,
static void del_netdev_default_ips(struct ib_device *ib_dev, u8 port,
struct net_device *rdma_ndev, void *cookie)
{
- struct net_device *event_ndev = (struct net_device *)cookie;
-
- bond_delete_netdev_default_gids(ib_dev, port, event_ndev, rdma_ndev);
+ bond_delete_netdev_default_gids(ib_dev, port, cookie, rdma_ndev);
}
/* The following functions operate on all IB devices. netdevice_event and
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index c1fb545e8d78..daadf3130c9f 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -1258,7 +1258,7 @@ int ib_device_register_sysfs(struct ib_device *device,
int ret;
int i;
- device->dev.parent = device->dma_device;
+ WARN_ON_ONCE(!device->dev.parent);
ret = dev_set_name(class_dev, "%s", device->name);
if (ret)
return ret;
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index e0a995b85a2d..cc0d51fb06e3 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -1290,7 +1290,7 @@ static void ib_ucm_add_one(struct ib_device *device)
goto err;
ucm_dev->dev.class = &cm_class;
- ucm_dev->dev.parent = device->dma_device;
+ ucm_dev->dev.parent = device->dev.parent;
ucm_dev->dev.devt = ucm_dev->cdev.dev;
ucm_dev->dev.release = ib_ucm_release_dev;
dev_set_name(&ucm_dev->dev, "ucm%d", ucm_dev->devnum);
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 4609b921f899..27f155d2df8d 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -34,7 +34,8 @@
#include <linux/mm.h>
#include <linux/dma-mapping.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
#include <linux/export.h>
#include <linux/hugetlb.h>
#include <linux/slab.h>
@@ -99,9 +100,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
if (dmasync)
dma_attrs |= DMA_ATTR_WRITE_BARRIER;
- if (!size)
- return ERR_PTR(-EINVAL);
-
/*
* If the combination of the addr and size requested for this memory
* region causes an integer overflow, return error.
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 6b079a31dced..cb2742b548bb 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -32,6 +32,8 @@
#include <linux/types.h>
#include <linux/sched.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/task.h>
#include <linux/pid.h>
#include <linux/slab.h>
#include <linux/export.h>
@@ -239,6 +241,71 @@ static const struct mmu_notifier_ops ib_umem_notifiers = {
.invalidate_range_end = ib_umem_notifier_invalidate_range_end,
};
+struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context,
+ unsigned long addr,
+ size_t size)
+{
+ struct ib_umem *umem;
+ struct ib_umem_odp *odp_data;
+ int pages = size >> PAGE_SHIFT;
+ int ret;
+
+ umem = kzalloc(sizeof(*umem), GFP_KERNEL);
+ if (!umem)
+ return ERR_PTR(-ENOMEM);
+
+ umem->context = context;
+ umem->length = size;
+ umem->address = addr;
+ umem->page_size = PAGE_SIZE;
+ umem->writable = 1;
+
+ odp_data = kzalloc(sizeof(*odp_data), GFP_KERNEL);
+ if (!odp_data) {
+ ret = -ENOMEM;
+ goto out_umem;
+ }
+ odp_data->umem = umem;
+
+ mutex_init(&odp_data->umem_mutex);
+ init_completion(&odp_data->notifier_completion);
+
+ odp_data->page_list = vzalloc(pages * sizeof(*odp_data->page_list));
+ if (!odp_data->page_list) {
+ ret = -ENOMEM;
+ goto out_odp_data;
+ }
+
+ odp_data->dma_list = vzalloc(pages * sizeof(*odp_data->dma_list));
+ if (!odp_data->dma_list) {
+ ret = -ENOMEM;
+ goto out_page_list;
+ }
+
+ down_write(&context->umem_rwsem);
+ context->odp_mrs_count++;
+ rbt_ib_umem_insert(&odp_data->interval_tree, &context->umem_tree);
+ if (likely(!atomic_read(&context->notifier_count)))
+ odp_data->mn_counters_active = true;
+ else
+ list_add(&odp_data->no_private_counters,
+ &context->no_private_counters);
+ up_write(&context->umem_rwsem);
+
+ umem->odp_data = odp_data;
+
+ return umem;
+
+out_page_list:
+ vfree(odp_data->page_list);
+out_odp_data:
+ kfree(odp_data);
+out_umem:
+ kfree(umem);
+ return ERR_PTR(ret);
+}
+EXPORT_SYMBOL(ib_alloc_odp_umem);
+
int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem)
{
int ret_val;
@@ -270,18 +337,20 @@ int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem)
init_completion(&umem->odp_data->notifier_completion);
- umem->odp_data->page_list = vzalloc(ib_umem_num_pages(umem) *
+ if (ib_umem_num_pages(umem)) {
+ umem->odp_data->page_list = vzalloc(ib_umem_num_pages(umem) *
sizeof(*umem->odp_data->page_list));
- if (!umem->odp_data->page_list) {
- ret_val = -ENOMEM;
- goto out_odp_data;
- }
+ if (!umem->odp_data->page_list) {
+ ret_val = -ENOMEM;
+ goto out_odp_data;
+ }
- umem->odp_data->dma_list = vzalloc(ib_umem_num_pages(umem) *
+ umem->odp_data->dma_list = vzalloc(ib_umem_num_pages(umem) *
sizeof(*umem->odp_data->dma_list));
- if (!umem->odp_data->dma_list) {
- ret_val = -ENOMEM;
- goto out_page_list;
+ if (!umem->odp_data->dma_list) {
+ ret_val = -ENOMEM;
+ goto out_page_list;
+ }
}
/*
@@ -466,6 +535,7 @@ static int ib_umem_odp_map_dma_single_page(
}
umem->odp_data->dma_list[page_index] = dma_addr | access_mask;
umem->odp_data->page_list[page_index] = page;
+ umem->npages++;
stored_page = 1;
} else if (umem->odp_data->page_list[page_index] == page) {
umem->odp_data->dma_list[page_index] |= access_mask;
@@ -505,7 +575,8 @@ out:
* for failure.
* An -EAGAIN error code is returned when a concurrent mmu notifier prevents
* the function from completing its task.
- *
+ * An -ENOENT error code indicates that userspace process is being terminated
+ * and mm was already destroyed.
* @umem: the umem to map and pin
* @user_virt: the address from which we need to map.
* @bcnt: the minimal number of bytes to pin and map. The mapping might be
@@ -553,7 +624,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
owning_mm = get_task_mm(owning_process);
if (owning_mm == NULL) {
- ret = -EINVAL;
+ ret = -ENOENT;
goto out_put_task;
}
@@ -665,6 +736,7 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt,
put_page(page);
umem->odp_data->page_list[idx] = NULL;
umem->odp_data->dma_list[idx] = 0;
+ umem->npages--;
}
}
mutex_unlock(&umem->odp_data->umem_mutex);
diff --git a/drivers/infiniband/core/umem_rbtree.c b/drivers/infiniband/core/umem_rbtree.c
index 727d788448f5..d176597b4d78 100644
--- a/drivers/infiniband/core/umem_rbtree.c
+++ b/drivers/infiniband/core/umem_rbtree.c
@@ -78,17 +78,32 @@ int rbt_ib_umem_for_each_in_range(struct rb_root *root,
void *cookie)
{
int ret_val = 0;
- struct umem_odp_node *node;
+ struct umem_odp_node *node, *next;
struct ib_umem_odp *umem;
if (unlikely(start == last))
return ret_val;
- for (node = rbt_ib_umem_iter_first(root, start, last - 1); node;
- node = rbt_ib_umem_iter_next(node, start, last - 1)) {
+ for (node = rbt_ib_umem_iter_first(root, start, last - 1);
+ node; node = next) {
+ next = rbt_ib_umem_iter_next(node, start, last - 1);
umem = container_of(node, struct ib_umem_odp, interval_tree);
ret_val = cb(umem->umem, start, last, cookie) || ret_val;
}
return ret_val;
}
+EXPORT_SYMBOL(rbt_ib_umem_for_each_in_range);
+
+struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root *root,
+ u64 addr, u64 length)
+{
+ struct umem_odp_node *node;
+
+ node = rbt_ib_umem_iter_first(root, addr, addr + length - 1);
+ if (node)
+ return container_of(node, struct ib_umem_odp, interval_tree);
+ return NULL;
+
+}
+EXPORT_SYMBOL(rbt_ib_umem_lookup);
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 249b403b43a4..aca7ff7abedc 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -1188,7 +1188,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
if (cdev_add(&port->cdev, base, 1))
goto err_cdev;
- port->dev = device_create(umad_class, device->dma_device,
+ port->dev = device_create(umad_class, device->dev.parent,
port->cdev.dev, port,
"umad%d", port->dev_num);
if (IS_ERR(port->dev))
@@ -1207,7 +1207,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
if (cdev_add(&port->sm_cdev, base, 1))
goto err_sm_cdev;
- port->sm_dev = device_create(umad_class, device->dma_device,
+ port->sm_dev = device_create(umad_class, device->dev.parent,
port->sm_cdev.dev, port,
"issm%d", port->dev_num);
if (IS_ERR(port->sm_dev))
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 455034ac994e..e1bedf0bac04 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -228,6 +228,7 @@ struct ib_uverbs_flow_spec {
struct ib_uverbs_flow_spec_ipv4 ipv4;
struct ib_uverbs_flow_spec_tcp_udp tcp_udp;
struct ib_uverbs_flow_spec_ipv6 ipv6;
+ struct ib_uverbs_flow_spec_action_tag flow_tag;
};
};
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 700782203483..7b7a76e1279a 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -316,6 +316,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
struct ib_udata udata;
struct ib_ucontext *ucontext;
struct file *filp;
+ struct ib_rdmacg_object cg_obj;
int ret;
if (out_len < sizeof resp)
@@ -335,13 +336,18 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
(unsigned long) cmd.response + sizeof resp,
in_len - sizeof cmd, out_len - sizeof resp);
+ ret = ib_rdmacg_try_charge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE);
+ if (ret)
+ goto err;
+
ucontext = ib_dev->alloc_ucontext(ib_dev, &udata);
if (IS_ERR(ucontext)) {
ret = PTR_ERR(ucontext);
- goto err;
+ goto err_alloc;
}
ucontext->device = ib_dev;
+ ucontext->cg_obj = cg_obj;
INIT_LIST_HEAD(&ucontext->pd_list);
INIT_LIST_HEAD(&ucontext->mr_list);
INIT_LIST_HEAD(&ucontext->mw_list);
@@ -407,6 +413,9 @@ err_free:
put_pid(ucontext->tgid);
ib_dev->dealloc_ucontext(ucontext);
+err_alloc:
+ ib_rdmacg_uncharge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE);
+
err:
mutex_unlock(&file->mutex);
return ret;
@@ -561,6 +570,13 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
return -ENOMEM;
init_uobj(uobj, 0, file->ucontext, &pd_lock_class);
+ ret = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev,
+ RDMACG_RESOURCE_HCA_OBJECT);
+ if (ret) {
+ kfree(uobj);
+ return ret;
+ }
+
down_write(&uobj->mutex);
pd = ib_dev->alloc_pd(ib_dev, file->ucontext, &udata);
@@ -605,6 +621,7 @@ err_idr:
ib_dealloc_pd(pd);
err:
+ ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
put_uobj_write(uobj);
return ret;
}
@@ -637,6 +654,8 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
if (ret)
goto err_put;
+ ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
+
uobj->live = 0;
put_uobj_write(uobj);
@@ -1006,6 +1025,10 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
goto err_put;
}
}
+ ret = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev,
+ RDMACG_RESOURCE_HCA_OBJECT);
+ if (ret)
+ goto err_charge;
mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va,
cmd.access_flags, &udata);
@@ -1054,6 +1077,9 @@ err_unreg:
ib_dereg_mr(mr);
err_put:
+ ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
+
+err_charge:
put_pd_read(pd);
err_free:
@@ -1178,6 +1204,8 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
if (ret)
return ret;
+ ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
+
idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
mutex_lock(&file->mutex);
@@ -1226,6 +1254,11 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
out_len - sizeof(resp));
+ ret = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev,
+ RDMACG_RESOURCE_HCA_OBJECT);
+ if (ret)
+ goto err_charge;
+
mw = pd->device->alloc_mw(pd, cmd.mw_type, &udata);
if (IS_ERR(mw)) {
ret = PTR_ERR(mw);
@@ -1271,6 +1304,9 @@ err_unalloc:
uverbs_dealloc_mw(mw);
err_put:
+ ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
+
+err_charge:
put_pd_read(pd);
err_free:
@@ -1306,6 +1342,8 @@ ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file,
if (ret)
return ret;
+ ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
+
idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
mutex_lock(&file->mutex);
@@ -1405,6 +1443,11 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
if (cmd_sz > offsetof(typeof(*cmd), flags) + sizeof(cmd->flags))
attr.flags = cmd->flags;
+ ret = ib_rdmacg_try_charge(&obj->uobject.cg_obj, ib_dev,
+ RDMACG_RESOURCE_HCA_OBJECT);
+ if (ret)
+ goto err_charge;
+
cq = ib_dev->create_cq(ib_dev, &attr,
file->ucontext, uhw);
if (IS_ERR(cq)) {
@@ -1452,6 +1495,10 @@ err_free:
ib_destroy_cq(cq);
err_file:
+ ib_rdmacg_uncharge(&obj->uobject.cg_obj, ib_dev,
+ RDMACG_RESOURCE_HCA_OBJECT);
+
+err_charge:
if (ev_file)
ib_uverbs_release_ucq(file, ev_file, obj);
@@ -1732,6 +1779,8 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
if (ret)
return ret;
+ ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
+
idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
mutex_lock(&file->mutex);
@@ -1891,7 +1940,8 @@ static int create_qp(struct ib_uverbs_file *file,
IB_QP_CREATE_CROSS_CHANNEL |
IB_QP_CREATE_MANAGED_SEND |
IB_QP_CREATE_MANAGED_RECV |
- IB_QP_CREATE_SCATTER_FCS)) {
+ IB_QP_CREATE_SCATTER_FCS |
+ IB_QP_CREATE_CVLAN_STRIPPING)) {
ret = -EINVAL;
goto err_put;
}
@@ -1904,6 +1954,11 @@ static int create_qp(struct ib_uverbs_file *file,
goto err_put;
}
+ ret = ib_rdmacg_try_charge(&obj->uevent.uobject.cg_obj, device,
+ RDMACG_RESOURCE_HCA_OBJECT);
+ if (ret)
+ goto err_put;
+
if (cmd->qp_type == IB_QPT_XRC_TGT)
qp = ib_create_qp(pd, &attr);
else
@@ -1911,7 +1966,7 @@ static int create_qp(struct ib_uverbs_file *file,
if (IS_ERR(qp)) {
ret = PTR_ERR(qp);
- goto err_put;
+ goto err_create;
}
if (cmd->qp_type != IB_QPT_XRC_TGT) {
@@ -1992,6 +2047,10 @@ err_cb:
err_destroy:
ib_destroy_qp(qp);
+err_create:
+ ib_rdmacg_uncharge(&obj->uevent.uobject.cg_obj, device,
+ RDMACG_RESOURCE_HCA_OBJECT);
+
err_put:
if (xrcd)
put_xrcd_read(xrcd_uobj);
@@ -2518,6 +2577,8 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
if (ret)
return ret;
+ ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
+
if (obj->uxrcd)
atomic_dec(&obj->uxrcd->refcnt);
@@ -2969,11 +3030,16 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
memset(&attr.dmac, 0, sizeof(attr.dmac));
memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16);
+ ret = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev,
+ RDMACG_RESOURCE_HCA_OBJECT);
+ if (ret)
+ goto err_charge;
+
ah = pd->device->create_ah(pd, &attr, &udata);
if (IS_ERR(ah)) {
ret = PTR_ERR(ah);
- goto err_put;
+ goto err_create;
}
ah->device = pd->device;
@@ -3012,7 +3078,10 @@ err_copy:
err_destroy:
ib_destroy_ah(ah);
-err_put:
+err_create:
+ ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
+
+err_charge:
put_pd_read(pd);
err:
@@ -3046,6 +3115,8 @@ ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file,
if (ret)
return ret;
+ ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
+
idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
mutex_lock(&file->mutex);
@@ -3143,6 +3214,25 @@ out_put:
return ret ? ret : in_len;
}
+static int kern_spec_to_ib_spec_action(struct ib_uverbs_flow_spec *kern_spec,
+ union ib_flow_spec *ib_spec)
+{
+ ib_spec->type = kern_spec->type;
+ switch (ib_spec->type) {
+ case IB_FLOW_SPEC_ACTION_TAG:
+ if (kern_spec->flow_tag.size !=
+ sizeof(struct ib_uverbs_flow_spec_action_tag))
+ return -EINVAL;
+
+ ib_spec->flow_tag.size = sizeof(struct ib_flow_spec_action_tag);
+ ib_spec->flow_tag.tag_id = kern_spec->flow_tag.tag_id;
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
static size_t kern_spec_filter_sz(struct ib_uverbs_flow_spec_hdr *spec)
{
/* Returns user space filter size, includes padding */
@@ -3167,8 +3257,8 @@ static ssize_t spec_filter_size(void *kern_spec_filter, u16 kern_filter_size,
return kern_filter_size;
}
-static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
- union ib_flow_spec *ib_spec)
+static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec,
+ union ib_flow_spec *ib_spec)
{
ssize_t actual_filter_sz;
ssize_t kern_filter_sz;
@@ -3263,6 +3353,18 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
return 0;
}
+static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
+ union ib_flow_spec *ib_spec)
+{
+ if (kern_spec->reserved)
+ return -EINVAL;
+
+ if (kern_spec->type >= IB_FLOW_SPEC_ACTION_TAG)
+ return kern_spec_to_ib_spec_action(kern_spec, ib_spec);
+ else
+ return kern_spec_to_ib_spec_filter(kern_spec, ib_spec);
+}
+
int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
struct ib_udata *ucore,
@@ -3325,6 +3427,9 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
wq_init_attr.wq_context = file;
wq_init_attr.wq_type = cmd.wq_type;
wq_init_attr.event_handler = ib_uverbs_wq_event_handler;
+ if (ucore->inlen >= (offsetof(typeof(cmd), create_flags) +
+ sizeof(cmd.create_flags)))
+ wq_init_attr.create_flags = cmd.create_flags;
obj->uevent.events_reported = 0;
INIT_LIST_HEAD(&obj->uevent.event_list);
wq = pd->device->create_wq(pd, &wq_init_attr, uhw);
@@ -3480,7 +3585,7 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
if (!cmd.attr_mask)
return -EINVAL;
- if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE))
+ if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE | IB_WQ_FLAGS))
return -EINVAL;
wq = idr_read_wq(cmd.wq_handle, file->ucontext);
@@ -3489,6 +3594,10 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
wq_attr.curr_wq_state = cmd.curr_wq_state;
wq_attr.wq_state = cmd.wq_state;
+ if (cmd.attr_mask & IB_WQ_FLAGS) {
+ wq_attr.flags = cmd.flags;
+ wq_attr.flags_mask = cmd.flags_mask;
+ }
ret = wq->device->modify_wq(wq, &wq_attr, cmd.attr_mask, uhw);
put_wq_read(wq);
return ret;
@@ -3822,10 +3931,16 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
err = -EINVAL;
goto err_free;
}
+
+ err = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev,
+ RDMACG_RESOURCE_HCA_OBJECT);
+ if (err)
+ goto err_free;
+
flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER);
if (IS_ERR(flow_id)) {
err = PTR_ERR(flow_id);
- goto err_free;
+ goto err_create;
}
flow_id->uobject = uobj;
uobj->object = flow_id;
@@ -3858,6 +3973,8 @@ err_copy:
idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
destroy_flow:
ib_destroy_flow(flow_id);
+err_create:
+ ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
err_free:
kfree(flow_attr);
err_put:
@@ -3897,8 +4014,11 @@ int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
flow_id = uobj->object;
ret = ib_destroy_flow(flow_id);
- if (!ret)
+ if (!ret) {
+ ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev,
+ RDMACG_RESOURCE_HCA_OBJECT);
uobj->live = 0;
+ }
put_uobj_write(uobj);
@@ -3966,6 +4086,11 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
obj->uevent.events_reported = 0;
INIT_LIST_HEAD(&obj->uevent.event_list);
+ ret = ib_rdmacg_try_charge(&obj->uevent.uobject.cg_obj, ib_dev,
+ RDMACG_RESOURCE_HCA_OBJECT);
+ if (ret)
+ goto err_put_cq;
+
srq = pd->device->create_srq(pd, &attr, udata);
if (IS_ERR(srq)) {
ret = PTR_ERR(srq);
@@ -4030,6 +4155,8 @@ err_destroy:
ib_destroy_srq(srq);
err_put:
+ ib_rdmacg_uncharge(&obj->uevent.uobject.cg_obj, ib_dev,
+ RDMACG_RESOURCE_HCA_OBJECT);
put_pd_read(pd);
err_put_cq:
@@ -4216,6 +4343,8 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
if (ret)
return ret;
+ ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
+
if (srq_type == IB_SRQT_XRC) {
us = container_of(obj, struct ib_usrq_object, uevent);
atomic_dec(&us->uxrcd->refcnt);
@@ -4323,6 +4452,12 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
resp.max_wq_type_rq = attr.max_wq_type_rq;
resp.response_length += sizeof(resp.max_wq_type_rq);
+
+ if (ucore->outlen < resp.response_length + sizeof(resp.raw_packet_caps))
+ goto end;
+
+ resp.raw_packet_caps = attr.raw_packet_caps;
+ resp.response_length += sizeof(resp.raw_packet_caps);
end:
err = ib_copy_to_udata(ucore, &resp, resp.response_length);
return err;
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index b3f95d453fba..35c788a32e26 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -51,6 +51,7 @@
#include <rdma/ib.h>
#include "uverbs.h"
+#include "core_priv.h"
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("InfiniBand userspace verbs access");
@@ -237,6 +238,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
ib_destroy_ah(ah);
+ ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
+ RDMACG_RESOURCE_HCA_OBJECT);
kfree(uobj);
}
@@ -246,6 +249,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
uverbs_dealloc_mw(mw);
+ ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
+ RDMACG_RESOURCE_HCA_OBJECT);
kfree(uobj);
}
@@ -254,6 +259,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
ib_destroy_flow(flow_id);
+ ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
+ RDMACG_RESOURCE_HCA_OBJECT);
kfree(uobj);
}
@@ -266,6 +273,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
if (qp == qp->real_qp)
ib_uverbs_detach_umcast(qp, uqp);
ib_destroy_qp(qp);
+ ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
+ RDMACG_RESOURCE_HCA_OBJECT);
ib_uverbs_release_uevent(file, &uqp->uevent);
kfree(uqp);
}
@@ -298,6 +307,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
ib_destroy_srq(srq);
+ ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
+ RDMACG_RESOURCE_HCA_OBJECT);
ib_uverbs_release_uevent(file, uevent);
kfree(uevent);
}
@@ -310,6 +321,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
ib_destroy_cq(cq);
+ ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
+ RDMACG_RESOURCE_HCA_OBJECT);
ib_uverbs_release_ucq(file, ev_file, ucq);
kfree(ucq);
}
@@ -319,6 +332,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
ib_dereg_mr(mr);
+ ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
+ RDMACG_RESOURCE_HCA_OBJECT);
kfree(uobj);
}
@@ -339,11 +354,16 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
ib_dealloc_pd(pd);
+ ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
+ RDMACG_RESOURCE_HCA_OBJECT);
kfree(uobj);
}
put_pid(context->tgid);
+ ib_rdmacg_uncharge(&context->cg_obj, context->device,
+ RDMACG_RESOURCE_HCA_HANDLE);
+
return context->device->dealloc_ucontext(context);
}
@@ -1174,7 +1194,7 @@ static void ib_uverbs_add_one(struct ib_device *device)
if (cdev_add(&uverbs_dev->cdev, base, 1))
goto err_cdev;
- uverbs_dev->dev = device_create(uverbs_class, device->dma_device,
+ uverbs_dev->dev = device_create(uverbs_class, device->dev.parent,
uverbs_dev->cdev.dev, uverbs_dev,
"uverbs%d", uverbs_dev->devnum);
if (IS_ERR(uverbs_dev->dev))
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 71580cc28c9e..85ed5051fdfd 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -1205,8 +1205,7 @@ int ib_resolve_eth_dmac(struct ib_device *device,
{
int ret = 0;
- if (ah_attr->port_num < rdma_start_port(device) ||
- ah_attr->port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, ah_attr->port_num))
return -EINVAL;
if (!rdma_cap_eth_ah(device, ah_attr->port_num))
@@ -1949,17 +1948,12 @@ static void ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc)
*/
static void __ib_drain_sq(struct ib_qp *qp)
{
+ struct ib_cq *cq = qp->send_cq;
struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
struct ib_drain_cqe sdrain;
struct ib_send_wr swr = {}, *bad_swr;
int ret;
- if (qp->send_cq->poll_ctx == IB_POLL_DIRECT) {
- WARN_ONCE(qp->send_cq->poll_ctx == IB_POLL_DIRECT,
- "IB_POLL_DIRECT poll_ctx not supported for drain\n");
- return;
- }
-
swr.wr_cqe = &sdrain.cqe;
sdrain.cqe.done = ib_drain_qp_done;
init_completion(&sdrain.done);
@@ -1976,7 +1970,11 @@ static void __ib_drain_sq(struct ib_qp *qp)
return;
}
- wait_for_completion(&sdrain.done);
+ if (cq->poll_ctx == IB_POLL_DIRECT)
+ while (wait_for_completion_timeout(&sdrain.done, HZ / 10) <= 0)
+ ib_process_cq_direct(cq, -1);
+ else
+ wait_for_completion(&sdrain.done);
}
/*
@@ -1984,17 +1982,12 @@ static void __ib_drain_sq(struct ib_qp *qp)
*/
static void __ib_drain_rq(struct ib_qp *qp)
{
+ struct ib_cq *cq = qp->recv_cq;
struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
struct ib_drain_cqe rdrain;
struct ib_recv_wr rwr = {}, *bad_rwr;
int ret;
- if (qp->recv_cq->poll_ctx == IB_POLL_DIRECT) {
- WARN_ONCE(qp->recv_cq->poll_ctx == IB_POLL_DIRECT,
- "IB_POLL_DIRECT poll_ctx not supported for drain\n");
- return;
- }
-
rwr.wr_cqe = &rdrain.cqe;
rdrain.cqe.done = ib_drain_qp_done;
init_completion(&rdrain.done);
@@ -2011,7 +2004,11 @@ static void __ib_drain_rq(struct ib_qp *qp)
return;
}
- wait_for_completion(&rdrain.done);
+ if (cq->poll_ctx == IB_POLL_DIRECT)
+ while (wait_for_completion_timeout(&rdrain.done, HZ / 10) <= 0)
+ ib_process_cq_direct(cq, -1);
+ else
+ wait_for_completion(&rdrain.done);
}
/**
@@ -2028,8 +2025,7 @@ static void __ib_drain_rq(struct ib_qp *qp)
* ensure there is room in the CQ and SQ for the drain work request and
* completion.
*
- * allocate the CQ using ib_alloc_cq() and the CQ poll context cannot be
- * IB_POLL_DIRECT.
+ * allocate the CQ using ib_alloc_cq().
*
* ensure that there are no other contexts that are posting WRs concurrently.
* Otherwise the drain is not guaranteed.
@@ -2057,8 +2053,7 @@ EXPORT_SYMBOL(ib_drain_sq);
* ensure there is room in the CQ and RQ for the drain work request and
* completion.
*
- * allocate the CQ using ib_alloc_cq() and the CQ poll context cannot be
- * IB_POLL_DIRECT.
+ * allocate the CQ using ib_alloc_cq().
*
* ensure that there are no other contexts that are posting WRs concurrently.
* Otherwise the drain is not guaranteed.
@@ -2082,8 +2077,7 @@ EXPORT_SYMBOL(ib_drain_rq);
* ensure there is room in the CQ(s), SQ, and RQ for drain work requests
* and completions.
*
- * allocate the CQs using ib_alloc_cq() and the CQ poll context cannot be
- * IB_POLL_DIRECT.
+ * allocate the CQs using ib_alloc_cq().
*
* ensure that there are no other contexts that are posting WRs concurrently.
* Otherwise the drain is not guaranteed.