diff options
author | Leon Romanovsky <leonro@mellanox.com> | 2019-02-18 21:25:47 +0100 |
---|---|---|
committer | Jason Gunthorpe <jgg@mellanox.com> | 2019-02-19 05:04:36 +0100 |
commit | 41eda65c6100930d95bb854a0114f3544593070c (patch) | |
tree | 64ae996d521909ccb72a35ba4a5485cfcc125aa9 | |
parent | RDMA/restrack: Reduce scope of synchronization lock while updating DB (diff) | |
download | linux-41eda65c6100930d95bb854a0114f3544593070c.tar.xz linux-41eda65c6100930d95bb854a0114f3544593070c.zip |
RDMA/restrack: Hide restrack DB from IB/core
There is no need to expose internals of restrack DB to IB/core.
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
-rw-r--r-- | drivers/infiniband/core/device.c | 6 | ||||
-rw-r--r-- | drivers/infiniband/core/nldev.c | 17 | ||||
-rw-r--r-- | drivers/infiniband/core/restrack.c | 83 | ||||
-rw-r--r-- | drivers/infiniband/core/restrack.h | 39 | ||||
-rw-r--r-- | include/rdma/ib_verbs.h | 7 | ||||
-rw-r--r-- | include/rdma/restrack.h | 28 |
6 files changed, 114 insertions, 66 deletions
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 3eddc6e67a16..f7e206033d39 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -45,6 +45,7 @@ #include <rdma/ib_cache.h> #include "core_priv.h" +#include "restrack.h" MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("core kernel InfiniBand API"); @@ -338,7 +339,10 @@ struct ib_device *_ib_alloc_device(size_t size) if (!device) return NULL; - rdma_restrack_init(device); + if (rdma_restrack_init(device)) { + kfree(device); + return NULL; + } device->dev.class = &ib_class; device->groups[0] = &ib_dev_attr_group; diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 0cd95f80f7b4..54312f9626a1 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -39,6 +39,7 @@ #include "core_priv.h" #include "cma_priv.h" +#include "restrack.h" static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { [RDMA_NLDEV_ATTR_DEV_INDEX] = { .type = NLA_U32 }, @@ -1027,6 +1028,7 @@ static int res_get_common_dumpit(struct sk_buff *skb, unsigned long id; u32 index, port = 0; bool filled = false; + struct xarray *xa; err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy, NULL); @@ -1074,13 +1076,14 @@ static int res_get_common_dumpit(struct sk_buff *skb, has_cap_net_admin = netlink_capable(cb->skb, CAP_NET_ADMIN); - down_read(&device->res.rwsem); + xa = &device->res->xa[res_type]; + down_read(&device->res->rwsem); /* * FIXME: if the skip ahead is something common this loop should * use xas_for_each & xas_pause to optimize, we can have a lot of * objects. */ - xa_for_each(&device->res.xa[res_type], id, res) { + xa_for_each(xa, id, res) { if (idx < start) goto next; @@ -1101,13 +1104,13 @@ static int res_get_common_dumpit(struct sk_buff *skb, if (!entry_attr) { ret = -EMSGSIZE; rdma_restrack_put(res); - up_read(&device->res.rwsem); + up_read(&device->res->rwsem); break; } - up_read(&device->res.rwsem); + up_read(&device->res->rwsem); ret = fe->fill_res_func(skb, has_cap_net_admin, res, port); - down_read(&device->res.rwsem); + down_read(&device->res->rwsem); /* * Return resource back, but it won't be released till * the &device->res.rwsem will be released for write. @@ -1125,7 +1128,7 @@ static int res_get_common_dumpit(struct sk_buff *skb, nla_nest_end(skb, entry_attr); next: idx++; } - up_read(&device->res.rwsem); + up_read(&device->res->rwsem); nla_nest_end(skb, table_attr); nlmsg_end(skb, nlh); @@ -1143,7 +1146,7 @@ next: idx++; res_err: nla_nest_cancel(skb, table_attr); - up_read(&device->res.rwsem); + up_read(&device->res->rwsem); err: nlmsg_cancel(skb, nlh); diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c index 076ef6475df8..6a4b76c66bcb 100644 --- a/drivers/infiniband/core/restrack.c +++ b/drivers/infiniband/core/restrack.c @@ -9,8 +9,10 @@ #include <linux/mutex.h> #include <linux/sched/task.h> #include <linux/pid_namespace.h> +#include <linux/rwsem.h> #include "cma_priv.h" +#include "restrack.h" static int rt_xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry, u32 *next) @@ -35,18 +37,27 @@ static int rt_xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry, } /** - * rdma_restrack_init() - initialize resource tracking + * rdma_restrack_init() - initialize and allocate resource tracking * @dev: IB device + * + * Return: 0 on success */ -void rdma_restrack_init(struct ib_device *dev) +int rdma_restrack_init(struct ib_device *dev) { - struct rdma_restrack_root *res = &dev->res; + struct rdma_restrack_root *rt; int i; + dev->res = kzalloc(sizeof(*rt), GFP_KERNEL); + if (!dev->res) + return -ENOMEM; + + rt = dev->res; + for (i = 0 ; i < RDMA_RESTRACK_MAX; i++) - xa_init_flags(&res->xa[i], XA_FLAGS_ALLOC); + xa_init_flags(&rt->xa[i], XA_FLAGS_ALLOC); + init_rwsem(&rt->rwsem); - init_rwsem(&res->rwsem); + return 0; } static const char *type2str(enum rdma_restrack_type type) @@ -69,7 +80,7 @@ static const char *type2str(enum rdma_restrack_type type) */ void rdma_restrack_clean(struct ib_device *dev) { - struct rdma_restrack_root *res = &dev->res; + struct rdma_restrack_root *rt = dev->res; struct rdma_restrack_entry *e; char buf[TASK_COMM_LEN]; bool found = false; @@ -77,14 +88,16 @@ void rdma_restrack_clean(struct ib_device *dev) int i; for (i = 0 ; i < RDMA_RESTRACK_MAX; i++) { - if (!xa_empty(&res->xa[i])) { + struct xarray *xa = &dev->res->xa[i]; + + if (!xa_empty(xa)) { unsigned long index; if (!found) { pr_err("restrack: %s", CUT_HERE); dev_err(&dev->dev, "BUG: RESTRACK detected leak of resources\n"); } - xa_for_each(&res->xa[i], index, e) { + xa_for_each(xa, index, e) { if (rdma_is_kernel_res(e)) { owner = e->kern_name; } else { @@ -104,10 +117,12 @@ void rdma_restrack_clean(struct ib_device *dev) } found = true; } - xa_destroy(&res->xa[i]); + xa_destroy(xa); } if (found) pr_err("restrack: %s", CUT_HERE); + + kfree(rt); } /** @@ -119,19 +134,19 @@ void rdma_restrack_clean(struct ib_device *dev) int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type, struct pid_namespace *ns) { - struct rdma_restrack_root *res = &dev->res; + struct xarray *xa = &dev->res->xa[type]; struct rdma_restrack_entry *e; unsigned long index = 0; u32 cnt = 0; - down_read(&res->rwsem); - xa_for_each(&res->xa[type], index, e) { + down_read(&dev->res->rwsem); + xa_for_each(xa, index, e) { if (ns == &init_pid_ns || (!rdma_is_kernel_res(e) && ns == task_active_pid_ns(e->task))) cnt++; } - up_read(&res->rwsem); + up_read(&dev->res->rwsem); return cnt; } EXPORT_SYMBOL(rdma_restrack_count); @@ -202,17 +217,19 @@ EXPORT_SYMBOL(rdma_restrack_set_task); static void rdma_restrack_add(struct rdma_restrack_entry *res) { struct ib_device *dev = res_to_dev(res); + struct rdma_restrack_root *rt; + struct xarray *xa; int ret; if (!dev) return; + rt = dev->res; + xa = &dev->res->xa[res->type]; + kref_init(&res->kref); init_completion(&res->comp); - - ret = rt_xa_alloc_cyclic(&dev->res.xa[res->type], &res->id, res, - &dev->res.next_id[res->type]); - + ret = rt_xa_alloc_cyclic(xa, &res->id, res, &rt->next_id[res->type]); if (!ret) res->valid = true; } @@ -266,14 +283,14 @@ struct rdma_restrack_entry * rdma_restrack_get_byid(struct ib_device *dev, enum rdma_restrack_type type, u32 id) { - struct rdma_restrack_root *rt = &dev->res; + struct xarray *xa = &dev->res->xa[type]; struct rdma_restrack_entry *res; - down_read(&dev->res.rwsem); - res = xa_load(&rt->xa[type], id); + down_read(&dev->res->rwsem); + res = xa_load(xa, id); if (!res || !rdma_restrack_get(res)) res = ERR_PTR(-ENOENT); - up_read(&dev->res.rwsem); + up_read(&dev->res->rwsem); return res; } @@ -295,19 +312,33 @@ EXPORT_SYMBOL(rdma_restrack_put); void rdma_restrack_del(struct rdma_restrack_entry *res) { - struct ib_device *dev; + struct ib_device *dev = res_to_dev(res); + struct xarray *xa; if (!res->valid) goto out; - dev = res_to_dev(res); + /* + * All objects except CM_ID set valid device immediately + * after new object is created, it means that for not valid + * objects will still have "dev". + * + * It is not the case for CM_ID, newly created object has + * this field set to NULL and it is set in _cma_attach_to_dev() + * only. + * + * Because we don't want to add any conditions on call + * to rdma_restrack_del(), the check below protects from + * NULL-dereference. + */ if (!dev) return; - down_write(&dev->res.rwsem); - xa_erase(&dev->res.xa[res->type], res->id); + xa = &dev->res->xa[res->type]; + down_write(&dev->res->rwsem); + xa_erase(xa, res->id); res->valid = false; - up_write(&dev->res.rwsem); + up_write(&dev->res->rwsem); rdma_restrack_put(res); wait_for_completion(&res->comp); diff --git a/drivers/infiniband/core/restrack.h b/drivers/infiniband/core/restrack.h new file mode 100644 index 000000000000..cf89ef0b8ed5 --- /dev/null +++ b/drivers/infiniband/core/restrack.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright (c) 2017-2019 Mellanox Technologies. All rights reserved. + */ + +#ifndef _RDMA_CORE_RESTRACK_H_ +#define _RDMA_CORE_RESTRACK_H_ + +#include <linux/mutex.h> +#include <linux/rwsem.h> + +/** + * struct rdma_restrack_root - main resource tracking management + * entity, per-device + */ +struct rdma_restrack_root { + /* + * @rwsem: Read/write lock to protect erase of entry. + * Lists and insertions are protected by XArray internal lock. + */ + struct rw_semaphore rwsem; + /** + * @xa: Array of XArray structures to hold restrack entries. + * We want to use array of XArrays because insertion is type + * dependent. For types with xisiting unique ID (like QPN), + * we will insert to that unique index. For other types, + * we insert based on pointers and auto-allocate unique index. + */ + struct xarray xa[RDMA_RESTRACK_MAX]; + /** + * @next_id: Next ID to support cyclic allocation + */ + u32 next_id[RDMA_RESTRACK_MAX]; +}; + + +int rdma_restrack_init(struct ib_device *dev); +void rdma_restrack_clean(struct ib_device *dev); +#endif /* _RDMA_CORE_RESTRACK_H_ */ diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 64ee7c08be22..2a17c2b30073 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2533,6 +2533,8 @@ struct ib_device_ops { DECLARE_RDMA_OBJ_SIZE(ib_pd); }; +struct rdma_restrack_root; + struct ib_device { /* Do not access @dma_device directly from ULP nor from HW drivers. */ struct device *dma_device; @@ -2589,10 +2591,7 @@ struct ib_device { #endif u32 index; - /* - * Implementation details of the RDMA core, don't use in drivers - */ - struct rdma_restrack_root res; + struct rdma_restrack_root *res; const struct uapi_definition *driver_def; enum rdma_driver_id driver_id; diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h index 53e1a7fb7355..ecf3c7702a4f 100644 --- a/include/rdma/restrack.h +++ b/include/rdma/restrack.h @@ -7,7 +7,6 @@ #define _RDMA_RESTRACK_H_ #include <linux/typecheck.h> -#include <linux/rwsem.h> #include <linux/sched.h> #include <linux/kref.h> #include <linux/completion.h> @@ -50,31 +49,6 @@ enum rdma_restrack_type { }; struct ib_device; -struct rdma_restrack_entry; - -/** - * struct rdma_restrack_root - main resource tracking management - * entity, per-device - */ -struct rdma_restrack_root { - /* - * @rwsem: Read/write lock to protect erase of entry. - * Lists and insertions are protected by XArray internal lock. - */ - struct rw_semaphore rwsem; - /** - * @xa: Array of XArray structures to hold restrack entries. - * We want to use array of XArrays because insertion is type - * dependent. For types with xisiting unique ID (like QPN), - * we will insert to that unique index. For other types, - * we insert based on pointers and auto-allocate unique index. - */ - struct xarray xa[RDMA_RESTRACK_MAX]; - /** - * @next_id: Next ID to support cyclic allocation - */ - u32 next_id[RDMA_RESTRACK_MAX]; -}; /** * struct rdma_restrack_entry - metadata per-entry @@ -125,8 +99,6 @@ struct rdma_restrack_entry { u32 id; }; -void rdma_restrack_init(struct ib_device *dev); -void rdma_restrack_clean(struct ib_device *dev); int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type, struct pid_namespace *ns); |