diff options
author | Jason Gunthorpe <jgg@nvidia.com> | 2020-11-18 14:33:23 +0100 |
---|---|---|
committer | Jason Gunthorpe <jgg@nvidia.com> | 2020-11-25 16:07:01 +0100 |
commit | dd37d2f59eb839d51b988f6668ce5f0d533b23fd (patch) | |
tree | e304d513f07dd8061673ba680ebaa89865b995c6 /drivers/infiniband/core | |
parent | RDMA/hns: Refactor the hns_roce_buf allocation flow (diff) | |
download | linux-dd37d2f59eb839d51b988f6668ce5f0d533b23fd.tar.xz linux-dd37d2f59eb839d51b988f6668ce5f0d533b23fd.zip |
RDMA/cma: Fix deadlock on &lock in rdma_cma_listen_on_all() error unwind
rdma_detroy_id() cannot be called under &lock - we must instead keep the
error'd ID around until &lock can be released, then destroy it.
This is complicated by the usual way listen IDs are destroyed through
cma_process_remove() which can run at any time and will asynchronously
destroy the same ID.
Remove the ID from visiblity of cma_process_remove() before going down the
destroy path outside the locking.
Fixes: c80a0c52d85c ("RDMA/cma: Add missing error handling of listen_id")
Link: https://lore.kernel.org/r/20201118133756.GK244516@ziepe.ca
Reported-by: syzbot+1bc48bf7f78253f664a9@syzkaller.appspotmail.com
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Diffstat (limited to 'drivers/infiniband/core')
-rw-r--r-- | drivers/infiniband/core/cma.c | 25 |
1 files changed, 18 insertions, 7 deletions
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 4585f654f883..c06c87a4dc5e 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2496,7 +2496,8 @@ static int cma_listen_handler(struct rdma_cm_id *id, } static int cma_listen_on_dev(struct rdma_id_private *id_priv, - struct cma_device *cma_dev) + struct cma_device *cma_dev, + struct rdma_id_private **to_destroy) { struct rdma_id_private *dev_id_priv; struct net *net = id_priv->id.route.addr.dev_addr.net; @@ -2504,6 +2505,7 @@ static int cma_listen_on_dev(struct rdma_id_private *id_priv, lockdep_assert_held(&lock); + *to_destroy = NULL; if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1)) return 0; @@ -2518,7 +2520,6 @@ static int cma_listen_on_dev(struct rdma_id_private *id_priv, rdma_addr_size(cma_src_addr(id_priv))); _cma_attach_to_dev(dev_id_priv, cma_dev); - list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); cma_id_get(id_priv); dev_id_priv->internal_id = 1; dev_id_priv->afonly = id_priv->afonly; @@ -2528,25 +2529,31 @@ static int cma_listen_on_dev(struct rdma_id_private *id_priv, ret = rdma_listen(&dev_id_priv->id, id_priv->backlog); if (ret) goto err_listen; + list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); return 0; err_listen: - list_del(&id_priv->listen_list); + /* Caller must destroy this after releasing lock */ + *to_destroy = dev_id_priv; dev_warn(&cma_dev->device->dev, "RDMA CMA: %s, error %d\n", __func__, ret); - rdma_destroy_id(&dev_id_priv->id); return ret; } static int cma_listen_on_all(struct rdma_id_private *id_priv) { + struct rdma_id_private *to_destroy; struct cma_device *cma_dev; int ret; mutex_lock(&lock); list_add_tail(&id_priv->list, &listen_any_list); list_for_each_entry(cma_dev, &dev_list, list) { - ret = cma_listen_on_dev(id_priv, cma_dev); - if (ret) + ret = cma_listen_on_dev(id_priv, cma_dev, &to_destroy); + if (ret) { + /* Prevent racing with cma_process_remove() */ + if (to_destroy) + list_del_init(&to_destroy->list); goto err_listen; + } } mutex_unlock(&lock); return 0; @@ -2554,6 +2561,8 @@ static int cma_listen_on_all(struct rdma_id_private *id_priv) err_listen: list_del(&id_priv->list); mutex_unlock(&lock); + if (to_destroy) + rdma_destroy_id(&to_destroy->id); return ret; } @@ -4855,6 +4864,7 @@ static void cma_process_remove(struct cma_device *cma_dev) static int cma_add_one(struct ib_device *device) { + struct rdma_id_private *to_destroy; struct cma_device *cma_dev; struct rdma_id_private *id_priv; unsigned int i; @@ -4902,7 +4912,7 @@ static int cma_add_one(struct ib_device *device) mutex_lock(&lock); list_add_tail(&cma_dev->list, &dev_list); list_for_each_entry(id_priv, &listen_any_list, list) { - ret = cma_listen_on_dev(id_priv, cma_dev); + ret = cma_listen_on_dev(id_priv, cma_dev, &to_destroy); if (ret) goto free_listen; } @@ -4915,6 +4925,7 @@ free_listen: list_del(&cma_dev->list); mutex_unlock(&lock); + /* cma_process_remove() will delete to_destroy */ cma_process_remove(cma_dev); kfree(cma_dev->default_roce_tos); free_gid_type: |