diff options
Diffstat (limited to 'drivers/nvme/host/rdma.c')
-rw-r--r-- | drivers/nvme/host/rdma.c | 171 |
1 files changed, 76 insertions, 95 deletions
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index ab545fb347a0..5a8388177959 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -54,7 +54,6 @@ struct nvme_rdma_device { struct ib_device *dev; struct ib_pd *pd; - struct ib_mr *mr; struct kref ref; struct list_head entry; }; @@ -82,6 +81,8 @@ struct nvme_rdma_request { enum nvme_rdma_queue_flags { NVME_RDMA_Q_CONNECTED = (1 << 0), + NVME_RDMA_IB_QUEUE_ALLOCATED = (1 << 1), + NVME_RDMA_Q_DELETING = (1 << 2), }; struct nvme_rdma_queue { @@ -291,6 +292,7 @@ static int nvme_rdma_reinit_request(void *data, struct request *rq) if (IS_ERR(req->mr)) { ret = PTR_ERR(req->mr); req->mr = NULL; + goto out; } req->mr->need_inval = false; @@ -405,10 +407,7 @@ static void nvme_rdma_free_dev(struct kref *ref) list_del(&ndev->entry); mutex_unlock(&device_list_mutex); - if (!register_always) - ib_dereg_mr(ndev->mr); ib_dealloc_pd(ndev->pd); - kfree(ndev); } @@ -441,24 +440,16 @@ nvme_rdma_find_get_device(struct rdma_cm_id *cm_id) ndev->dev = cm_id->device; kref_init(&ndev->ref); - ndev->pd = ib_alloc_pd(ndev->dev); + ndev->pd = ib_alloc_pd(ndev->dev, + register_always ? 0 : IB_PD_UNSAFE_GLOBAL_RKEY); if (IS_ERR(ndev->pd)) goto out_free_dev; - if (!register_always) { - ndev->mr = ib_get_dma_mr(ndev->pd, - IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_READ | - IB_ACCESS_REMOTE_WRITE); - if (IS_ERR(ndev->mr)) - goto out_free_pd; - } - if (!(ndev->dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) { dev_err(&ndev->dev->dev, "Memory registrations not supported.\n"); - goto out_free_mr; + goto out_free_pd; } list_add(&ndev->entry, &device_list); @@ -466,9 +457,6 @@ out_unlock: mutex_unlock(&device_list_mutex); return ndev; -out_free_mr: - if (!register_always) - ib_dereg_mr(ndev->mr); out_free_pd: ib_dealloc_pd(ndev->pd); out_free_dev: @@ -480,9 +468,14 @@ out_err: static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue) { - struct nvme_rdma_device *dev = queue->device; - struct ib_device *ibdev = dev->dev; + struct nvme_rdma_device *dev; + struct ib_device *ibdev; + if (!test_and_clear_bit(NVME_RDMA_IB_QUEUE_ALLOCATED, &queue->flags)) + return; + + dev = queue->device; + ibdev = dev->dev; rdma_destroy_qp(queue->cm_id); ib_free_cq(queue->ib_cq); @@ -533,6 +526,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue, ret = -ENOMEM; goto out_destroy_qp; } + set_bit(NVME_RDMA_IB_QUEUE_ALLOCATED, &queue->flags); return 0; @@ -585,11 +579,13 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl, goto out_destroy_cm_id; } + clear_bit(NVME_RDMA_Q_DELETING, &queue->flags); set_bit(NVME_RDMA_Q_CONNECTED, &queue->flags); return 0; out_destroy_cm_id: + nvme_rdma_destroy_queue_ib(queue); rdma_destroy_id(queue->cm_id); return ret; } @@ -608,7 +604,7 @@ static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue) static void nvme_rdma_stop_and_free_queue(struct nvme_rdma_queue *queue) { - if (!test_and_clear_bit(NVME_RDMA_Q_CONNECTED, &queue->flags)) + if (test_and_set_bit(NVME_RDMA_Q_DELETING, &queue->flags)) return; nvme_rdma_stop_queue(queue); nvme_rdma_free_queue(queue); @@ -652,7 +648,7 @@ static int nvme_rdma_init_io_queues(struct nvme_rdma_ctrl *ctrl) return 0; out_free_queues: - for (; i >= 1; i--) + for (i--; i >= 1; i--) nvme_rdma_stop_and_free_queue(&ctrl->queues[i]); return ret; @@ -761,8 +757,13 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work) { struct nvme_rdma_ctrl *ctrl = container_of(work, struct nvme_rdma_ctrl, err_work); + int i; nvme_stop_keep_alive(&ctrl->ctrl); + + for (i = 0; i < ctrl->queue_count; i++) + clear_bit(NVME_RDMA_Q_CONNECTED, &ctrl->queues[i].flags); + if (ctrl->queue_count > 1) nvme_stop_queues(&ctrl->ctrl); blk_mq_stop_hw_queues(ctrl->ctrl.admin_q); @@ -899,7 +900,7 @@ static int nvme_rdma_map_sg_single(struct nvme_rdma_queue *queue, sg->addr = cpu_to_le64(sg_dma_address(req->sg_table.sgl)); put_unaligned_le24(sg_dma_len(req->sg_table.sgl), sg->length); - put_unaligned_le32(queue->device->mr->rkey, sg->key); + put_unaligned_le32(queue->device->pd->unsafe_global_rkey, sg->key); sg->type = NVME_KEY_SGL_FMT_DATA_DESC << 4; return 0; } @@ -984,7 +985,7 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, nvme_rdma_queue_idx(queue)) return nvme_rdma_map_sg_inline(queue, req, c); - if (!register_always) + if (dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) return nvme_rdma_map_sg_single(queue, req, c); } @@ -1305,58 +1306,6 @@ out_destroy_queue_ib: return ret; } -/** - * nvme_rdma_device_unplug() - Handle RDMA device unplug - * @queue: Queue that owns the cm_id that caught the event - * - * DEVICE_REMOVAL event notifies us that the RDMA device is about - * to unplug so we should take care of destroying our RDMA resources. - * This event will be generated for each allocated cm_id. - * - * In our case, the RDMA resources are managed per controller and not - * only per queue. So the way we handle this is we trigger an implicit - * controller deletion upon the first DEVICE_REMOVAL event we see, and - * hold the event inflight until the controller deletion is completed. - * - * One exception that we need to handle is the destruction of the cm_id - * that caught the event. Since we hold the callout until the controller - * deletion is completed, we'll deadlock if the controller deletion will - * call rdma_destroy_id on this queue's cm_id. Thus, we claim ownership - * of destroying this queue before-hand, destroy the queue resources, - * then queue the controller deletion which won't destroy this queue and - * we destroy the cm_id implicitely by returning a non-zero rc to the callout. - */ -static int nvme_rdma_device_unplug(struct nvme_rdma_queue *queue) -{ - struct nvme_rdma_ctrl *ctrl = queue->ctrl; - int ret = 0; - - /* Own the controller deletion */ - if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING)) - return 0; - - dev_warn(ctrl->ctrl.device, - "Got rdma device removal event, deleting ctrl\n"); - - /* Get rid of reconnect work if its running */ - cancel_delayed_work_sync(&ctrl->reconnect_work); - - /* Disable the queue so ctrl delete won't free it */ - if (test_and_clear_bit(NVME_RDMA_Q_CONNECTED, &queue->flags)) { - /* Free this queue ourselves */ - nvme_rdma_stop_queue(queue); - nvme_rdma_destroy_queue_ib(queue); - - /* Return non-zero so the cm_id will destroy implicitly */ - ret = 1; - } - - /* Queue controller deletion */ - queue_work(nvme_rdma_wq, &ctrl->delete_work); - flush_work(&ctrl->delete_work); - return ret; -} - static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id, struct rdma_cm_event *ev) { @@ -1398,8 +1347,8 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id, nvme_rdma_error_recovery(queue->ctrl); break; case RDMA_CM_EVENT_DEVICE_REMOVAL: - /* return 1 means impliciy CM ID destroy */ - return nvme_rdma_device_unplug(queue); + /* device removal is handled via the ib_client API */ + break; default: dev_err(queue->ctrl->ctrl.device, "Unexpected RDMA CM event (%d)\n", ev->event); @@ -1531,7 +1480,6 @@ static void nvme_rdma_complete_rq(struct request *rq) static struct blk_mq_ops nvme_rdma_mq_ops = { .queue_rq = nvme_rdma_queue_rq, .complete = nvme_rdma_complete_rq, - .map_queue = blk_mq_map_queue, .init_request = nvme_rdma_init_request, .exit_request = nvme_rdma_exit_request, .reinit_request = nvme_rdma_reinit_request, @@ -1543,7 +1491,6 @@ static struct blk_mq_ops nvme_rdma_mq_ops = { static struct blk_mq_ops nvme_rdma_admin_mq_ops = { .queue_rq = nvme_rdma_queue_rq, .complete = nvme_rdma_complete_rq, - .map_queue = blk_mq_map_queue, .init_request = nvme_rdma_init_admin_request, .exit_request = nvme_rdma_exit_admin_request, .reinit_request = nvme_rdma_reinit_request, @@ -1700,15 +1647,19 @@ static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl) static int nvme_rdma_del_ctrl(struct nvme_ctrl *nctrl) { struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); - int ret; + int ret = 0; + /* + * Keep a reference until all work is flushed since + * __nvme_rdma_del_ctrl can free the ctrl mem + */ + if (!kref_get_unless_zero(&ctrl->ctrl.kref)) + return -EBUSY; ret = __nvme_rdma_del_ctrl(ctrl); - if (ret) - return ret; - - flush_work(&ctrl->delete_work); - - return 0; + if (!ret) + flush_work(&ctrl->delete_work); + nvme_put_ctrl(&ctrl->ctrl); + return ret; } static void nvme_rdma_remove_ctrl_work(struct work_struct *work) @@ -2005,27 +1956,57 @@ static struct nvmf_transport_ops nvme_rdma_transport = { .create_ctrl = nvme_rdma_create_ctrl, }; +static void nvme_rdma_add_one(struct ib_device *ib_device) +{ +} + +static void nvme_rdma_remove_one(struct ib_device *ib_device, void *client_data) +{ + struct nvme_rdma_ctrl *ctrl; + + /* Delete all controllers using this device */ + mutex_lock(&nvme_rdma_ctrl_mutex); + list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) { + if (ctrl->device->dev != ib_device) + continue; + dev_info(ctrl->ctrl.device, + "Removing ctrl: NQN \"%s\", addr %pISp\n", + ctrl->ctrl.opts->subsysnqn, &ctrl->addr); + __nvme_rdma_del_ctrl(ctrl); + } + mutex_unlock(&nvme_rdma_ctrl_mutex); + + flush_workqueue(nvme_rdma_wq); +} + +static struct ib_client nvme_rdma_ib_client = { + .name = "nvme_rdma", + .add = nvme_rdma_add_one, + .remove = nvme_rdma_remove_one +}; + static int __init nvme_rdma_init_module(void) { + int ret; + nvme_rdma_wq = create_workqueue("nvme_rdma_wq"); if (!nvme_rdma_wq) return -ENOMEM; + ret = ib_register_client(&nvme_rdma_ib_client); + if (ret) { + destroy_workqueue(nvme_rdma_wq); + return ret; + } + nvmf_register_transport(&nvme_rdma_transport); return 0; } static void __exit nvme_rdma_cleanup_module(void) { - struct nvme_rdma_ctrl *ctrl; - nvmf_unregister_transport(&nvme_rdma_transport); - - mutex_lock(&nvme_rdma_ctrl_mutex); - list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) - __nvme_rdma_del_ctrl(ctrl); - mutex_unlock(&nvme_rdma_ctrl_mutex); - + ib_unregister_client(&nvme_rdma_ib_client); destroy_workqueue(nvme_rdma_wq); } |