diff options
author | Gioh Kim <gi-oh.kim@cloud.ionos.com> | 2021-04-19 09:37:16 +0200 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2021-04-20 16:59:04 +0200 |
commit | 2958a995edc94654df690318df7b9b49e5a3ef88 (patch) | |
tree | fddb6e4b38e52bfcc2a984940fa0b65e61292252 /drivers/block/rnbd/rnbd-clt.c | |
parent | block/rnbd-clt: Fix missing a memory free when unloading the module (diff) | |
download | linux-2958a995edc94654df690318df7b9b49e5a3ef88.tar.xz linux-2958a995edc94654df690318df7b9b49e5a3ef88.zip |
block/rnbd-clt: Support polling mode for IO latency optimization
RNBD can make double-queues for irq-mode and poll-mode.
For example, on 4-CPU system 8 request-queues are created,
4 for irq-mode and 4 for poll-mode.
If the IO has HIPRI flag, the block-layer will call .poll function
of RNBD. Then IO is sent to the poll-mode queue.
Add optional nr_poll_queues argument for map_devices interface.
To support polling of RNBD, RTRS client creates connections
for both of irq-mode and direct-poll-mode.
For example, on 4-CPU system it could've create 5 connections:
con[0] => user message (softirq cq)
con[1:4] => softirq cq
After this patch, it can create 9 connections:
con[0] => user message (softirq cq)
con[1:4] => softirq cq
con[5:8] => DIRECT-POLL cq
Cc: Leon Romanovsky <leonro@nvidia.com>
Cc: linux-rdma@vger.kernel.org
Signed-off-by: Gioh Kim <gi-oh.kim@ionos.com>
Signed-off-by: Jack Wang <jinpu.wang@ionos.com>
Acked-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Link: https://lore.kernel.org/r/20210419073722.15351-14-gi-oh.kim@ionos.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'drivers/block/rnbd/rnbd-clt.c')
-rw-r--r-- | drivers/block/rnbd/rnbd-clt.c | 89 |
1 files changed, 81 insertions, 8 deletions
diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index 9b44aac680d5..ea98124e8ce9 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -1165,9 +1165,54 @@ static blk_status_t rnbd_queue_rq(struct blk_mq_hw_ctx *hctx, return ret; } +static int rnbd_rdma_poll(struct blk_mq_hw_ctx *hctx) +{ + struct rnbd_queue *q = hctx->driver_data; + struct rnbd_clt_dev *dev = q->dev; + int cnt; + + cnt = rtrs_clt_rdma_cq_direct(dev->sess->rtrs, hctx->queue_num); + return cnt; +} + +static int rnbd_rdma_map_queues(struct blk_mq_tag_set *set) +{ + struct rnbd_clt_session *sess = set->driver_data; + + /* shared read/write queues */ + set->map[HCTX_TYPE_DEFAULT].nr_queues = num_online_cpus(); + set->map[HCTX_TYPE_DEFAULT].queue_offset = 0; + set->map[HCTX_TYPE_READ].nr_queues = num_online_cpus(); + set->map[HCTX_TYPE_READ].queue_offset = 0; + blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]); + blk_mq_map_queues(&set->map[HCTX_TYPE_READ]); + + if (sess->nr_poll_queues) { + /* dedicated queue for poll */ + set->map[HCTX_TYPE_POLL].nr_queues = sess->nr_poll_queues; + set->map[HCTX_TYPE_POLL].queue_offset = set->map[HCTX_TYPE_READ].queue_offset + + set->map[HCTX_TYPE_READ].nr_queues; + blk_mq_map_queues(&set->map[HCTX_TYPE_POLL]); + pr_info("[session=%s] mapped %d/%d/%d default/read/poll queues.\n", + sess->sessname, + set->map[HCTX_TYPE_DEFAULT].nr_queues, + set->map[HCTX_TYPE_READ].nr_queues, + set->map[HCTX_TYPE_POLL].nr_queues); + } else { + pr_info("[session=%s] mapped %d/%d default/read queues.\n", + sess->sessname, + set->map[HCTX_TYPE_DEFAULT].nr_queues, + set->map[HCTX_TYPE_READ].nr_queues); + } + + return 0; +} + static struct blk_mq_ops rnbd_mq_ops = { .queue_rq = rnbd_queue_rq, .complete = rnbd_softirq_done_fn, + .map_queues = rnbd_rdma_map_queues, + .poll = rnbd_rdma_poll, }; static int setup_mq_tags(struct rnbd_clt_session *sess) @@ -1181,7 +1226,15 @@ static int setup_mq_tags(struct rnbd_clt_session *sess) tag_set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_TAG_QUEUE_SHARED; tag_set->cmd_size = sizeof(struct rnbd_iu) + RNBD_RDMA_SGL_SIZE; - tag_set->nr_hw_queues = num_online_cpus(); + + /* for HCTX_TYPE_DEFAULT, HCTX_TYPE_READ, HCTX_TYPE_POLL */ + tag_set->nr_maps = sess->nr_poll_queues ? HCTX_MAX_TYPES : 2; + /* + * HCTX_TYPE_DEFAULT and HCTX_TYPE_READ share one set of queues + * others are for HCTX_TYPE_POLL + */ + tag_set->nr_hw_queues = num_online_cpus() + sess->nr_poll_queues; + tag_set->driver_data = sess; return blk_mq_alloc_tag_set(tag_set); } @@ -1189,7 +1242,7 @@ static int setup_mq_tags(struct rnbd_clt_session *sess) static struct rnbd_clt_session * find_and_get_or_create_sess(const char *sessname, const struct rtrs_addr *paths, - size_t path_cnt, u16 port_nr) + size_t path_cnt, u16 port_nr, u32 nr_poll_queues) { struct rnbd_clt_session *sess; struct rtrs_attrs attrs; @@ -1198,6 +1251,17 @@ find_and_get_or_create_sess(const char *sessname, struct rtrs_clt_ops rtrs_ops; sess = find_or_create_sess(sessname, &first); + if (sess == ERR_PTR(-ENOMEM)) + return ERR_PTR(-ENOMEM); + else if ((nr_poll_queues && !first) || (!nr_poll_queues && sess->nr_poll_queues)) { + /* + * A device MUST have its own session to use the polling-mode. + * It must fail to map new device with the same session. + */ + err = -EINVAL; + goto put_sess; + } + if (!first) return sess; @@ -1219,7 +1283,7 @@ find_and_get_or_create_sess(const char *sessname, 0, /* Do not use pdu of rtrs */ RECONNECT_DELAY, BMAX_SEGMENTS, BLK_MAX_SEGMENT_SIZE, - MAX_RECONNECTS); + MAX_RECONNECTS, nr_poll_queues); if (IS_ERR(sess->rtrs)) { err = PTR_ERR(sess->rtrs); goto wake_up_and_put; @@ -1227,6 +1291,7 @@ find_and_get_or_create_sess(const char *sessname, rtrs_clt_query(sess->rtrs, &attrs); sess->max_io_size = attrs.max_io_size; sess->queue_depth = attrs.queue_depth; + sess->nr_poll_queues = nr_poll_queues; err = setup_mq_tags(sess); if (err) @@ -1370,7 +1435,8 @@ static int rnbd_client_setup_device(struct rnbd_clt_dev *dev) static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess, enum rnbd_access_mode access_mode, - const char *pathname) + const char *pathname, + u32 nr_poll_queues) { struct rnbd_clt_dev *dev; int ret; @@ -1379,7 +1445,12 @@ static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess, if (!dev) return ERR_PTR(-ENOMEM); - dev->hw_queues = kcalloc(nr_cpu_ids, sizeof(*dev->hw_queues), + /* + * nr_cpu_ids: the number of softirq queues + * nr_poll_queues: the number of polling queues + */ + dev->hw_queues = kcalloc(nr_cpu_ids + nr_poll_queues, + sizeof(*dev->hw_queues), GFP_KERNEL); if (!dev->hw_queues) { ret = -ENOMEM; @@ -1405,6 +1476,7 @@ static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess, dev->clt_device_id = ret; dev->sess = sess; dev->access_mode = access_mode; + dev->nr_poll_queues = nr_poll_queues; mutex_init(&dev->lock); refcount_set(&dev->refcount, 1); dev->dev_state = DEV_STATE_INIT; @@ -1491,7 +1563,8 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname, struct rtrs_addr *paths, size_t path_cnt, u16 port_nr, const char *pathname, - enum rnbd_access_mode access_mode) + enum rnbd_access_mode access_mode, + u32 nr_poll_queues) { struct rnbd_clt_session *sess; struct rnbd_clt_dev *dev; @@ -1500,11 +1573,11 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname, if (unlikely(exists_devpath(pathname, sessname))) return ERR_PTR(-EEXIST); - sess = find_and_get_or_create_sess(sessname, paths, path_cnt, port_nr); + sess = find_and_get_or_create_sess(sessname, paths, path_cnt, port_nr, nr_poll_queues); if (IS_ERR(sess)) return ERR_CAST(sess); - dev = init_dev(sess, access_mode, pathname); + dev = init_dev(sess, access_mode, pathname, nr_poll_queues); if (IS_ERR(dev)) { pr_err("map_device: failed to map device '%s' from session %s, can't initialize device, err: %ld\n", pathname, sess->sessname, PTR_ERR(dev)); |