summaryrefslogtreecommitdiffstats
path: root/drivers/block/rnbd/rnbd-clt.c
diff options
context:
space:
mode:
authorGioh Kim <gi-oh.kim@cloud.ionos.com>2021-04-19 09:37:16 +0200
committerJens Axboe <axboe@kernel.dk>2021-04-20 16:59:04 +0200
commit2958a995edc94654df690318df7b9b49e5a3ef88 (patch)
treefddb6e4b38e52bfcc2a984940fa0b65e61292252 /drivers/block/rnbd/rnbd-clt.c
parentblock/rnbd-clt: Fix missing a memory free when unloading the module (diff)
downloadlinux-2958a995edc94654df690318df7b9b49e5a3ef88.tar.xz
linux-2958a995edc94654df690318df7b9b49e5a3ef88.zip
block/rnbd-clt: Support polling mode for IO latency optimization
RNBD can make double-queues for irq-mode and poll-mode. For example, on 4-CPU system 8 request-queues are created, 4 for irq-mode and 4 for poll-mode. If the IO has HIPRI flag, the block-layer will call .poll function of RNBD. Then IO is sent to the poll-mode queue. Add optional nr_poll_queues argument for map_devices interface. To support polling of RNBD, RTRS client creates connections for both of irq-mode and direct-poll-mode. For example, on 4-CPU system it could've create 5 connections: con[0] => user message (softirq cq) con[1:4] => softirq cq After this patch, it can create 9 connections: con[0] => user message (softirq cq) con[1:4] => softirq cq con[5:8] => DIRECT-POLL cq Cc: Leon Romanovsky <leonro@nvidia.com> Cc: linux-rdma@vger.kernel.org Signed-off-by: Gioh Kim <gi-oh.kim@ionos.com> Signed-off-by: Jack Wang <jinpu.wang@ionos.com> Acked-by: Jason Gunthorpe <jgg@nvidia.com> Reviewed-by: Leon Romanovsky <leonro@nvidia.com> Link: https://lore.kernel.org/r/20210419073722.15351-14-gi-oh.kim@ionos.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'drivers/block/rnbd/rnbd-clt.c')
-rw-r--r--drivers/block/rnbd/rnbd-clt.c89
1 files changed, 81 insertions, 8 deletions
diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c
index 9b44aac680d5..ea98124e8ce9 100644
--- a/drivers/block/rnbd/rnbd-clt.c
+++ b/drivers/block/rnbd/rnbd-clt.c
@@ -1165,9 +1165,54 @@ static blk_status_t rnbd_queue_rq(struct blk_mq_hw_ctx *hctx,
return ret;
}
+static int rnbd_rdma_poll(struct blk_mq_hw_ctx *hctx)
+{
+ struct rnbd_queue *q = hctx->driver_data;
+ struct rnbd_clt_dev *dev = q->dev;
+ int cnt;
+
+ cnt = rtrs_clt_rdma_cq_direct(dev->sess->rtrs, hctx->queue_num);
+ return cnt;
+}
+
+static int rnbd_rdma_map_queues(struct blk_mq_tag_set *set)
+{
+ struct rnbd_clt_session *sess = set->driver_data;
+
+ /* shared read/write queues */
+ set->map[HCTX_TYPE_DEFAULT].nr_queues = num_online_cpus();
+ set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
+ set->map[HCTX_TYPE_READ].nr_queues = num_online_cpus();
+ set->map[HCTX_TYPE_READ].queue_offset = 0;
+ blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
+ blk_mq_map_queues(&set->map[HCTX_TYPE_READ]);
+
+ if (sess->nr_poll_queues) {
+ /* dedicated queue for poll */
+ set->map[HCTX_TYPE_POLL].nr_queues = sess->nr_poll_queues;
+ set->map[HCTX_TYPE_POLL].queue_offset = set->map[HCTX_TYPE_READ].queue_offset +
+ set->map[HCTX_TYPE_READ].nr_queues;
+ blk_mq_map_queues(&set->map[HCTX_TYPE_POLL]);
+ pr_info("[session=%s] mapped %d/%d/%d default/read/poll queues.\n",
+ sess->sessname,
+ set->map[HCTX_TYPE_DEFAULT].nr_queues,
+ set->map[HCTX_TYPE_READ].nr_queues,
+ set->map[HCTX_TYPE_POLL].nr_queues);
+ } else {
+ pr_info("[session=%s] mapped %d/%d default/read queues.\n",
+ sess->sessname,
+ set->map[HCTX_TYPE_DEFAULT].nr_queues,
+ set->map[HCTX_TYPE_READ].nr_queues);
+ }
+
+ return 0;
+}
+
static struct blk_mq_ops rnbd_mq_ops = {
.queue_rq = rnbd_queue_rq,
.complete = rnbd_softirq_done_fn,
+ .map_queues = rnbd_rdma_map_queues,
+ .poll = rnbd_rdma_poll,
};
static int setup_mq_tags(struct rnbd_clt_session *sess)
@@ -1181,7 +1226,15 @@ static int setup_mq_tags(struct rnbd_clt_session *sess)
tag_set->flags = BLK_MQ_F_SHOULD_MERGE |
BLK_MQ_F_TAG_QUEUE_SHARED;
tag_set->cmd_size = sizeof(struct rnbd_iu) + RNBD_RDMA_SGL_SIZE;
- tag_set->nr_hw_queues = num_online_cpus();
+
+ /* for HCTX_TYPE_DEFAULT, HCTX_TYPE_READ, HCTX_TYPE_POLL */
+ tag_set->nr_maps = sess->nr_poll_queues ? HCTX_MAX_TYPES : 2;
+ /*
+ * HCTX_TYPE_DEFAULT and HCTX_TYPE_READ share one set of queues
+ * others are for HCTX_TYPE_POLL
+ */
+ tag_set->nr_hw_queues = num_online_cpus() + sess->nr_poll_queues;
+ tag_set->driver_data = sess;
return blk_mq_alloc_tag_set(tag_set);
}
@@ -1189,7 +1242,7 @@ static int setup_mq_tags(struct rnbd_clt_session *sess)
static struct rnbd_clt_session *
find_and_get_or_create_sess(const char *sessname,
const struct rtrs_addr *paths,
- size_t path_cnt, u16 port_nr)
+ size_t path_cnt, u16 port_nr, u32 nr_poll_queues)
{
struct rnbd_clt_session *sess;
struct rtrs_attrs attrs;
@@ -1198,6 +1251,17 @@ find_and_get_or_create_sess(const char *sessname,
struct rtrs_clt_ops rtrs_ops;
sess = find_or_create_sess(sessname, &first);
+ if (sess == ERR_PTR(-ENOMEM))
+ return ERR_PTR(-ENOMEM);
+ else if ((nr_poll_queues && !first) || (!nr_poll_queues && sess->nr_poll_queues)) {
+ /*
+ * A device MUST have its own session to use the polling-mode.
+ * It must fail to map new device with the same session.
+ */
+ err = -EINVAL;
+ goto put_sess;
+ }
+
if (!first)
return sess;
@@ -1219,7 +1283,7 @@ find_and_get_or_create_sess(const char *sessname,
0, /* Do not use pdu of rtrs */
RECONNECT_DELAY, BMAX_SEGMENTS,
BLK_MAX_SEGMENT_SIZE,
- MAX_RECONNECTS);
+ MAX_RECONNECTS, nr_poll_queues);
if (IS_ERR(sess->rtrs)) {
err = PTR_ERR(sess->rtrs);
goto wake_up_and_put;
@@ -1227,6 +1291,7 @@ find_and_get_or_create_sess(const char *sessname,
rtrs_clt_query(sess->rtrs, &attrs);
sess->max_io_size = attrs.max_io_size;
sess->queue_depth = attrs.queue_depth;
+ sess->nr_poll_queues = nr_poll_queues;
err = setup_mq_tags(sess);
if (err)
@@ -1370,7 +1435,8 @@ static int rnbd_client_setup_device(struct rnbd_clt_dev *dev)
static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess,
enum rnbd_access_mode access_mode,
- const char *pathname)
+ const char *pathname,
+ u32 nr_poll_queues)
{
struct rnbd_clt_dev *dev;
int ret;
@@ -1379,7 +1445,12 @@ static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess,
if (!dev)
return ERR_PTR(-ENOMEM);
- dev->hw_queues = kcalloc(nr_cpu_ids, sizeof(*dev->hw_queues),
+ /*
+ * nr_cpu_ids: the number of softirq queues
+ * nr_poll_queues: the number of polling queues
+ */
+ dev->hw_queues = kcalloc(nr_cpu_ids + nr_poll_queues,
+ sizeof(*dev->hw_queues),
GFP_KERNEL);
if (!dev->hw_queues) {
ret = -ENOMEM;
@@ -1405,6 +1476,7 @@ static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess,
dev->clt_device_id = ret;
dev->sess = sess;
dev->access_mode = access_mode;
+ dev->nr_poll_queues = nr_poll_queues;
mutex_init(&dev->lock);
refcount_set(&dev->refcount, 1);
dev->dev_state = DEV_STATE_INIT;
@@ -1491,7 +1563,8 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname,
struct rtrs_addr *paths,
size_t path_cnt, u16 port_nr,
const char *pathname,
- enum rnbd_access_mode access_mode)
+ enum rnbd_access_mode access_mode,
+ u32 nr_poll_queues)
{
struct rnbd_clt_session *sess;
struct rnbd_clt_dev *dev;
@@ -1500,11 +1573,11 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname,
if (unlikely(exists_devpath(pathname, sessname)))
return ERR_PTR(-EEXIST);
- sess = find_and_get_or_create_sess(sessname, paths, path_cnt, port_nr);
+ sess = find_and_get_or_create_sess(sessname, paths, path_cnt, port_nr, nr_poll_queues);
if (IS_ERR(sess))
return ERR_CAST(sess);
- dev = init_dev(sess, access_mode, pathname);
+ dev = init_dev(sess, access_mode, pathname, nr_poll_queues);
if (IS_ERR(dev)) {
pr_err("map_device: failed to map device '%s' from session %s, can't initialize device, err: %ld\n",
pathname, sess->sessname, PTR_ERR(dev));