From e83068a5faafb8ca65d3b58bd1e1e3959ce1ddce Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 24 May 2016 21:16:51 -0400 Subject: dm mpath: add optional "queue_mode" feature Allow a user to specify an optional feature 'queue_mode ' where may be "bio", "rq" or "mq" -- which corresponds to bio-based, request_fn rq-based, and blk-mq rq-based respectively. If the queue_mode feature isn't specified the default for the "multipath" target is still "rq" but if dm_mod.use_blk_mq is set to Y it'll default to mode "mq". This new queue_mode feature introduces the ability for each multipath device to have its own queue_mode (whereas before this feature all multipath devices effectively had to have the same queue_mode). This commit also goes a long way to eliminate the awkward (ab)use of DM_TYPE_*, the associated filter_md_type() and other relatively fragile and difficult to maintain code. Signed-off-by: Mike Snitzer --- drivers/md/dm-mpath.c | 149 +++++++++++++++++++++++++++----------------------- drivers/md/dm-rq.c | 19 +++++-- drivers/md/dm-rq.h | 2 +- drivers/md/dm-table.c | 67 +++++++++++++++-------- drivers/md/dm.c | 15 +---- drivers/md/dm.h | 10 +--- 6 files changed, 143 insertions(+), 119 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 2d10ff780d84..7eac080fcb18 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -90,6 +90,8 @@ struct multipath { atomic_t pg_init_in_progress; /* Only one pg_init allowed at once */ atomic_t pg_init_count; /* Number of times pg_init called */ + unsigned queue_mode; + /* * We must use a mempool of dm_mpath_io structs so that we * can resubmit bios on error. @@ -131,7 +133,6 @@ static void process_queued_bios(struct work_struct *work); #define MPATHF_PG_INIT_DISABLED 4 /* pg_init is not currently allowed */ #define MPATHF_PG_INIT_REQUIRED 5 /* pg_init needs calling? */ #define MPATHF_PG_INIT_DELAY_RETRY 6 /* Delay pg_init retry? */ -#define MPATHF_BIO_BASED 7 /* Device is bio-based? */ /*----------------------------------------------- * Allocation routines @@ -191,8 +192,7 @@ static void free_priority_group(struct priority_group *pg, kfree(pg); } -static struct multipath *alloc_multipath(struct dm_target *ti, bool use_blk_mq, - bool bio_based) +static struct multipath *alloc_multipath(struct dm_target *ti) { struct multipath *m; @@ -210,25 +210,7 @@ static struct multipath *alloc_multipath(struct dm_target *ti, bool use_blk_mq, mutex_init(&m->work_mutex); m->mpio_pool = NULL; - if (!use_blk_mq && !bio_based) { - unsigned min_ios = dm_get_reserved_rq_based_ios(); - - m->mpio_pool = mempool_create_slab_pool(min_ios, _mpio_cache); - if (!m->mpio_pool) { - kfree(m); - return NULL; - } - } - - if (bio_based) { - INIT_WORK(&m->process_queued_bios, process_queued_bios); - set_bit(MPATHF_BIO_BASED, &m->flags); - /* - * bio-based doesn't support any direct scsi_dh management; - * it just discovers if a scsi_dh is attached. - */ - set_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags); - } + m->queue_mode = DM_TYPE_NONE; m->ti = ti; ti->private = m; @@ -237,6 +219,39 @@ static struct multipath *alloc_multipath(struct dm_target *ti, bool use_blk_mq, return m; } +static int alloc_multipath_stage2(struct dm_target *ti, struct multipath *m) +{ + if (m->queue_mode == DM_TYPE_NONE) { + /* + * Default to request-based. + */ + if (dm_use_blk_mq(dm_table_get_md(ti->table))) + m->queue_mode = DM_TYPE_MQ_REQUEST_BASED; + else + m->queue_mode = DM_TYPE_REQUEST_BASED; + } + + if (m->queue_mode == DM_TYPE_REQUEST_BASED) { + unsigned min_ios = dm_get_reserved_rq_based_ios(); + + m->mpio_pool = mempool_create_slab_pool(min_ios, _mpio_cache); + if (!m->mpio_pool) + return -ENOMEM; + } + else if (m->queue_mode == DM_TYPE_BIO_BASED) { + INIT_WORK(&m->process_queued_bios, process_queued_bios); + /* + * bio-based doesn't support any direct scsi_dh management; + * it just discovers if a scsi_dh is attached. + */ + set_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags); + } + + dm_table_set_type(ti->table, m->queue_mode); + + return 0; +} + static void free_multipath(struct multipath *m) { struct priority_group *pg, *tmp; @@ -653,7 +668,7 @@ static int multipath_map_bio(struct dm_target *ti, struct bio *bio) static void process_queued_bios_list(struct multipath *m) { - if (test_bit(MPATHF_BIO_BASED, &m->flags)) + if (m->queue_mode == DM_TYPE_BIO_BASED) queue_work(kmultipathd, &m->process_queued_bios); } @@ -964,7 +979,7 @@ static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m) if (!hw_argc) return 0; - if (test_bit(MPATHF_BIO_BASED, &m->flags)) { + if (m->queue_mode == DM_TYPE_BIO_BASED) { dm_consume_args(as, hw_argc); DMERR("bio-based multipath doesn't allow hardware handler args"); return 0; @@ -1005,7 +1020,7 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m) const char *arg_name; static struct dm_arg _args[] = { - {0, 6, "invalid number of feature args"}, + {0, 8, "invalid number of feature args"}, {1, 50, "pg_init_retries must be between 1 and 50"}, {0, 60000, "pg_init_delay_msecs must be between 0 and 60000"}, }; @@ -1045,6 +1060,24 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m) continue; } + if (!strcasecmp(arg_name, "queue_mode") && + (argc >= 1)) { + const char *queue_mode_name = dm_shift_arg(as); + + if (!strcasecmp(queue_mode_name, "bio")) + m->queue_mode = DM_TYPE_BIO_BASED; + else if (!strcasecmp(queue_mode_name, "rq")) + m->queue_mode = DM_TYPE_REQUEST_BASED; + else if (!strcasecmp(queue_mode_name, "mq")) + m->queue_mode = DM_TYPE_MQ_REQUEST_BASED; + else { + ti->error = "Unknown 'queue_mode' requested"; + r = -EINVAL; + } + argc--; + continue; + } + ti->error = "Unrecognised multipath feature request"; r = -EINVAL; } while (argc && !r); @@ -1052,8 +1085,7 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m) return r; } -static int __multipath_ctr(struct dm_target *ti, unsigned int argc, - char **argv, bool bio_based) +static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv) { /* target arguments */ static struct dm_arg _args[] = { @@ -1066,12 +1098,11 @@ static int __multipath_ctr(struct dm_target *ti, unsigned int argc, struct dm_arg_set as; unsigned pg_count = 0; unsigned next_pg_num; - bool use_blk_mq = dm_use_blk_mq(dm_table_get_md(ti->table)); as.argc = argc; as.argv = argv; - m = alloc_multipath(ti, use_blk_mq, bio_based); + m = alloc_multipath(ti); if (!m) { ti->error = "can't allocate multipath"; return -EINVAL; @@ -1081,6 +1112,10 @@ static int __multipath_ctr(struct dm_target *ti, unsigned int argc, if (r) goto bad; + r = alloc_multipath_stage2(ti, m); + if (r) + goto bad; + r = parse_hw_handler(&as, m); if (r) goto bad; @@ -1130,9 +1165,9 @@ static int __multipath_ctr(struct dm_target *ti, unsigned int argc, ti->num_flush_bios = 1; ti->num_discard_bios = 1; ti->num_write_same_bios = 1; - if (bio_based) + if (m->queue_mode == DM_TYPE_BIO_BASED) ti->per_io_data_size = multipath_per_bio_data_size(); - else if (use_blk_mq) + else if (m->queue_mode == DM_TYPE_MQ_REQUEST_BASED) ti->per_io_data_size = sizeof(struct dm_mpath_io); return 0; @@ -1142,16 +1177,6 @@ static int __multipath_ctr(struct dm_target *ti, unsigned int argc, return r; } -static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv) -{ - return __multipath_ctr(ti, argc, argv, false); -} - -static int multipath_bio_ctr(struct dm_target *ti, unsigned argc, char **argv) -{ - return __multipath_ctr(ti, argc, argv, true); -} - static void multipath_wait_for_pg_init_completion(struct multipath *m) { DECLARE_WAITQUEUE(wait, current); @@ -1700,7 +1725,9 @@ static void multipath_status(struct dm_target *ti, status_type_t type, DMEMIT("%u ", test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) + (m->pg_init_retries > 0) * 2 + (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) * 2 + - test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags)); + test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags) + + (m->queue_mode != DM_TYPE_REQUEST_BASED) * 2); + if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) DMEMIT("queue_if_no_path "); if (m->pg_init_retries) @@ -1709,6 +1736,16 @@ static void multipath_status(struct dm_target *ti, status_type_t type, DMEMIT("pg_init_delay_msecs %u ", m->pg_init_delay_msecs); if (test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags)) DMEMIT("retain_attached_hw_handler "); + if (m->queue_mode != DM_TYPE_REQUEST_BASED) { + switch(m->queue_mode) { + case DM_TYPE_BIO_BASED: + DMEMIT("queue_mode bio "); + break; + case DM_TYPE_MQ_REQUEST_BASED: + DMEMIT("queue_mode mq "); + break; + } + } } if (!m->hw_handler_name || type == STATUSTYPE_INFO) @@ -1995,7 +2032,7 @@ static int multipath_busy(struct dm_target *ti) *---------------------------------------------------------------*/ static struct target_type multipath_target = { .name = "multipath", - .version = {1, 11, 0}, + .version = {1, 12, 0}, .features = DM_TARGET_SINGLETON | DM_TARGET_IMMUTABLE, .module = THIS_MODULE, .ctr = multipath_ctr, @@ -2004,22 +2041,6 @@ static struct target_type multipath_target = { .clone_and_map_rq = multipath_clone_and_map, .release_clone_rq = multipath_release_clone, .rq_end_io = multipath_end_io, - .presuspend = multipath_presuspend, - .postsuspend = multipath_postsuspend, - .resume = multipath_resume, - .status = multipath_status, - .message = multipath_message, - .prepare_ioctl = multipath_prepare_ioctl, - .iterate_devices = multipath_iterate_devices, - .busy = multipath_busy, -}; - -static struct target_type multipath_bio_target = { - .name = "multipath-bio", - .version = {1, 0, 0}, - .module = THIS_MODULE, - .ctr = multipath_bio_ctr, - .dtr = multipath_dtr, .map = multipath_map_bio, .end_io = multipath_end_io_bio, .presuspend = multipath_presuspend, @@ -2048,13 +2069,6 @@ static int __init dm_multipath_init(void) goto bad_register_target; } - r = dm_register_target(&multipath_bio_target); - if (r < 0) { - DMERR("bio-based register failed %d", r); - r = -EINVAL; - goto bad_register_bio_based_target; - } - kmultipathd = alloc_workqueue("kmpathd", WQ_MEM_RECLAIM, 0); if (!kmultipathd) { DMERR("failed to create workqueue kmpathd"); @@ -2081,8 +2095,6 @@ static int __init dm_multipath_init(void) bad_alloc_kmpath_handlerd: destroy_workqueue(kmultipathd); bad_alloc_kmultipathd: - dm_unregister_target(&multipath_bio_target); -bad_register_bio_based_target: dm_unregister_target(&multipath_target); bad_register_target: kmem_cache_destroy(_mpio_cache); @@ -2096,7 +2108,6 @@ static void __exit dm_multipath_exit(void) destroy_workqueue(kmultipathd); dm_unregister_target(&multipath_target); - dm_unregister_target(&multipath_bio_target); kmem_cache_destroy(_mpio_cache); } diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 787c81b16a26..266f7b674108 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -230,7 +230,14 @@ static void free_rq_clone(struct request *clone) blk_rq_unprep_clone(clone); - if (md->type == DM_TYPE_MQ_REQUEST_BASED) + /* + * It is possible for a clone_old_rq() allocated clone to + * get passed in -- it may not yet have a request_queue. + * This is known to occur if the error target replaces + * a multipath target that has a request_fn queue stacked + * on blk-mq queue(s). + */ + if (clone->q && clone->q->mq_ops) /* stacked on blk-mq queue(s) */ tio->ti->type->release_clone_rq(clone); else if (!md->queue->mq_ops) @@ -561,7 +568,7 @@ static struct dm_rq_target_io *dm_old_prep_tio(struct request *rq, * Must clone a request if this .request_fn DM device * is stacked on .request_fn device(s). */ - if (!dm_table_mq_request_based(table)) { + if (!dm_table_all_blk_mq_devices(table)) { if (!clone_old_rq(rq, md, tio, gfp_mask)) { dm_put_live_table(md, srcu_idx); free_old_rq_tio(tio); @@ -711,7 +718,7 @@ ssize_t dm_attr_rq_based_seq_io_merge_deadline_store(struct mapped_device *md, { unsigned deadline; - if (!dm_request_based(md) || md->use_blk_mq) + if (dm_get_md_type(md) != DM_TYPE_REQUEST_BASED) return count; if (kstrtouint(buf, 10, &deadline)) @@ -886,12 +893,13 @@ static struct blk_mq_ops dm_mq_ops = { .init_request = dm_mq_init_request, }; -int dm_mq_init_request_queue(struct mapped_device *md, struct dm_target *immutable_tgt) +int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t) { struct request_queue *q; + struct dm_target *immutable_tgt; int err; - if (dm_get_md_type(md) == DM_TYPE_REQUEST_BASED) { + if (!dm_table_all_blk_mq_devices(t)) { DMERR("request-based dm-mq may only be stacked on blk-mq device(s)"); return -EINVAL; } @@ -908,6 +916,7 @@ int dm_mq_init_request_queue(struct mapped_device *md, struct dm_target *immutab md->tag_set->driver_data = md; md->tag_set->cmd_size = sizeof(struct dm_rq_target_io); + immutable_tgt = dm_table_get_immutable_target(t); if (immutable_tgt && immutable_tgt->per_io_data_size) { /* any target-specific per-io data is immediately after the tio */ md->tag_set->cmd_size += immutable_tgt->per_io_data_size; diff --git a/drivers/md/dm-rq.h b/drivers/md/dm-rq.h index 1559f6486024..9e6f0a3773d4 100644 --- a/drivers/md/dm-rq.h +++ b/drivers/md/dm-rq.h @@ -49,7 +49,7 @@ bool dm_use_blk_mq_default(void); bool dm_use_blk_mq(struct mapped_device *md); int dm_old_init_request_queue(struct mapped_device *md); -int dm_mq_init_request_queue(struct mapped_device *md, struct dm_target *immutable_tgt); +int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t); void dm_mq_cleanup_mapped_device(struct mapped_device *md); void dm_start_queue(struct request_queue *q); diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index a682d51111dd..88f01744ac16 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -43,8 +43,10 @@ struct dm_table { struct dm_target *targets; struct target_type *immutable_target_type; - unsigned integrity_supported:1; - unsigned singleton:1; + + bool integrity_supported:1; + bool singleton:1; + bool all_blk_mq:1; /* * Indicates the rw permissions for the new logical @@ -206,6 +208,7 @@ int dm_table_create(struct dm_table **result, fmode_t mode, return -ENOMEM; } + t->type = DM_TYPE_NONE; t->mode = mode; t->md = md; *result = t; @@ -703,7 +706,7 @@ int dm_table_add_target(struct dm_table *t, const char *type, dm_device_name(t->md), type); return -EINVAL; } - t->singleton = 1; + t->singleton = true; } if (dm_target_always_writeable(tgt->type) && !(t->mode & FMODE_WRITE)) { @@ -830,16 +833,29 @@ static bool __table_type_request_based(unsigned table_type) table_type == DM_TYPE_MQ_REQUEST_BASED); } -static int dm_table_set_type(struct dm_table *t) +void dm_table_set_type(struct dm_table *t, unsigned type) +{ + t->type = type; +} +EXPORT_SYMBOL_GPL(dm_table_set_type); + +static int dm_table_determine_type(struct dm_table *t) { unsigned i; unsigned bio_based = 0, request_based = 0, hybrid = 0; - bool use_blk_mq = false; + bool verify_blk_mq = false; struct dm_target *tgt; struct dm_dev_internal *dd; - struct list_head *devices; + struct list_head *devices = dm_table_get_devices(t); unsigned live_md_type = dm_get_md_type(t->md); + if (t->type != DM_TYPE_NONE) { + /* target already set the table's type */ + if (t->type == DM_TYPE_BIO_BASED) + return 0; + goto verify_rq_based; + } + for (i = 0; i < t->num_targets; i++) { tgt = t->targets + i; if (dm_target_hybrid(tgt)) @@ -876,6 +892,19 @@ static int dm_table_set_type(struct dm_table *t) BUG_ON(!request_based); /* No targets in this table */ + if (list_empty(devices) && __table_type_request_based(live_md_type)) { + /* inherit live MD type */ + t->type = live_md_type; + return 0; + } + + /* + * The only way to establish DM_TYPE_MQ_REQUEST_BASED is by + * having a compatible target use dm_table_set_type. + */ + t->type = DM_TYPE_REQUEST_BASED; + +verify_rq_based: /* * Request-based dm supports only tables that have a single target now. * To support multiple targets, request splitting support is needed, @@ -888,7 +917,6 @@ static int dm_table_set_type(struct dm_table *t) } /* Non-request-stackable devices can't be used for request-based dm */ - devices = dm_table_get_devices(t); list_for_each_entry(dd, devices, list) { struct request_queue *q = bdev_get_queue(dd->dm_dev->bdev); @@ -899,10 +927,10 @@ static int dm_table_set_type(struct dm_table *t) } if (q->mq_ops) - use_blk_mq = true; + verify_blk_mq = true; } - if (use_blk_mq) { + if (verify_blk_mq) { /* verify _all_ devices in the table are blk-mq devices */ list_for_each_entry(dd, devices, list) if (!bdev_get_queue(dd->dm_dev->bdev)->mq_ops) { @@ -910,14 +938,9 @@ static int dm_table_set_type(struct dm_table *t) " are blk-mq request-stackable"); return -EINVAL; } - t->type = DM_TYPE_MQ_REQUEST_BASED; - } else if (list_empty(devices) && __table_type_request_based(live_md_type)) { - /* inherit live MD type */ - t->type = live_md_type; - - } else - t->type = DM_TYPE_REQUEST_BASED; + t->all_blk_mq = true; + } return 0; } @@ -961,9 +984,9 @@ bool dm_table_request_based(struct dm_table *t) return __table_type_request_based(dm_table_get_type(t)); } -bool dm_table_mq_request_based(struct dm_table *t) +bool dm_table_all_blk_mq_devices(struct dm_table *t) { - return dm_table_get_type(t) == DM_TYPE_MQ_REQUEST_BASED; + return t->all_blk_mq; } static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *md) @@ -1106,7 +1129,7 @@ static int dm_table_register_integrity(struct dm_table *t) return 0; if (!integrity_profile_exists(dm_disk(md))) { - t->integrity_supported = 1; + t->integrity_supported = true; /* * Register integrity profile during table load; we can do * this because the final profile must match during resume. @@ -1129,7 +1152,7 @@ static int dm_table_register_integrity(struct dm_table *t) } /* Preserve existing integrity profile */ - t->integrity_supported = 1; + t->integrity_supported = true; return 0; } @@ -1141,9 +1164,9 @@ int dm_table_complete(struct dm_table *t) { int r; - r = dm_table_set_type(t); + r = dm_table_determine_type(t); if (r) { - DMERR("unable to set table type"); + DMERR("unable to determine table type"); return r; } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 8f22527134e9..2c907bc10fe9 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1738,23 +1738,14 @@ struct queue_limits *dm_get_queue_limits(struct mapped_device *md) } EXPORT_SYMBOL_GPL(dm_get_queue_limits); -static unsigned filter_md_type(unsigned type, struct mapped_device *md) -{ - if (type == DM_TYPE_BIO_BASED) - return type; - - return !md->use_blk_mq ? DM_TYPE_REQUEST_BASED : DM_TYPE_MQ_REQUEST_BASED; -} - /* * Setup the DM device's queue based on md's type */ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t) { int r; - unsigned md_type = filter_md_type(dm_get_md_type(md), md); - switch (md_type) { + switch (dm_get_md_type(md)) { case DM_TYPE_REQUEST_BASED: r = dm_old_init_request_queue(md); if (r) { @@ -1763,7 +1754,7 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t) } break; case DM_TYPE_MQ_REQUEST_BASED: - r = dm_mq_init_request_queue(md, dm_table_get_immutable_target(t)); + r = dm_mq_init_request_queue(md, t); if (r) { DMERR("Cannot initialize queue for request-based dm-mq mapped device"); return r; @@ -2472,8 +2463,6 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned t if (!pools) return NULL; - type = filter_md_type(type, md); - switch (type) { case DM_TYPE_BIO_BASED: cachep = _io_cache; diff --git a/drivers/md/dm.h b/drivers/md/dm.h index b611b3064a7c..2e0e4a53a312 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -33,14 +33,6 @@ */ #define DM_STATUS_NOFLUSH_FLAG (1 << 0) -/* - * Type of table and mapped_device's mempool - */ -#define DM_TYPE_NONE 0 -#define DM_TYPE_BIO_BASED 1 -#define DM_TYPE_REQUEST_BASED 2 -#define DM_TYPE_MQ_REQUEST_BASED 3 - /* * List of devices that a metadevice uses and should open/close. */ @@ -77,7 +69,7 @@ struct target_type *dm_table_get_immutable_target_type(struct dm_table *t); struct dm_target *dm_table_get_immutable_target(struct dm_table *t); struct dm_target *dm_table_get_wildcard_target(struct dm_table *t); bool dm_table_request_based(struct dm_table *t); -bool dm_table_mq_request_based(struct dm_table *t); +bool dm_table_all_blk_mq_devices(struct dm_table *t); void dm_table_free_md_mempools(struct dm_table *t); struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t); -- cgit v1.2.3