summaryrefslogtreecommitdiffstats
path: root/drivers/md/dm-mpath.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/dm-mpath.c')
-rw-r--r--drivers/md/dm-mpath.c297
1 files changed, 187 insertions, 110 deletions
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index ef57c6d1c887..7d3e572072f5 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -64,36 +64,30 @@ struct priority_group {
/* Multipath context */
struct multipath {
- struct list_head list;
- struct dm_target *ti;
-
- const char *hw_handler_name;
- char *hw_handler_params;
+ unsigned long flags; /* Multipath state flags */
spinlock_t lock;
-
- unsigned nr_priority_groups;
- struct list_head priority_groups;
-
- wait_queue_head_t pg_init_wait; /* Wait for pg_init completion */
+ enum dm_queue_mode queue_mode;
struct pgpath *current_pgpath;
struct priority_group *current_pg;
struct priority_group *next_pg; /* Switch to this PG if set */
- unsigned long flags; /* Multipath state flags */
+ atomic_t nr_valid_paths; /* Total number of usable paths */
+ unsigned nr_priority_groups;
+ struct list_head priority_groups;
+ const char *hw_handler_name;
+ char *hw_handler_params;
+ wait_queue_head_t pg_init_wait; /* Wait for pg_init completion */
unsigned pg_init_retries; /* Number of times to retry pg_init */
unsigned pg_init_delay_msecs; /* Number of msecs before pg_init retry */
-
- atomic_t nr_valid_paths; /* Total number of usable paths */
atomic_t pg_init_in_progress; /* Only one pg_init allowed at once */
atomic_t pg_init_count; /* Number of times pg_init called */
- enum dm_queue_mode queue_mode;
-
struct mutex work_mutex;
struct work_struct trigger_event;
+ struct dm_target *ti;
struct work_struct process_queued_bios;
struct bio_list queued_bios;
@@ -135,10 +129,10 @@ static struct pgpath *alloc_pgpath(void)
{
struct pgpath *pgpath = kzalloc(sizeof(*pgpath), GFP_KERNEL);
- if (pgpath) {
- pgpath->is_active = true;
- INIT_DELAYED_WORK(&pgpath->activate_path, activate_path_work);
- }
+ if (!pgpath)
+ return NULL;
+
+ pgpath->is_active = true;
return pgpath;
}
@@ -193,13 +187,8 @@ static struct multipath *alloc_multipath(struct dm_target *ti)
if (m) {
INIT_LIST_HEAD(&m->priority_groups);
spin_lock_init(&m->lock);
- set_bit(MPATHF_QUEUE_IO, &m->flags);
atomic_set(&m->nr_valid_paths, 0);
- atomic_set(&m->pg_init_in_progress, 0);
- atomic_set(&m->pg_init_count, 0);
- m->pg_init_delay_msecs = DM_PG_INIT_DELAY_DEFAULT;
INIT_WORK(&m->trigger_event, trigger_event);
- init_waitqueue_head(&m->pg_init_wait);
mutex_init(&m->work_mutex);
m->queue_mode = DM_TYPE_NONE;
@@ -221,13 +210,26 @@ static int alloc_multipath_stage2(struct dm_target *ti, struct multipath *m)
m->queue_mode = DM_TYPE_MQ_REQUEST_BASED;
else
m->queue_mode = DM_TYPE_REQUEST_BASED;
- } else if (m->queue_mode == DM_TYPE_BIO_BASED) {
+
+ } else if (m->queue_mode == DM_TYPE_BIO_BASED ||
+ m->queue_mode == DM_TYPE_NVME_BIO_BASED) {
INIT_WORK(&m->process_queued_bios, process_queued_bios);
- /*
- * bio-based doesn't support any direct scsi_dh management;
- * it just discovers if a scsi_dh is attached.
- */
- set_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags);
+
+ if (m->queue_mode == DM_TYPE_BIO_BASED) {
+ /*
+ * bio-based doesn't support any direct scsi_dh management;
+ * it just discovers if a scsi_dh is attached.
+ */
+ set_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags);
+ }
+ }
+
+ if (m->queue_mode != DM_TYPE_NVME_BIO_BASED) {
+ set_bit(MPATHF_QUEUE_IO, &m->flags);
+ atomic_set(&m->pg_init_in_progress, 0);
+ atomic_set(&m->pg_init_count, 0);
+ m->pg_init_delay_msecs = DM_PG_INIT_DELAY_DEFAULT;
+ init_waitqueue_head(&m->pg_init_wait);
}
dm_table_set_type(ti->table, m->queue_mode);
@@ -246,6 +248,7 @@ static void free_multipath(struct multipath *m)
kfree(m->hw_handler_name);
kfree(m->hw_handler_params);
+ mutex_destroy(&m->work_mutex);
kfree(m);
}
@@ -264,29 +267,23 @@ static struct dm_mpath_io *get_mpio_from_bio(struct bio *bio)
return dm_per_bio_data(bio, multipath_per_bio_data_size());
}
-static struct dm_bio_details *get_bio_details_from_bio(struct bio *bio)
+static struct dm_bio_details *get_bio_details_from_mpio(struct dm_mpath_io *mpio)
{
/* dm_bio_details is immediately after the dm_mpath_io in bio's per-bio-data */
- struct dm_mpath_io *mpio = get_mpio_from_bio(bio);
void *bio_details = mpio + 1;
-
return bio_details;
}
-static void multipath_init_per_bio_data(struct bio *bio, struct dm_mpath_io **mpio_p,
- struct dm_bio_details **bio_details_p)
+static void multipath_init_per_bio_data(struct bio *bio, struct dm_mpath_io **mpio_p)
{
struct dm_mpath_io *mpio = get_mpio_from_bio(bio);
- struct dm_bio_details *bio_details = get_bio_details_from_bio(bio);
+ struct dm_bio_details *bio_details = get_bio_details_from_mpio(mpio);
- memset(mpio, 0, sizeof(*mpio));
- memset(bio_details, 0, sizeof(*bio_details));
- dm_bio_record(bio_details, bio);
+ mpio->nr_bytes = bio->bi_iter.bi_size;
+ mpio->pgpath = NULL;
+ *mpio_p = mpio;
- if (mpio_p)
- *mpio_p = mpio;
- if (bio_details_p)
- *bio_details_p = bio_details;
+ dm_bio_record(bio_details, bio);
}
/*-----------------------------------------------
@@ -340,6 +337,9 @@ static void __switch_pg(struct multipath *m, struct priority_group *pg)
{
m->current_pg = pg;
+ if (m->queue_mode == DM_TYPE_NVME_BIO_BASED)
+ return;
+
/* Must we initialise the PG first, and queue I/O till it's ready? */
if (m->hw_handler_name) {
set_bit(MPATHF_PG_INIT_REQUIRED, &m->flags);
@@ -385,7 +385,8 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes)
unsigned bypassed = 1;
if (!atomic_read(&m->nr_valid_paths)) {
- clear_bit(MPATHF_QUEUE_IO, &m->flags);
+ if (m->queue_mode != DM_TYPE_NVME_BIO_BASED)
+ clear_bit(MPATHF_QUEUE_IO, &m->flags);
goto failed;
}
@@ -516,12 +517,10 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
return DM_MAPIO_KILL;
} else if (test_bit(MPATHF_QUEUE_IO, &m->flags) ||
test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) {
- if (pg_init_all_paths(m))
- return DM_MAPIO_DELAY_REQUEUE;
- return DM_MAPIO_REQUEUE;
+ pg_init_all_paths(m);
+ return DM_MAPIO_DELAY_REQUEUE;
}
- memset(mpio, 0, sizeof(*mpio));
mpio->pgpath = pgpath;
mpio->nr_bytes = nr_bytes;
@@ -530,12 +529,23 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
clone = blk_get_request(q, rq->cmd_flags | REQ_NOMERGE, GFP_ATOMIC);
if (IS_ERR(clone)) {
/* EBUSY, ENODEV or EWOULDBLOCK: requeue */
- bool queue_dying = blk_queue_dying(q);
- if (queue_dying) {
+ if (blk_queue_dying(q)) {
atomic_inc(&m->pg_init_in_progress);
activate_or_offline_path(pgpath);
+ return DM_MAPIO_DELAY_REQUEUE;
}
- return DM_MAPIO_DELAY_REQUEUE;
+
+ /*
+ * blk-mq's SCHED_RESTART can cover this requeue, so we
+ * needn't deal with it by DELAY_REQUEUE. More importantly,
+ * we have to return DM_MAPIO_REQUEUE so that blk-mq can
+ * get the queue busy feedback (via BLK_STS_RESOURCE),
+ * otherwise I/O merging can suffer.
+ */
+ if (q->mq_ops)
+ return DM_MAPIO_REQUEUE;
+ else
+ return DM_MAPIO_DELAY_REQUEUE;
}
clone->bio = clone->biotail = NULL;
clone->rq_disk = bdev->bd_disk;
@@ -557,9 +567,9 @@ static void multipath_release_clone(struct request *clone)
/*
* Map cloned bios (bio-based multipath)
*/
-static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_mpath_io *mpio)
+
+static struct pgpath *__map_bio(struct multipath *m, struct bio *bio)
{
- size_t nr_bytes = bio->bi_iter.bi_size;
struct pgpath *pgpath;
unsigned long flags;
bool queue_io;
@@ -568,7 +578,7 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m
pgpath = READ_ONCE(m->current_pgpath);
queue_io = test_bit(MPATHF_QUEUE_IO, &m->flags);
if (!pgpath || !queue_io)
- pgpath = choose_pgpath(m, nr_bytes);
+ pgpath = choose_pgpath(m, bio->bi_iter.bi_size);
if ((pgpath && queue_io) ||
(!pgpath && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))) {
@@ -576,14 +586,62 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m
spin_lock_irqsave(&m->lock, flags);
bio_list_add(&m->queued_bios, bio);
spin_unlock_irqrestore(&m->lock, flags);
+
/* PG_INIT_REQUIRED cannot be set without QUEUE_IO */
if (queue_io || test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags))
pg_init_all_paths(m);
else if (!queue_io)
queue_work(kmultipathd, &m->process_queued_bios);
- return DM_MAPIO_SUBMITTED;
+
+ return ERR_PTR(-EAGAIN);
}
+ return pgpath;
+}
+
+static struct pgpath *__map_bio_nvme(struct multipath *m, struct bio *bio)
+{
+ struct pgpath *pgpath;
+ unsigned long flags;
+
+ /* Do we need to select a new pgpath? */
+ /*
+ * FIXME: currently only switching path if no path (due to failure, etc)
+ * - which negates the point of using a path selector
+ */
+ pgpath = READ_ONCE(m->current_pgpath);
+ if (!pgpath)
+ pgpath = choose_pgpath(m, bio->bi_iter.bi_size);
+
+ if (!pgpath) {
+ if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
+ /* Queue for the daemon to resubmit */
+ spin_lock_irqsave(&m->lock, flags);
+ bio_list_add(&m->queued_bios, bio);
+ spin_unlock_irqrestore(&m->lock, flags);
+ queue_work(kmultipathd, &m->process_queued_bios);
+
+ return ERR_PTR(-EAGAIN);
+ }
+ return NULL;
+ }
+
+ return pgpath;
+}
+
+static int __multipath_map_bio(struct multipath *m, struct bio *bio,
+ struct dm_mpath_io *mpio)
+{
+ struct pgpath *pgpath;
+
+ if (m->queue_mode == DM_TYPE_NVME_BIO_BASED)
+ pgpath = __map_bio_nvme(m, bio);
+ else
+ pgpath = __map_bio(m, bio);
+
+ if (IS_ERR(pgpath))
+ return DM_MAPIO_SUBMITTED;
+
if (!pgpath) {
if (must_push_back_bio(m))
return DM_MAPIO_REQUEUE;
@@ -592,7 +650,6 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m
}
mpio->pgpath = pgpath;
- mpio->nr_bytes = nr_bytes;
bio->bi_status = 0;
bio_set_dev(bio, pgpath->path.dev->bdev);
@@ -601,7 +658,7 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m
if (pgpath->pg->ps.type->start_io)
pgpath->pg->ps.type->start_io(&pgpath->pg->ps,
&pgpath->path,
- nr_bytes);
+ mpio->nr_bytes);
return DM_MAPIO_REMAPPED;
}
@@ -610,8 +667,7 @@ static int multipath_map_bio(struct dm_target *ti, struct bio *bio)
struct multipath *m = ti->private;
struct dm_mpath_io *mpio = NULL;
- multipath_init_per_bio_data(bio, &mpio, NULL);
-
+ multipath_init_per_bio_data(bio, &mpio);
return __multipath_map_bio(m, bio, mpio);
}
@@ -619,7 +675,8 @@ static void process_queued_io_list(struct multipath *m)
{
if (m->queue_mode == DM_TYPE_MQ_REQUEST_BASED)
dm_mq_kick_requeue_list(dm_table_get_md(m->ti->table));
- else if (m->queue_mode == DM_TYPE_BIO_BASED)
+ else if (m->queue_mode == DM_TYPE_BIO_BASED ||
+ m->queue_mode == DM_TYPE_NVME_BIO_BASED)
queue_work(kmultipathd, &m->process_queued_bios);
}
@@ -649,7 +706,9 @@ static void process_queued_bios(struct work_struct *work)
blk_start_plug(&plug);
while ((bio = bio_list_pop(&bios))) {
- r = __multipath_map_bio(m, bio, get_mpio_from_bio(bio));
+ struct dm_mpath_io *mpio = get_mpio_from_bio(bio);
+ dm_bio_restore(get_bio_details_from_mpio(mpio), bio);
+ r = __multipath_map_bio(m, bio, mpio);
switch (r) {
case DM_MAPIO_KILL:
bio->bi_status = BLK_STS_IOERR;
@@ -752,34 +811,11 @@ static int parse_path_selector(struct dm_arg_set *as, struct priority_group *pg,
return 0;
}
-static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps,
- struct dm_target *ti)
+static int setup_scsi_dh(struct block_device *bdev, struct multipath *m, char **error)
{
- int r;
- struct pgpath *p;
- struct multipath *m = ti->private;
- struct request_queue *q = NULL;
+ struct request_queue *q = bdev_get_queue(bdev);
const char *attached_handler_name;
-
- /* we need at least a path arg */
- if (as->argc < 1) {
- ti->error = "no device given";
- return ERR_PTR(-EINVAL);
- }
-
- p = alloc_pgpath();
- if (!p)
- return ERR_PTR(-ENOMEM);
-
- r = dm_get_device(ti, dm_shift_arg(as), dm_table_get_mode(ti->table),
- &p->path.dev);
- if (r) {
- ti->error = "error getting device";
- goto bad;
- }
-
- if (test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags) || m->hw_handler_name)
- q = bdev_get_queue(p->path.dev->bdev);
+ int r;
if (test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags)) {
retain:
@@ -811,26 +847,59 @@ retain:
char b[BDEVNAME_SIZE];
printk(KERN_INFO "dm-mpath: retaining handler on device %s\n",
- bdevname(p->path.dev->bdev, b));
+ bdevname(bdev, b));
goto retain;
}
if (r < 0) {
- ti->error = "error attaching hardware handler";
- dm_put_device(ti, p->path.dev);
- goto bad;
+ *error = "error attaching hardware handler";
+ return r;
}
if (m->hw_handler_params) {
r = scsi_dh_set_params(q, m->hw_handler_params);
if (r < 0) {
- ti->error = "unable to set hardware "
- "handler parameters";
- dm_put_device(ti, p->path.dev);
- goto bad;
+ *error = "unable to set hardware handler parameters";
+ return r;
}
}
}
+ return 0;
+}
+
+static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps,
+ struct dm_target *ti)
+{
+ int r;
+ struct pgpath *p;
+ struct multipath *m = ti->private;
+
+ /* we need at least a path arg */
+ if (as->argc < 1) {
+ ti->error = "no device given";
+ return ERR_PTR(-EINVAL);
+ }
+
+ p = alloc_pgpath();
+ if (!p)
+ return ERR_PTR(-ENOMEM);
+
+ r = dm_get_device(ti, dm_shift_arg(as), dm_table_get_mode(ti->table),
+ &p->path.dev);
+ if (r) {
+ ti->error = "error getting device";
+ goto bad;
+ }
+
+ if (m->queue_mode != DM_TYPE_NVME_BIO_BASED) {
+ INIT_DELAYED_WORK(&p->activate_path, activate_path_work);
+ r = setup_scsi_dh(p->path.dev->bdev, m, &ti->error);
+ if (r) {
+ dm_put_device(ti, p->path.dev);
+ goto bad;
+ }
+ }
+
r = ps->type->add_path(ps, &p->path, as->argc, as->argv, &ti->error);
if (r) {
dm_put_device(ti, p->path.dev);
@@ -838,7 +907,6 @@ retain:
}
return p;
-
bad:
free_pgpath(p);
return ERR_PTR(r);
@@ -933,7 +1001,8 @@ static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m)
if (!hw_argc)
return 0;
- if (m->queue_mode == DM_TYPE_BIO_BASED) {
+ if (m->queue_mode == DM_TYPE_BIO_BASED ||
+ m->queue_mode == DM_TYPE_NVME_BIO_BASED) {
dm_consume_args(as, hw_argc);
DMERR("bio-based multipath doesn't allow hardware handler args");
return 0;
@@ -1022,6 +1091,8 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m)
if (!strcasecmp(queue_mode_name, "bio"))
m->queue_mode = DM_TYPE_BIO_BASED;
+ else if (!strcasecmp(queue_mode_name, "nvme"))
+ m->queue_mode = DM_TYPE_NVME_BIO_BASED;
else if (!strcasecmp(queue_mode_name, "rq"))
m->queue_mode = DM_TYPE_REQUEST_BASED;
else if (!strcasecmp(queue_mode_name, "mq"))
@@ -1122,7 +1193,7 @@ static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv)
ti->num_discard_bios = 1;
ti->num_write_same_bios = 1;
ti->num_write_zeroes_bios = 1;
- if (m->queue_mode == DM_TYPE_BIO_BASED)
+ if (m->queue_mode == DM_TYPE_BIO_BASED || m->queue_mode == DM_TYPE_NVME_BIO_BASED)
ti->per_io_data_size = multipath_per_bio_data_size();
else
ti->per_io_data_size = sizeof(struct dm_mpath_io);
@@ -1151,16 +1222,19 @@ static void multipath_wait_for_pg_init_completion(struct multipath *m)
static void flush_multipath_work(struct multipath *m)
{
- set_bit(MPATHF_PG_INIT_DISABLED, &m->flags);
- smp_mb__after_atomic();
+ if (m->hw_handler_name) {
+ set_bit(MPATHF_PG_INIT_DISABLED, &m->flags);
+ smp_mb__after_atomic();
+
+ flush_workqueue(kmpath_handlerd);
+ multipath_wait_for_pg_init_completion(m);
+
+ clear_bit(MPATHF_PG_INIT_DISABLED, &m->flags);
+ smp_mb__after_atomic();
+ }
- flush_workqueue(kmpath_handlerd);
- multipath_wait_for_pg_init_completion(m);
flush_workqueue(kmultipathd);
flush_work(&m->trigger_event);
-
- clear_bit(MPATHF_PG_INIT_DISABLED, &m->flags);
- smp_mb__after_atomic();
}
static void multipath_dtr(struct dm_target *ti)
@@ -1496,7 +1570,10 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone,
if (error && blk_path_error(error)) {
struct multipath *m = ti->private;
- r = DM_ENDIO_REQUEUE;
+ if (error == BLK_STS_RESOURCE)
+ r = DM_ENDIO_DELAY_REQUEUE;
+ else
+ r = DM_ENDIO_REQUEUE;
if (pgpath)
fail_path(pgpath);
@@ -1521,7 +1598,7 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone,
}
static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone,
- blk_status_t *error)
+ blk_status_t *error)
{
struct multipath *m = ti->private;
struct dm_mpath_io *mpio = get_mpio_from_bio(clone);
@@ -1546,9 +1623,6 @@ static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone,
goto done;
}
- /* Queue for the daemon to resubmit */
- dm_bio_restore(get_bio_details_from_bio(clone), clone);
-
spin_lock_irqsave(&m->lock, flags);
bio_list_add(&m->queued_bios, clone);
spin_unlock_irqrestore(&m->lock, flags);
@@ -1656,6 +1730,9 @@ static void multipath_status(struct dm_target *ti, status_type_t type,
case DM_TYPE_BIO_BASED:
DMEMIT("queue_mode bio ");
break;
+ case DM_TYPE_NVME_BIO_BASED:
+ DMEMIT("queue_mode nvme ");
+ break;
case DM_TYPE_MQ_REQUEST_BASED:
DMEMIT("queue_mode mq ");
break;