summaryrefslogtreecommitdiffstats
path: root/drivers/md/dm.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/dm.c')
-rw-r--r--drivers/md/dm.c273
1 files changed, 200 insertions, 73 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 58f3927fd7cc..8f37ed215b19 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -19,6 +19,7 @@
#include <linux/idr.h>
#include <linux/hdreg.h>
#include <linux/delay.h>
+#include <linux/wait.h>
#include <trace/events/block.h>
@@ -117,6 +118,7 @@ EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo);
#define DMF_NOFLUSH_SUSPENDING 5
#define DMF_MERGE_IS_OPTIONAL 6
#define DMF_DEFERRED_REMOVE 7
+#define DMF_SUSPENDED_INTERNALLY 8
/*
* A dummy definition to make RCU happy.
@@ -140,7 +142,7 @@ struct mapped_device {
* Use dm_get_live_table{_fast} or take suspend_lock for
* dereference.
*/
- struct dm_table *map;
+ struct dm_table __rcu *map;
struct list_head table_devices;
struct mutex table_devices_lock;
@@ -525,14 +527,15 @@ retry:
goto out;
tgt = dm_table_get_target(map, 0);
+ if (!tgt->type->ioctl)
+ goto out;
if (dm_suspended_md(md)) {
r = -EAGAIN;
goto out;
}
- if (tgt->type->ioctl)
- r = tgt->type->ioctl(tgt, cmd, arg);
+ r = tgt->type->ioctl(tgt, cmd, arg);
out:
dm_put_live_table(md, srcu_idx);
@@ -1607,9 +1610,9 @@ static int dm_merge_bvec(struct request_queue *q,
* Find maximum amount of I/O that won't need splitting
*/
max_sectors = min(max_io_len(bvm->bi_sector, ti),
- (sector_t) BIO_MAX_SECTORS);
+ (sector_t) queue_max_sectors(q));
max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size;
- if (max_size < 0)
+ if (unlikely(max_size < 0)) /* this shouldn't _ever_ happen */
max_size = 0;
/*
@@ -1621,10 +1624,10 @@ static int dm_merge_bvec(struct request_queue *q,
max_size = ti->type->merge(ti, bvm, biovec, max_size);
/*
* If the target doesn't support merge method and some of the devices
- * provided their merge_bvec method (we know this by looking at
- * queue_max_hw_sectors), then we can't allow bios with multiple vector
- * entries. So always set max_size to 0, and the code below allows
- * just one page.
+ * provided their merge_bvec method (we know this by looking for the
+ * max_hw_sectors that dm_set_device_limits may set), then we can't
+ * allow bios with multiple vector entries. So always set max_size
+ * to 0, and the code below allows just one page.
*/
else if (queue_max_hw_sectors(q) <= PAGE_SIZE >> 9)
max_size = 0;
@@ -2332,7 +2335,7 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
merge_is_optional = dm_table_merge_is_optional(t);
- old_map = md->map;
+ old_map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
rcu_assign_pointer(md->map, t);
md->immutable_target_type = dm_table_get_immutable_target_type(t);
@@ -2341,7 +2344,8 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
set_bit(DMF_MERGE_IS_OPTIONAL, &md->flags);
else
clear_bit(DMF_MERGE_IS_OPTIONAL, &md->flags);
- dm_sync_table(md);
+ if (old_map)
+ dm_sync_table(md);
return old_map;
}
@@ -2351,7 +2355,7 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
*/
static struct dm_table *__unbind(struct mapped_device *md)
{
- struct dm_table *map = md->map;
+ struct dm_table *map = rcu_dereference_protected(md->map, 1);
if (!map)
return NULL;
@@ -2716,36 +2720,18 @@ static void unlock_fs(struct mapped_device *md)
}
/*
- * We need to be able to change a mapping table under a mounted
- * filesystem. For example we might want to move some data in
- * the background. Before the table can be swapped with
- * dm_bind_table, dm_suspend must be called to flush any in
- * flight bios and ensure that any further io gets deferred.
- */
-/*
- * Suspend mechanism in request-based dm.
+ * If __dm_suspend returns 0, the device is completely quiescent
+ * now. There is no request-processing activity. All new requests
+ * are being added to md->deferred list.
*
- * 1. Flush all I/Os by lock_fs() if needed.
- * 2. Stop dispatching any I/O by stopping the request_queue.
- * 3. Wait for all in-flight I/Os to be completed or requeued.
- *
- * To abort suspend, start the request_queue.
+ * Caller must hold md->suspend_lock
*/
-int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
+static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
+ unsigned suspend_flags, int interruptible)
{
- struct dm_table *map = NULL;
- int r = 0;
- int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0;
- int noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG ? 1 : 0;
-
- mutex_lock(&md->suspend_lock);
-
- if (dm_suspended_md(md)) {
- r = -EINVAL;
- goto out_unlock;
- }
-
- map = md->map;
+ bool do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG;
+ bool noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG;
+ int r;
/*
* DMF_NOFLUSH_SUSPENDING must be set before presuspend.
@@ -2754,7 +2740,10 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
if (noflush)
set_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
- /* This does not get reverted if there's an error later. */
+ /*
+ * This gets reverted if there's an error later and the targets
+ * provide the .presuspend_undo hook.
+ */
dm_table_presuspend_targets(map);
/*
@@ -2765,8 +2754,10 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
*/
if (!noflush && do_lockfs) {
r = lock_fs(md);
- if (r)
- goto out_unlock;
+ if (r) {
+ dm_table_presuspend_undo_targets(map);
+ return r;
+ }
}
/*
@@ -2782,7 +2773,8 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
* flush_workqueue(md->wq).
*/
set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
- synchronize_srcu(&md->io_barrier);
+ if (map)
+ synchronize_srcu(&md->io_barrier);
/*
* Stop md->queue before flushing md->wq in case request-based
@@ -2798,11 +2790,12 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
* We call dm_wait_for_completion to wait for all existing requests
* to finish.
*/
- r = dm_wait_for_completion(md, TASK_INTERRUPTIBLE);
+ r = dm_wait_for_completion(md, interruptible);
if (noflush)
clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
- synchronize_srcu(&md->io_barrier);
+ if (map)
+ synchronize_srcu(&md->io_barrier);
/* were we interrupted ? */
if (r < 0) {
@@ -2812,14 +2805,56 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
start_queue(md->queue);
unlock_fs(md);
- goto out_unlock; /* pushback list is already flushed, so skip flush */
+ dm_table_presuspend_undo_targets(map);
+ /* pushback list is already flushed, so skip flush */
}
- /*
- * If dm_wait_for_completion returned 0, the device is completely
- * quiescent now. There is no request-processing activity. All new
- * requests are being added to md->deferred list.
- */
+ return r;
+}
+
+/*
+ * We need to be able to change a mapping table under a mounted
+ * filesystem. For example we might want to move some data in
+ * the background. Before the table can be swapped with
+ * dm_bind_table, dm_suspend must be called to flush any in
+ * flight bios and ensure that any further io gets deferred.
+ */
+/*
+ * Suspend mechanism in request-based dm.
+ *
+ * 1. Flush all I/Os by lock_fs() if needed.
+ * 2. Stop dispatching any I/O by stopping the request_queue.
+ * 3. Wait for all in-flight I/Os to be completed or requeued.
+ *
+ * To abort suspend, start the request_queue.
+ */
+int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
+{
+ struct dm_table *map = NULL;
+ int r = 0;
+
+retry:
+ mutex_lock_nested(&md->suspend_lock, SINGLE_DEPTH_NESTING);
+
+ if (dm_suspended_md(md)) {
+ r = -EINVAL;
+ goto out_unlock;
+ }
+
+ if (dm_suspended_internally_md(md)) {
+ /* already internally suspended, wait for internal resume */
+ mutex_unlock(&md->suspend_lock);
+ r = wait_on_bit(&md->flags, DMF_SUSPENDED_INTERNALLY, TASK_INTERRUPTIBLE);
+ if (r)
+ return r;
+ goto retry;
+ }
+
+ map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
+
+ r = __dm_suspend(md, map, suspend_flags, TASK_INTERRUPTIBLE);
+ if (r)
+ goto out_unlock;
set_bit(DMF_SUSPENDED, &md->flags);
@@ -2830,22 +2865,13 @@ out_unlock:
return r;
}
-int dm_resume(struct mapped_device *md)
+static int __dm_resume(struct mapped_device *md, struct dm_table *map)
{
- int r = -EINVAL;
- struct dm_table *map = NULL;
-
- mutex_lock(&md->suspend_lock);
- if (!dm_suspended_md(md))
- goto out;
-
- map = md->map;
- if (!map || !dm_table_get_size(map))
- goto out;
-
- r = dm_table_resume_targets(map);
- if (r)
- goto out;
+ if (map) {
+ int r = dm_table_resume_targets(map);
+ if (r)
+ return r;
+ }
dm_queue_flush(md);
@@ -2859,6 +2885,37 @@ int dm_resume(struct mapped_device *md)
unlock_fs(md);
+ return 0;
+}
+
+int dm_resume(struct mapped_device *md)
+{
+ int r = -EINVAL;
+ struct dm_table *map = NULL;
+
+retry:
+ mutex_lock_nested(&md->suspend_lock, SINGLE_DEPTH_NESTING);
+
+ if (!dm_suspended_md(md))
+ goto out;
+
+ if (dm_suspended_internally_md(md)) {
+ /* already internally suspended, wait for internal resume */
+ mutex_unlock(&md->suspend_lock);
+ r = wait_on_bit(&md->flags, DMF_SUSPENDED_INTERNALLY, TASK_INTERRUPTIBLE);
+ if (r)
+ return r;
+ goto retry;
+ }
+
+ map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
+ if (!map || !dm_table_get_size(map))
+ goto out;
+
+ r = __dm_resume(md, map);
+ if (r)
+ goto out;
+
clear_bit(DMF_SUSPENDED, &md->flags);
r = 0;
@@ -2872,15 +2929,80 @@ out:
* Internal suspend/resume works like userspace-driven suspend. It waits
* until all bios finish and prevents issuing new bios to the target drivers.
* It may be used only from the kernel.
- *
- * Internal suspend holds md->suspend_lock, which prevents interaction with
- * userspace-driven suspend.
*/
-void dm_internal_suspend(struct mapped_device *md)
+static void __dm_internal_suspend(struct mapped_device *md, unsigned suspend_flags)
{
- mutex_lock(&md->suspend_lock);
+ struct dm_table *map = NULL;
+
+ if (dm_suspended_internally_md(md))
+ return; /* nested internal suspend */
+
+ if (dm_suspended_md(md)) {
+ set_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
+ return; /* nest suspend */
+ }
+
+ map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
+
+ /*
+ * Using TASK_UNINTERRUPTIBLE because only NOFLUSH internal suspend is
+ * supported. Properly supporting a TASK_INTERRUPTIBLE internal suspend
+ * would require changing .presuspend to return an error -- avoid this
+ * until there is a need for more elaborate variants of internal suspend.
+ */
+ (void) __dm_suspend(md, map, suspend_flags, TASK_UNINTERRUPTIBLE);
+
+ set_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
+
+ dm_table_postsuspend_targets(map);
+}
+
+static void __dm_internal_resume(struct mapped_device *md)
+{
+ if (!dm_suspended_internally_md(md))
+ return; /* resume from nested internal suspend */
+
if (dm_suspended_md(md))
+ goto done; /* resume from nested suspend */
+
+ /*
+ * NOTE: existing callers don't need to call dm_table_resume_targets
+ * (which may fail -- so best to avoid it for now by passing NULL map)
+ */
+ (void) __dm_resume(md, NULL);
+
+done:
+ clear_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
+ smp_mb__after_atomic();
+ wake_up_bit(&md->flags, DMF_SUSPENDED_INTERNALLY);
+}
+
+void dm_internal_suspend_noflush(struct mapped_device *md)
+{
+ mutex_lock(&md->suspend_lock);
+ __dm_internal_suspend(md, DM_SUSPEND_NOFLUSH_FLAG);
+ mutex_unlock(&md->suspend_lock);
+}
+EXPORT_SYMBOL_GPL(dm_internal_suspend_noflush);
+
+void dm_internal_resume(struct mapped_device *md)
+{
+ mutex_lock(&md->suspend_lock);
+ __dm_internal_resume(md);
+ mutex_unlock(&md->suspend_lock);
+}
+EXPORT_SYMBOL_GPL(dm_internal_resume);
+
+/*
+ * Fast variants of internal suspend/resume hold md->suspend_lock,
+ * which prevents interaction with userspace-driven suspend.
+ */
+
+void dm_internal_suspend_fast(struct mapped_device *md)
+{
+ mutex_lock(&md->suspend_lock);
+ if (dm_suspended_md(md) || dm_suspended_internally_md(md))
return;
set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
@@ -2889,9 +3011,9 @@ void dm_internal_suspend(struct mapped_device *md)
dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
}
-void dm_internal_resume(struct mapped_device *md)
+void dm_internal_resume_fast(struct mapped_device *md)
{
- if (dm_suspended_md(md))
+ if (dm_suspended_md(md) || dm_suspended_internally_md(md))
goto done;
dm_queue_flush(md);
@@ -2977,6 +3099,11 @@ int dm_suspended_md(struct mapped_device *md)
return test_bit(DMF_SUSPENDED, &md->flags);
}
+int dm_suspended_internally_md(struct mapped_device *md)
+{
+ return test_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
+}
+
int dm_test_deferred_remove_flag(struct mapped_device *md)
{
return test_bit(DMF_DEFERRED_REMOVE, &md->flags);