summaryrefslogtreecommitdiffstats
path: root/drivers/md/md.c
diff options
context:
space:
mode:
authorGuoqing Jiang <gqjiang@suse.com>2019-06-19 11:30:46 +0200
committerSong Liu <songliubraving@fb.com>2019-06-21 01:35:59 +0200
commit3e148a3209792e04f63ec99701235c960765fc9a (patch)
tree2661881c5231d9ccf5ae1f92d078572dbdaf6850 /drivers/md/md.c
parentf2fs: use block layer helper for show_bio_op macro (diff)
downloadlinux-3e148a3209792e04f63ec99701235c960765fc9a.tar.xz
linux-3e148a3209792e04f63ec99701235c960765fc9a.zip
md/raid1: fix potential data inconsistency issue with write behind device
For write-behind mode, we think write IO is complete once it has reached all the non-writemostly devices. It works fine for single queue devices. But for multiqueue device, if there are lots of IOs come from upper layer, then the write-behind device could issue those IOs to different queues, depends on the each queue's delay, so there is no guarantee that those IOs can arrive in order. To address the issue, we need to check the collision among write behind IOs, we can only continue without collision, otherwise wait for the completion of previous collisioned IO. And WBCollision is introduced for multiqueue device which is worked under write-behind mode. But this patch doesn't handle below cases which could have the data inconsistency issue as well, these cases will be handled in later patches. 1. modify max_write_behind by write backlog node. 2. add or remove array's bitmap dynamically. 3. the change of member disk. Reviewed-by: NeilBrown <neilb@suse.com> Signed-off-by: Guoqing Jiang <gqjiang@suse.com> Signed-off-by: Song Liu <songliubraving@fb.com>
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r--drivers/md/md.c41
1 files changed, 41 insertions, 0 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 1f37a1adc926..9a9762c83cc8 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -124,6 +124,19 @@ static inline int speed_max(struct mddev *mddev)
mddev->sync_speed_max : sysctl_speed_limit_max;
}
+static int rdev_init_wb(struct md_rdev *rdev)
+{
+ if (rdev->bdev->bd_queue->nr_hw_queues == 1)
+ return 0;
+
+ spin_lock_init(&rdev->wb_list_lock);
+ INIT_LIST_HEAD(&rdev->wb_list);
+ init_waitqueue_head(&rdev->wb_io_wait);
+ set_bit(WBCollisionCheck, &rdev->flags);
+
+ return 1;
+}
+
static struct ctl_table_header *raid_table_header;
static struct ctl_table raid_table[] = {
@@ -5597,6 +5610,32 @@ int md_run(struct mddev *mddev)
md_bitmap_destroy(mddev);
goto abort;
}
+
+ if (mddev->bitmap_info.max_write_behind > 0) {
+ bool creat_pool = false;
+
+ rdev_for_each(rdev, mddev) {
+ if (test_bit(WriteMostly, &rdev->flags) &&
+ rdev_init_wb(rdev))
+ creat_pool = true;
+ }
+ if (creat_pool && mddev->wb_info_pool == NULL) {
+ mddev->wb_info_pool =
+ mempool_create_kmalloc_pool(NR_WB_INFOS,
+ sizeof(struct wb_info));
+ if (!mddev->wb_info_pool) {
+ err = -ENOMEM;
+ mddev_detach(mddev);
+ if (mddev->private)
+ pers->free(mddev, mddev->private);
+ mddev->private = NULL;
+ module_put(pers->owner);
+ md_bitmap_destroy(mddev);
+ goto abort;
+ }
+ }
+ }
+
if (mddev->queue) {
bool nonrot = true;
@@ -5825,6 +5864,8 @@ static void __md_stop_writes(struct mddev *mddev)
mddev->in_sync = 1;
md_update_sb(mddev, 1);
}
+ mempool_destroy(mddev->wb_info_pool);
+ mddev->wb_info_pool = NULL;
}
void md_stop_writes(struct mddev *mddev)