From a852d7b8a0cf29499905c9243fa6d3fb93898b82 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 19 Sep 2012 12:48:30 +1000 Subject: Revert "md/raid5: For odirect-write performance, do not set STRIPE_PREREAD_ACTIVE." This reverts commit 895e3c5c58a80bb9e4e05d9ac38b4f30e0f97d80. While this patch seemed like a good idea and did help some workloads, it hurts other workloads. Large sequential O_DIRECT writes were faster, Small random O_DIRECT writes were slower. Other changes (batching RAID5 writes) have improved the sequential writes using a different mechanism, so the net result of this patch is definitely negative. So revert it. Reported-by: Shaohua Li Tested-by: Jianpeng Ma Signed-off-by: NeilBrown --- drivers/md/raid5.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index adda94df5eb2..380cb68856b1 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -4192,7 +4192,7 @@ static void make_request(struct mddev *mddev, struct bio * bi) finish_wait(&conf->wait_for_overlap, &w); set_bit(STRIPE_HANDLE, &sh->state); clear_bit(STRIPE_DELAYED, &sh->state); - if ((bi->bi_rw & REQ_NOIDLE) && + if ((bi->bi_rw & REQ_SYNC) && !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) atomic_inc(&conf->preread_active_stripes); release_stripe_plug(mddev, sh); -- cgit v1.2.3 From e5c86471f933608db5d43679f84cb4346c32033e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 19 Sep 2012 12:52:30 +1000 Subject: md/raid5: fix calculate of 'degraded' when a replacement becomes active. When a replacement device becomes active, we mark the device that it replaces as 'faulty' so that it can subsequently get removed. However 'calc_degraded' only pays attention to the primary device, not the replacement, so the array appears to become degraded, which is wrong. So teach 'calc_degraded' to consider any replacement if a primary device is faulty. This is suitable for -stable as an incorrect 'degraded' value can confuse md and could lead to data corruption. This is only relevant for 3.3 and later. Cc: stable@vger.kernel.org Reported-by: Robin Hill Reported-by: John Drescher Signed-off-by: NeilBrown --- drivers/md/raid5.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 380cb68856b1..7031b865b3a0 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -393,6 +393,8 @@ static int calc_degraded(struct r5conf *conf) degraded = 0; for (i = 0; i < conf->previous_raid_disks; i++) { struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev); + if (rdev && test_bit(Faulty, &rdev->flags)) + rdev = rcu_dereference(conf->disks[i].replacement); if (!rdev || test_bit(Faulty, &rdev->flags)) degraded++; else if (test_bit(In_sync, &rdev->flags)) @@ -417,6 +419,8 @@ static int calc_degraded(struct r5conf *conf) degraded2 = 0; for (i = 0; i < conf->raid_disks; i++) { struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev); + if (rdev && test_bit(Faulty, &rdev->flags)) + rdev = rcu_dereference(conf->disks[i].replacement); if (!rdev || test_bit(Faulty, &rdev->flags)) degraded2++; else if (test_bit(In_sync, &rdev->flags)) -- cgit v1.2.3 From 6dafab6b1383e912cd252fa809570b484eb6e0dc Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 19 Sep 2012 12:54:22 +1000 Subject: md: make sure metadata is updated when spares are activated or removed. It isn't always necessary to update the metadata when spares are removed as the presence-or-not of a spare isn't really important to the integrity of an array. Also activating a spare doesn't always require updating the metadata as the update on 'recovery-completed' is usually sufficient. However the introduction of 'replacement' devices have made these transitions sometimes more important. For example the 'Replacement' flag isn't cleared until the original device is removed, so we need to ensure a metadata update after that 'spare' is removed. So set MD_CHANGE_DEVS whenever a spare is activated or removed, to complement the current situation where it is set when a spare is added or a device is failed (or a number of other less common situations). This is suitable for -stable as out-of-data metadata could lead to data corruption. This is only relevant for 3.3 and later 9when 'replacement' as introduced. Cc: stable@vger.kernel.org Signed-off-by: NeilBrown --- drivers/md/md.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/md/md.c b/drivers/md/md.c index 3f6203a4c7ea..308e87b417e0 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -7619,6 +7619,8 @@ static int remove_and_add_spares(struct mddev *mddev) } } } + if (removed) + set_bit(MD_CHANGE_DEVS, &mddev->flags); return spares; } @@ -7632,9 +7634,11 @@ static void reap_sync_thread(struct mddev *mddev) !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { /* success...*/ /* activate any spares */ - if (mddev->pers->spare_active(mddev)) + if (mddev->pers->spare_active(mddev)) { sysfs_notify(&mddev->kobj, NULL, "degraded"); + set_bit(MD_CHANGE_DEVS, &mddev->flags); + } } if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && mddev->pers->finish_reshape) -- cgit v1.2.3