diff options
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r-- | drivers/md/md.c | 95 |
1 files changed, 60 insertions, 35 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index c068f171b4eb..866825f10b4c 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -284,6 +284,8 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio) * go away inside make_request */ sectors = bio_sectors(bio); + /* bio could be mergeable after passing to underlayer */ + bio->bi_rw &= ~REQ_NOMERGE; mddev->pers->make_request(mddev, bio); cpu = part_stat_lock(); @@ -305,7 +307,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio) */ void mddev_suspend(struct mddev *mddev) { - WARN_ON_ONCE(current == mddev->thread->tsk); + WARN_ON_ONCE(mddev->thread && current == mddev->thread->tsk); if (mddev->suspended++) return; synchronize_rcu(); @@ -718,6 +720,7 @@ static void super_written(struct bio *bio) if (atomic_dec_and_test(&mddev->pending_writes)) wake_up(&mddev->sb_wait); + rdev_dec_pending(rdev, mddev); bio_put(bio); } @@ -732,6 +735,8 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev, */ struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, mddev); + atomic_inc(&rdev->nr_pending); + bio->bi_bdev = rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev; bio->bi_iter.bi_sector = sector; bio_add_page(bio, page, size, 0); @@ -2286,19 +2291,24 @@ void md_update_sb(struct mddev *mddev, int force_change) return; } +repeat: if (mddev_is_clustered(mddev)) { if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags)) force_change = 1; + if (test_and_clear_bit(MD_CHANGE_CLEAN, &mddev->flags)) + nospares = 1; ret = md_cluster_ops->metadata_update_start(mddev); /* Has someone else has updated the sb */ if (!does_sb_need_changing(mddev)) { if (ret == 0) md_cluster_ops->metadata_update_cancel(mddev); - clear_bit(MD_CHANGE_PENDING, &mddev->flags); + bit_clear_unless(&mddev->flags, BIT(MD_CHANGE_PENDING), + BIT(MD_CHANGE_DEVS) | + BIT(MD_CHANGE_CLEAN)); return; } } -repeat: + /* First make sure individual recovery_offsets are correct */ rdev_for_each(rdev, mddev) { if (rdev->raid_disk >= 0 && @@ -2425,15 +2435,14 @@ repeat: md_super_wait(mddev); /* if there was a failure, MD_CHANGE_DEVS was set, and we re-write super */ - spin_lock(&mddev->lock); + if (mddev_is_clustered(mddev) && ret == 0) + md_cluster_ops->metadata_update_finish(mddev); + if (mddev->in_sync != sync_req || - test_bit(MD_CHANGE_DEVS, &mddev->flags)) { + !bit_clear_unless(&mddev->flags, BIT(MD_CHANGE_PENDING), + BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_CLEAN))) /* have to write it out again */ - spin_unlock(&mddev->lock); goto repeat; - } - clear_bit(MD_CHANGE_PENDING, &mddev->flags); - spin_unlock(&mddev->lock); wake_up(&mddev->sb_wait); if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) sysfs_notify(&mddev->kobj, NULL, "sync_completed"); @@ -2447,9 +2456,6 @@ repeat: clear_bit(BlockedBadBlocks, &rdev->flags); wake_up(&rdev->blocked_wait); } - - if (mddev_is_clustered(mddev) && ret == 0) - md_cluster_ops->metadata_update_finish(mddev); } EXPORT_SYMBOL(md_update_sb); @@ -4811,6 +4817,10 @@ array_size_store(struct mddev *mddev, const char *buf, size_t len) if (err) return err; + /* cluster raid doesn't support change array_sectors */ + if (mddev_is_clustered(mddev)) + return -EINVAL; + if (strncmp(buf, "default", 7) == 0) { if (mddev->pers) sectors = mddev->pers->size(mddev, 0, 0); @@ -5034,7 +5044,7 @@ static int md_alloc(dev_t dev, char *name) disk->fops = &md_fops; disk->private_data = mddev; disk->queue = mddev->queue; - blk_queue_flush(mddev->queue, REQ_FLUSH | REQ_FUA); + blk_queue_write_cache(mddev->queue, true, true); /* Allow extended partitions. This makes the * 'mdp' device redundant, but we can't really * remove it now. @@ -6432,6 +6442,10 @@ static int update_size(struct mddev *mddev, sector_t num_sectors) int rv; int fit = (num_sectors == 0); + /* cluster raid doesn't support update size */ + if (mddev_is_clustered(mddev)) + return -EINVAL; + if (mddev->pers->resize == NULL) return -EINVAL; /* The "num_sectors" is the number of sectors of each device that @@ -6883,7 +6897,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, case ADD_NEW_DISK: /* We can support ADD_NEW_DISK on read-only arrays - * on if we are re-adding a preexisting device. + * only if we are re-adding a preexisting device. * So require mddev->pers and MD_DISK_SYNC. */ if (mddev->pers) { @@ -7780,7 +7794,7 @@ void md_do_sync(struct md_thread *thread) struct md_rdev *rdev; char *desc, *action = NULL; struct blk_plug plug; - bool cluster_resync_finished = false; + int ret; /* just incase thread restarts... */ if (test_bit(MD_RECOVERY_DONE, &mddev->recovery)) @@ -7790,6 +7804,19 @@ void md_do_sync(struct md_thread *thread) return; } + if (mddev_is_clustered(mddev)) { + ret = md_cluster_ops->resync_start(mddev); + if (ret) + goto skip; + + if (!(test_bit(MD_RECOVERY_SYNC, &mddev->recovery) || + test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) || + test_bit(MD_RECOVERY_RECOVER, &mddev->recovery)) + && ((unsigned long long)mddev->curr_resync_completed + < (unsigned long long)mddev->resync_max_sectors)) + goto skip; + } + if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) { desc = "data-check"; @@ -8084,11 +8111,6 @@ void md_do_sync(struct md_thread *thread) mddev->curr_resync_completed = mddev->curr_resync; sysfs_notify(&mddev->kobj, NULL, "sync_completed"); } - /* tell personality and other nodes that we are finished */ - if (mddev_is_clustered(mddev)) { - md_cluster_ops->resync_finish(mddev); - cluster_resync_finished = true; - } mddev->pers->sync_request(mddev, max_sectors, &skipped); if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) && @@ -8125,12 +8147,18 @@ void md_do_sync(struct md_thread *thread) } } skip: - set_bit(MD_CHANGE_DEVS, &mddev->flags); - if (mddev_is_clustered(mddev) && - test_bit(MD_RECOVERY_INTR, &mddev->recovery) && - !cluster_resync_finished) + ret == 0) { + /* set CHANGE_PENDING here since maybe another + * update is needed, so other nodes are informed */ + set_mask_bits(&mddev->flags, 0, + BIT(MD_CHANGE_PENDING) | BIT(MD_CHANGE_DEVS)); + md_wakeup_thread(mddev->thread); + wait_event(mddev->sb_wait, + !test_bit(MD_CHANGE_PENDING, &mddev->flags)); md_cluster_ops->resync_finish(mddev); + } else + set_bit(MD_CHANGE_DEVS, &mddev->flags); spin_lock(&mddev->lock); if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { @@ -8221,18 +8249,9 @@ static void md_start_sync(struct work_struct *ws) struct mddev *mddev = container_of(ws, struct mddev, del_work); int ret = 0; - if (mddev_is_clustered(mddev)) { - ret = md_cluster_ops->resync_start(mddev); - if (ret) { - mddev->sync_thread = NULL; - goto out; - } - } - mddev->sync_thread = md_register_thread(md_do_sync, mddev, "resync"); -out: if (!mddev->sync_thread) { if (!(mddev_is_clustered(mddev) && ret == -EAGAIN)) printk(KERN_ERR "%s: could not start resync" @@ -8531,6 +8550,7 @@ EXPORT_SYMBOL(md_finish_reshape); int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors, int is_new) { + struct mddev *mddev = rdev->mddev; int rv; if (is_new) s += rdev->new_data_offset; @@ -8540,8 +8560,8 @@ int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors, if (rv == 0) { /* Make sure they get written out promptly */ sysfs_notify_dirent_safe(rdev->sysfs_state); - set_bit(MD_CHANGE_CLEAN, &rdev->mddev->flags); - set_bit(MD_CHANGE_PENDING, &rdev->mddev->flags); + set_mask_bits(&mddev->flags, 0, + BIT(MD_CHANGE_CLEAN) | BIT(MD_CHANGE_PENDING)); md_wakeup_thread(rdev->mddev->thread); return 1; } else @@ -8675,6 +8695,11 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev) ret = remove_and_add_spares(mddev, rdev2); pr_info("Activated spare: %s\n", bdevname(rdev2->bdev,b)); + /* wakeup mddev->thread here, so array could + * perform resync with the new activated disk */ + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); + md_wakeup_thread(mddev->thread); + } /* device faulty * We just want to do the minimum to mark the disk |