diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-21 22:18:10 +0100 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-21 22:18:10 +0100 |
commit | 4526b710c1a31767044c3b1abb9f1f51e98bf49f (patch) | |
tree | c643e8085df6248c40af35c77ef9e88833bdb851 | |
parent | [media] vsp1: use proper dma alloc/free functions (diff) | |
parent | md/raid5: Cleanup cpu hotplug notifier (diff) | |
download | linux-4526b710c1a31767044c3b1abb9f1f51e98bf49f.tar.xz linux-4526b710c1a31767044c3b1abb9f1f51e98bf49f.zip |
Merge tag 'md/4.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md
Pull MD updates from Shaohua Li:
"This update mainly fixes bugs.
- a raid5 discard related fix from Jes
- a MD multipath bio clone fix from Ming
- raid1 error handling deadlock fix from Nate and corresponding
raid10 fix from myself
- a raid5 stripe batch fix from Neil
- a patch from Sebastian to avoid unnecessary uevent
- several cleanup/debug patches"
* tag 'md/4.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md:
md/raid5: Cleanup cpu hotplug notifier
raid10: include bio_end_io_list in nr_queued to prevent freeze_array hang
raid1: include bio_end_io_list in nr_queued to prevent freeze_array hang
md: fix typos for stipe
md/bitmap: remove redundant return in bitmap_checkpage
md/raid1: remove unnecessary BUG_ON
md: multipath: don't hardcopy bio in .make_request path
md/raid5: output stripe state for debug
md/raid5: preserve STRIPE_PREREAD_ACTIVE in break_stripe_batch_list
Update MD git tree URL
md/bitmap: remove redundant check
MD: warn for potential deadlock
md: Drop sending a change uevent when stopping
RAID5: revert e9e4c377e2f563 to fix a livelock
RAID5: check_reshape() shouldn't call mddev_suspend
md/raid5: Compare apples to apples (or sectors to sectors)
-rw-r--r-- | MAINTAINERS | 2 | ||||
-rw-r--r-- | drivers/md/bitmap.c | 4 | ||||
-rw-r--r-- | drivers/md/bitmap.h | 4 | ||||
-rw-r--r-- | drivers/md/md.c | 2 | ||||
-rw-r--r-- | drivers/md/multipath.c | 4 | ||||
-rw-r--r-- | drivers/md/raid1.c | 8 | ||||
-rw-r--r-- | drivers/md/raid10.c | 7 | ||||
-rw-r--r-- | drivers/md/raid5.c | 63 | ||||
-rw-r--r-- | drivers/md/raid5.h | 4 |
9 files changed, 58 insertions, 40 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 29d9779dc3d2..ce02830670fe 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10291,7 +10291,7 @@ F: drivers/media/pci/solo6x10/ SOFTWARE RAID (Multiple Disks) SUPPORT M: Shaohua Li <shli@kernel.org> L: linux-raid@vger.kernel.org -T: git git://neil.brown.name/md +T: git git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git S: Supported F: drivers/md/ F: include/linux/raid/ diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index d80cce499a56..7df6b4f1548a 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -98,7 +98,6 @@ __acquires(bitmap->lock) bitmap->bp[page].hijacked) { /* somebody beat us to getting the page */ kfree(mappage); - return 0; } else { /* no page was in place and we have one, so install it */ @@ -510,8 +509,7 @@ static int bitmap_new_disk_sb(struct bitmap *bitmap) sb->chunksize = cpu_to_le32(chunksize); daemon_sleep = bitmap->mddev->bitmap_info.daemon_sleep; - if (!daemon_sleep || - (daemon_sleep < 1) || (daemon_sleep > MAX_SCHEDULE_TIMEOUT)) { + if (!daemon_sleep || (daemon_sleep > MAX_SCHEDULE_TIMEOUT)) { printk(KERN_INFO "Choosing daemon_sleep default (5 sec)\n"); daemon_sleep = 5 * HZ; } diff --git a/drivers/md/bitmap.h b/drivers/md/bitmap.h index 7d5c3a610ca5..5e3fcd6ecf77 100644 --- a/drivers/md/bitmap.h +++ b/drivers/md/bitmap.h @@ -49,8 +49,8 @@ * When we set a bit, or in the counter (to start a write), if the fields is * 0, we first set the disk bit and set the counter to 1. * - * If the counter is 0, the on-disk bit is clear and the stipe is clean - * Anything that dirties the stipe pushes the counter to 2 (at least) + * If the counter is 0, the on-disk bit is clear and the stripe is clean + * Anything that dirties the stripe pushes the counter to 2 (at least) * and sets the on-disk bit (lazily). * If a periodic sweep find the counter at 2, it is decremented to 1. * If the sweep find the counter at 1, the on-disk bit is cleared and the diff --git a/drivers/md/md.c b/drivers/md/md.c index e55e6cf9ec17..c068f171b4eb 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -305,6 +305,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio) */ void mddev_suspend(struct mddev *mddev) { + WARN_ON_ONCE(current == mddev->thread->tsk); if (mddev->suspended++) return; synchronize_rcu(); @@ -5671,7 +5672,6 @@ static int do_md_stop(struct mddev *mddev, int mode, export_array(mddev); md_clean(mddev); - kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); if (mddev->hold_active == UNTIL_STOP) mddev->hold_active = 0; } diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 0a72ab6e6c20..dd483bb2e111 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -129,7 +129,9 @@ static void multipath_make_request(struct mddev *mddev, struct bio * bio) } multipath = conf->multipaths + mp_bh->path; - mp_bh->bio = *bio; + bio_init(&mp_bh->bio); + __bio_clone_fast(&mp_bh->bio, bio); + mp_bh->bio.bi_iter.bi_sector += multipath->rdev->data_offset; mp_bh->bio.bi_bdev = multipath->rdev->bdev; mp_bh->bio.bi_rw |= REQ_FAILFAST_TRANSPORT; diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 4e3843f7d245..39fb21e048e6 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -2274,6 +2274,7 @@ static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio) if (fail) { spin_lock_irq(&conf->device_lock); list_add(&r1_bio->retry_list, &conf->bio_end_io_list); + conf->nr_queued++; spin_unlock_irq(&conf->device_lock); md_wakeup_thread(conf->mddev->thread); } else { @@ -2391,8 +2392,10 @@ static void raid1d(struct md_thread *thread) LIST_HEAD(tmp); spin_lock_irqsave(&conf->device_lock, flags); if (!test_bit(MD_CHANGE_PENDING, &mddev->flags)) { - list_add(&tmp, &conf->bio_end_io_list); - list_del_init(&conf->bio_end_io_list); + while (!list_empty(&conf->bio_end_io_list)) { + list_move(conf->bio_end_io_list.prev, &tmp); + conf->nr_queued--; + } } spin_unlock_irqrestore(&conf->device_lock, flags); while (!list_empty(&tmp)) { @@ -2695,7 +2698,6 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) break; - BUG_ON(sync_blocks < (PAGE_SIZE>>9)); if ((len >> 9) > sync_blocks) len = sync_blocks<<9; } diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 1c1447dd3417..e3fd725d5c4d 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2664,6 +2664,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio) if (fail) { spin_lock_irq(&conf->device_lock); list_add(&r10_bio->retry_list, &conf->bio_end_io_list); + conf->nr_queued++; spin_unlock_irq(&conf->device_lock); md_wakeup_thread(conf->mddev->thread); } else { @@ -2691,8 +2692,10 @@ static void raid10d(struct md_thread *thread) LIST_HEAD(tmp); spin_lock_irqsave(&conf->device_lock, flags); if (!test_bit(MD_CHANGE_PENDING, &mddev->flags)) { - list_add(&tmp, &conf->bio_end_io_list); - list_del_init(&conf->bio_end_io_list); + while (!list_empty(&conf->bio_end_io_list)) { + list_move(conf->bio_end_io_list.prev, &tmp); + conf->nr_queued--; + } } spin_unlock_irqrestore(&conf->device_lock, flags); while (!list_empty(&tmp)) { diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index b4f02c9959f2..8ab8b65e1741 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -340,8 +340,7 @@ static void release_inactive_stripe_list(struct r5conf *conf, int hash) { int size; - unsigned long do_wakeup = 0; - int i = 0; + bool do_wakeup = false; unsigned long flags; if (hash == NR_STRIPE_HASH_LOCKS) { @@ -362,19 +361,15 @@ static void release_inactive_stripe_list(struct r5conf *conf, !list_empty(list)) atomic_dec(&conf->empty_inactive_list_nr); list_splice_tail_init(list, conf->inactive_list + hash); - do_wakeup |= 1 << hash; + do_wakeup = true; spin_unlock_irqrestore(conf->hash_locks + hash, flags); } size--; hash--; } - for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) { - if (do_wakeup & (1 << i)) - wake_up(&conf->wait_for_stripe[i]); - } - if (do_wakeup) { + wake_up(&conf->wait_for_stripe); if (atomic_read(&conf->active_stripes) == 0) wake_up(&conf->wait_for_quiescent); if (conf->retry_read_aligned) @@ -687,15 +682,14 @@ raid5_get_active_stripe(struct r5conf *conf, sector_t sector, if (!sh) { set_bit(R5_INACTIVE_BLOCKED, &conf->cache_state); - wait_event_exclusive_cmd( - conf->wait_for_stripe[hash], + wait_event_lock_irq( + conf->wait_for_stripe, !list_empty(conf->inactive_list + hash) && (atomic_read(&conf->active_stripes) < (conf->max_nr_stripes * 3 / 4) || !test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state)), - spin_unlock_irq(conf->hash_locks + hash), - spin_lock_irq(conf->hash_locks + hash)); + *(conf->hash_locks + hash)); clear_bit(R5_INACTIVE_BLOCKED, &conf->cache_state); } else { @@ -720,9 +714,6 @@ raid5_get_active_stripe(struct r5conf *conf, sector_t sector, } } while (sh == NULL); - if (!list_empty(conf->inactive_list + hash)) - wake_up(&conf->wait_for_stripe[hash]); - spin_unlock_irq(conf->hash_locks + hash); return sh; } @@ -2089,6 +2080,14 @@ static int resize_chunks(struct r5conf *conf, int new_disks, int new_sectors) unsigned long cpu; int err = 0; + /* + * Never shrink. And mddev_suspend() could deadlock if this is called + * from raid5d. In that case, scribble_disks and scribble_sectors + * should equal to new_disks and new_sectors + */ + if (conf->scribble_disks >= new_disks && + conf->scribble_sectors >= new_sectors) + return 0; mddev_suspend(conf->mddev); get_online_cpus(); for_each_present_cpu(cpu) { @@ -2110,6 +2109,10 @@ static int resize_chunks(struct r5conf *conf, int new_disks, int new_sectors) } put_online_cpus(); mddev_resume(conf->mddev); + if (!err) { + conf->scribble_disks = new_disks; + conf->scribble_sectors = new_sectors; + } return err; } @@ -2190,7 +2193,7 @@ static int resize_stripes(struct r5conf *conf, int newsize) cnt = 0; list_for_each_entry(nsh, &newstripes, lru) { lock_device_hash_lock(conf, hash); - wait_event_exclusive_cmd(conf->wait_for_stripe[hash], + wait_event_cmd(conf->wait_for_stripe, !list_empty(conf->inactive_list + hash), unlock_device_hash_lock(conf, hash), lock_device_hash_lock(conf, hash)); @@ -4233,10 +4236,9 @@ static void break_stripe_batch_list(struct stripe_head *head_sh, list_del_init(&sh->batch_list); - WARN_ON_ONCE(sh->state & ((1 << STRIPE_ACTIVE) | + WARN_ONCE(sh->state & ((1 << STRIPE_ACTIVE) | (1 << STRIPE_SYNCING) | (1 << STRIPE_REPLACED) | - (1 << STRIPE_PREREAD_ACTIVE) | (1 << STRIPE_DELAYED) | (1 << STRIPE_BIT_DELAY) | (1 << STRIPE_FULL_WRITE) | @@ -4246,11 +4248,14 @@ static void break_stripe_batch_list(struct stripe_head *head_sh, (1 << STRIPE_DISCARD) | (1 << STRIPE_BATCH_READY) | (1 << STRIPE_BATCH_ERR) | - (1 << STRIPE_BITMAP_PENDING))); - WARN_ON_ONCE(head_sh->state & ((1 << STRIPE_DISCARD) | - (1 << STRIPE_REPLACED))); + (1 << STRIPE_BITMAP_PENDING)), + "stripe state: %lx\n", sh->state); + WARN_ONCE(head_sh->state & ((1 << STRIPE_DISCARD) | + (1 << STRIPE_REPLACED)), + "head stripe state: %lx\n", head_sh->state); set_mask_bits(&sh->state, ~(STRIPE_EXPAND_SYNC_FLAGS | + (1 << STRIPE_PREREAD_ACTIVE) | (1 << STRIPE_DEGRADED)), head_sh->state & (1 << STRIPE_INSYNC)); @@ -6376,6 +6381,8 @@ static int raid456_cpu_notify(struct notifier_block *nfb, unsigned long action, break; case CPU_DEAD: case CPU_DEAD_FROZEN: + case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: free_scratch_buffer(conf, per_cpu_ptr(conf->percpu, cpu)); break; default: @@ -6413,6 +6420,12 @@ static int raid5_alloc_percpu(struct r5conf *conf) } put_online_cpus(); + if (!err) { + conf->scribble_disks = max(conf->raid_disks, + conf->previous_raid_disks); + conf->scribble_sectors = max(conf->chunk_sectors, + conf->prev_chunk_sectors); + } return err; } @@ -6503,9 +6516,7 @@ static struct r5conf *setup_conf(struct mddev *mddev) seqcount_init(&conf->gen_lock); mutex_init(&conf->cache_size_mutex); init_waitqueue_head(&conf->wait_for_quiescent); - for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) { - init_waitqueue_head(&conf->wait_for_stripe[i]); - } + init_waitqueue_head(&conf->wait_for_stripe); init_waitqueue_head(&conf->wait_for_overlap); INIT_LIST_HEAD(&conf->handle_list); INIT_LIST_HEAD(&conf->hold_list); @@ -7014,8 +7025,8 @@ static int raid5_run(struct mddev *mddev) } if (discard_supported && - mddev->queue->limits.max_discard_sectors >= stripe && - mddev->queue->limits.discard_granularity >= stripe) + mddev->queue->limits.max_discard_sectors >= (stripe >> 9) && + mddev->queue->limits.discard_granularity >= stripe) queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); else diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index a415e1cd39b8..517d4b68a1be 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -510,6 +510,8 @@ struct r5conf { * conversions */ } __percpu *percpu; + int scribble_disks; + int scribble_sectors; #ifdef CONFIG_HOTPLUG_CPU struct notifier_block cpu_notify; #endif @@ -522,7 +524,7 @@ struct r5conf { atomic_t empty_inactive_list_nr; struct llist_head released_stripes; wait_queue_head_t wait_for_quiescent; - wait_queue_head_t wait_for_stripe[NR_STRIPE_HASH_LOCKS]; + wait_queue_head_t wait_for_stripe; wait_queue_head_t wait_for_overlap; unsigned long cache_state; #define R5_INACTIVE_BLOCKED 1 /* release of inactive stripes blocked, |