From 6bfe0b499082fd3950429017cd8ebf2a6c458aa5 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 30 Apr 2008 00:52:32 -0700 Subject: md: support blocking writes to an array on device failure Allows a userspace metadata handler to take action upon detecting a device failure. Based on an original patch by Neil Brown. Changes: -added blocked_wait waitqueue to rdev -don't qualify Blocked with Faulty always let userspace block writes -added md_wait_for_blocked_rdev to wait for the block device to be clear, if userspace misses the notification another one is sent every 5 seconds -set MD_RECOVERY_NEEDED after clearing "blocked" -kill DoBlock flag, just test mddev->external Signed-off-by: Dan Williams Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/raid1.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) (limited to 'drivers/md/raid1.c') diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 9fd473a6dbf5..6778b7cb39bd 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -773,7 +773,6 @@ static int make_request(struct request_queue *q, struct bio * bio) r1bio_t *r1_bio; struct bio *read_bio; int i, targets = 0, disks; - mdk_rdev_t *rdev; struct bitmap *bitmap = mddev->bitmap; unsigned long flags; struct bio_list bl; @@ -781,6 +780,7 @@ static int make_request(struct request_queue *q, struct bio * bio) const int rw = bio_data_dir(bio); const int do_sync = bio_sync(bio); int do_barriers; + mdk_rdev_t *blocked_rdev; /* * Register the new request and wait if the reconstruction @@ -862,10 +862,17 @@ static int make_request(struct request_queue *q, struct bio * bio) first = 0; } #endif + retry_write: + blocked_rdev = NULL; rcu_read_lock(); for (i = 0; i < disks; i++) { - if ((rdev=rcu_dereference(conf->mirrors[i].rdev)) != NULL && - !test_bit(Faulty, &rdev->flags)) { + mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev); + if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) { + atomic_inc(&rdev->nr_pending); + blocked_rdev = rdev; + break; + } + if (rdev && !test_bit(Faulty, &rdev->flags)) { atomic_inc(&rdev->nr_pending); if (test_bit(Faulty, &rdev->flags)) { rdev_dec_pending(rdev, mddev); @@ -878,6 +885,20 @@ static int make_request(struct request_queue *q, struct bio * bio) } rcu_read_unlock(); + if (unlikely(blocked_rdev)) { + /* Wait for this device to become unblocked */ + int j; + + for (j = 0; j < i; j++) + if (r1_bio->bios[j]) + rdev_dec_pending(conf->mirrors[j].rdev, mddev); + + allow_barrier(conf); + md_wait_for_blocked_rdev(blocked_rdev, mddev); + wait_barrier(conf); + goto retry_write; + } + BUG_ON(targets == 0); /* we never fail the last device */ if (targets < conf->raid_disks) { -- cgit v1.2.3 From e7e72bf641b1fc7b9df6f40bd2c36dfccd8d647c Mon Sep 17 00:00:00 2001 From: Neil Brown Date: Wed, 14 May 2008 16:05:54 -0700 Subject: Remove blkdev warning triggered by using md As setting and clearing queue flags now requires that we hold a spinlock on the queue, and as blk_queue_stack_limits is called without that lock, get the lock inside blk_queue_stack_limits. For blk_queue_stack_limits to be able to find the right lock, each md personality needs to set q->queue_lock to point to the appropriate lock. Those personalities which didn't previously use a spin_lock, us q->__queue_lock. So always initialise that lock when allocated. With this in place, setting/clearing of the QUEUE_FLAG_PLUGGED bit will no longer cause warnings as it will be clear that the proper lock is held. Thanks to Dan Williams for review and fixing the silly bugs. Signed-off-by: NeilBrown Cc: Dan Williams Cc: Jens Axboe Cc: Alistair John Strachan Cc: Nick Piggin Cc: "Rafael J. Wysocki" Cc: Jacek Luczak Cc: Prakash Punnoor Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- block/blk-core.c | 5 ++--- block/blk-settings.c | 8 +++++++- drivers/md/linear.c | 1 + drivers/md/multipath.c | 1 + drivers/md/raid0.c | 1 + drivers/md/raid1.c | 4 +++- drivers/md/raid10.c | 4 +++- drivers/md/raid5.c | 1 + 8 files changed, 19 insertions(+), 6 deletions(-) (limited to 'drivers/md/raid1.c') diff --git a/block/blk-core.c b/block/blk-core.c index 2987fe47b5ee..6a9cc0d22a61 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -482,6 +482,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) kobject_init(&q->kobj, &blk_queue_ktype); mutex_init(&q->sysfs_lock); + spin_lock_init(&q->__queue_lock); return q; } @@ -544,10 +545,8 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) * if caller didn't supply a lock, they get per-queue locking with * our embedded lock */ - if (!lock) { - spin_lock_init(&q->__queue_lock); + if (!lock) lock = &q->__queue_lock; - } q->request_fn = rfn; q->prep_rq_fn = NULL; diff --git a/block/blk-settings.c b/block/blk-settings.c index bb93d4c32775..8dd86418f35d 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -286,8 +286,14 @@ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b) t->max_hw_segments = min(t->max_hw_segments, b->max_hw_segments); t->max_segment_size = min(t->max_segment_size, b->max_segment_size); t->hardsect_size = max(t->hardsect_size, b->hardsect_size); - if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) + if (!t->queue_lock) + WARN_ON_ONCE(1); + else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) { + unsigned long flags; + spin_lock_irqsave(t->queue_lock, flags); queue_flag_clear(QUEUE_FLAG_CLUSTER, t); + spin_unlock_irqrestore(t->queue_lock, flags); + } } EXPORT_SYMBOL(blk_queue_stack_limits); diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 0b8511776b3e..10748240cb2f 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -250,6 +250,7 @@ static int linear_run (mddev_t *mddev) { linear_conf_t *conf; + mddev->queue->queue_lock = &mddev->queue->__queue_lock; conf = linear_conf(mddev, mddev->raid_disks); if (!conf) diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 42ee1a2dc144..4f4d1f383842 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -417,6 +417,7 @@ static int multipath_run (mddev_t *mddev) * bookkeeping area. [whatever we allocate in multipath_run(), * should be freed in multipath_stop()] */ + mddev->queue->queue_lock = &mddev->queue->__queue_lock; conf = kzalloc(sizeof(multipath_conf_t), GFP_KERNEL); mddev->private = conf; diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 818b48284096..914c04ddec7c 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -280,6 +280,7 @@ static int raid0_run (mddev_t *mddev) (mddev->chunk_size>>1)-1); blk_queue_max_sectors(mddev->queue, mddev->chunk_size >> 9); blk_queue_segment_boundary(mddev->queue, (mddev->chunk_size>>1) - 1); + mddev->queue->queue_lock = &mddev->queue->__queue_lock; conf = kmalloc(sizeof (raid0_conf_t), GFP_KERNEL); if (!conf) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 6778b7cb39bd..ac409b7d83f5 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1935,6 +1935,9 @@ static int run(mddev_t *mddev) if (!conf->r1bio_pool) goto out_no_mem; + spin_lock_init(&conf->device_lock); + mddev->queue->queue_lock = &conf->device_lock; + rdev_for_each(rdev, tmp, mddev) { disk_idx = rdev->raid_disk; if (disk_idx >= mddev->raid_disks @@ -1958,7 +1961,6 @@ static int run(mddev_t *mddev) } conf->raid_disks = mddev->raid_disks; conf->mddev = mddev; - spin_lock_init(&conf->device_lock); INIT_LIST_HEAD(&conf->retry_list); spin_lock_init(&conf->resync_lock); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index faf3d8912979..8536ede1e712 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2082,6 +2082,9 @@ static int run(mddev_t *mddev) goto out_free_conf; } + spin_lock_init(&conf->device_lock); + mddev->queue->queue_lock = &conf->device_lock; + rdev_for_each(rdev, tmp, mddev) { disk_idx = rdev->raid_disk; if (disk_idx >= mddev->raid_disks @@ -2103,7 +2106,6 @@ static int run(mddev_t *mddev) disk->head_position = 0; } - spin_lock_init(&conf->device_lock); INIT_LIST_HEAD(&conf->retry_list); spin_lock_init(&conf->resync_lock); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index ee0ea9183080..93fde48c0f42 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -4257,6 +4257,7 @@ static int run(mddev_t *mddev) goto abort; } spin_lock_init(&conf->device_lock); + mddev->queue->queue_lock = &conf->device_lock; init_waitqueue_head(&conf->wait_for_stripe); init_waitqueue_head(&conf->wait_for_overlap); INIT_LIST_HEAD(&conf->handle_list); -- cgit v1.2.3