diff options
author | Steve French <sfrench@us.ibm.com> | 2008-05-06 19:55:32 +0200 |
---|---|---|
committer | Steve French <sfrench@us.ibm.com> | 2008-05-06 19:55:32 +0200 |
commit | a815752ac0ffdb910e92958d41d28f4fb28e5296 (patch) | |
tree | a3aa16a282354da0debe8e3a3a7ed8aac6e54001 /drivers/md | |
parent | [CIFS] fix typo (diff) | |
parent | Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rol... (diff) | |
download | linux-a815752ac0ffdb910e92958d41d28f4fb28e5296.tar.xz linux-a815752ac0ffdb910e92958d41d28f4fb28e5296.zip |
Merge branch 'master' of /pub/scm/linux/kernel/git/torvalds/linux-2.6
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/dm-emc.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-mpath-hp-sw.c | 1 | ||||
-rw-r--r-- | drivers/md/dm-mpath-rdac.c | 1 | ||||
-rw-r--r-- | drivers/md/dm-table.c | 5 | ||||
-rw-r--r-- | drivers/md/md.c | 121 | ||||
-rw-r--r-- | drivers/md/raid1.c | 27 | ||||
-rw-r--r-- | drivers/md/raid10.c | 29 | ||||
-rw-r--r-- | drivers/md/raid5.c | 33 |
8 files changed, 183 insertions, 36 deletions
diff --git a/drivers/md/dm-emc.c b/drivers/md/dm-emc.c index 6b91b9ab1d41..3ea5ad4b7805 100644 --- a/drivers/md/dm-emc.c +++ b/drivers/md/dm-emc.c @@ -110,8 +110,6 @@ static struct request *get_failover_req(struct emc_handler *h, memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE); rq->sense_len = 0; - memset(&rq->cmd, 0, BLK_MAX_CDB); - rq->timeout = EMC_FAILOVER_TIMEOUT; rq->cmd_type = REQ_TYPE_BLOCK_PC; rq->cmd_flags |= REQ_FAILFAST | REQ_NOMERGE; diff --git a/drivers/md/dm-mpath-hp-sw.c b/drivers/md/dm-mpath-hp-sw.c index 204bf42c9449..b63a0ab37c53 100644 --- a/drivers/md/dm-mpath-hp-sw.c +++ b/drivers/md/dm-mpath-hp-sw.c @@ -137,7 +137,6 @@ static struct request *hp_sw_get_request(struct dm_path *path) req->sense = h->sense; memset(req->sense, 0, SCSI_SENSE_BUFFERSIZE); - memset(&req->cmd, 0, BLK_MAX_CDB); req->cmd[0] = START_STOP; req->cmd[4] = 1; req->cmd_len = COMMAND_SIZE(req->cmd[0]); diff --git a/drivers/md/dm-mpath-rdac.c b/drivers/md/dm-mpath-rdac.c index e04eb5c697fb..95e77734880a 100644 --- a/drivers/md/dm-mpath-rdac.c +++ b/drivers/md/dm-mpath-rdac.c @@ -284,7 +284,6 @@ static struct request *get_rdac_req(struct rdac_handler *h, return NULL; } - memset(&rq->cmd, 0, BLK_MAX_CDB); rq->sense = h->sense; memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE); rq->sense_len = 0; diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 51be53344214..94116eaf4709 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -873,10 +873,11 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q) q->max_hw_sectors = t->limits.max_hw_sectors; q->seg_boundary_mask = t->limits.seg_boundary_mask; q->bounce_pfn = t->limits.bounce_pfn; + if (t->limits.no_cluster) - q->queue_flags &= ~(1 << QUEUE_FLAG_CLUSTER); + queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q); else - q->queue_flags |= (1 << QUEUE_FLAG_CLUSTER); + queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, q); } diff --git a/drivers/md/md.c b/drivers/md/md.c index 87620b705bee..83eb78b00137 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -276,13 +276,15 @@ static mddev_t * mddev_find(dev_t unit) init_waitqueue_head(&new->sb_wait); new->reshape_position = MaxSector; new->resync_max = MaxSector; + new->level = LEVEL_NONE; new->queue = blk_alloc_queue(GFP_KERNEL); if (!new->queue) { kfree(new); return NULL; } - set_bit(QUEUE_FLAG_CLUSTER, &new->queue->queue_flags); + /* Can be unlocked because the queue is new: no concurrency */ + queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, new->queue); blk_queue_make_request(new->queue, md_fail_request); @@ -1368,6 +1370,11 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) MD_BUG(); return -EINVAL; } + + /* prevent duplicates */ + if (find_rdev(mddev, rdev->bdev->bd_dev)) + return -EEXIST; + /* make sure rdev->size exceeds mddev->size */ if (rdev->size && (mddev->size == 0 || rdev->size < mddev->size)) { if (mddev->pers) { @@ -1651,6 +1658,8 @@ static void md_update_sb(mddev_t * mddev, int force_change) int sync_req; int nospares = 0; + if (mddev->external) + return; repeat: spin_lock_irq(&mddev->write_lock); @@ -1819,6 +1828,10 @@ state_show(mdk_rdev_t *rdev, char *page) len += sprintf(page+len, "%swrite_mostly",sep); sep = ","; } + if (test_bit(Blocked, &rdev->flags)) { + len += sprintf(page+len, "%sblocked", sep); + sep = ","; + } if (!test_bit(Faulty, &rdev->flags) && !test_bit(In_sync, &rdev->flags)) { len += sprintf(page+len, "%sspare", sep); @@ -1835,6 +1848,8 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len) * remove - disconnects the device * writemostly - sets write_mostly * -writemostly - clears write_mostly + * blocked - sets the Blocked flag + * -blocked - clears the Blocked flag */ int err = -EINVAL; if (cmd_match(buf, "faulty") && rdev->mddev->pers) { @@ -1857,6 +1872,16 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len) } else if (cmd_match(buf, "-writemostly")) { clear_bit(WriteMostly, &rdev->flags); err = 0; + } else if (cmd_match(buf, "blocked")) { + set_bit(Blocked, &rdev->flags); + err = 0; + } else if (cmd_match(buf, "-blocked")) { + clear_bit(Blocked, &rdev->flags); + wake_up(&rdev->blocked_wait); + set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery); + md_wakeup_thread(rdev->mddev->thread); + + err = 0; } return err ? err : len; } @@ -2096,7 +2121,7 @@ rdev_attr_store(struct kobject *kobj, struct attribute *attr, rv = -EBUSY; else rv = entry->store(rdev, page, length); - mddev_unlock(rdev->mddev); + mddev_unlock(mddev); } return rv; } @@ -2185,7 +2210,9 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi goto abort_free; } } + INIT_LIST_HEAD(&rdev->same_set); + init_waitqueue_head(&rdev->blocked_wait); return rdev; @@ -2456,7 +2483,6 @@ resync_start_show(mddev_t *mddev, char *page) static ssize_t resync_start_store(mddev_t *mddev, const char *buf, size_t len) { - /* can only set chunk_size if array is not yet active */ char *e; unsigned long long n = simple_strtoull(buf, &e, 10); @@ -2590,15 +2616,20 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) err = do_md_stop(mddev, 1); else { mddev->ro = 1; + set_disk_ro(mddev->gendisk, 1); err = do_md_run(mddev); } break; case read_auto: - /* stopping an active array */ if (mddev->pers) { - err = do_md_stop(mddev, 1); - if (err == 0) - mddev->ro = 2; /* FIXME mark devices writable */ + if (mddev->ro != 1) + err = do_md_stop(mddev, 1); + else + err = restart_array(mddev); + if (err == 0) { + mddev->ro = 2; + set_disk_ro(mddev->gendisk, 0); + } } else { mddev->ro = 2; err = do_md_run(mddev); @@ -2611,6 +2642,8 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) if (atomic_read(&mddev->writes_pending) == 0) { if (mddev->in_sync == 0) { mddev->in_sync = 1; + if (mddev->safemode == 1) + mddev->safemode = 0; if (mddev->persistent) set_bit(MD_CHANGE_CLEAN, &mddev->flags); @@ -2634,6 +2667,7 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) err = 0; } else { mddev->ro = 0; + set_disk_ro(mddev->gendisk, 0); err = do_md_run(mddev); } break; @@ -3711,6 +3745,30 @@ static int do_md_stop(mddev_t * mddev, int mode) mddev->reshape_position = MaxSector; mddev->external = 0; mddev->persistent = 0; + mddev->level = LEVEL_NONE; + mddev->clevel[0] = 0; + mddev->flags = 0; + mddev->ro = 0; + mddev->metadata_type[0] = 0; + mddev->chunk_size = 0; + mddev->ctime = mddev->utime = 0; + mddev->layout = 0; + mddev->max_disks = 0; + mddev->events = 0; + mddev->delta_disks = 0; + mddev->new_level = LEVEL_NONE; + mddev->new_layout = 0; + mddev->new_chunk = 0; + mddev->curr_resync = 0; + mddev->resync_mismatches = 0; + mddev->suspend_lo = mddev->suspend_hi = 0; + mddev->sync_speed_min = mddev->sync_speed_max = 0; + mddev->recovery = 0; + mddev->in_sync = 0; + mddev->changed = 0; + mddev->degraded = 0; + mddev->barriers_work = 0; + mddev->safemode = 0; } else if (mddev->pers) printk(KERN_INFO "md: %s switched to read-only mode.\n", @@ -4918,6 +4976,9 @@ void md_error(mddev_t *mddev, mdk_rdev_t *rdev) if (!rdev || test_bit(Faulty, &rdev->flags)) return; + + if (mddev->external) + set_bit(Blocked, &rdev->flags); /* dprintk("md_error dev:%s, rdev:(%d:%d), (caller: %p,%p,%p,%p).\n", mdname(mddev), @@ -5364,6 +5425,8 @@ void md_write_start(mddev_t *mddev, struct bio *bi) md_wakeup_thread(mddev->sync_thread); } atomic_inc(&mddev->writes_pending); + if (mddev->safemode == 1) + mddev->safemode = 0; if (mddev->in_sync) { spin_lock_irq(&mddev->write_lock); if (mddev->in_sync) { @@ -5718,7 +5781,7 @@ static int remove_and_add_spares(mddev_t *mddev) rdev_for_each(rdev, rtmp, mddev) if (rdev->raid_disk >= 0 && - !mddev->external && + !test_bit(Blocked, &rdev->flags) && (test_bit(Faulty, &rdev->flags) || ! test_bit(In_sync, &rdev->flags)) && atomic_read(&rdev->nr_pending)==0) { @@ -5788,7 +5851,7 @@ void md_check_recovery(mddev_t *mddev) return; if (signal_pending(current)) { - if (mddev->pers->sync_request) { + if (mddev->pers->sync_request && !mddev->external) { printk(KERN_INFO "md: %s in immediate safe mode\n", mdname(mddev)); mddev->safemode = 2; @@ -5800,7 +5863,7 @@ void md_check_recovery(mddev_t *mddev) (mddev->flags && !mddev->external) || test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) || test_bit(MD_RECOVERY_DONE, &mddev->recovery) || - (mddev->safemode == 1) || + (mddev->external == 0 && mddev->safemode == 1) || (mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending) && !mddev->in_sync && mddev->recovery_cp == MaxSector) )) @@ -5809,16 +5872,20 @@ void md_check_recovery(mddev_t *mddev) if (mddev_trylock(mddev)) { int spares = 0; - spin_lock_irq(&mddev->write_lock); - if (mddev->safemode && !atomic_read(&mddev->writes_pending) && - !mddev->in_sync && mddev->recovery_cp == MaxSector) { - mddev->in_sync = 1; - if (mddev->persistent) - set_bit(MD_CHANGE_CLEAN, &mddev->flags); + if (!mddev->external) { + spin_lock_irq(&mddev->write_lock); + if (mddev->safemode && + !atomic_read(&mddev->writes_pending) && + !mddev->in_sync && + mddev->recovery_cp == MaxSector) { + mddev->in_sync = 1; + if (mddev->persistent) + set_bit(MD_CHANGE_CLEAN, &mddev->flags); + } + if (mddev->safemode == 1) + mddev->safemode = 0; + spin_unlock_irq(&mddev->write_lock); } - if (mddev->safemode == 1) - mddev->safemode = 0; - spin_unlock_irq(&mddev->write_lock); if (mddev->flags) md_update_sb(mddev, 0); @@ -5913,6 +5980,16 @@ void md_check_recovery(mddev_t *mddev) } } +void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev) +{ + sysfs_notify(&rdev->kobj, NULL, "state"); + wait_event_timeout(rdev->blocked_wait, + !test_bit(Blocked, &rdev->flags), + msecs_to_jiffies(5000)); + rdev_dec_pending(rdev, mddev); +} +EXPORT_SYMBOL(md_wait_for_blocked_rdev); + static int md_notify_reboot(struct notifier_block *this, unsigned long code, void *x) { @@ -5947,13 +6024,9 @@ static struct notifier_block md_notifier = { static void md_geninit(void) { - struct proc_dir_entry *p; - dprintk("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t)); - p = create_proc_entry("mdstat", S_IRUGO, NULL); - if (p) - p->proc_fops = &md_seq_fops; + proc_create("mdstat", S_IRUGO, NULL, &md_seq_fops); } static int __init md_init(void) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 9fd473a6dbf5..6778b7cb39bd 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -773,7 +773,6 @@ static int make_request(struct request_queue *q, struct bio * bio) r1bio_t *r1_bio; struct bio *read_bio; int i, targets = 0, disks; - mdk_rdev_t *rdev; struct bitmap *bitmap = mddev->bitmap; unsigned long flags; struct bio_list bl; @@ -781,6 +780,7 @@ static int make_request(struct request_queue *q, struct bio * bio) const int rw = bio_data_dir(bio); const int do_sync = bio_sync(bio); int do_barriers; + mdk_rdev_t *blocked_rdev; /* * Register the new request and wait if the reconstruction @@ -862,10 +862,17 @@ static int make_request(struct request_queue *q, struct bio * bio) first = 0; } #endif + retry_write: + blocked_rdev = NULL; rcu_read_lock(); for (i = 0; i < disks; i++) { - if ((rdev=rcu_dereference(conf->mirrors[i].rdev)) != NULL && - !test_bit(Faulty, &rdev->flags)) { + mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev); + if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) { + atomic_inc(&rdev->nr_pending); + blocked_rdev = rdev; + break; + } + if (rdev && !test_bit(Faulty, &rdev->flags)) { atomic_inc(&rdev->nr_pending); if (test_bit(Faulty, &rdev->flags)) { rdev_dec_pending(rdev, mddev); @@ -878,6 +885,20 @@ static int make_request(struct request_queue *q, struct bio * bio) } rcu_read_unlock(); + if (unlikely(blocked_rdev)) { + /* Wait for this device to become unblocked */ + int j; + + for (j = 0; j < i; j++) + if (r1_bio->bios[j]) + rdev_dec_pending(conf->mirrors[j].rdev, mddev); + + allow_barrier(conf); + md_wait_for_blocked_rdev(blocked_rdev, mddev); + wait_barrier(conf); + goto retry_write; + } + BUG_ON(targets == 0); /* we never fail the last device */ if (targets < conf->raid_disks) { diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 1e96aa3ff513..5938fa962922 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -790,6 +790,7 @@ static int make_request(struct request_queue *q, struct bio * bio) const int do_sync = bio_sync(bio); struct bio_list bl; unsigned long flags; + mdk_rdev_t *blocked_rdev; if (unlikely(bio_barrier(bio))) { bio_endio(bio, -EOPNOTSUPP); @@ -879,17 +880,23 @@ static int make_request(struct request_queue *q, struct bio * bio) /* * WRITE: */ - /* first select target devices under spinlock and + /* first select target devices under rcu_lock and * inc refcount on their rdev. Record them by setting * bios[x] to bio */ raid10_find_phys(conf, r10_bio); + retry_write: + blocked_rdev = 0; rcu_read_lock(); for (i = 0; i < conf->copies; i++) { int d = r10_bio->devs[i].devnum; mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[d].rdev); - if (rdev && - !test_bit(Faulty, &rdev->flags)) { + if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) { + atomic_inc(&rdev->nr_pending); + blocked_rdev = rdev; + break; + } + if (rdev && !test_bit(Faulty, &rdev->flags)) { atomic_inc(&rdev->nr_pending); r10_bio->devs[i].bio = bio; } else { @@ -899,6 +906,22 @@ static int make_request(struct request_queue *q, struct bio * bio) } rcu_read_unlock(); + if (unlikely(blocked_rdev)) { + /* Have to wait for this device to get unblocked, then retry */ + int j; + int d; + + for (j = 0; j < i; j++) + if (r10_bio->devs[j].bio) { + d = r10_bio->devs[j].devnum; + rdev_dec_pending(conf->mirrors[d].rdev, mddev); + } + allow_barrier(conf); + md_wait_for_blocked_rdev(blocked_rdev, mddev); + wait_barrier(conf); + goto retry_write; + } + atomic_set(&r10_bio->remaining, 0); bio_list_init(&bl); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 968dacaced6d..087eee0cb809 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2607,6 +2607,7 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh, } } + /* * handle_stripe - do things to a stripe. * @@ -2632,6 +2633,7 @@ static void handle_stripe5(struct stripe_head *sh) struct stripe_head_state s; struct r5dev *dev; unsigned long pending = 0; + mdk_rdev_t *blocked_rdev = NULL; memset(&s, 0, sizeof(s)); pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d " @@ -2691,6 +2693,11 @@ static void handle_stripe5(struct stripe_head *sh) if (dev->written) s.written++; rdev = rcu_dereference(conf->disks[i].rdev); + if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) { + blocked_rdev = rdev; + atomic_inc(&rdev->nr_pending); + break; + } if (!rdev || !test_bit(In_sync, &rdev->flags)) { /* The ReadError flag will just be confusing now */ clear_bit(R5_ReadError, &dev->flags); @@ -2705,6 +2712,11 @@ static void handle_stripe5(struct stripe_head *sh) } rcu_read_unlock(); + if (unlikely(blocked_rdev)) { + set_bit(STRIPE_HANDLE, &sh->state); + goto unlock; + } + if (s.to_fill && !test_and_set_bit(STRIPE_OP_BIOFILL, &sh->ops.pending)) sh->ops.count++; @@ -2894,8 +2906,13 @@ static void handle_stripe5(struct stripe_head *sh) if (sh->ops.count) pending = get_stripe_work(sh); + unlock: spin_unlock(&sh->lock); + /* wait for this device to become unblocked */ + if (unlikely(blocked_rdev)) + md_wait_for_blocked_rdev(blocked_rdev, conf->mddev); + if (pending) raid5_run_ops(sh, pending); @@ -2912,6 +2929,7 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page) struct stripe_head_state s; struct r6_state r6s; struct r5dev *dev, *pdev, *qdev; + mdk_rdev_t *blocked_rdev = NULL; r6s.qd_idx = raid6_next_disk(pd_idx, disks); pr_debug("handling stripe %llu, state=%#lx cnt=%d, " @@ -2975,6 +2993,11 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page) if (dev->written) s.written++; rdev = rcu_dereference(conf->disks[i].rdev); + if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) { + blocked_rdev = rdev; + atomic_inc(&rdev->nr_pending); + break; + } if (!rdev || !test_bit(In_sync, &rdev->flags)) { /* The ReadError flag will just be confusing now */ clear_bit(R5_ReadError, &dev->flags); @@ -2989,6 +3012,11 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page) set_bit(R5_Insync, &dev->flags); } rcu_read_unlock(); + + if (unlikely(blocked_rdev)) { + set_bit(STRIPE_HANDLE, &sh->state); + goto unlock; + } pr_debug("locked=%d uptodate=%d to_read=%d" " to_write=%d failed=%d failed_num=%d,%d\n", s.locked, s.uptodate, s.to_read, s.to_write, s.failed, @@ -3094,8 +3122,13 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page) !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) handle_stripe_expansion(conf, sh, &r6s); + unlock: spin_unlock(&sh->lock); + /* wait for this device to become unblocked */ + if (unlikely(blocked_rdev)) + md_wait_for_blocked_rdev(blocked_rdev, conf->mddev); + return_io(return_bi); for (i=disks; i-- ;) { |