summaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorSteve French <sfrench@us.ibm.com>2008-05-06 19:55:32 +0200
committerSteve French <sfrench@us.ibm.com>2008-05-06 19:55:32 +0200
commita815752ac0ffdb910e92958d41d28f4fb28e5296 (patch)
treea3aa16a282354da0debe8e3a3a7ed8aac6e54001 /drivers/md
parent[CIFS] fix typo (diff)
parentMerge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rol... (diff)
downloadlinux-a815752ac0ffdb910e92958d41d28f4fb28e5296.tar.xz
linux-a815752ac0ffdb910e92958d41d28f4fb28e5296.zip
Merge branch 'master' of /pub/scm/linux/kernel/git/torvalds/linux-2.6
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/dm-emc.c2
-rw-r--r--drivers/md/dm-mpath-hp-sw.c1
-rw-r--r--drivers/md/dm-mpath-rdac.c1
-rw-r--r--drivers/md/dm-table.c5
-rw-r--r--drivers/md/md.c121
-rw-r--r--drivers/md/raid1.c27
-rw-r--r--drivers/md/raid10.c29
-rw-r--r--drivers/md/raid5.c33
8 files changed, 183 insertions, 36 deletions
diff --git a/drivers/md/dm-emc.c b/drivers/md/dm-emc.c
index 6b91b9ab1d41..3ea5ad4b7805 100644
--- a/drivers/md/dm-emc.c
+++ b/drivers/md/dm-emc.c
@@ -110,8 +110,6 @@ static struct request *get_failover_req(struct emc_handler *h,
memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE);
rq->sense_len = 0;
- memset(&rq->cmd, 0, BLK_MAX_CDB);
-
rq->timeout = EMC_FAILOVER_TIMEOUT;
rq->cmd_type = REQ_TYPE_BLOCK_PC;
rq->cmd_flags |= REQ_FAILFAST | REQ_NOMERGE;
diff --git a/drivers/md/dm-mpath-hp-sw.c b/drivers/md/dm-mpath-hp-sw.c
index 204bf42c9449..b63a0ab37c53 100644
--- a/drivers/md/dm-mpath-hp-sw.c
+++ b/drivers/md/dm-mpath-hp-sw.c
@@ -137,7 +137,6 @@ static struct request *hp_sw_get_request(struct dm_path *path)
req->sense = h->sense;
memset(req->sense, 0, SCSI_SENSE_BUFFERSIZE);
- memset(&req->cmd, 0, BLK_MAX_CDB);
req->cmd[0] = START_STOP;
req->cmd[4] = 1;
req->cmd_len = COMMAND_SIZE(req->cmd[0]);
diff --git a/drivers/md/dm-mpath-rdac.c b/drivers/md/dm-mpath-rdac.c
index e04eb5c697fb..95e77734880a 100644
--- a/drivers/md/dm-mpath-rdac.c
+++ b/drivers/md/dm-mpath-rdac.c
@@ -284,7 +284,6 @@ static struct request *get_rdac_req(struct rdac_handler *h,
return NULL;
}
- memset(&rq->cmd, 0, BLK_MAX_CDB);
rq->sense = h->sense;
memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE);
rq->sense_len = 0;
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 51be53344214..94116eaf4709 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -873,10 +873,11 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q)
q->max_hw_sectors = t->limits.max_hw_sectors;
q->seg_boundary_mask = t->limits.seg_boundary_mask;
q->bounce_pfn = t->limits.bounce_pfn;
+
if (t->limits.no_cluster)
- q->queue_flags &= ~(1 << QUEUE_FLAG_CLUSTER);
+ queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q);
else
- q->queue_flags |= (1 << QUEUE_FLAG_CLUSTER);
+ queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, q);
}
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 87620b705bee..83eb78b00137 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -276,13 +276,15 @@ static mddev_t * mddev_find(dev_t unit)
init_waitqueue_head(&new->sb_wait);
new->reshape_position = MaxSector;
new->resync_max = MaxSector;
+ new->level = LEVEL_NONE;
new->queue = blk_alloc_queue(GFP_KERNEL);
if (!new->queue) {
kfree(new);
return NULL;
}
- set_bit(QUEUE_FLAG_CLUSTER, &new->queue->queue_flags);
+ /* Can be unlocked because the queue is new: no concurrency */
+ queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, new->queue);
blk_queue_make_request(new->queue, md_fail_request);
@@ -1368,6 +1370,11 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
MD_BUG();
return -EINVAL;
}
+
+ /* prevent duplicates */
+ if (find_rdev(mddev, rdev->bdev->bd_dev))
+ return -EEXIST;
+
/* make sure rdev->size exceeds mddev->size */
if (rdev->size && (mddev->size == 0 || rdev->size < mddev->size)) {
if (mddev->pers) {
@@ -1651,6 +1658,8 @@ static void md_update_sb(mddev_t * mddev, int force_change)
int sync_req;
int nospares = 0;
+ if (mddev->external)
+ return;
repeat:
spin_lock_irq(&mddev->write_lock);
@@ -1819,6 +1828,10 @@ state_show(mdk_rdev_t *rdev, char *page)
len += sprintf(page+len, "%swrite_mostly",sep);
sep = ",";
}
+ if (test_bit(Blocked, &rdev->flags)) {
+ len += sprintf(page+len, "%sblocked", sep);
+ sep = ",";
+ }
if (!test_bit(Faulty, &rdev->flags) &&
!test_bit(In_sync, &rdev->flags)) {
len += sprintf(page+len, "%sspare", sep);
@@ -1835,6 +1848,8 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len)
* remove - disconnects the device
* writemostly - sets write_mostly
* -writemostly - clears write_mostly
+ * blocked - sets the Blocked flag
+ * -blocked - clears the Blocked flag
*/
int err = -EINVAL;
if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
@@ -1857,6 +1872,16 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len)
} else if (cmd_match(buf, "-writemostly")) {
clear_bit(WriteMostly, &rdev->flags);
err = 0;
+ } else if (cmd_match(buf, "blocked")) {
+ set_bit(Blocked, &rdev->flags);
+ err = 0;
+ } else if (cmd_match(buf, "-blocked")) {
+ clear_bit(Blocked, &rdev->flags);
+ wake_up(&rdev->blocked_wait);
+ set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
+ md_wakeup_thread(rdev->mddev->thread);
+
+ err = 0;
}
return err ? err : len;
}
@@ -2096,7 +2121,7 @@ rdev_attr_store(struct kobject *kobj, struct attribute *attr,
rv = -EBUSY;
else
rv = entry->store(rdev, page, length);
- mddev_unlock(rdev->mddev);
+ mddev_unlock(mddev);
}
return rv;
}
@@ -2185,7 +2210,9 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi
goto abort_free;
}
}
+
INIT_LIST_HEAD(&rdev->same_set);
+ init_waitqueue_head(&rdev->blocked_wait);
return rdev;
@@ -2456,7 +2483,6 @@ resync_start_show(mddev_t *mddev, char *page)
static ssize_t
resync_start_store(mddev_t *mddev, const char *buf, size_t len)
{
- /* can only set chunk_size if array is not yet active */
char *e;
unsigned long long n = simple_strtoull(buf, &e, 10);
@@ -2590,15 +2616,20 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len)
err = do_md_stop(mddev, 1);
else {
mddev->ro = 1;
+ set_disk_ro(mddev->gendisk, 1);
err = do_md_run(mddev);
}
break;
case read_auto:
- /* stopping an active array */
if (mddev->pers) {
- err = do_md_stop(mddev, 1);
- if (err == 0)
- mddev->ro = 2; /* FIXME mark devices writable */
+ if (mddev->ro != 1)
+ err = do_md_stop(mddev, 1);
+ else
+ err = restart_array(mddev);
+ if (err == 0) {
+ mddev->ro = 2;
+ set_disk_ro(mddev->gendisk, 0);
+ }
} else {
mddev->ro = 2;
err = do_md_run(mddev);
@@ -2611,6 +2642,8 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len)
if (atomic_read(&mddev->writes_pending) == 0) {
if (mddev->in_sync == 0) {
mddev->in_sync = 1;
+ if (mddev->safemode == 1)
+ mddev->safemode = 0;
if (mddev->persistent)
set_bit(MD_CHANGE_CLEAN,
&mddev->flags);
@@ -2634,6 +2667,7 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len)
err = 0;
} else {
mddev->ro = 0;
+ set_disk_ro(mddev->gendisk, 0);
err = do_md_run(mddev);
}
break;
@@ -3711,6 +3745,30 @@ static int do_md_stop(mddev_t * mddev, int mode)
mddev->reshape_position = MaxSector;
mddev->external = 0;
mddev->persistent = 0;
+ mddev->level = LEVEL_NONE;
+ mddev->clevel[0] = 0;
+ mddev->flags = 0;
+ mddev->ro = 0;
+ mddev->metadata_type[0] = 0;
+ mddev->chunk_size = 0;
+ mddev->ctime = mddev->utime = 0;
+ mddev->layout = 0;
+ mddev->max_disks = 0;
+ mddev->events = 0;
+ mddev->delta_disks = 0;
+ mddev->new_level = LEVEL_NONE;
+ mddev->new_layout = 0;
+ mddev->new_chunk = 0;
+ mddev->curr_resync = 0;
+ mddev->resync_mismatches = 0;
+ mddev->suspend_lo = mddev->suspend_hi = 0;
+ mddev->sync_speed_min = mddev->sync_speed_max = 0;
+ mddev->recovery = 0;
+ mddev->in_sync = 0;
+ mddev->changed = 0;
+ mddev->degraded = 0;
+ mddev->barriers_work = 0;
+ mddev->safemode = 0;
} else if (mddev->pers)
printk(KERN_INFO "md: %s switched to read-only mode.\n",
@@ -4918,6 +4976,9 @@ void md_error(mddev_t *mddev, mdk_rdev_t *rdev)
if (!rdev || test_bit(Faulty, &rdev->flags))
return;
+
+ if (mddev->external)
+ set_bit(Blocked, &rdev->flags);
/*
dprintk("md_error dev:%s, rdev:(%d:%d), (caller: %p,%p,%p,%p).\n",
mdname(mddev),
@@ -5364,6 +5425,8 @@ void md_write_start(mddev_t *mddev, struct bio *bi)
md_wakeup_thread(mddev->sync_thread);
}
atomic_inc(&mddev->writes_pending);
+ if (mddev->safemode == 1)
+ mddev->safemode = 0;
if (mddev->in_sync) {
spin_lock_irq(&mddev->write_lock);
if (mddev->in_sync) {
@@ -5718,7 +5781,7 @@ static int remove_and_add_spares(mddev_t *mddev)
rdev_for_each(rdev, rtmp, mddev)
if (rdev->raid_disk >= 0 &&
- !mddev->external &&
+ !test_bit(Blocked, &rdev->flags) &&
(test_bit(Faulty, &rdev->flags) ||
! test_bit(In_sync, &rdev->flags)) &&
atomic_read(&rdev->nr_pending)==0) {
@@ -5788,7 +5851,7 @@ void md_check_recovery(mddev_t *mddev)
return;
if (signal_pending(current)) {
- if (mddev->pers->sync_request) {
+ if (mddev->pers->sync_request && !mddev->external) {
printk(KERN_INFO "md: %s in immediate safe mode\n",
mdname(mddev));
mddev->safemode = 2;
@@ -5800,7 +5863,7 @@ void md_check_recovery(mddev_t *mddev)
(mddev->flags && !mddev->external) ||
test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
- (mddev->safemode == 1) ||
+ (mddev->external == 0 && mddev->safemode == 1) ||
(mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending)
&& !mddev->in_sync && mddev->recovery_cp == MaxSector)
))
@@ -5809,16 +5872,20 @@ void md_check_recovery(mddev_t *mddev)
if (mddev_trylock(mddev)) {
int spares = 0;
- spin_lock_irq(&mddev->write_lock);
- if (mddev->safemode && !atomic_read(&mddev->writes_pending) &&
- !mddev->in_sync && mddev->recovery_cp == MaxSector) {
- mddev->in_sync = 1;
- if (mddev->persistent)
- set_bit(MD_CHANGE_CLEAN, &mddev->flags);
+ if (!mddev->external) {
+ spin_lock_irq(&mddev->write_lock);
+ if (mddev->safemode &&
+ !atomic_read(&mddev->writes_pending) &&
+ !mddev->in_sync &&
+ mddev->recovery_cp == MaxSector) {
+ mddev->in_sync = 1;
+ if (mddev->persistent)
+ set_bit(MD_CHANGE_CLEAN, &mddev->flags);
+ }
+ if (mddev->safemode == 1)
+ mddev->safemode = 0;
+ spin_unlock_irq(&mddev->write_lock);
}
- if (mddev->safemode == 1)
- mddev->safemode = 0;
- spin_unlock_irq(&mddev->write_lock);
if (mddev->flags)
md_update_sb(mddev, 0);
@@ -5913,6 +5980,16 @@ void md_check_recovery(mddev_t *mddev)
}
}
+void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev)
+{
+ sysfs_notify(&rdev->kobj, NULL, "state");
+ wait_event_timeout(rdev->blocked_wait,
+ !test_bit(Blocked, &rdev->flags),
+ msecs_to_jiffies(5000));
+ rdev_dec_pending(rdev, mddev);
+}
+EXPORT_SYMBOL(md_wait_for_blocked_rdev);
+
static int md_notify_reboot(struct notifier_block *this,
unsigned long code, void *x)
{
@@ -5947,13 +6024,9 @@ static struct notifier_block md_notifier = {
static void md_geninit(void)
{
- struct proc_dir_entry *p;
-
dprintk("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t));
- p = create_proc_entry("mdstat", S_IRUGO, NULL);
- if (p)
- p->proc_fops = &md_seq_fops;
+ proc_create("mdstat", S_IRUGO, NULL, &md_seq_fops);
}
static int __init md_init(void)
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 9fd473a6dbf5..6778b7cb39bd 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -773,7 +773,6 @@ static int make_request(struct request_queue *q, struct bio * bio)
r1bio_t *r1_bio;
struct bio *read_bio;
int i, targets = 0, disks;
- mdk_rdev_t *rdev;
struct bitmap *bitmap = mddev->bitmap;
unsigned long flags;
struct bio_list bl;
@@ -781,6 +780,7 @@ static int make_request(struct request_queue *q, struct bio * bio)
const int rw = bio_data_dir(bio);
const int do_sync = bio_sync(bio);
int do_barriers;
+ mdk_rdev_t *blocked_rdev;
/*
* Register the new request and wait if the reconstruction
@@ -862,10 +862,17 @@ static int make_request(struct request_queue *q, struct bio * bio)
first = 0;
}
#endif
+ retry_write:
+ blocked_rdev = NULL;
rcu_read_lock();
for (i = 0; i < disks; i++) {
- if ((rdev=rcu_dereference(conf->mirrors[i].rdev)) != NULL &&
- !test_bit(Faulty, &rdev->flags)) {
+ mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
+ if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
+ atomic_inc(&rdev->nr_pending);
+ blocked_rdev = rdev;
+ break;
+ }
+ if (rdev && !test_bit(Faulty, &rdev->flags)) {
atomic_inc(&rdev->nr_pending);
if (test_bit(Faulty, &rdev->flags)) {
rdev_dec_pending(rdev, mddev);
@@ -878,6 +885,20 @@ static int make_request(struct request_queue *q, struct bio * bio)
}
rcu_read_unlock();
+ if (unlikely(blocked_rdev)) {
+ /* Wait for this device to become unblocked */
+ int j;
+
+ for (j = 0; j < i; j++)
+ if (r1_bio->bios[j])
+ rdev_dec_pending(conf->mirrors[j].rdev, mddev);
+
+ allow_barrier(conf);
+ md_wait_for_blocked_rdev(blocked_rdev, mddev);
+ wait_barrier(conf);
+ goto retry_write;
+ }
+
BUG_ON(targets == 0); /* we never fail the last device */
if (targets < conf->raid_disks) {
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 1e96aa3ff513..5938fa962922 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -790,6 +790,7 @@ static int make_request(struct request_queue *q, struct bio * bio)
const int do_sync = bio_sync(bio);
struct bio_list bl;
unsigned long flags;
+ mdk_rdev_t *blocked_rdev;
if (unlikely(bio_barrier(bio))) {
bio_endio(bio, -EOPNOTSUPP);
@@ -879,17 +880,23 @@ static int make_request(struct request_queue *q, struct bio * bio)
/*
* WRITE:
*/
- /* first select target devices under spinlock and
+ /* first select target devices under rcu_lock and
* inc refcount on their rdev. Record them by setting
* bios[x] to bio
*/
raid10_find_phys(conf, r10_bio);
+ retry_write:
+ blocked_rdev = 0;
rcu_read_lock();
for (i = 0; i < conf->copies; i++) {
int d = r10_bio->devs[i].devnum;
mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[d].rdev);
- if (rdev &&
- !test_bit(Faulty, &rdev->flags)) {
+ if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
+ atomic_inc(&rdev->nr_pending);
+ blocked_rdev = rdev;
+ break;
+ }
+ if (rdev && !test_bit(Faulty, &rdev->flags)) {
atomic_inc(&rdev->nr_pending);
r10_bio->devs[i].bio = bio;
} else {
@@ -899,6 +906,22 @@ static int make_request(struct request_queue *q, struct bio * bio)
}
rcu_read_unlock();
+ if (unlikely(blocked_rdev)) {
+ /* Have to wait for this device to get unblocked, then retry */
+ int j;
+ int d;
+
+ for (j = 0; j < i; j++)
+ if (r10_bio->devs[j].bio) {
+ d = r10_bio->devs[j].devnum;
+ rdev_dec_pending(conf->mirrors[d].rdev, mddev);
+ }
+ allow_barrier(conf);
+ md_wait_for_blocked_rdev(blocked_rdev, mddev);
+ wait_barrier(conf);
+ goto retry_write;
+ }
+
atomic_set(&r10_bio->remaining, 0);
bio_list_init(&bl);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 968dacaced6d..087eee0cb809 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2607,6 +2607,7 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
}
}
+
/*
* handle_stripe - do things to a stripe.
*
@@ -2632,6 +2633,7 @@ static void handle_stripe5(struct stripe_head *sh)
struct stripe_head_state s;
struct r5dev *dev;
unsigned long pending = 0;
+ mdk_rdev_t *blocked_rdev = NULL;
memset(&s, 0, sizeof(s));
pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d "
@@ -2691,6 +2693,11 @@ static void handle_stripe5(struct stripe_head *sh)
if (dev->written)
s.written++;
rdev = rcu_dereference(conf->disks[i].rdev);
+ if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
+ blocked_rdev = rdev;
+ atomic_inc(&rdev->nr_pending);
+ break;
+ }
if (!rdev || !test_bit(In_sync, &rdev->flags)) {
/* The ReadError flag will just be confusing now */
clear_bit(R5_ReadError, &dev->flags);
@@ -2705,6 +2712,11 @@ static void handle_stripe5(struct stripe_head *sh)
}
rcu_read_unlock();
+ if (unlikely(blocked_rdev)) {
+ set_bit(STRIPE_HANDLE, &sh->state);
+ goto unlock;
+ }
+
if (s.to_fill && !test_and_set_bit(STRIPE_OP_BIOFILL, &sh->ops.pending))
sh->ops.count++;
@@ -2894,8 +2906,13 @@ static void handle_stripe5(struct stripe_head *sh)
if (sh->ops.count)
pending = get_stripe_work(sh);
+ unlock:
spin_unlock(&sh->lock);
+ /* wait for this device to become unblocked */
+ if (unlikely(blocked_rdev))
+ md_wait_for_blocked_rdev(blocked_rdev, conf->mddev);
+
if (pending)
raid5_run_ops(sh, pending);
@@ -2912,6 +2929,7 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
struct stripe_head_state s;
struct r6_state r6s;
struct r5dev *dev, *pdev, *qdev;
+ mdk_rdev_t *blocked_rdev = NULL;
r6s.qd_idx = raid6_next_disk(pd_idx, disks);
pr_debug("handling stripe %llu, state=%#lx cnt=%d, "
@@ -2975,6 +2993,11 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
if (dev->written)
s.written++;
rdev = rcu_dereference(conf->disks[i].rdev);
+ if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
+ blocked_rdev = rdev;
+ atomic_inc(&rdev->nr_pending);
+ break;
+ }
if (!rdev || !test_bit(In_sync, &rdev->flags)) {
/* The ReadError flag will just be confusing now */
clear_bit(R5_ReadError, &dev->flags);
@@ -2989,6 +3012,11 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
set_bit(R5_Insync, &dev->flags);
}
rcu_read_unlock();
+
+ if (unlikely(blocked_rdev)) {
+ set_bit(STRIPE_HANDLE, &sh->state);
+ goto unlock;
+ }
pr_debug("locked=%d uptodate=%d to_read=%d"
" to_write=%d failed=%d failed_num=%d,%d\n",
s.locked, s.uptodate, s.to_read, s.to_write, s.failed,
@@ -3094,8 +3122,13 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
!test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending))
handle_stripe_expansion(conf, sh, &r6s);
+ unlock:
spin_unlock(&sh->lock);
+ /* wait for this device to become unblocked */
+ if (unlikely(blocked_rdev))
+ md_wait_for_blocked_rdev(blocked_rdev, conf->mddev);
+
return_io(return_bi);
for (i=disks; i-- ;) {