summaryrefslogtreecommitdiffstats
path: root/drivers/md/raid5.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r--drivers/md/raid5.c256
1 files changed, 131 insertions, 125 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 3c3626d2a1f9..f9f991e6e138 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -362,7 +362,7 @@ static void raid5_unplug_device(struct request_queue *q);
static struct stripe_head *
get_active_stripe(raid5_conf_t *conf, sector_t sector,
- int previous, int noblock)
+ int previous, int noblock, int noquiesce)
{
struct stripe_head *sh;
@@ -372,7 +372,7 @@ get_active_stripe(raid5_conf_t *conf, sector_t sector,
do {
wait_event_lock_irq(conf->wait_for_stripe,
- conf->quiesce == 0,
+ conf->quiesce == 0 || noquiesce,
conf->device_lock, /* nothing */);
sh = __find_stripe(conf, sector, conf->generation - previous);
if (!sh) {
@@ -1274,8 +1274,8 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector,
sector_t new_sector;
int algorithm = previous ? conf->prev_algo
: conf->algorithm;
- int sectors_per_chunk = previous ? (conf->prev_chunk >> 9)
- : (conf->chunk_size >> 9);
+ int sectors_per_chunk = previous ? conf->prev_chunk_sectors
+ : conf->chunk_sectors;
int raid_disks = previous ? conf->previous_raid_disks
: conf->raid_disks;
int data_disks = raid_disks - conf->max_degraded;
@@ -1480,8 +1480,8 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous)
int raid_disks = sh->disks;
int data_disks = raid_disks - conf->max_degraded;
sector_t new_sector = sh->sector, check;
- int sectors_per_chunk = previous ? (conf->prev_chunk >> 9)
- : (conf->chunk_size >> 9);
+ int sectors_per_chunk = previous ? conf->prev_chunk_sectors
+ : conf->chunk_sectors;
int algorithm = previous ? conf->prev_algo
: conf->algorithm;
sector_t stripe;
@@ -1997,8 +1997,7 @@ static void stripe_set_idx(sector_t stripe, raid5_conf_t *conf, int previous,
struct stripe_head *sh)
{
int sectors_per_chunk =
- previous ? (conf->prev_chunk >> 9)
- : (conf->chunk_size >> 9);
+ previous ? conf->prev_chunk_sectors : conf->chunk_sectors;
int dd_idx;
int chunk_offset = sector_div(stripe, sectors_per_chunk);
int disks = previous ? conf->previous_raid_disks : conf->raid_disks;
@@ -2671,7 +2670,7 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
sector_t bn = compute_blocknr(sh, i, 1);
sector_t s = raid5_compute_sector(conf, bn, 0,
&dd_idx, NULL);
- sh2 = get_active_stripe(conf, s, 0, 1);
+ sh2 = get_active_stripe(conf, s, 0, 1, 1);
if (sh2 == NULL)
/* so far only the early blocks of this stripe
* have been requested. When later blocks
@@ -2944,7 +2943,7 @@ static bool handle_stripe5(struct stripe_head *sh)
/* Finish reconstruct operations initiated by the expansion process */
if (sh->reconstruct_state == reconstruct_state_result) {
struct stripe_head *sh2
- = get_active_stripe(conf, sh->sector, 1, 1);
+ = get_active_stripe(conf, sh->sector, 1, 1, 1);
if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) {
/* sh cannot be written until sh2 has been read.
* so arrange for sh to be delayed a little
@@ -3189,7 +3188,7 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) {
struct stripe_head *sh2
- = get_active_stripe(conf, sh->sector, 1, 1);
+ = get_active_stripe(conf, sh->sector, 1, 1, 1);
if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) {
/* sh cannot be written until sh2 has been read.
* so arrange for sh to be delayed a little
@@ -3284,11 +3283,11 @@ static void activate_bit_delay(raid5_conf_t *conf)
static void unplug_slaves(mddev_t *mddev)
{
- raid5_conf_t *conf = mddev_to_conf(mddev);
+ raid5_conf_t *conf = mddev->private;
int i;
rcu_read_lock();
- for (i=0; i<mddev->raid_disks; i++) {
+ for (i = 0; i < conf->raid_disks; i++) {
mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
if (rdev && !test_bit(Faulty, &rdev->flags) && atomic_read(&rdev->nr_pending)) {
struct request_queue *r_queue = bdev_get_queue(rdev->bdev);
@@ -3308,7 +3307,7 @@ static void unplug_slaves(mddev_t *mddev)
static void raid5_unplug_device(struct request_queue *q)
{
mddev_t *mddev = q->queuedata;
- raid5_conf_t *conf = mddev_to_conf(mddev);
+ raid5_conf_t *conf = mddev->private;
unsigned long flags;
spin_lock_irqsave(&conf->device_lock, flags);
@@ -3327,7 +3326,7 @@ static void raid5_unplug_device(struct request_queue *q)
static int raid5_congested(void *data, int bits)
{
mddev_t *mddev = data;
- raid5_conf_t *conf = mddev_to_conf(mddev);
+ raid5_conf_t *conf = mddev->private;
/* No difference between reads and writes. Just check
* how busy the stripe_cache is
@@ -3352,14 +3351,14 @@ static int raid5_mergeable_bvec(struct request_queue *q,
mddev_t *mddev = q->queuedata;
sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
int max;
- unsigned int chunk_sectors = mddev->chunk_size >> 9;
+ unsigned int chunk_sectors = mddev->chunk_sectors;
unsigned int bio_sectors = bvm->bi_size >> 9;
if ((bvm->bi_rw & 1) == WRITE)
return biovec->bv_len; /* always allow writes to be mergeable */
- if (mddev->new_chunk < mddev->chunk_size)
- chunk_sectors = mddev->new_chunk >> 9;
+ if (mddev->new_chunk_sectors < mddev->chunk_sectors)
+ chunk_sectors = mddev->new_chunk_sectors;
max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9;
if (max < 0) max = 0;
if (max <= biovec->bv_len && bio_sectors == 0)
@@ -3372,11 +3371,11 @@ static int raid5_mergeable_bvec(struct request_queue *q,
static int in_chunk_boundary(mddev_t *mddev, struct bio *bio)
{
sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev);
- unsigned int chunk_sectors = mddev->chunk_size >> 9;
+ unsigned int chunk_sectors = mddev->chunk_sectors;
unsigned int bio_sectors = bio->bi_size >> 9;
- if (mddev->new_chunk < mddev->chunk_size)
- chunk_sectors = mddev->new_chunk >> 9;
+ if (mddev->new_chunk_sectors < mddev->chunk_sectors)
+ chunk_sectors = mddev->new_chunk_sectors;
return chunk_sectors >=
((sector & (chunk_sectors - 1)) + bio_sectors);
}
@@ -3440,7 +3439,7 @@ static void raid5_align_endio(struct bio *bi, int error)
bio_put(bi);
mddev = raid_bi->bi_bdev->bd_disk->queue->queuedata;
- conf = mddev_to_conf(mddev);
+ conf = mddev->private;
rdev = (void*)raid_bi->bi_next;
raid_bi->bi_next = NULL;
@@ -3463,10 +3462,10 @@ static int bio_fits_rdev(struct bio *bi)
{
struct request_queue *q = bdev_get_queue(bi->bi_bdev);
- if ((bi->bi_size>>9) > q->max_sectors)
+ if ((bi->bi_size>>9) > queue_max_sectors(q))
return 0;
blk_recount_segments(q, bi);
- if (bi->bi_phys_segments > q->max_phys_segments)
+ if (bi->bi_phys_segments > queue_max_phys_segments(q))
return 0;
if (q->merge_bvec_fn)
@@ -3482,7 +3481,7 @@ static int bio_fits_rdev(struct bio *bi)
static int chunk_aligned_read(struct request_queue *q, struct bio * raid_bio)
{
mddev_t *mddev = q->queuedata;
- raid5_conf_t *conf = mddev_to_conf(mddev);
+ raid5_conf_t *conf = mddev->private;
unsigned int dd_idx;
struct bio* align_bi;
mdk_rdev_t *rdev;
@@ -3599,7 +3598,7 @@ static struct stripe_head *__get_priority_stripe(raid5_conf_t *conf)
static int make_request(struct request_queue *q, struct bio * bi)
{
mddev_t *mddev = q->queuedata;
- raid5_conf_t *conf = mddev_to_conf(mddev);
+ raid5_conf_t *conf = mddev->private;
int dd_idx;
sector_t new_sector;
sector_t logical_sector, last_sector;
@@ -3675,7 +3674,7 @@ static int make_request(struct request_queue *q, struct bio * bi)
(unsigned long long)logical_sector);
sh = get_active_stripe(conf, new_sector, previous,
- (bi->bi_rw&RWA_MASK));
+ (bi->bi_rw&RWA_MASK), 0);
if (sh) {
if (unlikely(previous)) {
/* expansion might have moved on while waiting for a
@@ -3696,6 +3695,7 @@ static int make_request(struct request_queue *q, struct bio * bi)
spin_unlock_irq(&conf->device_lock);
if (must_retry) {
release_stripe(sh);
+ schedule();
goto retry;
}
}
@@ -3791,10 +3791,10 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
* If old and new chunk sizes differ, we need to process the
* largest of these
*/
- if (mddev->new_chunk > mddev->chunk_size)
- reshape_sectors = mddev->new_chunk / 512;
+ if (mddev->new_chunk_sectors > mddev->chunk_sectors)
+ reshape_sectors = mddev->new_chunk_sectors;
else
- reshape_sectors = mddev->chunk_size / 512;
+ reshape_sectors = mddev->chunk_sectors;
/* we update the metadata when there is more than 3Meg
* in the block range (that is rather arbitrary, should
@@ -3811,13 +3811,13 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
safepos = conf->reshape_safe;
sector_div(safepos, data_disks);
if (mddev->delta_disks < 0) {
- writepos -= min(reshape_sectors, writepos);
+ writepos -= min_t(sector_t, reshape_sectors, writepos);
readpos += reshape_sectors;
safepos += reshape_sectors;
} else {
writepos += reshape_sectors;
- readpos -= min(reshape_sectors, readpos);
- safepos -= min(reshape_sectors, safepos);
+ readpos -= min_t(sector_t, reshape_sectors, readpos);
+ safepos -= min_t(sector_t, reshape_sectors, safepos);
}
/* 'writepos' is the most advanced device address we might write.
@@ -3873,7 +3873,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
for (i = 0; i < reshape_sectors; i += STRIPE_SECTORS) {
int j;
int skipped = 0;
- sh = get_active_stripe(conf, stripe_addr+i, 0, 0);
+ sh = get_active_stripe(conf, stripe_addr+i, 0, 0, 1);
set_bit(STRIPE_EXPANDING, &sh->state);
atomic_inc(&conf->reshape_stripes);
/* If any of this stripe is beyond the end of the old
@@ -3916,13 +3916,13 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
raid5_compute_sector(conf, stripe_addr*(new_data_disks),
1, &dd_idx, NULL);
last_sector =
- raid5_compute_sector(conf, ((stripe_addr+conf->chunk_size/512)
- *(new_data_disks) - 1),
+ raid5_compute_sector(conf, ((stripe_addr+reshape_sectors)
+ * new_data_disks - 1),
1, &dd_idx, NULL);
if (last_sector >= mddev->dev_sectors)
last_sector = mddev->dev_sectors - 1;
while (first_sector <= last_sector) {
- sh = get_active_stripe(conf, first_sector, 1, 0);
+ sh = get_active_stripe(conf, first_sector, 1, 0, 1);
set_bit(STRIPE_EXPAND_SOURCE, &sh->state);
set_bit(STRIPE_HANDLE, &sh->state);
release_stripe(sh);
@@ -3946,7 +3946,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
wait_event(conf->wait_for_overlap,
atomic_read(&conf->reshape_stripes) == 0);
mddev->reshape_position = conf->reshape_progress;
- mddev->curr_resync_completed = mddev->curr_resync;
+ mddev->curr_resync_completed = mddev->curr_resync + reshape_sectors;
conf->reshape_checkpoint = jiffies;
set_bit(MD_CHANGE_DEVS, &mddev->flags);
md_wakeup_thread(mddev->thread);
@@ -4022,9 +4022,9 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
bitmap_cond_end_sync(mddev->bitmap, sector_nr);
- sh = get_active_stripe(conf, sector_nr, 0, 1);
+ sh = get_active_stripe(conf, sector_nr, 0, 1, 0);
if (sh == NULL) {
- sh = get_active_stripe(conf, sector_nr, 0, 0);
+ sh = get_active_stripe(conf, sector_nr, 0, 0, 0);
/* make sure we don't swamp the stripe cache if someone else
* is trying to get access
*/
@@ -4034,7 +4034,7 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
* We don't need to check the 'failed' flag as when that gets set,
* recovery aborts.
*/
- for (i=0; i<mddev->raid_disks; i++)
+ for (i = 0; i < conf->raid_disks; i++)
if (conf->disks[i].rdev == NULL)
still_degraded = 1;
@@ -4086,7 +4086,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
/* already done this stripe */
continue;
- sh = get_active_stripe(conf, sector, 0, 1);
+ sh = get_active_stripe(conf, sector, 0, 1, 0);
if (!sh) {
/* failed to get a stripe - must wait */
@@ -4129,7 +4129,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
static void raid5d(mddev_t *mddev)
{
struct stripe_head *sh;
- raid5_conf_t *conf = mddev_to_conf(mddev);
+ raid5_conf_t *conf = mddev->private;
int handled;
pr_debug("+++ raid5d active\n");
@@ -4185,7 +4185,7 @@ static void raid5d(mddev_t *mddev)
static ssize_t
raid5_show_stripe_cache_size(mddev_t *mddev, char *page)
{
- raid5_conf_t *conf = mddev_to_conf(mddev);
+ raid5_conf_t *conf = mddev->private;
if (conf)
return sprintf(page, "%d\n", conf->max_nr_stripes);
else
@@ -4195,7 +4195,7 @@ raid5_show_stripe_cache_size(mddev_t *mddev, char *page)
static ssize_t
raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len)
{
- raid5_conf_t *conf = mddev_to_conf(mddev);
+ raid5_conf_t *conf = mddev->private;
unsigned long new;
int err;
@@ -4233,7 +4233,7 @@ raid5_stripecache_size = __ATTR(stripe_cache_size, S_IRUGO | S_IWUSR,
static ssize_t
raid5_show_preread_threshold(mddev_t *mddev, char *page)
{
- raid5_conf_t *conf = mddev_to_conf(mddev);
+ raid5_conf_t *conf = mddev->private;
if (conf)
return sprintf(page, "%d\n", conf->bypass_threshold);
else
@@ -4243,7 +4243,7 @@ raid5_show_preread_threshold(mddev_t *mddev, char *page)
static ssize_t
raid5_store_preread_threshold(mddev_t *mddev, const char *page, size_t len)
{
- raid5_conf_t *conf = mddev_to_conf(mddev);
+ raid5_conf_t *conf = mddev->private;
unsigned long new;
if (len >= PAGE_SIZE)
return -EINVAL;
@@ -4267,7 +4267,7 @@ raid5_preread_bypass_threshold = __ATTR(preread_bypass_threshold,
static ssize_t
stripe_cache_active_show(mddev_t *mddev, char *page)
{
- raid5_conf_t *conf = mddev_to_conf(mddev);
+ raid5_conf_t *conf = mddev->private;
if (conf)
return sprintf(page, "%d\n", atomic_read(&conf->active_stripes));
else
@@ -4291,7 +4291,7 @@ static struct attribute_group raid5_attrs_group = {
static sector_t
raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks)
{
- raid5_conf_t *conf = mddev_to_conf(mddev);
+ raid5_conf_t *conf = mddev->private;
if (!sectors)
sectors = mddev->dev_sectors;
@@ -4303,8 +4303,8 @@ raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks)
raid_disks = conf->previous_raid_disks;
}
- sectors &= ~((sector_t)mddev->chunk_size/512 - 1);
- sectors &= ~((sector_t)mddev->new_chunk/512 - 1);
+ sectors &= ~((sector_t)mddev->chunk_sectors - 1);
+ sectors &= ~((sector_t)mddev->new_chunk_sectors - 1);
return sectors * (raid_disks - conf->max_degraded);
}
@@ -4336,9 +4336,11 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
return ERR_PTR(-EINVAL);
}
- if (!mddev->new_chunk || mddev->new_chunk % PAGE_SIZE) {
+ if (!mddev->new_chunk_sectors ||
+ (mddev->new_chunk_sectors << 9) % PAGE_SIZE ||
+ !is_power_of_2(mddev->new_chunk_sectors)) {
printk(KERN_ERR "raid5: invalid chunk size %d for %s\n",
- mddev->new_chunk, mdname(mddev));
+ mddev->new_chunk_sectors << 9, mdname(mddev));
return ERR_PTR(-EINVAL);
}
@@ -4401,7 +4403,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
conf->fullsync = 1;
}
- conf->chunk_size = mddev->new_chunk;
+ conf->chunk_sectors = mddev->new_chunk_sectors;
conf->level = mddev->new_level;
if (conf->level == 6)
conf->max_degraded = 2;
@@ -4411,7 +4413,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
conf->max_nr_stripes = NR_STRIPES;
conf->reshape_progress = mddev->reshape_position;
if (conf->reshape_progress != MaxSector) {
- conf->prev_chunk = mddev->chunk_size;
+ conf->prev_chunk_sectors = mddev->chunk_sectors;
conf->prev_algo = mddev->layout;
}
@@ -4453,6 +4455,10 @@ static int run(mddev_t *mddev)
int working_disks = 0;
mdk_rdev_t *rdev;
+ if (mddev->recovery_cp != MaxSector)
+ printk(KERN_NOTICE "raid5: %s is not clean"
+ " -- starting background reconstruction\n",
+ mdname(mddev));
if (mddev->reshape_position != MaxSector) {
/* Check that we can continue the reshape.
* Currently only disks can change, it must
@@ -4475,7 +4481,7 @@ static int run(mddev_t *mddev)
* geometry.
*/
here_new = mddev->reshape_position;
- if (sector_div(here_new, (mddev->new_chunk>>9)*
+ if (sector_div(here_new, mddev->new_chunk_sectors *
(mddev->raid_disks - max_degraded))) {
printk(KERN_ERR "raid5: reshape_position not "
"on a stripe boundary\n");
@@ -4483,7 +4489,7 @@ static int run(mddev_t *mddev)
}
/* here_new is the stripe we will write to */
here_old = mddev->reshape_position;
- sector_div(here_old, (mddev->chunk_size>>9)*
+ sector_div(here_old, mddev->chunk_sectors *
(old_disks-max_degraded));
/* here_old is the first stripe that we might need to read
* from */
@@ -4498,7 +4504,7 @@ static int run(mddev_t *mddev)
} else {
BUG_ON(mddev->level != mddev->new_level);
BUG_ON(mddev->layout != mddev->new_layout);
- BUG_ON(mddev->chunk_size != mddev->new_chunk);
+ BUG_ON(mddev->chunk_sectors != mddev->new_chunk_sectors);
BUG_ON(mddev->delta_disks != 0);
}
@@ -4532,7 +4538,7 @@ static int run(mddev_t *mddev)
}
/* device size must be a multiple of chunk size */
- mddev->dev_sectors &= ~(mddev->chunk_size / 512 - 1);
+ mddev->dev_sectors &= ~(mddev->chunk_sectors - 1);
mddev->resync_max_sectors = mddev->dev_sectors;
if (mddev->degraded > 0 &&
@@ -4581,7 +4587,7 @@ static int run(mddev_t *mddev)
{
int data_disks = conf->previous_raid_disks - conf->max_degraded;
int stripe = data_disks *
- (mddev->chunk_size / PAGE_SIZE);
+ ((mddev->chunk_sectors << 9) / PAGE_SIZE);
if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe)
mddev->queue->backing_dev_info.ra_pages = 2 * stripe;
}
@@ -4678,7 +4684,8 @@ static void status(struct seq_file *seq, mddev_t *mddev)
raid5_conf_t *conf = (raid5_conf_t *) mddev->private;
int i;
- seq_printf (seq, " level %d, %dk chunk, algorithm %d", mddev->level, mddev->chunk_size >> 10, mddev->layout);
+ seq_printf(seq, " level %d, %dk chunk, algorithm %d", mddev->level,
+ mddev->chunk_sectors / 2, mddev->layout);
seq_printf (seq, " [%d/%d] [", conf->raid_disks, conf->raid_disks - mddev->degraded);
for (i = 0; i < conf->raid_disks; i++)
seq_printf (seq, "%s",
@@ -4826,7 +4833,7 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors)
* any io in the removed space completes, but it hardly seems
* worth it.
*/
- sectors &= ~((sector_t)mddev->chunk_size/512 - 1);
+ sectors &= ~((sector_t)mddev->chunk_sectors - 1);
md_set_array_sectors(mddev, raid5_size(mddev, sectors,
mddev->raid_disks));
if (mddev->array_sectors >
@@ -4843,14 +4850,37 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors)
return 0;
}
-static int raid5_check_reshape(mddev_t *mddev)
+static int check_stripe_cache(mddev_t *mddev)
+{
+ /* Can only proceed if there are plenty of stripe_heads.
+ * We need a minimum of one full stripe,, and for sensible progress
+ * it is best to have about 4 times that.
+ * If we require 4 times, then the default 256 4K stripe_heads will
+ * allow for chunk sizes up to 256K, which is probably OK.
+ * If the chunk size is greater, user-space should request more
+ * stripe_heads first.
+ */
+ raid5_conf_t *conf = mddev->private;
+ if (((mddev->chunk_sectors << 9) / STRIPE_SIZE) * 4
+ > conf->max_nr_stripes ||
+ ((mddev->new_chunk_sectors << 9) / STRIPE_SIZE) * 4
+ > conf->max_nr_stripes) {
+ printk(KERN_WARNING "raid5: reshape: not enough stripes. Needed %lu\n",
+ ((max(mddev->chunk_sectors, mddev->new_chunk_sectors) << 9)
+ / STRIPE_SIZE)*4);
+ return 0;
+ }
+ return 1;
+}
+
+static int check_reshape(mddev_t *mddev)
{
- raid5_conf_t *conf = mddev_to_conf(mddev);
+ raid5_conf_t *conf = mddev->private;
if (mddev->delta_disks == 0 &&
mddev->new_layout == mddev->layout &&
- mddev->new_chunk == mddev->chunk_size)
- return -EINVAL; /* nothing to do */
+ mddev->new_chunk_sectors == mddev->chunk_sectors)
+ return 0; /* nothing to do */
if (mddev->bitmap)
/* Cannot grow a bitmap yet */
return -EBUSY;
@@ -4869,28 +4899,15 @@ static int raid5_check_reshape(mddev_t *mddev)
return -EINVAL;
}
- /* Can only proceed if there are plenty of stripe_heads.
- * We need a minimum of one full stripe,, and for sensible progress
- * it is best to have about 4 times that.
- * If we require 4 times, then the default 256 4K stripe_heads will
- * allow for chunk sizes up to 256K, which is probably OK.
- * If the chunk size is greater, user-space should request more
- * stripe_heads first.
- */
- if ((mddev->chunk_size / STRIPE_SIZE) * 4 > conf->max_nr_stripes ||
- (mddev->new_chunk / STRIPE_SIZE) * 4 > conf->max_nr_stripes) {
- printk(KERN_WARNING "raid5: reshape: not enough stripes. Needed %lu\n",
- (max(mddev->chunk_size, mddev->new_chunk)
- / STRIPE_SIZE)*4);
+ if (!check_stripe_cache(mddev))
return -ENOSPC;
- }
return resize_stripes(conf, conf->raid_disks + mddev->delta_disks);
}
static int raid5_start_reshape(mddev_t *mddev)
{
- raid5_conf_t *conf = mddev_to_conf(mddev);
+ raid5_conf_t *conf = mddev->private;
mdk_rdev_t *rdev;
int spares = 0;
int added_devices = 0;
@@ -4899,6 +4916,9 @@ static int raid5_start_reshape(mddev_t *mddev)
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
return -EBUSY;
+ if (!check_stripe_cache(mddev))
+ return -ENOSPC;
+
list_for_each_entry(rdev, &mddev->disks, same_set)
if (rdev->raid_disk < 0 &&
!test_bit(Faulty, &rdev->flags))
@@ -4925,8 +4945,8 @@ static int raid5_start_reshape(mddev_t *mddev)
spin_lock_irq(&conf->device_lock);
conf->previous_raid_disks = conf->raid_disks;
conf->raid_disks += mddev->delta_disks;
- conf->prev_chunk = conf->chunk_size;
- conf->chunk_size = mddev->new_chunk;
+ conf->prev_chunk_sectors = conf->chunk_sectors;
+ conf->chunk_sectors = mddev->new_chunk_sectors;
conf->prev_algo = conf->algorithm;
conf->algorithm = mddev->new_layout;
if (mddev->delta_disks < 0)
@@ -5008,7 +5028,7 @@ static void end_reshape(raid5_conf_t *conf)
*/
{
int data_disks = conf->raid_disks - conf->max_degraded;
- int stripe = data_disks * (conf->chunk_size
+ int stripe = data_disks * ((conf->chunk_sectors << 9)
/ PAGE_SIZE);
if (conf->mddev->queue->backing_dev_info.ra_pages < 2 * stripe)
conf->mddev->queue->backing_dev_info.ra_pages = 2 * stripe;
@@ -5022,7 +5042,7 @@ static void end_reshape(raid5_conf_t *conf)
static void raid5_finish_reshape(mddev_t *mddev)
{
struct block_device *bdev;
- raid5_conf_t *conf = mddev_to_conf(mddev);
+ raid5_conf_t *conf = mddev->private;
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
@@ -5053,7 +5073,7 @@ static void raid5_finish_reshape(mddev_t *mddev)
raid5_remove_disk(mddev, d);
}
mddev->layout = conf->algorithm;
- mddev->chunk_size = conf->chunk_size;
+ mddev->chunk_sectors = conf->chunk_sectors;
mddev->reshape_position = MaxSector;
mddev->delta_disks = 0;
}
@@ -5061,7 +5081,7 @@ static void raid5_finish_reshape(mddev_t *mddev)
static void raid5_quiesce(mddev_t *mddev, int state)
{
- raid5_conf_t *conf = mddev_to_conf(mddev);
+ raid5_conf_t *conf = mddev->private;
switch(state) {
case 2: /* resume for a suspend */
@@ -5111,7 +5131,7 @@ static void *raid5_takeover_raid1(mddev_t *mddev)
mddev->new_level = 5;
mddev->new_layout = ALGORITHM_LEFT_SYMMETRIC;
- mddev->new_chunk = chunksect << 9;
+ mddev->new_chunk_sectors = chunksect;
return setup_conf(mddev);
}
@@ -5150,24 +5170,24 @@ static void *raid5_takeover_raid6(mddev_t *mddev)
}
-static int raid5_reconfig(mddev_t *mddev, int new_layout, int new_chunk)
+static int raid5_check_reshape(mddev_t *mddev)
{
/* For a 2-drive array, the layout and chunk size can be changed
* immediately as not restriping is needed.
* For larger arrays we record the new value - after validation
* to be used by a reshape pass.
*/
- raid5_conf_t *conf = mddev_to_conf(mddev);
+ raid5_conf_t *conf = mddev->private;
+ int new_chunk = mddev->new_chunk_sectors;
- if (new_layout >= 0 && !algorithm_valid_raid5(new_layout))
+ if (mddev->new_layout >= 0 && !algorithm_valid_raid5(mddev->new_layout))
return -EINVAL;
if (new_chunk > 0) {
- if (new_chunk & (new_chunk-1))
- /* not a power of 2 */
+ if (!is_power_of_2(new_chunk))
return -EINVAL;
- if (new_chunk < PAGE_SIZE)
+ if (new_chunk < (PAGE_SIZE>>9))
return -EINVAL;
- if (mddev->array_sectors & ((new_chunk>>9)-1))
+ if (mddev->array_sectors & (new_chunk-1))
/* not factor of array size */
return -EINVAL;
}
@@ -5175,49 +5195,39 @@ static int raid5_reconfig(mddev_t *mddev, int new_layout, int new_chunk)
/* They look valid */
if (mddev->raid_disks == 2) {
-
- if (new_layout >= 0) {
- conf->algorithm = new_layout;
- mddev->layout = mddev->new_layout = new_layout;
+ /* can make the change immediately */
+ if (mddev->new_layout >= 0) {
+ conf->algorithm = mddev->new_layout;
+ mddev->layout = mddev->new_layout;
}
if (new_chunk > 0) {
- conf->chunk_size = new_chunk;
- mddev->chunk_size = mddev->new_chunk = new_chunk;
+ conf->chunk_sectors = new_chunk ;
+ mddev->chunk_sectors = new_chunk;
}
set_bit(MD_CHANGE_DEVS, &mddev->flags);
md_wakeup_thread(mddev->thread);
- } else {
- if (new_layout >= 0)
- mddev->new_layout = new_layout;
- if (new_chunk > 0)
- mddev->new_chunk = new_chunk;
}
- return 0;
+ return check_reshape(mddev);
}
-static int raid6_reconfig(mddev_t *mddev, int new_layout, int new_chunk)
+static int raid6_check_reshape(mddev_t *mddev)
{
- if (new_layout >= 0 && !algorithm_valid_raid6(new_layout))
+ int new_chunk = mddev->new_chunk_sectors;
+
+ if (mddev->new_layout >= 0 && !algorithm_valid_raid6(mddev->new_layout))
return -EINVAL;
if (new_chunk > 0) {
- if (new_chunk & (new_chunk-1))
- /* not a power of 2 */
+ if (!is_power_of_2(new_chunk))
return -EINVAL;
- if (new_chunk < PAGE_SIZE)
+ if (new_chunk < (PAGE_SIZE >> 9))
return -EINVAL;
- if (mddev->array_sectors & ((new_chunk>>9)-1))
+ if (mddev->array_sectors & (new_chunk-1))
/* not factor of array size */
return -EINVAL;
}
/* They look valid */
-
- if (new_layout >= 0)
- mddev->new_layout = new_layout;
- if (new_chunk > 0)
- mddev->new_chunk = new_chunk;
-
- return 0;
+ return check_reshape(mddev);
}
static void *raid5_takeover(mddev_t *mddev)
@@ -5227,8 +5237,6 @@ static void *raid5_takeover(mddev_t *mddev)
* raid1 - if there are two drives. We need to know the chunk size
* raid4 - trivial - just use a raid4 layout.
* raid6 - Providing it is a *_6 layout
- *
- * For now, just do raid1
*/
if (mddev->level == 1)
@@ -5310,12 +5318,11 @@ static struct mdk_personality raid6_personality =
.sync_request = sync_request,
.resize = raid5_resize,
.size = raid5_size,
- .check_reshape = raid5_check_reshape,
+ .check_reshape = raid6_check_reshape,
.start_reshape = raid5_start_reshape,
.finish_reshape = raid5_finish_reshape,
.quiesce = raid5_quiesce,
.takeover = raid6_takeover,
- .reconfig = raid6_reconfig,
};
static struct mdk_personality raid5_personality =
{
@@ -5338,7 +5345,6 @@ static struct mdk_personality raid5_personality =
.finish_reshape = raid5_finish_reshape,
.quiesce = raid5_quiesce,
.takeover = raid5_takeover,
- .reconfig = raid5_reconfig,
};
static struct mdk_personality raid4_personality =