From dafb20fa34320a472deb7442f25a0c086e0feb33 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 19 Mar 2012 12:46:39 +1100 Subject: md: tidy up rdev_for_each usage. md.h has an 'rdev_for_each()' macro for iterating the rdevs in an mddev. However it uses the 'safe' version of list_for_each_entry, and so requires the extra variable, but doesn't include 'safe' in the name, which is useful documentation. Consequently some places use this safe version without needing it, and many use an explicity list_for_each entry. So: - rename rdev_for_each to rdev_for_each_safe - create a new rdev_for_each which uses the plain list_for_each_entry, - use the 'safe' version only where needed, and convert all other list_for_each_entry calls to use rdev_for_each. Signed-off-by: NeilBrown --- drivers/md/raid0.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/md/raid0.c') diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 7294bd115e34..7ef5cbf31bb1 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -91,7 +91,7 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) if (!conf) return -ENOMEM; - list_for_each_entry(rdev1, &mddev->disks, same_set) { + rdev_for_each(rdev1, mddev) { pr_debug("md/raid0:%s: looking at %s\n", mdname(mddev), bdevname(rdev1->bdev, b)); @@ -102,7 +102,7 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) sector_div(sectors, mddev->chunk_sectors); rdev1->sectors = sectors * mddev->chunk_sectors; - list_for_each_entry(rdev2, &mddev->disks, same_set) { + rdev_for_each(rdev2, mddev) { pr_debug("md/raid0:%s: comparing %s(%llu)" " with %s(%llu)\n", mdname(mddev), @@ -157,7 +157,7 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) smallest = NULL; dev = conf->devlist; err = -EINVAL; - list_for_each_entry(rdev1, &mddev->disks, same_set) { + rdev_for_each(rdev1, mddev) { int j = rdev1->raid_disk; if (mddev->level == 10) { @@ -329,7 +329,7 @@ static sector_t raid0_size(struct mddev *mddev, sector_t sectors, int raid_disks WARN_ONCE(sectors || raid_disks, "%s does not support generic reshape\n", __func__); - list_for_each_entry(rdev, &mddev->disks, same_set) + rdev_for_each(rdev, mddev) array_sectors += rdev->sectors; return array_sectors; @@ -543,7 +543,7 @@ static void *raid0_takeover_raid45(struct mddev *mddev) return ERR_PTR(-EINVAL); } - list_for_each_entry(rdev, &mddev->disks, same_set) { + rdev_for_each(rdev, mddev) { /* check slot number for a disk */ if (rdev->raid_disk == mddev->raid_disks-1) { printk(KERN_ERR "md/raid0:%s: raid5 must have missing parity disk!\n", -- cgit v1.2.3 From ba13da47ffa202784355561f72160a41350e95cc Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 19 Mar 2012 12:46:39 +1100 Subject: md: add proper merge_bvec handling to RAID0 and Linear. These personalities currently set a max request size of one page when any member device has a merge_bvec_fn because they don't bother to call that function. This causes extra works in splitting and combining requests. So make the extra effort to call the merge_bvec_fn when it exists so that we end up with larger requests out the bottom. Signed-off-by: NeilBrown --- drivers/md/linear.c | 30 +++++----- drivers/md/raid0.c | 154 +++++++++++++++++++++++++++++----------------------- drivers/md/raid0.h | 11 ++-- 3 files changed, 107 insertions(+), 88 deletions(-) (limited to 'drivers/md/raid0.c') diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 67940741b19d..b0fcc7d02adb 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -68,10 +68,19 @@ static int linear_mergeable_bvec(struct request_queue *q, struct dev_info *dev0; unsigned long maxsectors, bio_sectors = bvm->bi_size >> 9; sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); + int maxbytes = biovec->bv_len; + struct request_queue *subq; rcu_read_lock(); dev0 = which_dev(mddev, sector); maxsectors = dev0->end_sector - sector; + subq = bdev_get_queue(dev0->rdev->bdev); + if (subq->merge_bvec_fn) { + bvm->bi_bdev = dev0->rdev->bdev; + bvm->bi_sector -= dev0->end_sector - dev0->rdev->sectors; + maxbytes = min(maxbytes, subq->merge_bvec_fn(subq, bvm, + biovec)); + } rcu_read_unlock(); if (maxsectors < bio_sectors) @@ -80,12 +89,12 @@ static int linear_mergeable_bvec(struct request_queue *q, maxsectors -= bio_sectors; if (maxsectors <= (PAGE_SIZE >> 9 ) && bio_sectors == 0) - return biovec->bv_len; - /* The bytes available at this offset could be really big, - * so we cap at 2^31 to avoid overflow */ - if (maxsectors > (1 << (31-9))) - return 1<<31; - return maxsectors << 9; + return maxbytes; + + if (maxsectors > (maxbytes >> 9)) + return maxbytes; + else + return maxsectors << 9; } static int linear_congested(void *data, int bits) @@ -158,15 +167,6 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks) disk_stack_limits(mddev->gendisk, rdev->bdev, rdev->data_offset << 9); - /* as we don't honour merge_bvec_fn, we must never risk - * violating it, so limit max_segments to 1 lying within - * a single page. - */ - if (rdev->bdev->bd_disk->queue->merge_bvec_fn) { - blk_queue_max_segments(mddev->queue, 1); - blk_queue_segment_boundary(mddev->queue, - PAGE_CACHE_SIZE - 1); - } conf->array_sectors += rdev->sectors; cnt++; diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 7ef5cbf31bb1..6f31f5596e01 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -188,16 +188,10 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) disk_stack_limits(mddev->gendisk, rdev1->bdev, rdev1->data_offset << 9); - /* as we don't honour merge_bvec_fn, we must never risk - * violating it, so limit ->max_segments to 1, lying within - * a single page. - */ - if (rdev1->bdev->bd_disk->queue->merge_bvec_fn) { - blk_queue_max_segments(mddev->queue, 1); - blk_queue_segment_boundary(mddev->queue, - PAGE_CACHE_SIZE - 1); - } + if (rdev1->bdev->bd_disk->queue->merge_bvec_fn) + conf->has_merge_bvec = 1; + if (!smallest || (rdev1->sectors < smallest->sectors)) smallest = rdev1; cnt++; @@ -290,8 +284,64 @@ abort: return err; } +/* Find the zone which holds a particular offset + * Update *sectorp to be an offset in that zone + */ +static struct strip_zone *find_zone(struct r0conf *conf, + sector_t *sectorp) +{ + int i; + struct strip_zone *z = conf->strip_zone; + sector_t sector = *sectorp; + + for (i = 0; i < conf->nr_strip_zones; i++) + if (sector < z[i].zone_end) { + if (i) + *sectorp = sector - z[i-1].zone_end; + return z + i; + } + BUG(); +} + +/* + * remaps the bio to the target device. we separate two flows. + * power 2 flow and a general flow for the sake of perfromance +*/ +static struct md_rdev *map_sector(struct mddev *mddev, struct strip_zone *zone, + sector_t sector, sector_t *sector_offset) +{ + unsigned int sect_in_chunk; + sector_t chunk; + struct r0conf *conf = mddev->private; + int raid_disks = conf->strip_zone[0].nb_dev; + unsigned int chunk_sects = mddev->chunk_sectors; + + if (is_power_of_2(chunk_sects)) { + int chunksect_bits = ffz(~chunk_sects); + /* find the sector offset inside the chunk */ + sect_in_chunk = sector & (chunk_sects - 1); + sector >>= chunksect_bits; + /* chunk in zone */ + chunk = *sector_offset; + /* quotient is the chunk in real device*/ + sector_div(chunk, zone->nb_dev << chunksect_bits); + } else{ + sect_in_chunk = sector_div(sector, chunk_sects); + chunk = *sector_offset; + sector_div(chunk, chunk_sects * zone->nb_dev); + } + /* + * position the bio over the real device + * real sector = chunk in device + starting of zone + * + the position in the chunk + */ + *sector_offset = (chunk * chunk_sects) + sect_in_chunk; + return conf->devlist[(zone - conf->strip_zone)*raid_disks + + sector_div(sector, zone->nb_dev)]; +} + /** - * raid0_mergeable_bvec -- tell bio layer if a two requests can be merged + * raid0_mergeable_bvec -- tell bio layer if two requests can be merged * @q: request queue * @bvm: properties of new bio * @biovec: the request that could be merged to it. @@ -303,10 +353,15 @@ static int raid0_mergeable_bvec(struct request_queue *q, struct bio_vec *biovec) { struct mddev *mddev = q->queuedata; + struct r0conf *conf = mddev->private; sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); + sector_t sector_offset = sector; int max; unsigned int chunk_sectors = mddev->chunk_sectors; unsigned int bio_sectors = bvm->bi_size >> 9; + struct strip_zone *zone; + struct md_rdev *rdev; + struct request_queue *subq; if (is_power_of_2(chunk_sectors)) max = (chunk_sectors - ((sector & (chunk_sectors-1)) @@ -314,10 +369,27 @@ static int raid0_mergeable_bvec(struct request_queue *q, else max = (chunk_sectors - (sector_div(sector, chunk_sectors) + bio_sectors)) << 9; - if (max < 0) max = 0; /* bio_add cannot handle a negative return */ + if (max < 0) + max = 0; /* bio_add cannot handle a negative return */ if (max <= biovec->bv_len && bio_sectors == 0) return biovec->bv_len; - else + if (max < biovec->bv_len) + /* too small already, no need to check further */ + return max; + if (!conf->has_merge_bvec) + return max; + + /* May need to check subordinate device */ + sector = sector_offset; + zone = find_zone(mddev->private, §or_offset); + rdev = map_sector(mddev, zone, sector, §or_offset); + subq = bdev_get_queue(rdev->bdev); + if (subq->merge_bvec_fn) { + bvm->bi_bdev = rdev->bdev; + bvm->bi_sector = sector_offset + zone->dev_start + + rdev->data_offset; + return min(max, subq->merge_bvec_fn(subq, bvm, biovec)); + } else return max; } @@ -397,62 +469,6 @@ static int raid0_stop(struct mddev *mddev) return 0; } -/* Find the zone which holds a particular offset - * Update *sectorp to be an offset in that zone - */ -static struct strip_zone *find_zone(struct r0conf *conf, - sector_t *sectorp) -{ - int i; - struct strip_zone *z = conf->strip_zone; - sector_t sector = *sectorp; - - for (i = 0; i < conf->nr_strip_zones; i++) - if (sector < z[i].zone_end) { - if (i) - *sectorp = sector - z[i-1].zone_end; - return z + i; - } - BUG(); -} - -/* - * remaps the bio to the target device. we separate two flows. - * power 2 flow and a general flow for the sake of perfromance -*/ -static struct md_rdev *map_sector(struct mddev *mddev, struct strip_zone *zone, - sector_t sector, sector_t *sector_offset) -{ - unsigned int sect_in_chunk; - sector_t chunk; - struct r0conf *conf = mddev->private; - int raid_disks = conf->strip_zone[0].nb_dev; - unsigned int chunk_sects = mddev->chunk_sectors; - - if (is_power_of_2(chunk_sects)) { - int chunksect_bits = ffz(~chunk_sects); - /* find the sector offset inside the chunk */ - sect_in_chunk = sector & (chunk_sects - 1); - sector >>= chunksect_bits; - /* chunk in zone */ - chunk = *sector_offset; - /* quotient is the chunk in real device*/ - sector_div(chunk, zone->nb_dev << chunksect_bits); - } else{ - sect_in_chunk = sector_div(sector, chunk_sects); - chunk = *sector_offset; - sector_div(chunk, chunk_sects * zone->nb_dev); - } - /* - * position the bio over the real device - * real sector = chunk in device + starting of zone - * + the position in the chunk - */ - *sector_offset = (chunk * chunk_sects) + sect_in_chunk; - return conf->devlist[(zone - conf->strip_zone)*raid_disks - + sector_div(sector, zone->nb_dev)]; -} - /* * Is io distribute over 1 or more chunks ? */ @@ -505,7 +521,7 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio) } sector_offset = bio->bi_sector; - zone = find_zone(mddev->private, §or_offset); + zone = find_zone(mddev->private, §or_offset); tmp_dev = map_sector(mddev, zone, bio->bi_sector, §or_offset); bio->bi_bdev = tmp_dev->bdev; diff --git a/drivers/md/raid0.h b/drivers/md/raid0.h index 0884bba8df4c..05539d9c97f0 100644 --- a/drivers/md/raid0.h +++ b/drivers/md/raid0.h @@ -4,13 +4,16 @@ struct strip_zone { sector_t zone_end; /* Start of the next zone (in sectors) */ sector_t dev_start; /* Zone offset in real dev (in sectors) */ - int nb_dev; /* # of devices attached to the zone */ + int nb_dev; /* # of devices attached to the zone */ }; struct r0conf { - struct strip_zone *strip_zone; - struct md_rdev **devlist; /* lists of rdevs, pointed to by strip_zone->dev */ - int nr_strip_zones; + struct strip_zone *strip_zone; + struct md_rdev **devlist; /* lists of rdevs, pointed to + * by strip_zone->dev */ + int nr_strip_zones; + int has_merge_bvec; /* at least one member has + * a merge_bvec_fn */ }; #endif -- cgit v1.2.3 From 0366ef847581d692e197b88825867ca9ee00e358 Mon Sep 17 00:00:00 2001 From: majianpeng Date: Mon, 2 Apr 2012 09:48:37 +1000 Subject: md/raid0: If md_integrity_register() fails, raid0_run() must free the mem. Signed-off-by: majianpeng Signed-off-by: NeilBrown --- drivers/md/raid0.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'drivers/md/raid0.c') diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 6f31f5596e01..c9809453a346 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -407,6 +407,8 @@ static sector_t raid0_size(struct mddev *mddev, sector_t sectors, int raid_disks return array_sectors; } +static int raid0_stop(struct mddev *mddev); + static int raid0_run(struct mddev *mddev) { struct r0conf *conf; @@ -454,7 +456,12 @@ static int raid0_run(struct mddev *mddev) blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec); dump_zones(mddev); - return md_integrity_register(mddev); + + ret = md_integrity_register(mddev); + if (ret) + raid0_stop(mddev); + + return ret; } static int raid0_stop(struct mddev *mddev) -- cgit v1.2.3 From 24b961f811a3e790a9b93604d2594bfb6cce4fa4 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Sun, 1 Apr 2012 23:48:38 +1000 Subject: md: Avoid OOPS when reshaping raid1 to raid0 raid1 arrays do not have the notion of chunk size. Calculate the largest chunk sector size we can use to avoid a divide by zero OOPS when aligning the size of the new array to the chunk size. Signed-off-by: Jes Sorensen Signed-off-by: NeilBrown --- drivers/md/raid0.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'drivers/md/raid0.c') diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index c9809453a346..de63a1fc3737 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -632,6 +632,7 @@ static void *raid0_takeover_raid10(struct mddev *mddev) static void *raid0_takeover_raid1(struct mddev *mddev) { struct r0conf *priv_conf; + int chunksect; /* Check layout: * - (N - 1) mirror drives must be already faulty @@ -642,10 +643,25 @@ static void *raid0_takeover_raid1(struct mddev *mddev) return ERR_PTR(-EINVAL); } + /* + * a raid1 doesn't have the notion of chunk size, so + * figure out the largest suitable size we can use. + */ + chunksect = 64 * 2; /* 64K by default */ + + /* The array must be an exact multiple of chunksize */ + while (chunksect && (mddev->array_sectors & (chunksect - 1))) + chunksect >>= 1; + + if ((chunksect << 9) < PAGE_SIZE) + /* array size does not allow a suitable chunk size */ + return ERR_PTR(-EINVAL); + /* Set new parameters */ mddev->new_level = 0; mddev->new_layout = 0; - mddev->new_chunk_sectors = 128; /* by default set chunk size to 64k */ + mddev->new_chunk_sectors = chunksect; + mddev->chunk_sectors = chunksect; mddev->delta_disks = 1 - mddev->raid_disks; mddev->raid_disks = 1; /* make sure it will be not marked as dirty */ -- cgit v1.2.3