diff options
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r-- | drivers/md/raid5.c | 174 |
1 files changed, 83 insertions, 91 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 58f8395b56d6..f2e3c3e2d879 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -1295,10 +1295,7 @@ again: if (rrdev) set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags); - if (conf->mddev->gendisk) - trace_block_bio_remap(bi, - disk_devt(conf->mddev->gendisk), - sh->dev[i].sector); + mddev_trace_remap(conf->mddev, bi, sh->dev[i].sector); if (should_defer && op_is_write(op)) bio_list_add(&pending_bios, bi); else @@ -1342,10 +1339,7 @@ again: */ if (op == REQ_OP_DISCARD) rbi->bi_vcnt = 0; - if (conf->mddev->gendisk) - trace_block_bio_remap(rbi, - disk_devt(conf->mddev->gendisk), - sh->dev[i].sector); + mddev_trace_remap(conf->mddev, rbi, sh->dev[i].sector); if (should_defer && op_is_write(op)) bio_list_add(&pending_bios, rbi); else @@ -2422,12 +2416,12 @@ static int grow_stripes(struct r5conf *conf, int num) size_t namelen = sizeof(conf->cache_name[0]); int devs = max(conf->raid_disks, conf->previous_raid_disks); - if (conf->mddev->gendisk) + if (mddev_is_dm(conf->mddev)) snprintf(conf->cache_name[0], namelen, - "raid%d-%s", conf->level, mdname(conf->mddev)); + "raid%d-%p", conf->level, conf->mddev); else snprintf(conf->cache_name[0], namelen, - "raid%d-%p", conf->level, conf->mddev); + "raid%d-%s", conf->level, mdname(conf->mddev)); snprintf(conf->cache_name[1], namelen, "%.27s-alt", conf->cache_name[0]); conf->active_name = 0; @@ -4201,10 +4195,9 @@ static int handle_stripe_dirtying(struct r5conf *conf, set_bit(STRIPE_HANDLE, &sh->state); if ((rmw < rcw || (rmw == rcw && conf->rmw_level == PARITY_PREFER_RMW)) && rmw > 0) { /* prefer read-modify-write, but need to get some data */ - if (conf->mddev->queue) - blk_add_trace_msg(conf->mddev->queue, - "raid5 rmw %llu %d", - (unsigned long long)sh->sector, rmw); + mddev_add_trace_msg(conf->mddev, "raid5 rmw %llu %d", + sh->sector, rmw); + for (i = disks; i--; ) { struct r5dev *dev = &sh->dev[i]; if (test_bit(R5_InJournal, &dev->flags) && @@ -4281,10 +4274,11 @@ static int handle_stripe_dirtying(struct r5conf *conf, set_bit(STRIPE_DELAYED, &sh->state); } } - if (rcw && conf->mddev->queue) - blk_add_trace_msg(conf->mddev->queue, "raid5 rcw %llu %d %d %d", - (unsigned long long)sh->sector, - rcw, qread, test_bit(STRIPE_DELAYED, &sh->state)); + if (rcw && !mddev_is_dm(conf->mddev)) + blk_add_trace_msg(conf->mddev->gendisk->queue, + "raid5 rcw %llu %d %d %d", + (unsigned long long)sh->sector, rcw, qread, + test_bit(STRIPE_DELAYED, &sh->state)); } if (rcw > disks && rmw > disks && @@ -5523,9 +5517,7 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio) spin_unlock_irq(&conf->device_lock); } - if (mddev->gendisk) - trace_block_bio_remap(align_bio, disk_devt(mddev->gendisk), - raid_bio->bi_iter.bi_sector); + mddev_trace_remap(mddev, align_bio, raid_bio->bi_iter.bi_sector); submit_bio_noacct(align_bio); return 1; } @@ -5694,8 +5686,8 @@ static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule) } release_inactive_stripe_list(conf, cb->temp_inactive_list, NR_STRIPE_HASH_LOCKS); - if (mddev->queue) - trace_block_unplug(mddev->queue, cnt, !from_schedule); + if (!mddev_is_dm(mddev)) + trace_block_unplug(mddev->gendisk->queue, cnt, !from_schedule); kfree(cb); } @@ -7098,7 +7090,7 @@ raid5_store_skip_copy(struct mddev *mddev, const char *page, size_t len) if (!conf) err = -ENODEV; else if (new != conf->skip_copy) { - struct request_queue *q = mddev->queue; + struct request_queue *q = mddev->gendisk->queue; conf->skip_copy = new; if (new) @@ -7700,10 +7692,65 @@ static int only_parity(int raid_disk, int algo, int raid_disks, int max_degraded return 0; } -static void raid5_set_io_opt(struct r5conf *conf) +static int raid5_set_limits(struct mddev *mddev) { - blk_queue_io_opt(conf->mddev->queue, (conf->chunk_sectors << 9) * - (conf->raid_disks - conf->max_degraded)); + struct r5conf *conf = mddev->private; + struct queue_limits lim; + int data_disks, stripe; + struct md_rdev *rdev; + + /* + * The read-ahead size must cover two whole stripes, which is + * 2 * (datadisks) * chunksize where 'n' is the number of raid devices. + */ + data_disks = conf->previous_raid_disks - conf->max_degraded; + + /* + * We can only discard a whole stripe. It doesn't make sense to + * discard data disk but write parity disk + */ + stripe = roundup_pow_of_two(data_disks * (mddev->chunk_sectors << 9)); + + blk_set_stacking_limits(&lim); + lim.io_min = mddev->chunk_sectors << 9; + lim.io_opt = lim.io_min * (conf->raid_disks - conf->max_degraded); + lim.raid_partial_stripes_expensive = 1; + lim.discard_granularity = stripe; + lim.max_write_zeroes_sectors = 0; + mddev_stack_rdev_limits(mddev, &lim); + rdev_for_each(rdev, mddev) + queue_limits_stack_bdev(&lim, rdev->bdev, rdev->new_data_offset, + mddev->gendisk->disk_name); + + /* + * Zeroing is required for discard, otherwise data could be lost. + * + * Consider a scenario: discard a stripe (the stripe could be + * inconsistent if discard_zeroes_data is 0); write one disk of the + * stripe (the stripe could be inconsistent again depending on which + * disks are used to calculate parity); the disk is broken; The stripe + * data of this disk is lost. + * + * We only allow DISCARD if the sysadmin has confirmed that only safe + * devices are in use by setting a module parameter. A better idea + * might be to turn DISCARD into WRITE_ZEROES requests, as that is + * required to be safe. + */ + if (!devices_handle_discard_safely || + lim.max_discard_sectors < (stripe >> 9) || + lim.discard_granularity < stripe) + lim.max_hw_discard_sectors = 0; + + /* + * Requests require having a bitmap for each stripe. + * Limit the max sectors based on this. + */ + lim.max_hw_sectors = RAID5_MAX_REQ_STRIPES << RAID5_STRIPE_SHIFT(conf); + + /* No restrictions on the number of segments in the request */ + lim.max_segments = USHRT_MAX; + + return queue_limits_set(mddev->gendisk->queue, &lim); } static int raid5_run(struct mddev *mddev) @@ -7716,6 +7763,7 @@ static int raid5_run(struct mddev *mddev) int i; long long min_offset_diff = 0; int first = 1; + int ret = -EIO; if (mddev->recovery_cp != MaxSector) pr_notice("md/raid:%s: not clean -- starting background reconstruction\n", @@ -7968,66 +8016,10 @@ static int raid5_run(struct mddev *mddev) mdname(mddev)); md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); - if (mddev->queue) { - int chunk_size; - /* read-ahead size must cover two whole stripes, which - * is 2 * (datadisks) * chunksize where 'n' is the - * number of raid devices - */ - int data_disks = conf->previous_raid_disks - conf->max_degraded; - int stripe = data_disks * - ((mddev->chunk_sectors << 9) / PAGE_SIZE); - - chunk_size = mddev->chunk_sectors << 9; - blk_queue_io_min(mddev->queue, chunk_size); - raid5_set_io_opt(conf); - mddev->queue->limits.raid_partial_stripes_expensive = 1; - /* - * We can only discard a whole stripe. It doesn't make sense to - * discard data disk but write parity disk - */ - stripe = stripe * PAGE_SIZE; - stripe = roundup_pow_of_two(stripe); - mddev->queue->limits.discard_granularity = stripe; - - blk_queue_max_write_zeroes_sectors(mddev->queue, 0); - - rdev_for_each(rdev, mddev) { - disk_stack_limits(mddev->gendisk, rdev->bdev, - rdev->data_offset << 9); - disk_stack_limits(mddev->gendisk, rdev->bdev, - rdev->new_data_offset << 9); - } - - /* - * zeroing is required, otherwise data - * could be lost. Consider a scenario: discard a stripe - * (the stripe could be inconsistent if - * discard_zeroes_data is 0); write one disk of the - * stripe (the stripe could be inconsistent again - * depending on which disks are used to calculate - * parity); the disk is broken; The stripe data of this - * disk is lost. - * - * We only allow DISCARD if the sysadmin has confirmed that - * only safe devices are in use by setting a module parameter. - * A better idea might be to turn DISCARD into WRITE_ZEROES - * requests, as that is required to be safe. - */ - if (!devices_handle_discard_safely || - mddev->queue->limits.max_discard_sectors < (stripe >> 9) || - mddev->queue->limits.discard_granularity < stripe) - blk_queue_max_discard_sectors(mddev->queue, 0); - - /* - * Requests require having a bitmap for each stripe. - * Limit the max sectors based on this. - */ - blk_queue_max_hw_sectors(mddev->queue, - RAID5_MAX_REQ_STRIPES << RAID5_STRIPE_SHIFT(conf)); - - /* No restrictions on the number of segments in the request */ - blk_queue_max_segments(mddev->queue, USHRT_MAX); + if (!mddev_is_dm(mddev)) { + ret = raid5_set_limits(mddev); + if (ret) + goto abort; } if (log_init(conf, journal_dev, raid5_has_ppl(conf))) @@ -8040,7 +8032,7 @@ abort: free_conf(conf); mddev->private = NULL; pr_warn("md/raid:%s: failed to run raid set.\n", mdname(mddev)); - return -EIO; + return ret; } static void raid5_free(struct mddev *mddev, void *priv) @@ -8572,8 +8564,8 @@ static void end_reshape(struct r5conf *conf) spin_unlock_irq(&conf->device_lock); wake_up(&conf->wait_for_overlap); - if (conf->mddev->queue) - raid5_set_io_opt(conf); + mddev_update_io_opt(conf->mddev, + conf->raid_disks - conf->max_degraded); } } |