summaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/bcache/request.c4
-rw-r--r--drivers/md/bcache/super.c29
-rw-r--r--drivers/md/dm-cache-target.c4
-rw-r--r--drivers/md/dm-core.h7
-rw-r--r--drivers/md/dm-integrity.c4
-rw-r--r--drivers/md/dm-raid.c12
-rw-r--r--drivers/md/dm-rq.c2
-rw-r--r--drivers/md/dm-table.c20
-rw-r--r--drivers/md/dm-writecache.c6
-rw-r--r--drivers/md/dm.c82
-rw-r--r--drivers/md/md-cluster.c8
-rw-r--r--drivers/md/md-linear.c6
-rw-r--r--drivers/md/md.c52
-rw-r--r--drivers/md/md.h2
-rw-r--r--drivers/md/raid0.c18
-rw-r--r--drivers/md/raid1.c7
-rw-r--r--drivers/md/raid10.c429
-rw-r--r--drivers/md/raid10.h1
-rw-r--r--drivers/md/raid5.c15
19 files changed, 163 insertions, 545 deletions
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index afac8d07c1bd..85b1f2a9b72d 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -475,7 +475,7 @@ struct search {
unsigned int read_dirty_data:1;
unsigned int cache_missed:1;
- struct hd_struct *part;
+ struct block_device *part;
unsigned long start_time;
struct btree_op op;
@@ -1073,7 +1073,7 @@ struct detached_dev_io_private {
unsigned long start_time;
bio_end_io_t *bi_end_io;
void *bi_private;
- struct hd_struct *part;
+ struct block_device *part;
};
static void detached_dev_end_io(struct bio *bio)
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index b1a6ba9a5adb..0e06d721cd8e 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1399,7 +1399,7 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size)
q->limits.raid_partial_stripes_expensive;
ret = bcache_device_init(&dc->disk, block_size,
- dc->bdev->bd_part->nr_sects - dc->sb.data_offset,
+ bdev_nr_sectors(dc->bdev) - dc->sb.data_offset,
dc->bdev, &bcache_cached_ops);
if (ret)
return ret;
@@ -1438,8 +1438,7 @@ static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
goto err;
err = "error creating kobject";
- if (kobject_add(&dc->disk.kobj, &part_to_dev(bdev->bd_part)->kobj,
- "bcache"))
+ if (kobject_add(&dc->disk.kobj, bdev_kobj(bdev), "bcache"))
goto err;
if (bch_cache_accounting_add_kobjs(&dc->accounting, &dc->disk.kobj))
goto err;
@@ -2333,9 +2332,7 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
goto err;
}
- if (kobject_add(&ca->kobj,
- &part_to_dev(bdev->bd_part)->kobj,
- "bcache")) {
+ if (kobject_add(&ca->kobj, bdev_kobj(bdev), "bcache")) {
err = "error calling kobject_add";
ret = -ENOMEM;
goto out;
@@ -2374,38 +2371,38 @@ kobj_attribute_write(register, register_bcache);
kobj_attribute_write(register_quiet, register_bcache);
kobj_attribute_write(pendings_cleanup, bch_pending_bdevs_cleanup);
-static bool bch_is_open_backing(struct block_device *bdev)
+static bool bch_is_open_backing(dev_t dev)
{
struct cache_set *c, *tc;
struct cached_dev *dc, *t;
list_for_each_entry_safe(c, tc, &bch_cache_sets, list)
list_for_each_entry_safe(dc, t, &c->cached_devs, list)
- if (dc->bdev == bdev)
+ if (dc->bdev->bd_dev == dev)
return true;
list_for_each_entry_safe(dc, t, &uncached_devices, list)
- if (dc->bdev == bdev)
+ if (dc->bdev->bd_dev == dev)
return true;
return false;
}
-static bool bch_is_open_cache(struct block_device *bdev)
+static bool bch_is_open_cache(dev_t dev)
{
struct cache_set *c, *tc;
list_for_each_entry_safe(c, tc, &bch_cache_sets, list) {
struct cache *ca = c->cache;
- if (ca->bdev == bdev)
+ if (ca->bdev->bd_dev == dev)
return true;
}
return false;
}
-static bool bch_is_open(struct block_device *bdev)
+static bool bch_is_open(dev_t dev)
{
- return bch_is_open_cache(bdev) || bch_is_open_backing(bdev);
+ return bch_is_open_cache(dev) || bch_is_open_backing(dev);
}
struct async_reg_args {
@@ -2529,9 +2526,11 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
sb);
if (IS_ERR(bdev)) {
if (bdev == ERR_PTR(-EBUSY)) {
- bdev = lookup_bdev(strim(path));
+ dev_t dev;
+
mutex_lock(&bch_register_lock);
- if (!IS_ERR(bdev) && bch_is_open(bdev))
+ if (lookup_bdev(strim(path), &dev) == 0 &&
+ bch_is_open(dev))
err = "device already registered";
else
err = "device busy";
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 9644424591da..4bc453f5bbaa 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -712,10 +712,6 @@ static bool block_size_is_power_of_two(struct cache *cache)
return cache->sectors_per_block_shift >= 0;
}
-/* gcc on ARM generates spurious references to __udivdi3 and __umoddi3 */
-#if defined(CONFIG_ARM) && __GNUC__ == 4 && __GNUC_MINOR__ <= 6
-__always_inline
-#endif
static dm_block_t block_div(dm_block_t b, uint32_t n)
{
do_div(b, n);
diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index d522093cb39d..086d293c2b03 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -96,19 +96,12 @@ struct mapped_device {
*/
struct workqueue_struct *wq;
- /*
- * freeze/thaw support require holding onto a super block
- */
- struct super_block *frozen_sb;
-
/* forced geometry settings */
struct hd_geometry geometry;
/* kobject and completion */
struct dm_kobject_holder kobj_holder;
- struct block_device *bdev;
-
struct dm_stats stats;
/* for blk-mq request-based DM support */
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 3fc3757def55..5a7a1b90e671 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -3462,7 +3462,7 @@ static int get_mac(struct crypto_shash **hash, struct alg_spec *a, char **error,
int r;
if (a->alg_string) {
- *hash = crypto_alloc_shash(a->alg_string, 0, 0);
+ *hash = crypto_alloc_shash(a->alg_string, 0, CRYPTO_ALG_ALLOCATES_MEMORY);
if (IS_ERR(*hash)) {
*error = error_alg;
r = PTR_ERR(*hash);
@@ -3519,7 +3519,7 @@ static int create_journal(struct dm_integrity_c *ic, char **error)
struct journal_completion comp;
comp.ic = ic;
- ic->journal_crypt = crypto_alloc_skcipher(ic->journal_crypt_alg.alg_string, 0, 0);
+ ic->journal_crypt = crypto_alloc_skcipher(ic->journal_crypt_alg.alg_string, 0, CRYPTO_ALG_ALLOCATES_MEMORY);
if (IS_ERR(ic->journal_crypt)) {
*error = "Invalid journal cipher";
r = PTR_ERR(ic->journal_crypt);
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 9c1f7c4de65b..23c38777e8f6 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -700,8 +700,7 @@ static void rs_set_capacity(struct raid_set *rs)
{
struct gendisk *gendisk = dm_disk(dm_table_get_md(rs->ti->table));
- set_capacity(gendisk, rs->md.array_sectors);
- revalidate_disk_size(gendisk, true);
+ set_capacity_and_notify(gendisk, rs->md.array_sectors);
}
/*
@@ -3728,6 +3727,15 @@ static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits)
blk_limits_io_min(limits, chunk_size_bytes);
blk_limits_io_opt(limits, chunk_size_bytes * mddev_data_stripes(rs));
+
+ /*
+ * RAID1 and RAID10 personalities require bio splitting,
+ * RAID0/4/5/6 don't and process large discard bios properly.
+ */
+ if (rs_is_raid1(rs) || rs_is_raid10(rs)) {
+ limits->discard_granularity = chunk_size_bytes;
+ limits->max_discard_sectors = rs->md.chunk_sectors;
+ }
}
static void raid_postsuspend(struct dm_target *ti)
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index 729a72ec30cc..13b4385f4d5a 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -397,7 +397,7 @@ static int map_request(struct dm_rq_target_io *tio)
}
/* The target has remapped the I/O so dispatch it */
- trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)),
+ trace_block_rq_remap(clone, disk_devt(dm_disk(md)),
blk_rq_pos(rq));
ret = dm_dispatch_clone_request(clone, rq);
if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) {
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index ce543b761be7..188f41287f18 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -18,7 +18,6 @@
#include <linux/mutex.h>
#include <linux/delay.h>
#include <linux/atomic.h>
-#include <linux/lcm.h>
#include <linux/blk-mq.h>
#include <linux/mount.h>
#include <linux/dax.h>
@@ -348,16 +347,9 @@ static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode,
dev_t dm_get_dev_t(const char *path)
{
dev_t dev;
- struct block_device *bdev;
- bdev = lookup_bdev(path);
- if (IS_ERR(bdev))
+ if (lookup_bdev(path, &dev))
dev = name_to_dev_t(path);
- else {
- dev = bdev->bd_dev;
- bdput(bdev);
- }
-
return dev;
}
EXPORT_SYMBOL_GPL(dm_get_dev_t);
@@ -1247,12 +1239,6 @@ void dm_table_event_callback(struct dm_table *t,
void dm_table_event(struct dm_table *t)
{
- /*
- * You can no longer call dm_table_event() from interrupt
- * context, use a bottom half instead.
- */
- BUG_ON(in_interrupt());
-
mutex_lock(&_event_lock);
if (t->event_fn)
t->event_fn(t->event_context);
@@ -1455,10 +1441,6 @@ int dm_calculate_queue_limits(struct dm_table *table,
zone_sectors = ti_limits.chunk_sectors;
}
- /* Stack chunk_sectors if target-specific splitting is required */
- if (ti->max_io_len)
- ti_limits.chunk_sectors = lcm_not_zero(ti->max_io_len,
- ti_limits.chunk_sectors);
/* Set I/O hints portion of queue limits */
if (ti->type->io_hints)
ti->type->io_hints(ti, &ti_limits);
diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index 9ae4ce7df95c..d5223a0e5cc5 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -319,7 +319,7 @@ err1:
#else
static int persistent_memory_claim(struct dm_writecache *wc)
{
- BUG();
+ return -EOPNOTSUPP;
}
#endif
@@ -2041,7 +2041,7 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv)
struct wc_memory_superblock s;
static struct dm_arg _args[] = {
- {0, 10, "Invalid number of feature args"},
+ {0, 16, "Invalid number of feature args"},
};
as.argc = argc;
@@ -2479,6 +2479,8 @@ static void writecache_status(struct dm_target *ti, status_type_t type,
extra_args += 2;
if (wc->autocommit_time_set)
extra_args += 2;
+ if (wc->max_age != MAX_AGE_UNSPECIFIED)
+ extra_args += 2;
if (wc->cleaner)
extra_args++;
if (wc->writeback_fua_set)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 6db395c3d28b..5b2f371ec4bb 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -476,8 +476,10 @@ static int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
return -EAGAIN;
map = dm_get_live_table(md, &srcu_idx);
- if (!map)
- return -EIO;
+ if (!map) {
+ ret = -EIO;
+ goto out;
+ }
do {
struct dm_target *tgt;
@@ -507,7 +509,6 @@ out:
static int dm_prepare_ioctl(struct mapped_device *md, int *srcu_idx,
struct block_device **bdev)
- __acquires(md->io_barrier)
{
struct dm_target *tgt;
struct dm_table *map;
@@ -541,7 +542,6 @@ retry:
}
static void dm_unprepare_ioctl(struct mapped_device *md, int srcu_idx)
- __releases(md->io_barrier)
{
dm_put_live_table(md, srcu_idx);
}
@@ -1040,15 +1040,18 @@ static sector_t max_io_len(struct dm_target *ti, sector_t sector)
sector_t max_len;
/*
- * Does the target need to split even further?
- * - q->limits.chunk_sectors reflects ti->max_io_len so
- * blk_max_size_offset() provides required splitting.
- * - blk_max_size_offset() also respects q->limits.max_sectors
+ * Does the target need to split IO even further?
+ * - varied (per target) IO splitting is a tenet of DM; this
+ * explains why stacked chunk_sectors based splitting via
+ * blk_max_size_offset() isn't possible here. So pass in
+ * ti->max_io_len to override stacked chunk_sectors.
*/
- max_len = blk_max_size_offset(ti->table->md->queue,
- target_offset);
- if (len > max_len)
- len = max_len;
+ if (ti->max_io_len) {
+ max_len = blk_max_size_offset(ti->table->md->queue,
+ target_offset, ti->max_io_len);
+ if (len > max_len)
+ len = max_len;
+ }
return len;
}
@@ -1199,11 +1202,9 @@ static int dm_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
* ->zero_page_range() is mandatory dax operation. If we are
* here, something is wrong.
*/
- dm_put_live_table(md, srcu_idx);
goto out;
}
ret = ti->type->dax_zero_page_range(ti, pgoff, nr_pages);
-
out:
dm_put_live_table(md, srcu_idx);
@@ -1276,8 +1277,7 @@ static blk_qc_t __map_bio(struct dm_target_io *tio)
break;
case DM_MAPIO_REMAPPED:
/* the bio has been remapped so dispatch it */
- trace_block_bio_remap(clone->bi_disk->queue, clone,
- bio_dev(io->orig_bio), sector);
+ trace_block_bio_remap(clone, bio_dev(io->orig_bio), sector);
ret = submit_bio_noacct(clone);
break;
case DM_MAPIO_KILL:
@@ -1422,18 +1422,12 @@ static int __send_empty_flush(struct clone_info *ci)
*/
bio_init(&flush_bio, NULL, 0);
flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC;
+ flush_bio.bi_disk = ci->io->md->disk;
+ bio_associate_blkg(&flush_bio);
+
ci->bio = &flush_bio;
ci->sector_count = 0;
- /*
- * Empty flush uses a statically initialized bio, as the base for
- * cloning. However, blkg association requires that a bdev is
- * associated with a gendisk, which doesn't happen until the bdev is
- * opened. So, blkg association is done at issue time of the flush
- * rather than when the device is created in alloc_dev().
- */
- bio_set_dev(ci->bio, ci->io->md->bdev);
-
BUG_ON(bio_has_data(ci->bio));
while ((ti = dm_table_get_target(ci->map, target_nr++)))
__send_duplicate_bios(ci, ti, ti->num_flush_bios, NULL);
@@ -1613,12 +1607,12 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md,
* (by eliminating DM's splitting and just using bio_split)
*/
part_stat_lock();
- __dm_part_stat_sub(&dm_disk(md)->part0,
+ __dm_part_stat_sub(dm_disk(md)->part0,
sectors[op_stat_group(bio_op(bio))], ci.sector_count);
part_stat_unlock();
bio_chain(b, bio);
- trace_block_split(md->queue, b, bio->bi_iter.bi_sector);
+ trace_block_split(b, bio->bi_iter.bi_sector);
ret = submit_bio_noacct(bio);
break;
}
@@ -1750,11 +1744,6 @@ static void cleanup_mapped_device(struct mapped_device *md)
cleanup_srcu_struct(&md->io_barrier);
- if (md->bdev) {
- bdput(md->bdev);
- md->bdev = NULL;
- }
-
mutex_destroy(&md->suspend_lock);
mutex_destroy(&md->type_lock);
mutex_destroy(&md->table_devices_lock);
@@ -1846,10 +1835,6 @@ static struct mapped_device *alloc_dev(int minor)
if (!md->wq)
goto bad;
- md->bdev = bdget_disk(md->disk, 0);
- if (!md->bdev)
- goto bad;
-
dm_stats_init(&md->stats);
/* Populate the mapping, nobody knows we exist yet */
@@ -1974,8 +1959,7 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
if (size != dm_get_size(md))
memset(&md->geometry, 0, sizeof(md->geometry));
- set_capacity(md->disk, size);
- bd_set_nr_sectors(md->bdev, size);
+ set_capacity_and_notify(md->disk, size);
dm_table_event_callback(t, event_callback, md);
@@ -2258,7 +2242,7 @@ EXPORT_SYMBOL_GPL(dm_put);
static bool md_in_flight_bios(struct mapped_device *md)
{
int cpu;
- struct hd_struct *part = &dm_disk(md)->part0;
+ struct block_device *part = dm_disk(md)->part0;
long sum = 0;
for_each_possible_cpu(cpu) {
@@ -2393,27 +2377,19 @@ static int lock_fs(struct mapped_device *md)
{
int r;
- WARN_ON(md->frozen_sb);
-
- md->frozen_sb = freeze_bdev(md->bdev);
- if (IS_ERR(md->frozen_sb)) {
- r = PTR_ERR(md->frozen_sb);
- md->frozen_sb = NULL;
- return r;
- }
+ WARN_ON(test_bit(DMF_FROZEN, &md->flags));
- set_bit(DMF_FROZEN, &md->flags);
-
- return 0;
+ r = freeze_bdev(md->disk->part0);
+ if (!r)
+ set_bit(DMF_FROZEN, &md->flags);
+ return r;
}
static void unlock_fs(struct mapped_device *md)
{
if (!test_bit(DMF_FROZEN, &md->flags))
return;
-
- thaw_bdev(md->bdev, md->frozen_sb);
- md->frozen_sb = NULL;
+ thaw_bdev(md->disk->part0);
clear_bit(DMF_FROZEN, &md->flags);
}
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index f0e64e76fd79..7fbd41e156c9 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -581,8 +581,7 @@ static int process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg)
process_metadata_update(mddev, msg);
break;
case CHANGE_CAPACITY:
- set_capacity(mddev->gendisk, mddev->array_sectors);
- revalidate_disk_size(mddev->gendisk, true);
+ set_capacity_and_notify(mddev->gendisk, mddev->array_sectors);
break;
case RESYNCING:
set_bit(MD_RESYNCING_REMOTE, &mddev->recovery);
@@ -1304,13 +1303,10 @@ static void update_size(struct mddev *mddev, sector_t old_dev_sectors)
if (ret)
pr_err("%s:%d: failed to send CHANGE_CAPACITY msg\n",
__func__, __LINE__);
- set_capacity(mddev->gendisk, mddev->array_sectors);
- revalidate_disk_size(mddev->gendisk, true);
+ set_capacity_and_notify(mddev->gendisk, mddev->array_sectors);
} else {
/* revert to previous sectors */
ret = mddev->pers->resize(mddev, old_dev_sectors);
- if (!ret)
- revalidate_disk_size(mddev->gendisk, true);
ret = __sendmsg(cinfo, &cmsg);
if (ret)
pr_err("%s:%d: failed to send METADATA_UPDATED msg\n",
diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c
index 5ab22069b5be..68cac7d19278 100644
--- a/drivers/md/md-linear.c
+++ b/drivers/md/md-linear.c
@@ -200,9 +200,8 @@ static int linear_add(struct mddev *mddev, struct md_rdev *rdev)
"copied raid_disks doesn't match mddev->raid_disks");
rcu_assign_pointer(mddev->private, newconf);
md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
- set_capacity(mddev->gendisk, mddev->array_sectors);
+ set_capacity_and_notify(mddev->gendisk, mddev->array_sectors);
mddev_resume(mddev);
- revalidate_disk_size(mddev->gendisk, true);
kfree_rcu(oldconf, rcu);
return 0;
}
@@ -258,8 +257,7 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
bio_endio(bio);
} else {
if (mddev->gendisk)
- trace_block_bio_remap(bio->bi_disk->queue,
- bio, disk_devt(mddev->gendisk),
+ trace_block_bio_remap(bio, disk_devt(mddev->gendisk),
bio_sector);
mddev_check_writesame(mddev, bio);
mddev_check_write_zeroes(mddev, bio);
diff --git a/drivers/md/md.c b/drivers/md/md.c
index c42af46d366a..ca409428b4fc 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -464,7 +464,7 @@ struct md_io {
bio_end_io_t *orig_bi_end_io;
void *orig_bi_private;
unsigned long start_time;
- struct hd_struct *part;
+ struct block_device *part;
};
static void md_end_io(struct bio *bio)
@@ -2418,7 +2418,6 @@ EXPORT_SYMBOL(md_integrity_add_rdev);
static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
{
char b[BDEVNAME_SIZE];
- struct kobject *ko;
int err;
/* prevent duplicates */
@@ -2481,9 +2480,8 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
goto fail;
- ko = &part_to_dev(rdev->bdev->bd_part)->kobj;
/* failure here is OK */
- err = sysfs_create_link(&rdev->kobj, ko, "block");
+ err = sysfs_create_link(&rdev->kobj, bdev_kobj(rdev->bdev), "block");
rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
rdev->sysfs_unack_badblocks =
sysfs_get_dirent_safe(rdev->kobj.sd, "unacknowledged_bad_blocks");
@@ -5359,10 +5357,9 @@ array_size_store(struct mddev *mddev, const char *buf, size_t len)
if (!err) {
mddev->array_sectors = sectors;
- if (mddev->pers) {
- set_capacity(mddev->gendisk, mddev->array_sectors);
- revalidate_disk_size(mddev->gendisk, true);
- }
+ if (mddev->pers)
+ set_capacity_and_notify(mddev->gendisk,
+ mddev->array_sectors);
}
mddev_unlock(mddev);
return err ?: len;
@@ -6112,8 +6109,7 @@ int do_md_run(struct mddev *mddev)
md_wakeup_thread(mddev->thread);
md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */
- set_capacity(mddev->gendisk, mddev->array_sectors);
- revalidate_disk_size(mddev->gendisk, true);
+ set_capacity_and_notify(mddev->gendisk, mddev->array_sectors);
clear_bit(MD_NOT_READY, &mddev->flags);
mddev->changed = 1;
kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
@@ -6428,10 +6424,9 @@ static int do_md_stop(struct mddev *mddev, int mode,
if (rdev->raid_disk >= 0)
sysfs_unlink_rdev(mddev, rdev);
- set_capacity(disk, 0);
+ set_capacity_and_notify(disk, 0);
mutex_unlock(&mddev->open_mutex);
mddev->changed = 1;
- revalidate_disk_size(disk, true);
if (mddev->ro)
mddev->ro = 0;
@@ -7264,8 +7259,8 @@ static int update_size(struct mddev *mddev, sector_t num_sectors)
if (mddev_is_clustered(mddev))
md_cluster_ops->update_size(mddev, old_dev_sectors);
else if (mddev->queue) {
- set_capacity(mddev->gendisk, mddev->array_sectors);
- revalidate_disk_size(mddev->gendisk, true);
+ set_capacity_and_notify(mddev->gendisk,
+ mddev->array_sectors);
}
}
return rv;
@@ -8456,7 +8451,7 @@ static int is_mddev_idle(struct mddev *mddev, int init)
rcu_read_lock();
rdev_for_each_rcu(rdev, mddev) {
struct gendisk *disk = rdev->bdev->bd_disk;
- curr_events = (int)part_stat_read_accum(&disk->part0, sectors) -
+ curr_events = (int)part_stat_read_accum(disk->part0, sectors) -
atomic_read(&disk->sync_io);
/* sync IO will cause sync_io to increase before the disk_stats
* as sync_io is counted when a request starts, and
@@ -8593,26 +8588,6 @@ void md_write_end(struct mddev *mddev)
EXPORT_SYMBOL(md_write_end);
-/* This is used by raid0 and raid10 */
-void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
- struct bio *bio, sector_t start, sector_t size)
-{
- struct bio *discard_bio = NULL;
-
- if (__blkdev_issue_discard(rdev->bdev, start, size,
- GFP_NOIO, 0, &discard_bio) || !discard_bio)
- return;
-
- bio_chain(discard_bio, bio);
- bio_clone_blkg_association(discard_bio, bio);
- if (mddev->gendisk)
- trace_block_bio_remap(bdev_get_queue(rdev->bdev),
- discard_bio, disk_devt(mddev->gendisk),
- bio->bi_iter.bi_sector);
- submit_bio_noacct(discard_bio);
-}
-EXPORT_SYMBOL(md_submit_discard_bio);
-
/* md_allow_write(mddev)
* Calling this ensures that the array is marked 'active' so that writes
* may proceed without blocking. It is important to call this before
@@ -9046,10 +9021,9 @@ void md_do_sync(struct md_thread *thread)
mddev_lock_nointr(mddev);
md_set_array_sectors(mddev, mddev->pers->size(mddev, 0, 0));
mddev_unlock(mddev);
- if (!mddev_is_clustered(mddev)) {
- set_capacity(mddev->gendisk, mddev->array_sectors);
- revalidate_disk_size(mddev->gendisk, true);
- }
+ if (!mddev_is_clustered(mddev))
+ set_capacity_and_notify(mddev->gendisk,
+ mddev->array_sectors);
}
spin_lock(&mddev->lock);
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 2292c847f9dd..34070ab30a8a 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -713,8 +713,6 @@ extern void md_write_end(struct mddev *mddev);
extern void md_done_sync(struct mddev *mddev, int blocks, int ok);
extern void md_error(struct mddev *mddev, struct md_rdev *rdev);
extern void md_finish_reshape(struct mddev *mddev);
-extern void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
- struct bio *bio, sector_t start, sector_t size);
extern bool __must_check md_flush_request(struct mddev *mddev, struct bio *bio);
extern void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 6f44177593a5..67f157f2525d 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -477,6 +477,7 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
for (disk = 0; disk < zone->nb_dev; disk++) {
sector_t dev_start, dev_end;
+ struct bio *discard_bio = NULL;
struct md_rdev *rdev;
if (disk < start_disk_index)
@@ -499,9 +500,18 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
rdev = conf->devlist[(zone - conf->strip_zone) *
conf->strip_zone[0].nb_dev + disk];
- md_submit_discard_bio(mddev, rdev, bio,
+ if (__blkdev_issue_discard(rdev->bdev,
dev_start + zone->dev_start + rdev->data_offset,
- dev_end - dev_start);
+ dev_end - dev_start, GFP_NOIO, 0, &discard_bio) ||
+ !discard_bio)
+ continue;
+ bio_chain(discard_bio, bio);
+ bio_clone_blkg_association(discard_bio, bio);
+ if (mddev->gendisk)
+ trace_block_bio_remap(discard_bio,
+ disk_devt(mddev->gendisk),
+ bio->bi_iter.bi_sector);
+ submit_bio_noacct(discard_bio);
}
bio_endio(bio);
}
@@ -571,8 +581,8 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio)
tmp_dev->data_offset;
if (mddev->gendisk)
- trace_block_bio_remap(bio->bi_disk->queue, bio,
- disk_devt(mddev->gendisk), bio_sector);
+ trace_block_bio_remap(bio, disk_devt(mddev->gendisk),
+ bio_sector);
mddev_check_writesame(mddev, bio);
mddev_check_write_zeroes(mddev, bio);
submit_bio_noacct(bio);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 960d854c07f8..c0347997f6ff 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1305,8 +1305,8 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
read_bio->bi_private = r1_bio;
if (mddev->gendisk)
- trace_block_bio_remap(read_bio->bi_disk->queue, read_bio,
- disk_devt(mddev->gendisk), r1_bio->sector);
+ trace_block_bio_remap(read_bio, disk_devt(mddev->gendisk),
+ r1_bio->sector);
submit_bio_noacct(read_bio);
}
@@ -1517,8 +1517,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
atomic_inc(&r1_bio->remaining);
if (mddev->gendisk)
- trace_block_bio_remap(mbio->bi_disk->queue,
- mbio, disk_devt(mddev->gendisk),
+ trace_block_bio_remap(mbio, disk_devt(mddev->gendisk),
r1_bio->sector);
/* flush_pending_writes() needs access to the rdev so...*/
mbio->bi_disk = (void *)conf->mirrors[i].rdev;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 3153183b7772..c5d88ef6a45c 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -91,7 +91,7 @@ static inline struct r10bio *get_resync_r10bio(struct bio *bio)
static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data)
{
struct r10conf *conf = data;
- int size = offsetof(struct r10bio, devs[conf->geo.raid_disks]);
+ int size = offsetof(struct r10bio, devs[conf->copies]);
/* allocate a r10bio with room for raid_disks entries in the
* bios array */
@@ -238,7 +238,7 @@ static void put_all_bios(struct r10conf *conf, struct r10bio *r10_bio)
{
int i;
- for (i = 0; i < conf->geo.raid_disks; i++) {
+ for (i = 0; i < conf->copies; i++) {
struct bio **bio = & r10_bio->devs[i].bio;
if (!BIO_SPECIAL(*bio))
bio_put(*bio);
@@ -327,7 +327,7 @@ static int find_bio_disk(struct r10conf *conf, struct r10bio *r10_bio,
int slot;
int repl = 0;
- for (slot = 0; slot < conf->geo.raid_disks; slot++) {
+ for (slot = 0; slot < conf->copies; slot++) {
if (r10_bio->devs[slot].bio == bio)
break;
if (r10_bio->devs[slot].repl_bio == bio) {
@@ -336,6 +336,7 @@ static int find_bio_disk(struct r10conf *conf, struct r10bio *r10_bio,
}
}
+ BUG_ON(slot == conf->copies);
update_head_pos(slot, r10_bio);
if (slotp)
@@ -1200,8 +1201,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
read_bio->bi_private = r10_bio;
if (mddev->gendisk)
- trace_block_bio_remap(read_bio->bi_disk->queue,
- read_bio, disk_devt(mddev->gendisk),
+ trace_block_bio_remap(read_bio, disk_devt(mddev->gendisk),
r10_bio->sector);
submit_bio_noacct(read_bio);
return;
@@ -1250,8 +1250,7 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
mbio->bi_private = r10_bio;
if (conf->mddev->gendisk)
- trace_block_bio_remap(mbio->bi_disk->queue,
- mbio, disk_devt(conf->mddev->gendisk),
+ trace_block_bio_remap(mbio, disk_devt(conf->mddev->gendisk),
r10_bio->sector);
/* flush_pending_writes() needs access to the rdev so...*/
mbio->bi_disk = (void *)rdev;
@@ -1275,75 +1274,12 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
}
}
-static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio)
-{
- int i;
- struct r10conf *conf = mddev->private;
- struct md_rdev *blocked_rdev;
-
-retry_wait:
- blocked_rdev = NULL;
- rcu_read_lock();
- for (i = 0; i < conf->copies; i++) {
- struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
- struct md_rdev *rrdev = rcu_dereference(
- conf->mirrors[i].replacement);
- if (rdev == rrdev)
- rrdev = NULL;
- if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
- atomic_inc(&rdev->nr_pending);
- blocked_rdev = rdev;
- break;
- }
- if (rrdev && unlikely(test_bit(Blocked, &rrdev->flags))) {
- atomic_inc(&rrdev->nr_pending);
- blocked_rdev = rrdev;
- break;
- }
-
- if (rdev && test_bit(WriteErrorSeen, &rdev->flags)) {
- sector_t first_bad;
- sector_t dev_sector = r10_bio->devs[i].addr;
- int bad_sectors;
- int is_bad;
-
- /* Discard request doesn't care the write result
- * so it doesn't need to wait blocked disk here.
- */
- if (!r10_bio->sectors)
- continue;
-
- is_bad = is_badblock(rdev, dev_sector, r10_bio->sectors,
- &first_bad, &bad_sectors);
- if (is_bad < 0) {
- /* Mustn't write here until the bad block
- * is acknowledged
- */
- atomic_inc(&rdev->nr_pending);
- set_bit(BlockedBadBlocks, &rdev->flags);
- blocked_rdev = rdev;
- break;
- }
- }
- }
- rcu_read_unlock();
-
- if (unlikely(blocked_rdev)) {
- /* Have to wait for this device to get unblocked, then retry */
- allow_barrier(conf);
- raid10_log(conf->mddev, "%s wait rdev %d blocked",
- __func__, blocked_rdev->raid_disk);
- md_wait_for_blocked_rdev(blocked_rdev, mddev);
- wait_barrier(conf);
- goto retry_wait;
- }
-}
-
static void raid10_write_request(struct mddev *mddev, struct bio *bio,
struct r10bio *r10_bio)
{
struct r10conf *conf = mddev->private;
int i;
+ struct md_rdev *blocked_rdev;
sector_t sectors;
int max_sectors;
@@ -1401,9 +1337,8 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
r10_bio->read_slot = -1; /* make sure repl_bio gets freed */
raid10_find_phys(conf, r10_bio);
-
- wait_blocked_dev(mddev, r10_bio);
-
+retry_write:
+ blocked_rdev = NULL;
rcu_read_lock();
max_sectors = r10_bio->sectors;
@@ -1414,6 +1349,16 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
conf->mirrors[d].replacement);
if (rdev == rrdev)
rrdev = NULL;
+ if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
+ atomic_inc(&rdev->nr_pending);
+ blocked_rdev = rdev;
+ break;
+ }
+ if (rrdev && unlikely(test_bit(Blocked, &rrdev->flags))) {
+ atomic_inc(&rrdev->nr_pending);
+ blocked_rdev = rrdev;
+ break;
+ }
if (rdev && (test_bit(Faulty, &rdev->flags)))
rdev = NULL;
if (rrdev && (test_bit(Faulty, &rrdev->flags)))
@@ -1434,6 +1379,15 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
is_bad = is_badblock(rdev, dev_sector, max_sectors,
&first_bad, &bad_sectors);
+ if (is_bad < 0) {
+ /* Mustn't write here until the bad block
+ * is acknowledged
+ */
+ atomic_inc(&rdev->nr_pending);
+ set_bit(BlockedBadBlocks, &rdev->flags);
+ blocked_rdev = rdev;
+ break;
+ }
if (is_bad && first_bad <= dev_sector) {
/* Cannot write here at all */
bad_sectors -= (dev_sector - first_bad);
@@ -1469,6 +1423,35 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
}
rcu_read_unlock();
+ if (unlikely(blocked_rdev)) {
+ /* Have to wait for this device to get unblocked, then retry */
+ int j;
+ int d;
+
+ for (j = 0; j < i; j++) {
+ if (r10_bio->devs[j].bio) {
+ d = r10_bio->devs[j].devnum;
+ rdev_dec_pending(conf->mirrors[d].rdev, mddev);
+ }
+ if (r10_bio->devs[j].repl_bio) {
+ struct md_rdev *rdev;
+ d = r10_bio->devs[j].devnum;
+ rdev = conf->mirrors[d].replacement;
+ if (!rdev) {
+ /* Race with remove_disk */
+ smp_mb();
+ rdev = conf->mirrors[d].rdev;
+ }
+ rdev_dec_pending(rdev, mddev);
+ }
+ }
+ allow_barrier(conf);
+ raid10_log(conf->mddev, "wait rdev %d blocked", blocked_rdev->raid_disk);
+ md_wait_for_blocked_rdev(blocked_rdev, mddev);
+ wait_barrier(conf);
+ goto retry_write;
+ }
+
if (max_sectors < r10_bio->sectors)
r10_bio->sectors = max_sectors;
@@ -1509,7 +1492,7 @@ static void __make_request(struct mddev *mddev, struct bio *bio, int sectors)
r10_bio->sector = bio->bi_iter.bi_sector;
r10_bio->state = 0;
r10_bio->read_slot = -1;
- memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) * conf->geo.raid_disks);
+ memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) * conf->copies);
if (bio_data_dir(bio) == READ)
raid10_read_request(mddev, bio, r10_bio);
@@ -1517,296 +1500,6 @@ static void __make_request(struct mddev *mddev, struct bio *bio, int sectors)
raid10_write_request(mddev, bio, r10_bio);
}
-static struct bio *raid10_split_bio(struct r10conf *conf,
- struct bio *bio, sector_t sectors, bool want_first)
-{
- struct bio *split;
-
- split = bio_split(bio, sectors, GFP_NOIO, &conf->bio_split);
- bio_chain(split, bio);
- allow_barrier(conf);
- if (want_first) {
- submit_bio_noacct(bio);
- bio = split;
- } else
- submit_bio_noacct(split);
- wait_barrier(conf);
-
- return bio;
-}
-
-static void raid_end_discard_bio(struct r10bio *r10bio)
-{
- struct r10conf *conf = r10bio->mddev->private;
- struct r10bio *first_r10bio;
-
- while (atomic_dec_and_test(&r10bio->remaining)) {
-
- allow_barrier(conf);
-
- if (!test_bit(R10BIO_Discard, &r10bio->state)) {
- first_r10bio = (struct r10bio *)r10bio->master_bio;
- free_r10bio(r10bio);
- r10bio = first_r10bio;
- } else {
- md_write_end(r10bio->mddev);
- bio_endio(r10bio->master_bio);
- free_r10bio(r10bio);
- break;
- }
- }
-}
-
-static void raid10_end_discard_request(struct bio *bio)
-{
- struct r10bio *r10_bio = bio->bi_private;
- struct r10conf *conf = r10_bio->mddev->private;
- struct md_rdev *rdev = NULL;
- int dev;
- int slot, repl;
-
- /*
- * We don't care the return value of discard bio
- */
- if (!test_bit(R10BIO_Uptodate, &r10_bio->state))
- set_bit(R10BIO_Uptodate, &r10_bio->state);
-
- dev = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
- if (repl)
- rdev = conf->mirrors[dev].replacement;
- if (!rdev) {
- /* raid10_remove_disk uses smp_mb to make sure rdev is set to
- * replacement before setting replacement to NULL. It can read
- * rdev first without barrier protect even replacment is NULL
- */
- smp_rmb();
- rdev = conf->mirrors[dev].rdev;
- }
-
- raid_end_discard_bio(r10_bio);
- rdev_dec_pending(rdev, conf->mddev);
-}
-
-/* There are some limitations to handle discard bio
- * 1st, the discard size is bigger than stripe_size*2.
- * 2st, if the discard bio spans reshape progress, we use the old way to
- * handle discard bio
- */
-static int raid10_handle_discard(struct mddev *mddev, struct bio *bio)
-{
- struct r10conf *conf = mddev->private;
- struct geom *geo = &conf->geo;
- struct r10bio *r10_bio, *first_r10bio;
- int far_copies = geo->far_copies;
- bool first_copy = true;
-
- int disk;
- sector_t chunk;
- unsigned int stripe_size;
- sector_t split_size;
-
- sector_t bio_start, bio_end;
- sector_t first_stripe_index, last_stripe_index;
- sector_t start_disk_offset;
- unsigned int start_disk_index;
- sector_t end_disk_offset;
- unsigned int end_disk_index;
- unsigned int remainder;
-
- if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
- return -EAGAIN;
-
- wait_barrier(conf);
-
- /* Check reshape again to avoid reshape happens after checking
- * MD_RECOVERY_RESHAPE and before wait_barrier
- */
- if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
- goto out;
-
- stripe_size = geo->raid_disks << geo->chunk_shift;
- bio_start = bio->bi_iter.bi_sector;
- bio_end = bio_end_sector(bio);
-
- /* Maybe one discard bio is smaller than strip size or across one stripe
- * and discard region is larger than one stripe size. For far offset layout,
- * if the discard region is not aligned with stripe size, there is hole
- * when we submit discard bio to member disk. For simplicity, we only
- * handle discard bio which discard region is bigger than stripe_size*2
- */
- if (bio_sectors(bio) < stripe_size*2)
- goto out;
-
- /* For far and far offset layout, if bio is not aligned with stripe size,
- * it splits the part that is not aligned with strip size.
- */
- div_u64_rem(bio_start, stripe_size, &remainder);
- if ((far_copies > 1) && remainder) {
- split_size = stripe_size - remainder;
- bio = raid10_split_bio(conf, bio, split_size, false);
- }
- div_u64_rem(bio_end, stripe_size, &remainder);
- if ((far_copies > 1) && remainder) {
- split_size = bio_sectors(bio) - remainder;
- bio = raid10_split_bio(conf, bio, split_size, true);
- }
-
- bio_start = bio->bi_iter.bi_sector;
- bio_end = bio_end_sector(bio);
-
- /* raid10 uses chunk as the unit to store data. It's similar like raid0.
- * One stripe contains the chunks from all member disk (one chunk from
- * one disk at the same HBA address). For layout detail, see 'man md 4'
- */
- chunk = bio_start >> geo->chunk_shift;
- chunk *= geo->near_copies;
- first_stripe_index = chunk;
- start_disk_index = sector_div(first_stripe_index, geo->raid_disks);
- if (geo->far_offset)
- first_stripe_index *= geo->far_copies;
- start_disk_offset = (bio_start & geo->chunk_mask) +
- (first_stripe_index << geo->chunk_shift);
-
- chunk = bio_end >> geo->chunk_shift;
- chunk *= geo->near_copies;
- last_stripe_index = chunk;
- end_disk_index = sector_div(last_stripe_index, geo->raid_disks);
- if (geo->far_offset)
- last_stripe_index *= geo->far_copies;
- end_disk_offset = (bio_end & geo->chunk_mask) +
- (last_stripe_index << geo->chunk_shift);
-
-retry_discard:
- r10_bio = mempool_alloc(&conf->r10bio_pool, GFP_NOIO);
- r10_bio->mddev = mddev;
- r10_bio->state = 0;
- r10_bio->sectors = 0;
- memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) * geo->raid_disks);
- wait_blocked_dev(mddev, r10_bio);
-
- /* For far layout it needs more than one r10bio to cover all regions.
- * Inspired by raid10_sync_request, we can use the first r10bio->master_bio
- * to record the discard bio. Other r10bio->master_bio record the first
- * r10bio. The first r10bio only release after all other r10bios finish.
- * The discard bio returns only first r10bio finishes
- */
- if (first_copy) {
- r10_bio->master_bio = bio;
- set_bit(R10BIO_Discard, &r10_bio->state);
- first_copy = false;
- first_r10bio = r10_bio;
- } else
- r10_bio->master_bio = (struct bio *)first_r10bio;
-
- rcu_read_lock();
- for (disk = 0; disk < geo->raid_disks; disk++) {
- struct md_rdev *rdev = rcu_dereference(conf->mirrors[disk].rdev);
- struct md_rdev *rrdev = rcu_dereference(
- conf->mirrors[disk].replacement);
-
- r10_bio->devs[disk].bio = NULL;
- r10_bio->devs[disk].repl_bio = NULL;
-
- if (rdev && (test_bit(Faulty, &rdev->flags)))
- rdev = NULL;
- if (rrdev && (test_bit(Faulty, &rrdev->flags)))
- rrdev = NULL;
- if (!rdev && !rrdev)
- continue;
-
- if (rdev) {
- r10_bio->devs[disk].bio = bio;
- atomic_inc(&rdev->nr_pending);
- }
- if (rrdev) {
- r10_bio->devs[disk].repl_bio = bio;
- atomic_inc(&rrdev->nr_pending);
- }
- }
- rcu_read_unlock();
-
- atomic_set(&r10_bio->remaining, 1);
- for (disk = 0; disk < geo->raid_disks; disk++) {
- sector_t dev_start, dev_end;
- struct bio *mbio, *rbio = NULL;
- struct md_rdev *rdev = rcu_dereference(conf->mirrors[disk].rdev);
- struct md_rdev *rrdev = rcu_dereference(
- conf->mirrors[disk].replacement);
-
- /*
- * Now start to calculate the start and end address for each disk.
- * The space between dev_start and dev_end is the discard region.
- *
- * For dev_start, it needs to consider three conditions:
- * 1st, the disk is before start_disk, you can imagine the disk in
- * the next stripe. So the dev_start is the start address of next
- * stripe.
- * 2st, the disk is after start_disk, it means the disk is at the
- * same stripe of first disk
- * 3st, the first disk itself, we can use start_disk_offset directly
- */
- if (disk < start_disk_index)
- dev_start = (first_stripe_index + 1) * mddev->chunk_sectors;
- else if (disk > start_disk_index)
- dev_start = first_stripe_index * mddev->chunk_sectors;
- else
- dev_start = start_disk_offset;
-
- if (disk < end_disk_index)
- dev_end = (last_stripe_index + 1) * mddev->chunk_sectors;
- else if (disk > end_disk_index)
- dev_end = last_stripe_index * mddev->chunk_sectors;
- else
- dev_end = end_disk_offset;
-
- /* It only handles discard bio which size is >= stripe size, so
- * dev_end > dev_start all the time
- */
- if (r10_bio->devs[disk].bio) {
- mbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
- mbio->bi_end_io = raid10_end_discard_request;
- mbio->bi_private = r10_bio;
- r10_bio->devs[disk].bio = mbio;
- r10_bio->devs[disk].devnum = disk;
- atomic_inc(&r10_bio->remaining);
- md_submit_discard_bio(mddev, rdev, mbio,
- dev_start + choose_data_offset(r10_bio, rdev),
- dev_end - dev_start);
- bio_endio(mbio);
- }
- if (r10_bio->devs[disk].repl_bio) {
- rbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
- rbio->bi_end_io = raid10_end_discard_request;
- rbio->bi_private = r10_bio;
- r10_bio->devs[disk].repl_bio = rbio;
- r10_bio->devs[disk].devnum = disk;
- atomic_inc(&r10_bio->remaining);
- md_submit_discard_bio(mddev, rrdev, rbio,
- dev_start + choose_data_offset(r10_bio, rrdev),
- dev_end - dev_start);
- bio_endio(rbio);
- }
- }
-
- if (!geo->far_offset && --far_copies) {
- first_stripe_index += geo->stride >> geo->chunk_shift;
- start_disk_offset += geo->stride;
- last_stripe_index += geo->stride >> geo->chunk_shift;
- end_disk_offset += geo->stride;
- atomic_inc(&first_r10bio->remaining);
- raid_end_discard_bio(r10_bio);
- wait_barrier(conf);
- goto retry_discard;
- }
-
- raid_end_discard_bio(r10_bio);
-
- return 0;
-out:
- allow_barrier(conf);
- return -EAGAIN;
-}
-
static bool raid10_make_request(struct mddev *mddev, struct bio *bio)
{
struct r10conf *conf = mddev->private;
@@ -1821,10 +1514,6 @@ static bool raid10_make_request(struct mddev *mddev, struct bio *bio)
if (!md_write_start(mddev, bio))
return false;
- if (unlikely(bio_op(bio) == REQ_OP_DISCARD))
- if (!raid10_handle_discard(mddev, bio))
- return true;
-
/*
* If this request crosses a chunk boundary, we need to split
* it.
@@ -4064,7 +3753,7 @@ static int raid10_run(struct mddev *mddev)
if (mddev->queue) {
blk_queue_max_discard_sectors(mddev->queue,
- UINT_MAX);
+ mddev->chunk_sectors);
blk_queue_max_write_same_sectors(mddev->queue, 0);
blk_queue_max_write_zeroes_sectors(mddev->queue, 0);
blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h
index 1461fd55311b..79cd2b7d3128 100644
--- a/drivers/md/raid10.h
+++ b/drivers/md/raid10.h
@@ -179,6 +179,5 @@ enum r10bio_state {
R10BIO_Previous,
/* failfast devices did receive failfast requests. */
R10BIO_FailFast,
- R10BIO_Discard,
};
#endif
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 39343479ac2a..3a90cc0e43ca 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1222,9 +1222,9 @@ again:
set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags);
if (conf->mddev->gendisk)
- trace_block_bio_remap(bi->bi_disk->queue,
- bi, disk_devt(conf->mddev->gendisk),
- sh->dev[i].sector);
+ trace_block_bio_remap(bi,
+ disk_devt(conf->mddev->gendisk),
+ sh->dev[i].sector);
if (should_defer && op_is_write(op))
bio_list_add(&pending_bios, bi);
else
@@ -1272,9 +1272,9 @@ again:
if (op == REQ_OP_DISCARD)
rbi->bi_vcnt = 0;
if (conf->mddev->gendisk)
- trace_block_bio_remap(rbi->bi_disk->queue,
- rbi, disk_devt(conf->mddev->gendisk),
- sh->dev[i].sector);
+ trace_block_bio_remap(rbi,
+ disk_devt(conf->mddev->gendisk),
+ sh->dev[i].sector);
if (should_defer && op_is_write(op))
bio_list_add(&pending_bios, rbi);
else
@@ -5468,8 +5468,7 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
spin_unlock_irq(&conf->device_lock);
if (mddev->gendisk)
- trace_block_bio_remap(align_bi->bi_disk->queue,
- align_bi, disk_devt(mddev->gendisk),
+ trace_block_bio_remap(align_bi, disk_devt(mddev->gendisk),
raid_bio->bi_iter.bi_sector);
submit_bio_noacct(align_bi);
return 1;