summaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2018-03-14 20:37:31 +0100
committerThomas Gleixner <tglx@linutronix.de>2018-03-14 20:37:31 +0100
commitb0d8bef8ed805ca92eb91e86acf3ce89cbebc8ce (patch)
tree8838391a76f0cf75ffcd31166996d03d19c580a6 /drivers/md
parentsoftirq: Consolidate common code in tasklet_[hi]_action() (diff)
parentMerge tag 'nfs-for-4.16-4' of git://git.linux-nfs.org/projects/trondmy/linux-nfs (diff)
downloadlinux-b0d8bef8ed805ca92eb91e86acf3ce89cbebc8ce.tar.xz
linux-b0d8bef8ed805ca92eb91e86acf3ce89cbebc8ce.zip
Merge branch 'linus' into irq/core to pick up dependencies.
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/bcache/request.c2
-rw-r--r--drivers/md/bcache/super.c29
-rw-r--r--drivers/md/dm-bufio.c16
-rw-r--r--drivers/md/dm-mpath.c66
-rw-r--r--drivers/md/dm-raid.c7
-rw-r--r--drivers/md/dm-table.c16
-rw-r--r--drivers/md/dm.c38
-rw-r--r--drivers/md/md-multipath.c2
-rw-r--r--drivers/md/md.c53
-rw-r--r--drivers/md/md.h2
-rw-r--r--drivers/md/raid1.c13
-rw-r--r--drivers/md/raid1.h12
-rw-r--r--drivers/md/raid10.c18
-rw-r--r--drivers/md/raid10.h13
-rw-r--r--drivers/md/raid5-log.h3
-rw-r--r--drivers/md/raid5-ppl.c10
-rw-r--r--drivers/md/raid5.c19
-rw-r--r--drivers/md/raid5.h12
18 files changed, 215 insertions, 116 deletions
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 1a46b41dac70..6422846b546e 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -659,11 +659,11 @@ static void do_bio_hook(struct search *s, struct bio *orig_bio)
static void search_free(struct closure *cl)
{
struct search *s = container_of(cl, struct search, cl);
- bio_complete(s);
if (s->iop.bio)
bio_put(s->iop.bio);
+ bio_complete(s);
closure_debug_destroy(cl);
mempool_free(s, s->d->c->search);
}
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 312895788036..f2273143b3cb 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -963,6 +963,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
uint32_t rtime = cpu_to_le32(get_seconds());
struct uuid_entry *u;
char buf[BDEVNAME_SIZE];
+ struct cached_dev *exist_dc, *t;
bdevname(dc->bdev, buf);
@@ -987,6 +988,16 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
return -EINVAL;
}
+ /* Check whether already attached */
+ list_for_each_entry_safe(exist_dc, t, &c->cached_devs, list) {
+ if (!memcmp(dc->sb.uuid, exist_dc->sb.uuid, 16)) {
+ pr_err("Tried to attach %s but duplicate UUID already attached",
+ buf);
+
+ return -EINVAL;
+ }
+ }
+
u = uuid_find(c, dc->sb.uuid);
if (u &&
@@ -1204,7 +1215,7 @@ static void register_bdev(struct cache_sb *sb, struct page *sb_page,
return;
err:
- pr_notice("error opening %s: %s", bdevname(bdev, name), err);
+ pr_notice("error %s: %s", bdevname(bdev, name), err);
bcache_device_stop(&dc->disk);
}
@@ -1274,7 +1285,7 @@ static int flash_devs_run(struct cache_set *c)
struct uuid_entry *u;
for (u = c->uuids;
- u < c->uuids + c->devices_max_used && !ret;
+ u < c->uuids + c->nr_uuids && !ret;
u++)
if (UUID_FLASH_ONLY(u))
ret = flash_dev_run(c, u);
@@ -1883,6 +1894,8 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page,
const char *err = NULL; /* must be set for any error case */
int ret = 0;
+ bdevname(bdev, name);
+
memcpy(&ca->sb, sb, sizeof(struct cache_sb));
ca->bdev = bdev;
ca->bdev->bd_holder = ca;
@@ -1891,11 +1904,12 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page,
bio_first_bvec_all(&ca->sb_bio)->bv_page = sb_page;
get_page(sb_page);
- if (blk_queue_discard(bdev_get_queue(ca->bdev)))
+ if (blk_queue_discard(bdev_get_queue(bdev)))
ca->discard = CACHE_DISCARD(&ca->sb);
ret = cache_alloc(ca);
if (ret != 0) {
+ blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
if (ret == -ENOMEM)
err = "cache_alloc(): -ENOMEM";
else
@@ -1918,14 +1932,14 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page,
goto out;
}
- pr_info("registered cache device %s", bdevname(bdev, name));
+ pr_info("registered cache device %s", name);
out:
kobject_put(&ca->kobj);
err:
if (err)
- pr_notice("error opening %s: %s", bdevname(bdev, name), err);
+ pr_notice("error %s: %s", name, err);
return ret;
}
@@ -2014,6 +2028,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
if (err)
goto err_close;
+ err = "failed to register device";
if (SB_IS_BDEV(sb)) {
struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL);
if (!dc)
@@ -2028,7 +2043,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
goto err_close;
if (register_cache(sb, sb_page, bdev, ca) != 0)
- goto err_close;
+ goto err;
}
out:
if (sb_page)
@@ -2041,7 +2056,7 @@ out:
err_close:
blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
err:
- pr_info("error opening %s: %s", path, err);
+ pr_info("error %s: %s", path, err);
ret = -EINVAL;
goto out;
}
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 414c9af54ded..aa2032fa80d4 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -386,9 +386,6 @@ static void __cache_size_refresh(void)
static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask,
enum data_mode *data_mode)
{
- unsigned noio_flag;
- void *ptr;
-
if (c->block_size <= DM_BUFIO_BLOCK_SIZE_SLAB_LIMIT) {
*data_mode = DATA_MODE_SLAB;
return kmem_cache_alloc(DM_BUFIO_CACHE(c), gfp_mask);
@@ -412,16 +409,15 @@ static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask,
* all allocations done by this process (including pagetables) are done
* as if GFP_NOIO was specified.
*/
+ if (gfp_mask & __GFP_NORETRY) {
+ unsigned noio_flag = memalloc_noio_save();
+ void *ptr = __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL);
- if (gfp_mask & __GFP_NORETRY)
- noio_flag = memalloc_noio_save();
-
- ptr = __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL);
-
- if (gfp_mask & __GFP_NORETRY)
memalloc_noio_restore(noio_flag);
+ return ptr;
+ }
- return ptr;
+ return __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL);
}
/*
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 7d3e572072f5..3fde9e9faddd 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -22,6 +22,7 @@
#include <linux/time.h>
#include <linux/workqueue.h>
#include <linux/delay.h>
+#include <scsi/scsi_device.h>
#include <scsi/scsi_dh.h>
#include <linux/atomic.h>
#include <linux/blk-mq.h>
@@ -211,25 +212,13 @@ static int alloc_multipath_stage2(struct dm_target *ti, struct multipath *m)
else
m->queue_mode = DM_TYPE_REQUEST_BASED;
- } else if (m->queue_mode == DM_TYPE_BIO_BASED ||
- m->queue_mode == DM_TYPE_NVME_BIO_BASED) {
+ } else if (m->queue_mode == DM_TYPE_BIO_BASED) {
INIT_WORK(&m->process_queued_bios, process_queued_bios);
-
- if (m->queue_mode == DM_TYPE_BIO_BASED) {
- /*
- * bio-based doesn't support any direct scsi_dh management;
- * it just discovers if a scsi_dh is attached.
- */
- set_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags);
- }
- }
-
- if (m->queue_mode != DM_TYPE_NVME_BIO_BASED) {
- set_bit(MPATHF_QUEUE_IO, &m->flags);
- atomic_set(&m->pg_init_in_progress, 0);
- atomic_set(&m->pg_init_count, 0);
- m->pg_init_delay_msecs = DM_PG_INIT_DELAY_DEFAULT;
- init_waitqueue_head(&m->pg_init_wait);
+ /*
+ * bio-based doesn't support any direct scsi_dh management;
+ * it just discovers if a scsi_dh is attached.
+ */
+ set_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags);
}
dm_table_set_type(ti->table, m->queue_mode);
@@ -337,14 +326,12 @@ static void __switch_pg(struct multipath *m, struct priority_group *pg)
{
m->current_pg = pg;
- if (m->queue_mode == DM_TYPE_NVME_BIO_BASED)
- return;
-
/* Must we initialise the PG first, and queue I/O till it's ready? */
if (m->hw_handler_name) {
set_bit(MPATHF_PG_INIT_REQUIRED, &m->flags);
set_bit(MPATHF_QUEUE_IO, &m->flags);
} else {
+ /* FIXME: not needed if no scsi_dh is attached */
clear_bit(MPATHF_PG_INIT_REQUIRED, &m->flags);
clear_bit(MPATHF_QUEUE_IO, &m->flags);
}
@@ -385,8 +372,7 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes)
unsigned bypassed = 1;
if (!atomic_read(&m->nr_valid_paths)) {
- if (m->queue_mode != DM_TYPE_NVME_BIO_BASED)
- clear_bit(MPATHF_QUEUE_IO, &m->flags);
+ clear_bit(MPATHF_QUEUE_IO, &m->flags);
goto failed;
}
@@ -599,7 +585,7 @@ static struct pgpath *__map_bio(struct multipath *m, struct bio *bio)
return pgpath;
}
-static struct pgpath *__map_bio_nvme(struct multipath *m, struct bio *bio)
+static struct pgpath *__map_bio_fast(struct multipath *m, struct bio *bio)
{
struct pgpath *pgpath;
unsigned long flags;
@@ -634,8 +620,8 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio,
{
struct pgpath *pgpath;
- if (m->queue_mode == DM_TYPE_NVME_BIO_BASED)
- pgpath = __map_bio_nvme(m, bio);
+ if (!m->hw_handler_name)
+ pgpath = __map_bio_fast(m, bio);
else
pgpath = __map_bio(m, bio);
@@ -675,8 +661,7 @@ static void process_queued_io_list(struct multipath *m)
{
if (m->queue_mode == DM_TYPE_MQ_REQUEST_BASED)
dm_mq_kick_requeue_list(dm_table_get_md(m->ti->table));
- else if (m->queue_mode == DM_TYPE_BIO_BASED ||
- m->queue_mode == DM_TYPE_NVME_BIO_BASED)
+ else if (m->queue_mode == DM_TYPE_BIO_BASED)
queue_work(kmultipathd, &m->process_queued_bios);
}
@@ -838,6 +823,16 @@ retain:
*/
kfree(m->hw_handler_name);
m->hw_handler_name = attached_handler_name;
+
+ /*
+ * Init fields that are only used when a scsi_dh is attached
+ */
+ if (!test_and_set_bit(MPATHF_QUEUE_IO, &m->flags)) {
+ atomic_set(&m->pg_init_in_progress, 0);
+ atomic_set(&m->pg_init_count, 0);
+ m->pg_init_delay_msecs = DM_PG_INIT_DELAY_DEFAULT;
+ init_waitqueue_head(&m->pg_init_wait);
+ }
}
}
@@ -873,6 +868,7 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps
int r;
struct pgpath *p;
struct multipath *m = ti->private;
+ struct scsi_device *sdev;
/* we need at least a path arg */
if (as->argc < 1) {
@@ -891,7 +887,9 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps
goto bad;
}
- if (m->queue_mode != DM_TYPE_NVME_BIO_BASED) {
+ sdev = scsi_device_from_queue(bdev_get_queue(p->path.dev->bdev));
+ if (sdev) {
+ put_device(&sdev->sdev_gendev);
INIT_DELAYED_WORK(&p->activate_path, activate_path_work);
r = setup_scsi_dh(p->path.dev->bdev, m, &ti->error);
if (r) {
@@ -1001,8 +999,7 @@ static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m)
if (!hw_argc)
return 0;
- if (m->queue_mode == DM_TYPE_BIO_BASED ||
- m->queue_mode == DM_TYPE_NVME_BIO_BASED) {
+ if (m->queue_mode == DM_TYPE_BIO_BASED) {
dm_consume_args(as, hw_argc);
DMERR("bio-based multipath doesn't allow hardware handler args");
return 0;
@@ -1091,8 +1088,6 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m)
if (!strcasecmp(queue_mode_name, "bio"))
m->queue_mode = DM_TYPE_BIO_BASED;
- else if (!strcasecmp(queue_mode_name, "nvme"))
- m->queue_mode = DM_TYPE_NVME_BIO_BASED;
else if (!strcasecmp(queue_mode_name, "rq"))
m->queue_mode = DM_TYPE_REQUEST_BASED;
else if (!strcasecmp(queue_mode_name, "mq"))
@@ -1193,7 +1188,7 @@ static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv)
ti->num_discard_bios = 1;
ti->num_write_same_bios = 1;
ti->num_write_zeroes_bios = 1;
- if (m->queue_mode == DM_TYPE_BIO_BASED || m->queue_mode == DM_TYPE_NVME_BIO_BASED)
+ if (m->queue_mode == DM_TYPE_BIO_BASED)
ti->per_io_data_size = multipath_per_bio_data_size();
else
ti->per_io_data_size = sizeof(struct dm_mpath_io);
@@ -1730,9 +1725,6 @@ static void multipath_status(struct dm_target *ti, status_type_t type,
case DM_TYPE_BIO_BASED:
DMEMIT("queue_mode bio ");
break;
- case DM_TYPE_NVME_BIO_BASED:
- DMEMIT("queue_mode nvme ");
- break;
case DM_TYPE_MQ_REQUEST_BASED:
DMEMIT("queue_mode mq ");
break;
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 7ef469e902c6..c1d1034ff7b7 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -3408,9 +3408,10 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,
set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
} else {
- if (test_bit(MD_RECOVERY_NEEDED, &recovery) ||
- test_bit(MD_RECOVERY_RESHAPE, &recovery) ||
- test_bit(MD_RECOVERY_RUNNING, &recovery))
+ if (!test_bit(MD_RECOVERY_INTR, &recovery) &&
+ (test_bit(MD_RECOVERY_NEEDED, &recovery) ||
+ test_bit(MD_RECOVERY_RESHAPE, &recovery) ||
+ test_bit(MD_RECOVERY_RUNNING, &recovery)))
r = mddev->curr_resync_completed;
else
r = mddev->recovery_cp;
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 5fe7ec356c33..7eb3e2a3c07d 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -942,17 +942,12 @@ static int dm_table_determine_type(struct dm_table *t)
if (t->type != DM_TYPE_NONE) {
/* target already set the table's type */
- if (t->type == DM_TYPE_BIO_BASED)
- return 0;
- else if (t->type == DM_TYPE_NVME_BIO_BASED) {
- if (!dm_table_does_not_support_partial_completion(t)) {
- DMERR("nvme bio-based is only possible with devices"
- " that don't support partial completion");
- return -EINVAL;
- }
- /* Fallthru, also verify all devices are blk-mq */
+ if (t->type == DM_TYPE_BIO_BASED) {
+ /* possibly upgrade to a variant of bio-based */
+ goto verify_bio_based;
}
BUG_ON(t->type == DM_TYPE_DAX_BIO_BASED);
+ BUG_ON(t->type == DM_TYPE_NVME_BIO_BASED);
goto verify_rq_based;
}
@@ -985,6 +980,7 @@ static int dm_table_determine_type(struct dm_table *t)
}
if (bio_based) {
+verify_bio_based:
/* We must use this table as bio-based */
t->type = DM_TYPE_BIO_BASED;
if (dm_table_supports_dax(t) ||
@@ -1755,7 +1751,7 @@ static int device_no_partial_completion(struct dm_target *ti, struct dm_dev *dev
char b[BDEVNAME_SIZE];
/* For now, NVMe devices are the only devices of this class */
- return (strncmp(bdevname(dev->bdev, b), "nvme", 3) == 0);
+ return (strncmp(bdevname(dev->bdev, b), "nvme", 4) == 0);
}
static bool dm_table_does_not_support_partial_completion(struct dm_table *t)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index d6de00f367ef..45328d8b2859 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -458,9 +458,11 @@ static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
return dm_get_geometry(md, geo);
}
-static int dm_grab_bdev_for_ioctl(struct mapped_device *md,
- struct block_device **bdev,
- fmode_t *mode)
+static char *_dm_claim_ptr = "I belong to device-mapper";
+
+static int dm_get_bdev_for_ioctl(struct mapped_device *md,
+ struct block_device **bdev,
+ fmode_t *mode)
{
struct dm_target *tgt;
struct dm_table *map;
@@ -490,6 +492,10 @@ retry:
goto out;
bdgrab(*bdev);
+ r = blkdev_get(*bdev, *mode, _dm_claim_ptr);
+ if (r < 0)
+ goto out;
+
dm_put_live_table(md, srcu_idx);
return r;
@@ -508,7 +514,7 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
struct mapped_device *md = bdev->bd_disk->private_data;
int r;
- r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
+ r = dm_get_bdev_for_ioctl(md, &bdev, &mode);
if (r < 0)
return r;
@@ -528,7 +534,7 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
r = __blkdev_driver_ioctl(bdev, mode, cmd, arg);
out:
- bdput(bdev);
+ blkdev_put(bdev, mode);
return r;
}
@@ -708,14 +714,13 @@ static void dm_put_live_table_fast(struct mapped_device *md) __releases(RCU)
static int open_table_device(struct table_device *td, dev_t dev,
struct mapped_device *md)
{
- static char *_claim_ptr = "I belong to device-mapper";
struct block_device *bdev;
int r;
BUG_ON(td->dm_dev.bdev);
- bdev = blkdev_get_by_dev(dev, td->dm_dev.mode | FMODE_EXCL, _claim_ptr);
+ bdev = blkdev_get_by_dev(dev, td->dm_dev.mode | FMODE_EXCL, _dm_claim_ptr);
if (IS_ERR(bdev))
return PTR_ERR(bdev);
@@ -903,7 +908,8 @@ static void dec_pending(struct dm_io *io, blk_status_t error)
queue_io(md, bio);
} else {
/* done with normal IO or empty flush */
- bio->bi_status = io_error;
+ if (io_error)
+ bio->bi_status = io_error;
bio_endio(bio);
}
}
@@ -3010,7 +3016,7 @@ static int dm_pr_reserve(struct block_device *bdev, u64 key, enum pr_type type,
fmode_t mode;
int r;
- r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
+ r = dm_get_bdev_for_ioctl(md, &bdev, &mode);
if (r < 0)
return r;
@@ -3020,7 +3026,7 @@ static int dm_pr_reserve(struct block_device *bdev, u64 key, enum pr_type type,
else
r = -EOPNOTSUPP;
- bdput(bdev);
+ blkdev_put(bdev, mode);
return r;
}
@@ -3031,7 +3037,7 @@ static int dm_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
fmode_t mode;
int r;
- r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
+ r = dm_get_bdev_for_ioctl(md, &bdev, &mode);
if (r < 0)
return r;
@@ -3041,7 +3047,7 @@ static int dm_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
else
r = -EOPNOTSUPP;
- bdput(bdev);
+ blkdev_put(bdev, mode);
return r;
}
@@ -3053,7 +3059,7 @@ static int dm_pr_preempt(struct block_device *bdev, u64 old_key, u64 new_key,
fmode_t mode;
int r;
- r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
+ r = dm_get_bdev_for_ioctl(md, &bdev, &mode);
if (r < 0)
return r;
@@ -3063,7 +3069,7 @@ static int dm_pr_preempt(struct block_device *bdev, u64 old_key, u64 new_key,
else
r = -EOPNOTSUPP;
- bdput(bdev);
+ blkdev_put(bdev, mode);
return r;
}
@@ -3074,7 +3080,7 @@ static int dm_pr_clear(struct block_device *bdev, u64 key)
fmode_t mode;
int r;
- r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
+ r = dm_get_bdev_for_ioctl(md, &bdev, &mode);
if (r < 0)
return r;
@@ -3084,7 +3090,7 @@ static int dm_pr_clear(struct block_device *bdev, u64 key)
else
r = -EOPNOTSUPP;
- bdput(bdev);
+ blkdev_put(bdev, mode);
return r;
}
diff --git a/drivers/md/md-multipath.c b/drivers/md/md-multipath.c
index e40065bdbfc8..0a7e99d62c69 100644
--- a/drivers/md/md-multipath.c
+++ b/drivers/md/md-multipath.c
@@ -157,7 +157,7 @@ static void multipath_status(struct seq_file *seq, struct mddev *mddev)
seq_printf (seq, "%s", rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_");
}
rcu_read_unlock();
- seq_printf (seq, "]");
+ seq_putc(seq, ']');
}
static int multipath_congested(struct mddev *mddev, int bits)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index bc67ab6844f0..254e44e44668 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -801,6 +801,9 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
struct bio *bio;
int ff = 0;
+ if (!page)
+ return;
+
if (test_bit(Faulty, &rdev->flags))
return;
@@ -5452,6 +5455,7 @@ int md_run(struct mddev *mddev)
* the only valid external interface is through the md
* device.
*/
+ mddev->has_superblocks = false;
rdev_for_each(rdev, mddev) {
if (test_bit(Faulty, &rdev->flags))
continue;
@@ -5465,6 +5469,9 @@ int md_run(struct mddev *mddev)
set_disk_ro(mddev->gendisk, 1);
}
+ if (rdev->sb_page)
+ mddev->has_superblocks = true;
+
/* perform some consistency tests on the device.
* We don't want the data to overlap the metadata,
* Internal Bitmap issues have been handled elsewhere.
@@ -5497,8 +5504,10 @@ int md_run(struct mddev *mddev)
}
if (mddev->sync_set == NULL) {
mddev->sync_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
- if (!mddev->sync_set)
- return -ENOMEM;
+ if (!mddev->sync_set) {
+ err = -ENOMEM;
+ goto abort;
+ }
}
spin_lock(&pers_lock);
@@ -5511,7 +5520,8 @@ int md_run(struct mddev *mddev)
else
pr_warn("md: personality for level %s is not loaded!\n",
mddev->clevel);
- return -EINVAL;
+ err = -EINVAL;
+ goto abort;
}
spin_unlock(&pers_lock);
if (mddev->level != pers->level) {
@@ -5524,7 +5534,8 @@ int md_run(struct mddev *mddev)
pers->start_reshape == NULL) {
/* This personality cannot handle reshaping... */
module_put(pers->owner);
- return -EINVAL;
+ err = -EINVAL;
+ goto abort;
}
if (pers->sync_request) {
@@ -5593,7 +5604,7 @@ int md_run(struct mddev *mddev)
mddev->private = NULL;
module_put(pers->owner);
bitmap_destroy(mddev);
- return err;
+ goto abort;
}
if (mddev->queue) {
bool nonrot = true;
@@ -5655,6 +5666,18 @@ int md_run(struct mddev *mddev)
sysfs_notify_dirent_safe(mddev->sysfs_action);
sysfs_notify(&mddev->kobj, NULL, "degraded");
return 0;
+
+abort:
+ if (mddev->bio_set) {
+ bioset_free(mddev->bio_set);
+ mddev->bio_set = NULL;
+ }
+ if (mddev->sync_set) {
+ bioset_free(mddev->sync_set);
+ mddev->sync_set = NULL;
+ }
+
+ return err;
}
EXPORT_SYMBOL_GPL(md_run);
@@ -8049,6 +8072,7 @@ EXPORT_SYMBOL(md_done_sync);
bool md_write_start(struct mddev *mddev, struct bio *bi)
{
int did_change = 0;
+
if (bio_data_dir(bi) != WRITE)
return true;
@@ -8081,6 +8105,8 @@ bool md_write_start(struct mddev *mddev, struct bio *bi)
rcu_read_unlock();
if (did_change)
sysfs_notify_dirent_safe(mddev->sysfs_state);
+ if (!mddev->has_superblocks)
+ return true;
wait_event(mddev->sb_wait,
!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) ||
mddev->suspended);
@@ -8543,6 +8569,19 @@ void md_do_sync(struct md_thread *thread)
set_mask_bits(&mddev->sb_flags, 0,
BIT(MD_SB_CHANGE_PENDING) | BIT(MD_SB_CHANGE_DEVS));
+ if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
+ !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
+ mddev->delta_disks > 0 &&
+ mddev->pers->finish_reshape &&
+ mddev->pers->size &&
+ mddev->queue) {
+ mddev_lock_nointr(mddev);
+ md_set_array_sectors(mddev, mddev->pers->size(mddev, 0, 0));
+ mddev_unlock(mddev);
+ set_capacity(mddev->gendisk, mddev->array_sectors);
+ revalidate_disk(mddev->gendisk);
+ }
+
spin_lock(&mddev->lock);
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
/* We completed so min/max setting can be forgotten if used. */
@@ -8569,6 +8608,10 @@ static int remove_and_add_spares(struct mddev *mddev,
int removed = 0;
bool remove_some = false;
+ if (this && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
+ /* Mustn't remove devices when resync thread is running */
+ return 0;
+
rdev_for_each(rdev, mddev) {
if ((this == NULL || rdev == this) &&
rdev->raid_disk >= 0 &&
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 58cd20a5e85e..fbc925cce810 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -468,6 +468,8 @@ struct mddev {
void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev);
struct md_cluster_info *cluster_info;
unsigned int good_device_nr; /* good device num within cluster raid */
+
+ bool has_superblocks:1;
};
enum recovery_flags {
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index b2eae332e1a2..fe872dc6712e 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1108,7 +1108,7 @@ static void alloc_behind_master_bio(struct r1bio *r1_bio,
bio_copy_data(behind_bio, bio);
skip_copy:
- r1_bio->behind_master_bio = behind_bio;;
+ r1_bio->behind_master_bio = behind_bio;
set_bit(R1BIO_BehindIO, &r1_bio->state);
return;
@@ -1809,6 +1809,17 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
struct md_rdev *repl =
conf->mirrors[conf->raid_disks + number].rdev;
freeze_array(conf, 0);
+ if (atomic_read(&repl->nr_pending)) {
+ /* It means that some queued IO of retry_list
+ * hold repl. Thus, we cannot set replacement
+ * as NULL, avoiding rdev NULL pointer
+ * dereference in sync_request_write and
+ * handle_write_finished.
+ */
+ err = -EBUSY;
+ unfreeze_array(conf);
+ goto abort;
+ }
clear_bit(Replacement, &repl->flags);
p->rdev = repl;
conf->mirrors[conf->raid_disks + number].rdev = NULL;
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h
index c7294e7557e0..eb84bc68e2fd 100644
--- a/drivers/md/raid1.h
+++ b/drivers/md/raid1.h
@@ -26,6 +26,18 @@
#define BARRIER_BUCKETS_NR_BITS (PAGE_SHIFT - ilog2(sizeof(atomic_t)))
#define BARRIER_BUCKETS_NR (1<<BARRIER_BUCKETS_NR_BITS)
+/* Note: raid1_info.rdev can be set to NULL asynchronously by raid1_remove_disk.
+ * There are three safe ways to access raid1_info.rdev.
+ * 1/ when holding mddev->reconfig_mutex
+ * 2/ when resync/recovery is known to be happening - i.e. in code that is
+ * called as part of performing resync/recovery.
+ * 3/ while holding rcu_read_lock(), use rcu_dereference to get the pointer
+ * and if it is non-NULL, increment rdev->nr_pending before dropping the
+ * RCU lock.
+ * When .rdev is set to NULL, the nr_pending count checked again and if it has
+ * been incremented, the pointer is put back in .rdev.
+ */
+
struct raid1_info {
struct md_rdev *rdev;
sector_t head_position;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 99c9207899a7..c5e6c60fc0d4 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -141,7 +141,7 @@ static void r10bio_pool_free(void *r10_bio, void *data)
#define RESYNC_WINDOW (1024*1024)
/* maximum number of concurrent requests, memory permitting */
#define RESYNC_DEPTH (32*1024*1024/RESYNC_BLOCK_SIZE)
-#define CLUSTER_RESYNC_WINDOW (16 * RESYNC_WINDOW)
+#define CLUSTER_RESYNC_WINDOW (32 * RESYNC_WINDOW)
#define CLUSTER_RESYNC_WINDOW_SECTORS (CLUSTER_RESYNC_WINDOW >> 9)
/*
@@ -2655,7 +2655,8 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
for (m = 0; m < conf->copies; m++) {
int dev = r10_bio->devs[m].devnum;
rdev = conf->mirrors[dev].rdev;
- if (r10_bio->devs[m].bio == NULL)
+ if (r10_bio->devs[m].bio == NULL ||
+ r10_bio->devs[m].bio->bi_end_io == NULL)
continue;
if (!r10_bio->devs[m].bio->bi_status) {
rdev_clear_badblocks(
@@ -2670,7 +2671,8 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
md_error(conf->mddev, rdev);
}
rdev = conf->mirrors[dev].replacement;
- if (r10_bio->devs[m].repl_bio == NULL)
+ if (r10_bio->devs[m].repl_bio == NULL ||
+ r10_bio->devs[m].repl_bio->bi_end_io == NULL)
continue;
if (!r10_bio->devs[m].repl_bio->bi_status) {
@@ -3782,7 +3784,7 @@ static int raid10_run(struct mddev *mddev)
if (fc > 1 || fo > 0) {
pr_err("only near layout is supported by clustered"
" raid10\n");
- goto out;
+ goto out_free_conf;
}
}
@@ -4830,17 +4832,11 @@ static void raid10_finish_reshape(struct mddev *mddev)
return;
if (mddev->delta_disks > 0) {
- sector_t size = raid10_size(mddev, 0, 0);
- md_set_array_sectors(mddev, size);
if (mddev->recovery_cp > mddev->resync_max_sectors) {
mddev->recovery_cp = mddev->resync_max_sectors;
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
}
- mddev->resync_max_sectors = size;
- if (mddev->queue) {
- set_capacity(mddev->gendisk, mddev->array_sectors);
- revalidate_disk(mddev->gendisk);
- }
+ mddev->resync_max_sectors = mddev->array_sectors;
} else {
int d;
rcu_read_lock();
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h
index db2ac22ac1b4..e2e8840de9bf 100644
--- a/drivers/md/raid10.h
+++ b/drivers/md/raid10.h
@@ -2,6 +2,19 @@
#ifndef _RAID10_H
#define _RAID10_H
+/* Note: raid10_info.rdev can be set to NULL asynchronously by
+ * raid10_remove_disk.
+ * There are three safe ways to access raid10_info.rdev.
+ * 1/ when holding mddev->reconfig_mutex
+ * 2/ when resync/recovery/reshape is known to be happening - i.e. in code
+ * that is called as part of performing resync/recovery/reshape.
+ * 3/ while holding rcu_read_lock(), use rcu_dereference to get the pointer
+ * and if it is non-NULL, increment rdev->nr_pending before dropping the
+ * RCU lock.
+ * When .rdev is set to NULL, the nr_pending count checked again and if it has
+ * been incremented, the pointer is put back in .rdev.
+ */
+
struct raid10_info {
struct md_rdev *rdev, *replacement;
sector_t head_position;
diff --git a/drivers/md/raid5-log.h b/drivers/md/raid5-log.h
index 0c76bcedfc1c..a001808a2b77 100644
--- a/drivers/md/raid5-log.h
+++ b/drivers/md/raid5-log.h
@@ -44,6 +44,7 @@ extern void ppl_write_stripe_run(struct r5conf *conf);
extern void ppl_stripe_write_finished(struct stripe_head *sh);
extern int ppl_modify_log(struct r5conf *conf, struct md_rdev *rdev, bool add);
extern void ppl_quiesce(struct r5conf *conf, int quiesce);
+extern int ppl_handle_flush_request(struct r5l_log *log, struct bio *bio);
static inline bool raid5_has_ppl(struct r5conf *conf)
{
@@ -104,7 +105,7 @@ static inline int log_handle_flush_request(struct r5conf *conf, struct bio *bio)
if (conf->log)
ret = r5l_handle_flush_request(conf->log, bio);
else if (raid5_has_ppl(conf))
- ret = 0;
+ ret = ppl_handle_flush_request(conf->log, bio);
return ret;
}
diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index 2764c2290062..42890a08375b 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -693,6 +693,16 @@ void ppl_quiesce(struct r5conf *conf, int quiesce)
}
}
+int ppl_handle_flush_request(struct r5l_log *log, struct bio *bio)
+{
+ if (bio->bi_iter.bi_size == 0) {
+ bio_endio(bio);
+ return 0;
+ }
+ bio->bi_opf &= ~REQ_PREFLUSH;
+ return -EAGAIN;
+}
+
void ppl_stripe_write_finished(struct stripe_head *sh)
{
struct ppl_io_unit *io;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 50d01144b805..b5d2601483e3 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2196,15 +2196,16 @@ static int grow_one_stripe(struct r5conf *conf, gfp_t gfp)
static int grow_stripes(struct r5conf *conf, int num)
{
struct kmem_cache *sc;
+ size_t namelen = sizeof(conf->cache_name[0]);
int devs = max(conf->raid_disks, conf->previous_raid_disks);
if (conf->mddev->gendisk)
- sprintf(conf->cache_name[0],
+ snprintf(conf->cache_name[0], namelen,
"raid%d-%s", conf->level, mdname(conf->mddev));
else
- sprintf(conf->cache_name[0],
+ snprintf(conf->cache_name[0], namelen,
"raid%d-%p", conf->level, conf->mddev);
- sprintf(conf->cache_name[1], "%s-alt", conf->cache_name[0]);
+ snprintf(conf->cache_name[1], namelen, "%.27s-alt", conf->cache_name[0]);
conf->active_name = 0;
sc = kmem_cache_create(conf->cache_name[conf->active_name],
@@ -6764,9 +6765,7 @@ static void free_conf(struct r5conf *conf)
log_exit(conf);
- if (conf->shrinker.nr_deferred)
- unregister_shrinker(&conf->shrinker);
-
+ unregister_shrinker(&conf->shrinker);
free_thread_groups(conf);
shrink_stripes(conf);
raid5_free_percpu(conf);
@@ -8001,13 +8000,7 @@ static void raid5_finish_reshape(struct mddev *mddev)
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
- if (mddev->delta_disks > 0) {
- md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
- if (mddev->queue) {
- set_capacity(mddev->gendisk, mddev->array_sectors);
- revalidate_disk(mddev->gendisk);
- }
- } else {
+ if (mddev->delta_disks <= 0) {
int d;
spin_lock_irq(&conf->device_lock);
mddev->degraded = raid5_calc_degraded(conf);
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 2e6123825095..3f8da26032ac 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -450,6 +450,18 @@ enum {
* HANDLE gets cleared if stripe_handle leaves nothing locked.
*/
+/* Note: disk_info.rdev can be set to NULL asynchronously by raid5_remove_disk.
+ * There are three safe ways to access disk_info.rdev.
+ * 1/ when holding mddev->reconfig_mutex
+ * 2/ when resync/recovery/reshape is known to be happening - i.e. in code that
+ * is called as part of performing resync/recovery/reshape.
+ * 3/ while holding rcu_read_lock(), use rcu_dereference to get the pointer
+ * and if it is non-NULL, increment rdev->nr_pending before dropping the RCU
+ * lock.
+ * When .rdev is set to NULL, the nr_pending count checked again and if
+ * it has been incremented, the pointer is put back in .rdev.
+ */
+
struct disk_info {
struct md_rdev *rdev, *replacement;
struct page *extra_page; /* extra page to use in prexor */