Merge tag 'dm-4.10-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper updates from Mike Snitzer: - various fixes and improvements to request-based DM and DM multipath - some locking improvements in DM bufio - add Kconfig option to disable the DM block manager's extra locking which mainly serves as a developer tool - a few bug fixes to DM's persistent-data - a couple changes to prepare for multipage biovec support in the block layer - various improvements and cleanups in the DM core, DM cache, DM raid and DM crypt - add ability to have DM crypt use keys from the kernel key retention service - add a new "error_writes" feature to the DM flakey target, reads are left unchanged in this mode * tag 'dm-4.10-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (40 commits) dm flakey: introduce "error_writes" feature dm cache policy smq: use hash_32() instead of hash_32_generic() dm crypt: reject key strings containing whitespace chars dm space map: always set ev if sm_ll_mutate() succeeds dm space map metadata: skip useless memcpy in metadata_ll_init_index() dm space map metadata: fix 'struct sm_metadata' leak on failed create Documentation: dm raid: define data_offset status field dm raid: fix discard support regression dm raid: don't allow "write behind" with raid4/5/6 dm mpath: use hw_handler_params if attached hw_handler is same as requested dm crypt: add ability to use keys from the kernel key retention service dm array: remove a dead assignment in populate_ablock_with_values() dm ioctl: use offsetof() instead of open-coding it dm rq: simplify use_blk_mq initialization dm: use blk_set_queue_dying() in __dm_destroy() dm bufio: drop the lock when doing GFP_NOIO allocation dm bufio: don't take the lock in dm_bufio_shrink_count dm bufio: avoid sleeping while holding the dm_bufio lock dm table: simplify dm_table_determine_type() dm table: an 'all_blk_mq' table must be loaded for a blk-mq DM device ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2016-12-14 20:01:00 +0100
committer: Linus Torvalds <torvalds@linux-foundation.org> 2016-12-14 20:01:00 +0100
commit: 775a2e29c3bbcf853432f47d3caa9ff8808807ad (patch)
tree: 614a7481b68dc9b0b628f392ab9bbdc53bbfe447
parent: Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/shl... (diff)
parent: dm flakey: introduce "error_writes" feature (diff)
download: linux-775a2e29c3bbcf853432f47d3caa9ff8808807ad.tar.xz
linux-775a2e29c3bbcf853432f47d3caa9ff8808807ad.zip
22 files changed, 461 insertions, 200 deletions
diff --git a/Documentation/device-mapper/dm-crypt.txt b/Documentation/device-mapper/dm-crypt.txt
index 692171fe9da0..6f15fcea9566 100644
--- a/Documentation/device-mapper/dm-crypt.txt
+++ b/Documentation/device-mapper/dm-crypt.txt
@@ -21,13 +21,30 @@ Parameters: <cipher> <key> <iv_offset> <device path> \
     /proc/crypto contains supported crypto modes
 
 <key>
-    Key used for encryption. It is encoded as a hexadecimal number.
+    Key used for encryption. It is encoded either as a hexadecimal number
+    or it can be passed as <key_string> prefixed with single colon
+    character (':') for keys residing in kernel keyring service.
     You can only use key sizes that are valid for the selected cipher
     in combination with the selected iv mode.
     Note that for some iv modes the key string can contain additional
     keys (for example IV seed) so the key contains more parts concatenated
     into a single string.
 
+<key_string>
+    The kernel keyring key is identified by string in following format:
+    <key_size>:<key_type>:<key_description>.
+
+<key_size>
+    The encryption key size in bytes. The kernel key payload size must match
+    the value passed in <key_size>.
+
+<key_type>
+    Either 'logon' or 'user' kernel key type.
+
+<key_description>
+    The kernel keyring key description crypt target should look for
+    when loading key of <key_type>.
+
 <keycount>
     Multi-key compatibility mode. You can define <keycount> keys and
     then sectors are encrypted according to their offsets (sector 0 uses key0;
@@ -90,6 +107,12 @@ dmsetup create crypt1 --table "0 `blockdev --getsize $1` crypt aes-cbc-essiv:sha
 
 [[
 #!/bin/sh
+# Create a crypt device using dmsetup when encryption key is stored in keyring service
+dmsetup create crypt2 --table "0 `blockdev --getsize $1` crypt aes-cbc-essiv:sha256 :32:logon:my_prefix:my_key 0 $1 0"
+]]
+
+[[
+#!/bin/sh
 # Create a crypt device using cryptsetup and LUKS header with default cipher
 cryptsetup luksFormat $1
 cryptsetup luksOpen $1 crypt1
diff --git a/Documentation/device-mapper/dm-raid.txt b/Documentation/device-mapper/dm-raid.txt
index 9bd531aa2279..5e3786fd9ea7 100644
--- a/Documentation/device-mapper/dm-raid.txt
+++ b/Documentation/device-mapper/dm-raid.txt
@@ -242,6 +242,10 @@ recovery.  Here is a fuller description of the individual fields:
 			in RAID1/10 or wrong parity values found in RAID4/5/6.
 			This value is valid only after a "check" of the array
 			is performed.  A healthy array has a 'mismatch_cnt' of 0.
+	<data_offset>   The current data offset to the start of the user data on
+			each component device of a raid set (see the respective
+			raid parameter to support out-of-place reshaping).
+
 
 Message Interface
 -----------------
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 02a5345a44a6..b7767da50c26 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -240,9 +240,17 @@ config DM_BUFIO
 	 as a cache, holding recently-read blocks in memory and performing
 	 delayed writes.
 
+config DM_DEBUG_BLOCK_MANAGER_LOCKING
+       bool "Block manager locking"
+       depends on DM_BUFIO
+       ---help---
+	 Block manager locking can catch various metadata corruption issues.
+
+	 If unsure, say N.
+
 config DM_DEBUG_BLOCK_STACK_TRACING
        bool "Keep stack trace of persistent data block lock holders"
-       depends on STACKTRACE_SUPPORT && DM_BUFIO
+       depends on STACKTRACE_SUPPORT && DM_DEBUG_BLOCK_MANAGER_LOCKING
        select STACKTRACE
        ---help---
 	 Enable this for messages that may help debug problems with the
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 262e75365cc0..84d2f0e4c754 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -820,12 +820,14 @@ enum new_flag {
 static struct dm_buffer *__alloc_buffer_wait_no_callback(struct dm_bufio_client *c, enum new_flag nf)
 {
 	struct dm_buffer *b;
+	bool tried_noio_alloc = false;
 
 	/*
 	 * dm-bufio is resistant to allocation failures (it just keeps
 	 * one buffer reserved in cases all the allocations fail).
 	 * So set flags to not try too hard:
-	 *	GFP_NOIO: don't recurse into the I/O layer
+	 *	GFP_NOWAIT: don't wait; if we need to sleep we'll release our
+	 *		    mutex and wait ourselves.
 	 *	__GFP_NORETRY: don't retry and rather return failure
 	 *	__GFP_NOMEMALLOC: don't use emergency reserves
 	 *	__GFP_NOWARN: don't print a warning in case of failure
@@ -835,7 +837,7 @@ static struct dm_buffer *__alloc_buffer_wait_no_callback(struct dm_bufio_client
 	 */
 	while (1) {
 		if (dm_bufio_cache_size_latch != 1) {
-			b = alloc_buffer(c, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
+			b = alloc_buffer(c, GFP_NOWAIT | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
 			if (b)
 				return b;
 		}
@@ -843,6 +845,15 @@ static struct dm_buffer *__alloc_buffer_wait_no_callback(struct dm_bufio_client
 		if (nf == NF_PREFETCH)
 			return NULL;
 
+		if (dm_bufio_cache_size_latch != 1 && !tried_noio_alloc) {
+			dm_bufio_unlock(c);
+			b = alloc_buffer(c, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
+			dm_bufio_lock(c);
+			if (b)
+				return b;
+			tried_noio_alloc = true;
+		}
+
 		if (!list_empty(&c->reserved_buffers)) {
 			b = list_entry(c->reserved_buffers.next,
 				       struct dm_buffer, lru_list);
@@ -1585,18 +1596,9 @@ dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
 static unsigned long
 dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
 {
-	struct dm_bufio_client *c;
-	unsigned long count;
-
-	c = container_of(shrink, struct dm_bufio_client, shrinker);
-	if (sc->gfp_mask & __GFP_FS)
-		dm_bufio_lock(c);
-	else if (!dm_bufio_trylock(c))
-		return 0;
+	struct dm_bufio_client *c = container_of(shrink, struct dm_bufio_client, shrinker);
 
-	count = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY];
-	dm_bufio_unlock(c);
-	return count;
+	return ACCESS_ONCE(c->n_buffers[LIST_CLEAN]) + ACCESS_ONCE(c->n_buffers[LIST_DIRTY]);
 }
 
 /*
diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c
index 695577812cf6..624fe4319b24 100644
--- a/drivers/md/dm-cache-metadata.c
+++ b/drivers/md/dm-cache-metadata.c
@@ -383,7 +383,6 @@ static int __format_metadata(struct dm_cache_metadata *cmd)
 		goto bad;
 
 	dm_disk_bitset_init(cmd->tm, &cmd->discard_info);
-
 	r = dm_bitset_empty(&cmd->discard_info, &cmd->discard_root);
 	if (r < 0)
 		goto bad;
@@ -789,7 +788,7 @@ static struct dm_cache_metadata *lookup_or_open(struct block_device *bdev,
 static bool same_params(struct dm_cache_metadata *cmd, sector_t data_block_size)
 {
 	if (cmd->data_block_size != data_block_size) {
-		DMERR("data_block_size (%llu) different from that in metadata (%llu)\n",
+		DMERR("data_block_size (%llu) different from that in metadata (%llu)",
 		      (unsigned long long) data_block_size,
 		      (unsigned long long) cmd->data_block_size);
 		return false;
diff --git a/drivers/md/dm-cache-policy-smq.c b/drivers/md/dm-cache-policy-smq.c
index c33f4a6e1d7d..f19c6930a67c 100644
--- a/drivers/md/dm-cache-policy-smq.c
+++ b/drivers/md/dm-cache-policy-smq.c
@@ -1361,7 +1361,7 @@ static void smq_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock)
 
 static unsigned random_level(dm_cblock_t cblock)
 {
-	return hash_32_generic(from_cblock(cblock), 9) & (NR_CACHE_LEVELS - 1);
+	return hash_32(from_cblock(cblock), 9) & (NR_CACHE_LEVELS - 1);
 }
 
 static int smq_load_mapping(struct dm_cache_policy *p,
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 59b2c50562e4..e04c61e0839e 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -989,7 +989,8 @@ static void set_cache_mode(struct cache *cache, enum cache_metadata_mode new_mod
 	enum cache_metadata_mode old_mode = get_cache_mode(cache);
 
 	if (dm_cache_metadata_needs_check(cache->cmd, &needs_check)) {
-		DMERR("unable to read needs_check flag, setting failure mode");
+		DMERR("%s: unable to read needs_check flag, setting failure mode.",
+		      cache_device_name(cache));
 		new_mode = CM_FAIL;
 	}
 
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 68a9eb4f3f36..7c6c57216bf2 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -12,6 +12,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/key.h>
 #include <linux/bio.h>
 #include <linux/blkdev.h>
 #include <linux/mempool.h>
@@ -23,12 +24,14 @@
 #include <linux/atomic.h>
 #include <linux/scatterlist.h>
 #include <linux/rbtree.h>
+#include <linux/ctype.h>
 #include <asm/page.h>
 #include <asm/unaligned.h>
 #include <crypto/hash.h>
 #include <crypto/md5.h>
 #include <crypto/algapi.h>
 #include <crypto/skcipher.h>
+#include <keys/user-type.h>
 
 #include <linux/device-mapper.h>
 
@@ -140,8 +143,9 @@ struct crypt_config {
 
 	char *cipher;
 	char *cipher_string;
+	char *key_string;
 
-	struct crypt_iv_operations *iv_gen_ops;
+	const struct crypt_iv_operations *iv_gen_ops;
 	union {
 		struct iv_essiv_private essiv;
 		struct iv_benbi_private benbi;
@@ -758,15 +762,15 @@ static int crypt_iv_tcw_post(struct crypt_config *cc, u8 *iv,
 	return r;
 }
 
-static struct crypt_iv_operations crypt_iv_plain_ops = {
+static const struct crypt_iv_operations crypt_iv_plain_ops = {
 	.generator = crypt_iv_plain_gen
 };
 
-static struct crypt_iv_operations crypt_iv_plain64_ops = {
+static const struct crypt_iv_operations crypt_iv_plain64_ops = {
 	.generator = crypt_iv_plain64_gen
 };
 
-static struct crypt_iv_operations crypt_iv_essiv_ops = {
+static const struct crypt_iv_operations crypt_iv_essiv_ops = {
 	.ctr       = crypt_iv_essiv_ctr,
 	.dtr       = crypt_iv_essiv_dtr,
 	.init      = crypt_iv_essiv_init,
@@ -774,17 +778,17 @@ static struct crypt_iv_operations crypt_iv_essiv_ops = {
 	.generator = crypt_iv_essiv_gen
 };
 
-static struct crypt_iv_operations crypt_iv_benbi_ops = {
+static const struct crypt_iv_operations crypt_iv_benbi_ops = {
 	.ctr	   = crypt_iv_benbi_ctr,
 	.dtr	   = crypt_iv_benbi_dtr,
 	.generator = crypt_iv_benbi_gen
 };
 
-static struct crypt_iv_operations crypt_iv_null_ops = {
+static const struct crypt_iv_operations crypt_iv_null_ops = {
 	.generator = crypt_iv_null_gen
 };
 
-static struct crypt_iv_operations crypt_iv_lmk_ops = {
+static const struct crypt_iv_operations crypt_iv_lmk_ops = {
 	.ctr	   = crypt_iv_lmk_ctr,
 	.dtr	   = crypt_iv_lmk_dtr,
 	.init	   = crypt_iv_lmk_init,
@@ -793,7 +797,7 @@ static struct crypt_iv_operations crypt_iv_lmk_ops = {
 	.post	   = crypt_iv_lmk_post
 };
 
-static struct crypt_iv_operations crypt_iv_tcw_ops = {
+static const struct crypt_iv_operations crypt_iv_tcw_ops = {
 	.ctr	   = crypt_iv_tcw_ctr,
 	.dtr	   = crypt_iv_tcw_dtr,
 	.init	   = crypt_iv_tcw_init,
@@ -994,7 +998,6 @@ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size)
 	gfp_t gfp_mask = GFP_NOWAIT | __GFP_HIGHMEM;
 	unsigned i, len, remaining_size;
 	struct page *page;
-	struct bio_vec *bvec;
 
 retry:
 	if (unlikely(gfp_mask & __GFP_DIRECT_RECLAIM))
@@ -1019,12 +1022,7 @@ retry:
 
 		len = (remaining_size > PAGE_SIZE) ? PAGE_SIZE : remaining_size;
 
-		bvec = &clone->bi_io_vec[clone->bi_vcnt++];
-		bvec->bv_page = page;
-		bvec->bv_len = len;
-		bvec->bv_offset = 0;
-
-		clone->bi_iter.bi_size += len;
+		bio_add_page(clone, page, len, 0);
 
 		remaining_size -= len;
 	}
@@ -1471,7 +1469,7 @@ static int crypt_alloc_tfms(struct crypt_config *cc, char *ciphermode)
 	return 0;
 }
 
-static int crypt_setkey_allcpus(struct crypt_config *cc)
+static int crypt_setkey(struct crypt_config *cc)
 {
 	unsigned subkey_size;
 	int err = 0, i, r;
@@ -1490,25 +1488,157 @@ static int crypt_setkey_allcpus(struct crypt_config *cc)
 	return err;
 }
 
+#ifdef CONFIG_KEYS
+
+static bool contains_whitespace(const char *str)
+{
+	while (*str)
+		if (isspace(*str++))
+			return true;
+	return false;
+}
+
+static int crypt_set_keyring_key(struct crypt_config *cc, const char *key_string)
+{
+	char *new_key_string, *key_desc;
+	int ret;
+	struct key *key;
+	const struct user_key_payload *ukp;
+
+	/*
+	 * Reject key_string with whitespace. dm core currently lacks code for
+	 * proper whitespace escaping in arguments on DM_TABLE_STATUS path.
+	 */
+	if (contains_whitespace(key_string)) {
+		DMERR("whitespace chars not allowed in key string");
+		return -EINVAL;
+	}
+
+	/* look for next ':' separating key_type from key_description */
+	key_desc = strpbrk(key_string, ":");
+	if (!key_desc || key_desc == key_string || !strlen(key_desc + 1))
+		return -EINVAL;
+
+	if (strncmp(key_string, "logon:", key_desc - key_string + 1) &&
+	    strncmp(key_string, "user:", key_desc - key_string + 1))
+		return -EINVAL;
+
+	new_key_string = kstrdup(key_string, GFP_KERNEL);
+	if (!new_key_string)
+		return -ENOMEM;
+
+	key = request_key(key_string[0] == 'l' ? &key_type_logon : &key_type_user,
+			  key_desc + 1, NULL);
+	if (IS_ERR(key)) {
+		kzfree(new_key_string);
+		return PTR_ERR(key);
+	}
+
+	rcu_read_lock();
+
+	ukp = user_key_payload(key);
+	if (!ukp) {
+		rcu_read_unlock();
+		key_put(key);
+		kzfree(new_key_string);
+		return -EKEYREVOKED;
+	}
+
+	if (cc->key_size != ukp->datalen) {
+		rcu_read_unlock();
+		key_put(key);
+		kzfree(new_key_string);
+		return -EINVAL;
+	}
+
+	memcpy(cc->key, ukp->data, cc->key_size);
+
+	rcu_read_unlock();
+	key_put(key);
+
+	/* clear the flag since following operations may invalidate previously valid key */
+	clear_bit(DM_CRYPT_KEY_VALID, &cc->flags);
+
+	ret = crypt_setkey(cc);
+
+	/* wipe the kernel key payload copy in each case */
+	memset(cc->key, 0, cc->key_size * sizeof(u8));
+
+	if (!ret) {
+		set_bit(DM_CRYPT_KEY_VALID, &cc->flags);
+		kzfree(cc->key_string);
+		cc->key_string = new_key_string;
+	} else
+		kzfree(new_key_string);
+
+	return ret;
+}
+
+static int get_key_size(char **key_string)
+{
+	char *colon, dummy;
+	int ret;
+
+	if (*key_string[0] != ':')
+		return strlen(*key_string) >> 1;
+
+	/* look for next ':' in key string */
+	colon = strpbrk(*key_string + 1, ":");
+	if (!colon)
+		return -EINVAL;
+
+	if (sscanf(*key_string + 1, "%u%c", &ret, &dummy) != 2 || dummy != ':')
+		return -EINVAL;
+
+	*key_string = colon;
+
+	/* remaining key string should be :<logon|user>:<key_desc> */
+
+	return ret;
+}
+
+#else
+
+static int crypt_set_keyring_key(struct crypt_config *cc, const char *key_string)
+{
+	return -EINVAL;
+}
+
+static int get_key_size(char **key_string)
+{
+	return (*key_string[0] == ':') ? -EINVAL : strlen(*key_string) >> 1;
+}
+
+#endif
+
 static int crypt_set_key(struct crypt_config *cc, char *key)
 {
 	int r = -EINVAL;
 	int key_string_len = strlen(key);
 
-	/* The key size may not be changed. */
-	if (cc->key_size != (key_string_len >> 1))
-		goto out;
-
 	/* Hyphen (which gives a key_size of zero) means there is no key. */
 	if (!cc->key_size && strcmp(key, "-"))
 		goto out;
 
-	if (cc->key_size && crypt_decode_key(cc->key, key, cc->key_size) < 0)
+	/* ':' means the key is in kernel keyring, short-circuit normal key processing */
+	if (key[0] == ':') {
+		r = crypt_set_keyring_key(cc, key + 1);
 		goto out;
+	}
 
-	set_bit(DM_CRYPT_KEY_VALID, &cc->flags);
+	/* clear the flag since following operations may invalidate previously valid key */
+	clear_bit(DM_CRYPT_KEY_VALID, &cc->flags);
 
-	r = crypt_setkey_allcpus(cc);
+	/* wipe references to any kernel keyring key */
+	kzfree(cc->key_string);
+	cc->key_string = NULL;
+
+	if (cc->key_size && crypt_decode_key(cc->key, key, cc->key_size) < 0)
+		goto out;
+
+	r = crypt_setkey(cc);
+	if (!r)
+		set_bit(DM_CRYPT_KEY_VALID, &cc->flags);
 
 out:
 	/* Hex key string not needed after here, so wipe it. */
@@ -1521,8 +1651,10 @@ static int crypt_wipe_key(struct crypt_config *cc)
 {
 	clear_bit(DM_CRYPT_KEY_VALID, &cc->flags);
 	memset(&cc->key, 0, cc->key_size * sizeof(u8));
+	kzfree(cc->key_string);
+	cc->key_string = NULL;
 
-	return crypt_setkey_allcpus(cc);
+	return crypt_setkey(cc);
 }
 
 static void crypt_dtr(struct dm_target *ti)
@@ -1558,6 +1690,7 @@ static void crypt_dtr(struct dm_target *ti)
 
 	kzfree(cc->cipher);
 	kzfree(cc->cipher_string);
+	kzfree(cc->key_string);
 
 	/* Must zero key material before freeing */
 	kzfree(cc);
@@ -1726,12 +1859,13 @@ bad_mem:
 
 /*
  * Construct an encryption mapping:
- * <cipher> <key> <iv_offset> <dev_path> <start>
+ * <cipher> [<key>|:<key_size>:<user|logon>:<key_description>] <iv_offset> <dev_path> <start>
  */
 static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 {
 	struct crypt_config *cc;
-	unsigned int key_size, opt_params;
+	int key_size;
+	unsigned int opt_params;
 	unsigned long long tmpll;
 	int ret;
 	size_t iv_size_padding;
@@ -1748,7 +1882,11 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		return -EINVAL;
 	}
 
-	key_size = strlen(argv[1]) >> 1;
+	key_size = get_key_size(&argv[1]);
+	if (key_size < 0) {
+		ti->error = "Cannot parse key size";
+		return -EINVAL;
+	}
 
 	cc = kzalloc(sizeof(*cc) + key_size * sizeof(u8), GFP_KERNEL);
 	if (!cc) {
@@ -1955,10 +2093,13 @@ static void crypt_status(struct dm_target *ti, status_type_t type,
 	case STATUSTYPE_TABLE:
 		DMEMIT("%s ", cc->cipher_string);
 
-		if (cc->key_size > 0)
-			for (i = 0; i < cc->key_size; i++)
-				DMEMIT("%02x", cc->key[i]);
-		else
+		if (cc->key_size > 0) {
+			if (cc->key_string)
+				DMEMIT(":%u:%s", cc->key_size, cc->key_string);
+			else
+				for (i = 0; i < cc->key_size; i++)
+					DMEMIT("%02x", cc->key[i]);
+		} else
 			DMEMIT("-");
 
 		DMEMIT(" %llu %s %llu", (unsigned long long)cc->iv_offset,
@@ -2014,7 +2155,7 @@ static void crypt_resume(struct dm_target *ti)
 static int crypt_message(struct dm_target *ti, unsigned argc, char **argv)
 {
 	struct crypt_config *cc = ti->private;
-	int ret = -EINVAL;
+	int key_size, ret = -EINVAL;
 
 	if (argc < 2)
 		goto error;
@@ -2025,6 +2166,13 @@ static int crypt_message(struct dm_target *ti, unsigned argc, char **argv)
 			return -EINVAL;
 		}
 		if (argc == 3 && !strcasecmp(argv[1], "set")) {
+			/* The key size may not be changed. */
+			key_size = get_key_size(&argv[2]);
+			if (key_size < 0 || cc->key_size != key_size) {
+				memset(argv[2], '0', strlen(argv[2]));
+				return -EINVAL;
+			}
+
 			ret = crypt_set_key(cc, argv[2]);
 			if (ret)
 				return ret;
@@ -2068,7 +2216,7 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits)
 
 static struct target_type crypt_target = {
 	.name   = "crypt",
-	.version = {1, 14, 1},
+	.version = {1, 15, 0},
 	.module = THIS_MODULE,
 	.ctr    = crypt_ctr,
 	.dtr    = crypt_dtr,
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index 6a2e8dd44a1b..13305a182611 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -36,7 +36,8 @@ struct flakey_c {
 };
 
 enum feature_flag_bits {
-	DROP_WRITES
+	DROP_WRITES,
+	ERROR_WRITES
 };
 
 struct per_bio_data {
@@ -76,6 +77,25 @@ static int parse_features(struct dm_arg_set *as, struct flakey_c *fc,
 			if (test_and_set_bit(DROP_WRITES, &fc->flags)) {
 				ti->error = "Feature drop_writes duplicated";
 				return -EINVAL;
+			} else if (test_bit(ERROR_WRITES, &fc->flags)) {
+				ti->error = "Feature drop_writes conflicts with feature error_writes";
+				return -EINVAL;
+			}
+
+			continue;
+		}
+
+		/*
+		 * error_writes
+		 */
+		if (!strcasecmp(arg_name, "error_writes")) {
+			if (test_and_set_bit(ERROR_WRITES, &fc->flags)) {
+				ti->error = "Feature error_writes duplicated";
+				return -EINVAL;
+
+			} else if (test_bit(DROP_WRITES, &fc->flags)) {
+				ti->error = "Feature error_writes conflicts with feature drop_writes";
+				return -EINVAL;
 			}
 
 			continue;
@@ -135,6 +155,10 @@ static int parse_features(struct dm_arg_set *as, struct flakey_c *fc,
 	if (test_bit(DROP_WRITES, &fc->flags) && (fc->corrupt_bio_rw == WRITE)) {
 		ti->error = "drop_writes is incompatible with corrupt_bio_byte with the WRITE flag set";
 		return -EINVAL;
+
+	} else if (test_bit(ERROR_WRITES, &fc->flags) && (fc->corrupt_bio_rw == WRITE)) {
+		ti->error = "error_writes is incompatible with corrupt_bio_byte with the WRITE flag set";
+		return -EINVAL;
 	}
 
 	return 0;
@@ -200,11 +224,13 @@ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 
 	if (!(fc->up_interval + fc->down_interval)) {
 		ti->error = "Total (up + down) interval is zero";
+		r = -EINVAL;
 		goto bad;
 	}
 
 	if (fc->up_interval + fc->down_interval < fc->up_interval) {
 		ti->error = "Interval overflow";
+		r = -EINVAL;
 		goto bad;
 	}
 
@@ -289,22 +315,27 @@ static int flakey_map(struct dm_target *ti, struct bio *bio)
 		pb->bio_submitted = true;
 
 		/*
-		 * Error reads if neither corrupt_bio_byte or drop_writes are set.
+		 * Error reads if neither corrupt_bio_byte or drop_writes or error_writes are set.
 		 * Otherwise, flakey_end_io() will decide if the reads should be modified.
 		 */
 		if (bio_data_dir(bio) == READ) {
-			if (!fc->corrupt_bio_byte && !test_bit(DROP_WRITES, &fc->flags))
+			if (!fc->corrupt_bio_byte && !test_bit(DROP_WRITES, &fc->flags) &&
+			    !test_bit(ERROR_WRITES, &fc->flags))
 				return -EIO;
 			goto map_bio;
 		}
 
 		/*
-		 * Drop writes?
+		 * Drop or error writes?
 		 */
 		if (test_bit(DROP_WRITES, &fc->flags)) {
 			bio_endio(bio);
 			return DM_MAPIO_SUBMITTED;
 		}
+		else if (test_bit(ERROR_WRITES, &fc->flags)) {
+			bio_io_error(bio);
+			return DM_MAPIO_SUBMITTED;
+		}
 
 		/*
 		 * Corrupt matching writes.
@@ -340,10 +371,11 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error)
 			 */
 			corrupt_bio_data(bio, fc);
 
-		} else if (!test_bit(DROP_WRITES, &fc->flags)) {
+		} else if (!test_bit(DROP_WRITES, &fc->flags) &&
+			   !test_bit(ERROR_WRITES, &fc->flags)) {
 			/*
 			 * Error read during the down_interval if drop_writes
-			 * wasn't configured.
+			 * and error_writes were not configured.
 			 */
 			return -EIO;
 		}
@@ -357,7 +389,7 @@ static void flakey_status(struct dm_target *ti, status_type_t type,
 {
 	unsigned sz = 0;
 	struct flakey_c *fc = ti->private;
-	unsigned drop_writes;
+	unsigned drop_writes, error_writes;
 
 	switch (type) {
 	case STATUSTYPE_INFO:
@@ -370,10 +402,13 @@ static void flakey_status(struct dm_target *ti, status_type_t type,
 		       fc->down_interval);
 
 		drop_writes = test_bit(DROP_WRITES, &fc->flags);
-		DMEMIT("%u ", drop_writes + (fc->corrupt_bio_byte > 0) * 5);
+		error_writes = test_bit(ERROR_WRITES, &fc->flags);
+		DMEMIT("%u ", drop_writes + error_writes + (fc->corrupt_bio_byte > 0) * 5);
 
 		if (drop_writes)
 			DMEMIT("drop_writes ");
+		else if (error_writes)
+			DMEMIT("error_writes ");
 
 		if (fc->corrupt_bio_byte)
 			DMEMIT("corrupt_bio_byte %u %c %u %u ",
@@ -410,7 +445,7 @@ static int flakey_iterate_devices(struct dm_target *ti, iterate_devices_callout_
 
 static struct target_type flakey_target = {
 	.name   = "flakey",
-	.version = {1, 3, 1},
+	.version = {1, 4, 0},
 	.module = THIS_MODULE,
 	.ctr    = flakey_ctr,
 	.dtr    = flakey_dtr,
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 0bf1a12e35fe..03940bf36f6c 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -162,7 +162,10 @@ struct dpages {
 			 struct page **p, unsigned long *len, unsigned *offset);
 	void (*next_page)(struct dpages *dp);
 
-	unsigned context_u;
+	union {
+		unsigned context_u;
+		struct bvec_iter context_bi;
+	};
 	void *context_ptr;
 
 	void *vma_invalidate_address;
@@ -204,25 +207,36 @@ static void list_dp_init(struct dpages *dp, struct page_list *pl, unsigned offse
 static void bio_get_page(struct dpages *dp, struct page **p,
 			 unsigned long *len, unsigned *offset)
 {
-	struct bio_vec *bvec = dp->context_ptr;
-	*p = bvec->bv_page;
-	*len = bvec->bv_len - dp->context_u;
-	*offset = bvec->bv_offset + dp->context_u;
+	struct bio_vec bvec = bvec_iter_bvec((struct bio_vec *)dp->context_ptr,
+					     dp->context_bi);
+
+	*p = bvec.bv_page;
+	*len = bvec.bv_len;
+	*offset = bvec.bv_offset;
+
+	/* avoid figuring it out again in bio_next_page() */
+	dp->context_bi.bi_sector = (sector_t)bvec.bv_len;
 }
 
 static void bio_next_page(struct dpages *dp)
 {
-	struct bio_vec *bvec = dp->context_ptr;
-	dp->context_ptr = bvec + 1;
-	dp->context_u = 0;
+	unsigned int len = (unsigned int)dp->context_bi.bi_sector;
+
+	bvec_iter_advance((struct bio_vec *)dp->context_ptr,
+			  &dp->context_bi, len);
 }
 
 static void bio_dp_init(struct dpages *dp, struct bio *bio)
 {
 	dp->get_page = bio_get_page;
 	dp->next_page = bio_next_page;
-	dp->context_ptr = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
-	dp->context_u = bio->bi_iter.bi_bvec_done;
+
+	/*
+	 * We just use bvec iterator to retrieve pages, so it is ok to
+	 * access the bvec table directly here
+	 */
+	dp->context_ptr = bio->bi_io_vec;
+	dp->context_bi = bio->bi_iter;
 }
 
 /*
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 966eb4b61aed..c72a77048b73 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -1697,7 +1697,7 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kern
 {
 	struct dm_ioctl *dmi;
 	int secure_data;
-	const size_t minimum_data_size = sizeof(*param_kernel) - sizeof(param_kernel->data);
+	const size_t minimum_data_size = offsetof(struct dm_ioctl, data);
 
 	if (copy_from_user(param_kernel, user, minimum_data_size))
 		return -EFAULT;
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index e477af8596e2..6400cffb986d 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -372,16 +372,13 @@ static int __pg_init_all_paths(struct multipath *m)
 	return atomic_read(&m->pg_init_in_progress);
 }
 
-static int pg_init_all_paths(struct multipath *m)
+static void pg_init_all_paths(struct multipath *m)
 {
-	int r;
 	unsigned long flags;
 
 	spin_lock_irqsave(&m->lock, flags);
-	r = __pg_init_all_paths(m);
+	__pg_init_all_paths(m);
 	spin_unlock_irqrestore(&m->lock, flags);
-
-	return r;
 }
 
 static void __switch_pg(struct multipath *m, struct priority_group *pg)
@@ -583,16 +580,17 @@ static int __multipath_map(struct dm_target *ti, struct request *clone,
 		 * .request_fn stacked on blk-mq path(s) and
 		 * blk-mq stacked on blk-mq path(s).
 		 */
-		*__clone = blk_mq_alloc_request(bdev_get_queue(bdev),
-						rq_data_dir(rq), BLK_MQ_REQ_NOWAIT);
-		if (IS_ERR(*__clone)) {
-			/* ENOMEM, requeue */
+		clone = blk_mq_alloc_request(bdev_get_queue(bdev),
+					     rq_data_dir(rq), BLK_MQ_REQ_NOWAIT);
+		if (IS_ERR(clone)) {
+			/* EBUSY, ENODEV or EWOULDBLOCK: requeue */
 			clear_request_fn_mpio(m, map_context);
 			return r;
 		}
-		(*__clone)->bio = (*__clone)->biotail = NULL;
-		(*__clone)->rq_disk = bdev->bd_disk;
-		(*__clone)->cmd_flags |= REQ_FAILFAST_TRANSPORT;
+		clone->bio = clone->biotail = NULL;
+		clone->rq_disk = bdev->bd_disk;
+		clone->cmd_flags |= REQ_FAILFAST_TRANSPORT;
+		*__clone = clone;
 	}
 
 	if (pgpath->pg->ps.type->start_io)
@@ -852,18 +850,22 @@ retain:
 		attached_handler_name = scsi_dh_attached_handler_name(q, GFP_KERNEL);
 		if (attached_handler_name) {
 			/*
+			 * Clear any hw_handler_params associated with a
+			 * handler that isn't already attached.
+			 */
+			if (m->hw_handler_name && strcmp(attached_handler_name, m->hw_handler_name)) {
+				kfree(m->hw_handler_params);
+				m->hw_handler_params = NULL;
+			}
+
+			/*
 			 * Reset hw_handler_name to match the attached handler
-			 * and clear any hw_handler_params associated with the
-			 * ignored handler.
 			 *
 			 * NB. This modifies the table line to show the actual
 			 * handler instead of the original table passed in.
 			 */
 			kfree(m->hw_handler_name);
 			m->hw_handler_name = attached_handler_name;
-
-			kfree(m->hw_handler_params);
-			m->hw_handler_params = NULL;
 		}
 	}
 
@@ -1002,6 +1004,8 @@ static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m)
 	}
 
 	m->hw_handler_name = kstrdup(dm_shift_arg(as), GFP_KERNEL);
+	if (!m->hw_handler_name)
+		return -EINVAL;
 
 	if (hw_argc > 1) {
 		char *p;
@@ -1362,7 +1366,7 @@ static int switch_pg_num(struct multipath *m, const char *pgstr)
 	char dummy;
 
 	if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum ||
-	    (pgnum > m->nr_priority_groups)) {
+	    !m->nr_priority_groups || (pgnum > m->nr_priority_groups)) {
 		DMWARN("invalid PG number supplied to switch_pg_num");
 		return -EINVAL;
 	}
@@ -1394,7 +1398,7 @@ static int bypass_pg_num(struct multipath *m, const char *pgstr, bool bypassed)
 	char dummy;
 
 	if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum ||
-	    (pgnum > m->nr_priority_groups)) {
+	    !m->nr_priority_groups || (pgnum > m->nr_priority_groups)) {
 		DMWARN("invalid PG number supplied to bypass_pg");
 		return -EINVAL;
 	}
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 953159d9a825..b8f978e551d7 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -160,7 +160,6 @@ struct raid_dev {
 				 CTR_FLAG_DAEMON_SLEEP | \
 				 CTR_FLAG_MIN_RECOVERY_RATE | \
 				 CTR_FLAG_MAX_RECOVERY_RATE | \
-				 CTR_FLAG_MAX_WRITE_BEHIND | \
 				 CTR_FLAG_STRIPE_CACHE | \
 				 CTR_FLAG_REGION_SIZE | \
 				 CTR_FLAG_DELTA_DISKS | \
@@ -171,7 +170,6 @@ struct raid_dev {
 				 CTR_FLAG_DAEMON_SLEEP | \
 				 CTR_FLAG_MIN_RECOVERY_RATE | \
 				 CTR_FLAG_MAX_RECOVERY_RATE | \
-				 CTR_FLAG_MAX_WRITE_BEHIND | \
 				 CTR_FLAG_STRIPE_CACHE | \
 				 CTR_FLAG_REGION_SIZE | \
 				 CTR_FLAG_DELTA_DISKS | \
@@ -2050,16 +2048,17 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev)
 
 	mddev->reshape_position = MaxSector;
 
+	mddev->raid_disks = le32_to_cpu(sb->num_devices);
+	mddev->level = le32_to_cpu(sb->level);
+	mddev->layout = le32_to_cpu(sb->layout);
+	mddev->chunk_sectors = le32_to_cpu(sb->stripe_sectors);
+
 	/*
 	 * Reshaping is supported, e.g. reshape_position is valid
 	 * in superblock and superblock content is authoritative.
 	 */
 	if (le32_to_cpu(sb->compat_features) & FEATURE_FLAG_SUPPORTS_V190) {
 		/* Superblock is authoritative wrt given raid set layout! */
-		mddev->raid_disks = le32_to_cpu(sb->num_devices);
-		mddev->level = le32_to_cpu(sb->level);
-		mddev->layout = le32_to_cpu(sb->layout);
-		mddev->chunk_sectors = le32_to_cpu(sb->stripe_sectors);
 		mddev->new_level = le32_to_cpu(sb->new_level);
 		mddev->new_layout = le32_to_cpu(sb->new_layout);
 		mddev->new_chunk_sectors = le32_to_cpu(sb->new_stripe_sectors);
@@ -2087,38 +2086,44 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev)
 		/*
 		 * No takeover/reshaping, because we don't have the extended v1.9.0 metadata
 		 */
-		if (le32_to_cpu(sb->level) != mddev->new_level) {
-			DMERR("Reshaping/takeover raid sets not yet supported. (raid level/stripes/size change)");
-			return -EINVAL;
-		}
-		if (le32_to_cpu(sb->layout) != mddev->new_layout) {
-			DMERR("Reshaping raid sets not yet supported. (raid layout change)");
-			DMERR("	 0x%X vs 0x%X", le32_to_cpu(sb->layout), mddev->layout);
-			DMERR("	 Old layout: %s w/ %d copies",
-			      raid10_md_layout_to_format(le32_to_cpu(sb->layout)),
-			      raid10_md_layout_to_copies(le32_to_cpu(sb->layout)));
-			DMERR("	 New layout: %s w/ %d copies",
-			      raid10_md_layout_to_format(mddev->layout),
-			      raid10_md_layout_to_copies(mddev->layout));
-			return -EINVAL;
-		}
-		if (le32_to_cpu(sb->stripe_sectors) != mddev->new_chunk_sectors) {
-			DMERR("Reshaping raid sets not yet supported. (stripe sectors change)");
-			return -EINVAL;
-		}
+		struct raid_type *rt_cur = get_raid_type_by_ll(mddev->level, mddev->layout);
+		struct raid_type *rt_new = get_raid_type_by_ll(mddev->new_level, mddev->new_layout);
 
-		/* We can only change the number of devices in raid1 with old (i.e. pre 1.0.7) metadata */
-		if (!rt_is_raid1(rs->raid_type) &&
-		    (le32_to_cpu(sb->num_devices) != mddev->raid_disks)) {
-			DMERR("Reshaping raid sets not yet supported. (device count change from %u to %u)",
-			      sb->num_devices, mddev->raid_disks);
+		if (rs_takeover_requested(rs)) {
+			if (rt_cur && rt_new)
+				DMERR("Takeover raid sets from %s to %s not yet supported by metadata. (raid level change)",
+				      rt_cur->name, rt_new->name);
+			else
+				DMERR("Takeover raid sets not yet supported by metadata. (raid level change)");
+			return -EINVAL;
+		} else if (rs_reshape_requested(rs)) {
+			DMERR("Reshaping raid sets not yet supported by metadata. (raid layout change keeping level)");
+			if (mddev->layout != mddev->new_layout) {
+				if (rt_cur && rt_new)
+					DMERR("	 current layout %s vs new layout %s",
+					      rt_cur->name, rt_new->name);
+				else
+					DMERR("	 current layout 0x%X vs new layout 0x%X",
+					      le32_to_cpu(sb->layout), mddev->new_layout);
+			}
+			if (mddev->chunk_sectors != mddev->new_chunk_sectors)
+				DMERR("	 current stripe sectors %u vs new stripe sectors %u",
+				      mddev->chunk_sectors, mddev->new_chunk_sectors);
+			if (rs->delta_disks)
+				DMERR("	 current %u disks vs new %u disks",
+				      mddev->raid_disks, mddev->raid_disks + rs->delta_disks);
+			if (rs_is_raid10(rs)) {
+				DMERR("	 Old layout: %s w/ %u copies",
+				      raid10_md_layout_to_format(mddev->layout),
+				      raid10_md_layout_to_copies(mddev->layout));
+				DMERR("	 New layout: %s w/ %u copies",
+				      raid10_md_layout_to_format(mddev->new_layout),
+				      raid10_md_layout_to_copies(mddev->new_layout));
+			}
 			return -EINVAL;
 		}
 
 		DMINFO("Discovered old metadata format; upgrading to extended metadata format");
-
-		/* Table line is checked vs. authoritative superblock */
-		rs_set_new(rs);
 	}
 
 	if (!test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags))
@@ -2211,7 +2216,7 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev)
 				continue;
 
 			if (role != r->raid_disk) {
-				if (__is_raid10_near(mddev->layout)) {
+				if (rs_is_raid10(rs) && __is_raid10_near(mddev->layout)) {
 					if (mddev->raid_disks % __raid10_near_copies(mddev->layout) ||
 					    rs->raid_disks % rs->raid10_copies) {
 						rs->ti->error =
@@ -2994,6 +2999,9 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		}
 	}
 
+	/* Disable/enable discard support on raid set. */
+	configure_discard_support(rs);
+
 	mddev_unlock(&rs->md);
 	return 0;
 
@@ -3580,12 +3588,6 @@ static int raid_preresume(struct dm_target *ti)
 	if (test_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags))
 		rs_update_sbs(rs);
 
-	/*
-	 * Disable/enable discard support on raid set after any
-	 * conversion, because devices can have been added
-	 */
-	configure_discard_support(rs);
-
 	/* Load the bitmap from disk unless raid0 */
 	r = __load_dirty_region_bitmap(rs);
 	if (r)
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index b2a9e2d161e4..9d7275fb541a 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -23,11 +23,7 @@ static unsigned dm_mq_queue_depth = DM_MQ_QUEUE_DEPTH;
 #define RESERVED_REQUEST_BASED_IOS	256
 static unsigned reserved_rq_based_ios = RESERVED_REQUEST_BASED_IOS;
 
-#ifdef CONFIG_DM_MQ_DEFAULT
-static bool use_blk_mq = true;
-#else
-static bool use_blk_mq = false;
-#endif
+static bool use_blk_mq = IS_ENABLED(CONFIG_DM_MQ_DEFAULT);
 
 bool dm_use_blk_mq_default(void)
 {
@@ -210,6 +206,9 @@ static void rq_end_stats(struct mapped_device *md, struct request *orig)
  */
 static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
 {
+	struct request_queue *q = md->queue;
+	unsigned long flags;
+
 	atomic_dec(&md->pending[rw]);
 
 	/* nudge anyone waiting on suspend queue */
@@ -222,8 +221,11 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
 	 * back into ->request_fn() could deadlock attempting to grab the
 	 * queue lock again.
 	 */
-	if (!md->queue->mq_ops && run_queue)
-		blk_run_queue_async(md->queue);
+	if (!q->mq_ops && run_queue) {
+		spin_lock_irqsave(q->queue_lock, flags);
+		blk_run_queue_async(q);
+		spin_unlock_irqrestore(q->queue_lock, flags);
+	}
 
 	/*
 	 * dm_put() must be at the end of this function. See the comment above
@@ -798,7 +800,7 @@ static void dm_old_request_fn(struct request_queue *q)
 			pos = blk_rq_pos(rq);
 
 		if ((dm_old_request_peeked_before_merge_deadline(md) &&
-		     md_in_flight(md) && rq->bio && rq->bio->bi_vcnt == 1 &&
+		     md_in_flight(md) && rq->bio && !bio_multiple_segments(rq->bio) &&
 		     md->last_rq_pos == pos && md->last_rq_rw == rq_data_dir(rq)) ||
 		    (ti->type->busy && ti->type->busy(ti))) {
 			blk_delay_queue(q, 10);
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index c4b53b332607..0a427de23ed2 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -871,7 +871,7 @@ static int dm_table_determine_type(struct dm_table *t)
 {
 	unsigned i;
 	unsigned bio_based = 0, request_based = 0, hybrid = 0;
-	bool verify_blk_mq = false;
+	unsigned sq_count = 0, mq_count = 0;
 	struct dm_target *tgt;
 	struct dm_dev_internal *dd;
 	struct list_head *devices = dm_table_get_devices(t);
@@ -924,12 +924,6 @@ static int dm_table_determine_type(struct dm_table *t)
 
 	BUG_ON(!request_based); /* No targets in this table */
 
-	if (list_empty(devices) && __table_type_request_based(live_md_type)) {
-		/* inherit live MD type */
-		t->type = live_md_type;
-		return 0;
-	}
-
 	/*
 	 * The only way to establish DM_TYPE_MQ_REQUEST_BASED is by
 	 * having a compatible target use dm_table_set_type.
@@ -948,6 +942,19 @@ verify_rq_based:
 		return -EINVAL;
 	}
 
+	if (list_empty(devices)) {
+		int srcu_idx;
+		struct dm_table *live_table = dm_get_live_table(t->md, &srcu_idx);
+
+		/* inherit live table's type and all_blk_mq */
+		if (live_table) {
+			t->type = live_table->type;
+			t->all_blk_mq = live_table->all_blk_mq;
+		}
+		dm_put_live_table(t->md, srcu_idx);
+		return 0;
+	}
+
 	/* Non-request-stackable devices can't be used for request-based dm */
 	list_for_each_entry(dd, devices, list) {
 		struct request_queue *q = bdev_get_queue(dd->dm_dev->bdev);
@@ -959,19 +966,19 @@ verify_rq_based:
 		}
 
 		if (q->mq_ops)
-			verify_blk_mq = true;
+			mq_count++;
+		else
+			sq_count++;
 	}
+	if (sq_count && mq_count) {
+		DMERR("table load rejected: not all devices are blk-mq request-stackable");
+		return -EINVAL;
+	}
+	t->all_blk_mq = mq_count > 0;
 
-	if (verify_blk_mq) {
-		/* verify _all_ devices in the table are blk-mq devices */
-		list_for_each_entry(dd, devices, list)
-			if (!bdev_get_queue(dd->dm_dev->bdev)->mq_ops) {
-				DMERR("table load rejected: not all devices"
-				      " are blk-mq request-stackable");
-				return -EINVAL;
-			}
-
-		t->all_blk_mq = true;
+	if (t->type == DM_TYPE_MQ_REQUEST_BASED && !t->all_blk_mq) {
+		DMERR("table load rejected: all devices are not blk-mq request-stackable");
+		return -EINVAL;
 	}
 
 	return 0;
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index 0aba34a7b3b3..7335d8a3fc47 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -868,7 +868,7 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
 
 	r = dm_get_device(ti, argv[2], FMODE_READ, &v->hash_dev);
 	if (r) {
-		ti->error = "Data device lookup failed";
+		ti->error = "Hash device lookup failed";
 		goto bad;
 	}
 
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index ffa97b742a68..3086da5664f3 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1886,9 +1886,7 @@ static void __dm_destroy(struct mapped_device *md, bool wait)
 	set_bit(DMF_FREEING, &md->flags);
 	spin_unlock(&_minor_lock);
 
-	spin_lock_irq(q->queue_lock);
-	queue_flag_set(QUEUE_FLAG_DYING, q);
-	spin_unlock_irq(q->queue_lock);
+	blk_set_queue_dying(q);
 
 	if (dm_request_based(md) && md->kworker_task)
 		kthread_flush_worker(&md->kworker);
diff --git a/drivers/md/persistent-data/dm-array.c b/drivers/md/persistent-data/dm-array.c
index e83047cbb2da..7938cd21fa4c 100644
--- a/drivers/md/persistent-data/dm-array.c
+++ b/drivers/md/persistent-data/dm-array.c
@@ -700,13 +700,11 @@ static int populate_ablock_with_values(struct dm_array_info *info, struct array_
 {
 	int r;
 	unsigned i;
-	uint32_t nr_entries;
 	struct dm_btree_value_type *vt = &info->value_type;
 
 	BUG_ON(le32_to_cpu(ab->nr_entries));
 	BUG_ON(new_nr > le32_to_cpu(ab->max_entries));
 
-	nr_entries = le32_to_cpu(ab->nr_entries);
 	for (i = 0; i < new_nr; i++) {
 		r = fn(base + i, element_at(info, ab, i), context);
 		if (r)
diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c
index 1e33dd51c21f..a6dde7cab458 100644
--- a/drivers/md/persistent-data/dm-block-manager.c
+++ b/drivers/md/persistent-data/dm-block-manager.c
@@ -18,6 +18,8 @@
 
 /*----------------------------------------------------------------*/
 
+#ifdef CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING
+
 /*
  * This is a read/write semaphore with a couple of differences.
  *
@@ -302,6 +304,18 @@ static void report_recursive_bug(dm_block_t b, int r)
 		      (unsigned long long) b);
 }
 
+#else  /* !CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING */
+
+#define bl_init(x) do { } while (0)
+#define bl_down_read(x) 0
+#define bl_down_read_nonblock(x) 0
+#define bl_up_read(x) do { } while (0)
+#define bl_down_write(x) 0
+#define bl_up_write(x) do { } while (0)
+#define report_recursive_bug(x, y) do { } while (0)
+
+#endif /* CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING */
+
 /*----------------------------------------------------------------*/
 
 /*
@@ -330,8 +344,11 @@ EXPORT_SYMBOL_GPL(dm_block_data);
 
 struct buffer_aux {
 	struct dm_block_validator *validator;
-	struct block_lock lock;
 	int write_locked;
+
+#ifdef CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING
+	struct block_lock lock;
+#endif
 };
 
 static void dm_block_manager_alloc_callback(struct dm_buffer *buf)
diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c
index 306d2e4502c4..4c28608a0c94 100644
--- a/drivers/md/persistent-data/dm-space-map-common.c
+++ b/drivers/md/persistent-data/dm-space-map-common.c
@@ -464,7 +464,8 @@ static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b,
 		ll->nr_allocated--;
 		le32_add_cpu(&ie_disk.nr_free, 1);
 		ie_disk.none_free_before = cpu_to_le32(min(le32_to_cpu(ie_disk.none_free_before), bit));
-	}
+	} else
+		*ev = SM_NONE;
 
 	return ll->save_ie(ll, index, &ie_disk);
 }
@@ -547,7 +548,6 @@ static int metadata_ll_init_index(struct ll_disk *ll)
 	if (r < 0)
 		return r;
 
-	memcpy(dm_block_data(b), &ll->mi_le, sizeof(ll->mi_le));
 	ll->bitmap_root = dm_block_location(b);
 
 	dm_tm_unlock(ll->tm, b);
diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c
index 7e44005595c1..20557e2c60c6 100644
--- a/drivers/md/persistent-data/dm-space-map-metadata.c
+++ b/drivers/md/persistent-data/dm-space-map-metadata.c
@@ -775,17 +775,15 @@ int dm_sm_metadata_create(struct dm_space_map *sm,
 	memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm));
 
 	r = sm_ll_new_metadata(&smm->ll, tm);
+	if (!r) {
+		if (nr_blocks > DM_SM_METADATA_MAX_BLOCKS)
+			nr_blocks = DM_SM_METADATA_MAX_BLOCKS;
+		r = sm_ll_extend(&smm->ll, nr_blocks);
+	}
+	memcpy(&smm->sm, &ops, sizeof(smm->sm));
 	if (r)
 		return r;
 
-	if (nr_blocks > DM_SM_METADATA_MAX_BLOCKS)
-		nr_blocks = DM_SM_METADATA_MAX_BLOCKS;
-	r = sm_ll_extend(&smm->ll, nr_blocks);
-	if (r)
-		return r;
-
-	memcpy(&smm->sm, &ops, sizeof(smm->sm));
-
 	/*
 	 * Now we need to update the newly created data structures with the
 	 * allocated blocks that they were built from.
diff --git a/include/uapi/linux/dm-log-userspace.h b/include/uapi/linux/dm-log-userspace.h
index 0fa0d9ef06a5..05e91e14c501 100644
--- a/include/uapi/linux/dm-log-userspace.h
+++ b/include/uapi/linux/dm-log-userspace.h
@@ -7,6 +7,7 @@
 #ifndef __DM_LOG_USERSPACE_H__
 #define __DM_LOG_USERSPACE_H__
 
+#include <linux/types.h>
 #include <linux/dm-ioctl.h> /* For DM_UUID_LEN */
 
 /*
@@ -147,12 +148,12 @@
 
 /*
  * DM_ULOG_GET_REGION_SIZE corresponds to (found in dm-dirty-log.h):
- * uint32_t (*get_region_size)(struct dm_dirty_log *log);
+ * __u32 (*get_region_size)(struct dm_dirty_log *log);
  *
  * Payload-to-userspace:
  *	None.
  * Payload-to-kernel:
- *	uint64_t - contains the region size
+ *	__u64 - contains the region size
  *
  * The region size is something that was determined at constructor time.
  * It is returned in the payload area and 'data_size' is set to
@@ -168,11 +169,11 @@
  * int (*is_clean)(struct dm_dirty_log *log, region_t region);
  *
  * Payload-to-userspace:
- *	uint64_t - the region to get clean status on
+ *	__u64 - the region to get clean status on
  * Payload-to-kernel:
- *	int64_t  - 1 if clean, 0 otherwise
+ *	__s64  - 1 if clean, 0 otherwise
  *
- * Payload is sizeof(uint64_t) and contains the region for which the clean
+ * Payload is sizeof(__u64) and contains the region for which the clean
  * status is being made.
  *
  * When the request has been processed, user-space must return the
@@ -187,9 +188,9 @@
  *		  int can_block);
  *
  * Payload-to-userspace:
- *	uint64_t - the region to get sync status on
+ *	__u64 - the region to get sync status on
  * Payload-to-kernel:
- *	int64_t - 1 if in-sync, 0 otherwise
+ *	__s64 - 1 if in-sync, 0 otherwise
  *
  * Exactly the same as 'is_clean' above, except this time asking "has the
  * region been recovered?" vs. "is the region not being modified?"
@@ -203,7 +204,7 @@
  * Payload-to-userspace:
  *	If the 'integrated_flush' directive is present in the constructor
  *	table, the payload is as same as DM_ULOG_MARK_REGION:
- *		uint64_t [] - region(s) to mark
+ *		__u64 [] - region(s) to mark
  *	else
  *		None
  * Payload-to-kernel:
@@ -225,13 +226,13 @@
  * void (*mark_region)(struct dm_dirty_log *log, region_t region);
  *
  * Payload-to-userspace:
- *	uint64_t [] - region(s) to mark
+ *	__u64 [] - region(s) to mark
  * Payload-to-kernel:
  *	None.
  *
  * Incoming payload contains the one or more regions to mark dirty.
  * The number of regions contained in the payload can be determined from
- * 'data_size/sizeof(uint64_t)'.
+ * 'data_size/sizeof(__u64)'.
  *
  * When the request has been processed, user-space must return the
  * dm_ulog_request to the kernel - setting the 'error' field and clearing
@@ -244,13 +245,13 @@
  * void (*clear_region)(struct dm_dirty_log *log, region_t region);
  *
  * Payload-to-userspace:
- *	uint64_t [] - region(s) to clear
+ *	__u64 [] - region(s) to clear
  * Payload-to-kernel:
  *	None.
  *
  * Incoming payload contains the one or more regions to mark clean.
  * The number of regions contained in the payload can be determined from
- * 'data_size/sizeof(uint64_t)'.
+ * 'data_size/sizeof(__u64)'.
  *
  * When the request has been processed, user-space must return the
  * dm_ulog_request to the kernel - setting the 'error' field and clearing
@@ -266,8 +267,8 @@
  *	None.
  * Payload-to-kernel:
  *	{
- *		int64_t i; -- 1 if recovery necessary, 0 otherwise
- *		uint64_t r; -- The region to recover if i=1
+ *		__s64 i; -- 1 if recovery necessary, 0 otherwise
+ *		__u64 r; -- The region to recover if i=1
  *	}
  * 'data_size' should be set appropriately.
  *
@@ -283,8 +284,8 @@
  *
  * Payload-to-userspace:
  *	{
- *		uint64_t - region to set sync state on
- *		int64_t  - 0 if not-in-sync, 1 if in-sync
+ *		__u64 - region to set sync state on
+ *		__s64  - 0 if not-in-sync, 1 if in-sync
  *	}
  * Payload-to-kernel:
  *	None.
@@ -302,7 +303,7 @@
  * Payload-to-userspace:
  *	None.
  * Payload-to-kernel:
- *	uint64_t - the number of in-sync regions
+ *	__u64 - the number of in-sync regions
  *
  * No incoming payload.  Kernel-bound payload contains the number of
  * regions that are in-sync (in a size_t).
@@ -350,11 +351,11 @@
  * int (*is_remote_recovering)(struct dm_dirty_log *log, region_t region);
  *
  * Payload-to-userspace:
- *	uint64_t - region to determine recovery status on
+ *	__u64 - region to determine recovery status on
  * Payload-to-kernel:
  *	{
- *		int64_t is_recovering;  -- 0 if no, 1 if yes
- *		uint64_t in_sync_hint;  -- lowest region still needing resync
+ *		__s64 is_recovering;  -- 0 if no, 1 if yes
+ *		__u64 in_sync_hint;  -- lowest region still needing resync
  *	}
  *
  * When the request has been processed, user-space must return the
@@ -413,16 +414,16 @@ struct dm_ulog_request {
 	 * differentiate between logs that are being swapped and have the
 	 * same 'uuid'.  (Think "live" and "inactive" device-mapper tables.)
 	 */
-	uint64_t luid;
+	__u64 luid;
 	char uuid[DM_UUID_LEN];
 	char padding[3];        /* Padding because DM_UUID_LEN = 129 */
 
-	uint32_t version;       /* See DM_ULOG_REQUEST_VERSION */
-	int32_t error;          /* Used to report back processing errors */
+	__u32 version;       /* See DM_ULOG_REQUEST_VERSION */
+	__s32 error;          /* Used to report back processing errors */
 
-	uint32_t seq;           /* Sequence number for request */
-	uint32_t request_type;  /* DM_ULOG_* defined above */
-	uint32_t data_size;     /* How much data (not including this struct) */
+	__u32 seq;           /* Sequence number for request */
+	__u32 request_type;  /* DM_ULOG_* defined above */
+	__u32 data_size;     /* How much data (not including this struct) */
 
 	char data[0];
 };
author	Linus Torvalds <torvalds@linux-foundation.org>	2016-12-14 20:01:00 +0100
committer	Linus Torvalds <torvalds@linux-foundation.org>	2016-12-14 20:01:00 +0100
commit	775a2e29c3bbcf853432f47d3caa9ff8808807ad (patch)
tree	614a7481b68dc9b0b628f392ab9bbdc53bbfe447
parent	Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/shl... (diff)
parent	dm flakey: introduce "error_writes" feature (diff)
download	linux-775a2e29c3bbcf853432f47d3caa9ff8808807ad.tar.xz linux-775a2e29c3bbcf853432f47d3caa9ff8808807ad.zip