summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-08-02 23:21:25 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2022-08-02 23:21:25 +0200
commit8374cfe647a1f360be3228b949dd6d753c55c19c (patch)
treed7e5cf67dc5b3ea62901556a2baf2be090a1dc6a
parentMerge tag 'for-5.20/block-2022-07-29' of git://git.kernel.dk/linux-block (diff)
parentdm: fix dm-raid crash if md_handle_request() splits bio (diff)
downloadlinux-8374cfe647a1f360be3228b949dd6d753c55c19c.tar.xz
linux-8374cfe647a1f360be3228b949dd6d753c55c19c.zip
Merge tag 'for-6.0/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper updates from Mike Snitzer: - Refactor DM core's mempool allocation so that it clearer by not being split acorss files. - Improve DM core's BLK_STS_DM_REQUEUE and BLK_STS_AGAIN handling. - Optimize DM core's more common bio splitting by eliminating the use of bio cloning with bio_split+bio_chain. Shift that cloning cost to the relatively unlikely dm_io requeue case that only occurs during error handling. Introduces dm_io_rewind() that will clone a bio that reflects the subset of the original bio that must be requeued. - Remove DM core's dm_table_get_num_targets() wrapper and audit all dm_table_get_target() callers. - Fix potential for OOM with DM writecache target by setting a default MAX_WRITEBACK_JOBS (set to 256MiB or 1/16 of total system memory, whichever is smaller). - Fix DM writecache target's stats that are reported through DM-specific table info. - Fix use-after-free crash in dm_sm_register_threshold_callback(). - Refine DM core's Persistent Reservation handling in preparation for broader work Mike Christie is doing to add compatibility with Microsoft Windows Failover Cluster. - Fix various KASAN reported bugs in the DM raid target. - Fix DM raid target crash due to md_handle_request() bio splitting that recurses to block core without properly initializing the bio's bi_dev. - Fix some code comment typos and fix some Documentation formatting. * tag 'for-6.0/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (29 commits) dm: fix dm-raid crash if md_handle_request() splits bio dm raid: fix address sanitizer warning in raid_resume dm raid: fix address sanitizer warning in raid_status dm: Start pr_preempt from the same starting path dm: Fix PR release handling for non All Registrants dm: Start pr_reserve from the same starting path dm: Allow dm_call_pr to be used for path searches dm: return early from dm_pr_call() if DM device is suspended dm thin: fix use-after-free crash in dm_sm_register_threshold_callback dm writecache: count number of blocks discarded, not number of discard bios dm writecache: count number of blocks written, not number of write bios dm writecache: count number of blocks read, not number of read bios dm writecache: return void from functions dm kcopyd: use __GFP_HIGHMEM when allocating pages dm writecache: set a default MAX_WRITEBACK_JOBS Documentation: dm writecache: Render status list as list Documentation: dm writecache: add blank line before optional parameters dm snapshot: fix typo in snapshot_map() comment dm raid: remove redundant "the" in parse_raid_params() comment dm cache: fix typo in 2 comment blocks ...
-rw-r--r--Documentation/admin-guide/device-mapper/writecache.rst18
-rw-r--r--drivers/md/Makefile2
-rw-r--r--drivers/md/dm-cache-metadata.h2
-rw-r--r--drivers/md/dm-cache-target.c2
-rw-r--r--drivers/md/dm-core.h23
-rw-r--r--drivers/md/dm-ima.c5
-rw-r--r--drivers/md/dm-io-rewind.c166
-rw-r--r--drivers/md/dm-ioctl.c6
-rw-r--r--drivers/md/dm-kcopyd.c2
-rw-r--r--drivers/md/dm-raid.c7
-rw-r--r--drivers/md/dm-rq.c1
-rw-r--r--drivers/md/dm-snap.c2
-rw-r--r--drivers/md/dm-table.c318
-rw-r--r--drivers/md/dm-thin-metadata.c7
-rw-r--r--drivers/md/dm-thin.c4
-rw-r--r--drivers/md/dm-verity-target.c7
-rw-r--r--drivers/md/dm-writecache.c43
-rw-r--r--drivers/md/dm-zone.c7
-rw-r--r--drivers/md/dm.c462
-rw-r--r--drivers/md/dm.h4
-rw-r--r--include/linux/device-mapper.h7
-rw-r--r--include/uapi/linux/dm-ioctl.h4
22 files changed, 694 insertions, 405 deletions
diff --git a/Documentation/admin-guide/device-mapper/writecache.rst b/Documentation/admin-guide/device-mapper/writecache.rst
index 10429779a91a..60c16b7fd5ac 100644
--- a/Documentation/admin-guide/device-mapper/writecache.rst
+++ b/Documentation/admin-guide/device-mapper/writecache.rst
@@ -20,6 +20,7 @@ Constructor parameters:
size)
5. the number of optional parameters (the parameters with an argument
count as two)
+
start_sector n (default: 0)
offset from the start of cache device in 512-byte sectors
high_watermark n (default: 50)
@@ -74,20 +75,21 @@ Constructor parameters:
the origin volume in the last n milliseconds
Status:
+
1. error indicator - 0 if there was no error, otherwise error number
2. the number of blocks
3. the number of free blocks
4. the number of blocks under writeback
-5. the number of read requests
-6. the number of read requests that hit the cache
-7. the number of write requests
-8. the number of write requests that hit uncommitted block
-9. the number of write requests that hit committed block
-10. the number of write requests that bypass the cache
-11. the number of write requests that are allocated in the cache
+5. the number of read blocks
+6. the number of read blocks that hit the cache
+7. the number of write blocks
+8. the number of write blocks that hit uncommitted block
+9. the number of write blocks that hit committed block
+10. the number of write blocks that bypass the cache
+11. the number of write blocks that are allocated in the cache
12. the number of write requests that are blocked on the freelist
13. the number of flush requests
-14. the number of discard requests
+14. the number of discarded blocks
Messages:
flush
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index 0454b0885b01..270f694850ec 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -5,7 +5,7 @@
dm-mod-y += dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \
dm-ioctl.o dm-io.o dm-kcopyd.o dm-sysfs.o dm-stats.o \
- dm-rq.o
+ dm-rq.o dm-io-rewind.o
dm-multipath-y += dm-path-selector.o dm-mpath.o
dm-historical-service-time-y += dm-ps-historical-service-time.o
dm-io-affinity-y += dm-ps-io-affinity.o
diff --git a/drivers/md/dm-cache-metadata.h b/drivers/md/dm-cache-metadata.h
index 179ed5bf81a3..0905f2c1615e 100644
--- a/drivers/md/dm-cache-metadata.h
+++ b/drivers/md/dm-cache-metadata.h
@@ -131,7 +131,7 @@ void dm_cache_dump(struct dm_cache_metadata *cmd);
* hints will be lost.
*
* The hints are indexed by the cblock, but many policies will not
- * neccessarily have a fast way of accessing efficiently via cblock. So
+ * necessarily have a fast way of accessing efficiently via cblock. So
* rather than querying the policy for each cblock, we let it walk its data
* structures and fill in the hints in whatever order it wishes.
*/
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 28c5de8eca4a..54a8d5c9a44e 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -2775,7 +2775,7 @@ static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock,
/*
* The discard block size in the on disk metadata is not
- * neccessarily the same as we're currently using. So we have to
+ * necessarily the same as we're currently using. So we have to
* be careful to only set the discarded attribute if we know it
* covers a complete block of the new size.
*/
diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index c954ff91870e..6c6bd24774f2 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -22,6 +22,8 @@
#define DM_RESERVED_MAX_IOS 1024
+struct dm_io;
+
struct dm_kobject_holder {
struct kobject kobj;
struct completion completion;
@@ -91,6 +93,14 @@ struct mapped_device {
spinlock_t deferred_lock;
struct bio_list deferred;
+ /*
+ * requeue work context is needed for cloning one new bio
+ * to represent the dm_io to be requeued, since each
+ * dm_io may point to the original bio from FS.
+ */
+ struct work_struct requeue_work;
+ struct dm_io *requeue_list;
+
void *interface_ptr;
/*
@@ -216,6 +226,13 @@ struct dm_table {
#endif
};
+static inline struct dm_target *dm_table_get_target(struct dm_table *t,
+ unsigned int index)
+{
+ BUG_ON(index >= t->num_targets);
+ return t->targets + index;
+}
+
/*
* One of these is allocated per clone bio.
*/
@@ -230,6 +247,9 @@ struct dm_target_io {
sector_t old_sector;
struct bio clone;
};
+#define DM_TARGET_IO_BIO_OFFSET (offsetof(struct dm_target_io, clone))
+#define DM_IO_BIO_OFFSET \
+ (offsetof(struct dm_target_io, clone) + offsetof(struct dm_io, tio))
/*
* dm_target_io flags
@@ -272,7 +292,6 @@ struct dm_io {
atomic_t io_count;
struct mapped_device *md;
- struct bio *split_bio;
/* The three fields represent mapped part of original bio */
struct bio *orig_bio;
unsigned int sector_offset; /* offset to end of orig_bio */
@@ -300,6 +319,8 @@ static inline void dm_io_set_flag(struct dm_io *io, unsigned int bit)
io->flags |= (1U << bit);
}
+void dm_io_rewind(struct dm_io *io, struct bio_set *bs);
+
static inline struct completion *dm_get_completion_from_kobject(struct kobject *kobj)
{
return &container_of(kobj, struct dm_kobject_holder, kobj)->completion;
diff --git a/drivers/md/dm-ima.c b/drivers/md/dm-ima.c
index 1842d3a958ef..a1bd7cd52b1b 100644
--- a/drivers/md/dm-ima.c
+++ b/drivers/md/dm-ima.c
@@ -208,7 +208,7 @@ void dm_ima_measure_on_table_load(struct dm_table *table, unsigned int status_fl
if (!target_data_buf)
goto error;
- num_targets = dm_table_get_num_targets(table);
+ num_targets = table->num_targets;
if (dm_ima_alloc_and_copy_device_data(table->md, &device_data_buf, num_targets, noio))
goto error;
@@ -237,9 +237,6 @@ void dm_ima_measure_on_table_load(struct dm_table *table, unsigned int status_fl
for (i = 0; i < num_targets; i++) {
struct dm_target *ti = dm_table_get_target(table, i);
- if (!ti)
- goto error;
-
last_target_measured = 0;
/*
diff --git a/drivers/md/dm-io-rewind.c b/drivers/md/dm-io-rewind.c
new file mode 100644
index 000000000000..0db53ccb94ba
--- /dev/null
+++ b/drivers/md/dm-io-rewind.c
@@ -0,0 +1,166 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2022 Red Hat, Inc.
+ */
+
+#include <linux/bio.h>
+#include <linux/blk-crypto.h>
+#include <linux/blk-integrity.h>
+
+#include "dm-core.h"
+
+static inline bool dm_bvec_iter_rewind(const struct bio_vec *bv,
+ struct bvec_iter *iter,
+ unsigned int bytes)
+{
+ int idx;
+
+ iter->bi_size += bytes;
+ if (bytes <= iter->bi_bvec_done) {
+ iter->bi_bvec_done -= bytes;
+ return true;
+ }
+
+ bytes -= iter->bi_bvec_done;
+ idx = iter->bi_idx - 1;
+
+ while (idx >= 0 && bytes && bytes > bv[idx].bv_len) {
+ bytes -= bv[idx].bv_len;
+ idx--;
+ }
+
+ if (WARN_ONCE(idx < 0 && bytes,
+ "Attempted to rewind iter beyond bvec's boundaries\n")) {
+ iter->bi_size -= bytes;
+ iter->bi_bvec_done = 0;
+ iter->bi_idx = 0;
+ return false;
+ }
+
+ iter->bi_idx = idx;
+ iter->bi_bvec_done = bv[idx].bv_len - bytes;
+ return true;
+}
+
+#if defined(CONFIG_BLK_DEV_INTEGRITY)
+
+/**
+ * dm_bio_integrity_rewind - Rewind integrity vector
+ * @bio: bio whose integrity vector to update
+ * @bytes_done: number of data bytes to rewind
+ *
+ * Description: This function calculates how many integrity bytes the
+ * number of completed data bytes correspond to and rewind the
+ * integrity vector accordingly.
+ */
+static void dm_bio_integrity_rewind(struct bio *bio, unsigned int bytes_done)
+{
+ struct bio_integrity_payload *bip = bio_integrity(bio);
+ struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
+ unsigned bytes = bio_integrity_bytes(bi, bytes_done >> 9);
+
+ bip->bip_iter.bi_sector -= bio_integrity_intervals(bi, bytes_done >> 9);
+ dm_bvec_iter_rewind(bip->bip_vec, &bip->bip_iter, bytes);
+}
+
+#else /* CONFIG_BLK_DEV_INTEGRITY */
+
+static inline void dm_bio_integrity_rewind(struct bio *bio,
+ unsigned int bytes_done)
+{
+ return;
+}
+
+#endif
+
+#if defined(CONFIG_BLK_INLINE_ENCRYPTION)
+
+/* Decrements @dun by @dec, treating @dun as a multi-limb integer. */
+static void dm_bio_crypt_dun_decrement(u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE],
+ unsigned int dec)
+{
+ int i;
+
+ for (i = 0; dec && i < BLK_CRYPTO_DUN_ARRAY_SIZE; i++) {
+ u64 prev = dun[i];
+
+ dun[i] -= dec;
+ if (dun[i] > prev)
+ dec = 1;
+ else
+ dec = 0;
+ }
+}
+
+static void dm_bio_crypt_rewind(struct bio *bio, unsigned int bytes)
+{
+ struct bio_crypt_ctx *bc = bio->bi_crypt_context;
+
+ dm_bio_crypt_dun_decrement(bc->bc_dun,
+ bytes >> bc->bc_key->data_unit_size_bits);
+}
+
+#else /* CONFIG_BLK_INLINE_ENCRYPTION */
+
+static inline void dm_bio_crypt_rewind(struct bio *bio, unsigned int bytes)
+{
+ return;
+}
+
+#endif
+
+static inline void dm_bio_rewind_iter(const struct bio *bio,
+ struct bvec_iter *iter, unsigned int bytes)
+{
+ iter->bi_sector -= bytes >> 9;
+
+ /* No advance means no rewind */
+ if (bio_no_advance_iter(bio))
+ iter->bi_size += bytes;
+ else
+ dm_bvec_iter_rewind(bio->bi_io_vec, iter, bytes);
+}
+
+/**
+ * dm_bio_rewind - update ->bi_iter of @bio by rewinding @bytes.
+ * @bio: bio to rewind
+ * @bytes: how many bytes to rewind
+ *
+ * WARNING:
+ * Caller must ensure that @bio has a fixed end sector, to allow
+ * rewinding from end of bio and restoring its original position.
+ * Caller is also responsibile for restoring bio's size.
+ */
+static void dm_bio_rewind(struct bio *bio, unsigned bytes)
+{
+ if (bio_integrity(bio))
+ dm_bio_integrity_rewind(bio, bytes);
+
+ if (bio_has_crypt_ctx(bio))
+ dm_bio_crypt_rewind(bio, bytes);
+
+ dm_bio_rewind_iter(bio, &bio->bi_iter, bytes);
+}
+
+void dm_io_rewind(struct dm_io *io, struct bio_set *bs)
+{
+ struct bio *orig = io->orig_bio;
+ struct bio *new_orig = bio_alloc_clone(orig->bi_bdev, orig,
+ GFP_NOIO, bs);
+ /*
+ * dm_bio_rewind can restore to previous position since the
+ * end sector is fixed for original bio, but we still need
+ * to restore bio's size manually (using io->sectors).
+ */
+ dm_bio_rewind(new_orig, ((io->sector_offset << 9) -
+ orig->bi_iter.bi_size));
+ bio_trim(new_orig, 0, io->sectors);
+
+ bio_chain(new_orig, orig);
+ /*
+ * __bi_remaining was increased (by dm_split_and_process_bio),
+ * so must drop the one added in bio_chain.
+ */
+ atomic_dec(&orig->__bi_remaining);
+ io->orig_bio = new_orig;
+}
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 87310fceb0d8..98976aaa9db9 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -832,7 +832,7 @@ static void __dev_status(struct mapped_device *md, struct dm_ioctl *param)
if (!(param->flags & DM_QUERY_INACTIVE_TABLE_FLAG)) {
if (get_disk_ro(disk))
param->flags |= DM_READONLY_FLAG;
- param->target_count = dm_table_get_num_targets(table);
+ param->target_count = table->num_targets;
}
param->flags |= DM_ACTIVE_PRESENT_FLAG;
@@ -845,7 +845,7 @@ static void __dev_status(struct mapped_device *md, struct dm_ioctl *param)
if (table) {
if (!(dm_table_get_mode(table) & FMODE_WRITE))
param->flags |= DM_READONLY_FLAG;
- param->target_count = dm_table_get_num_targets(table);
+ param->target_count = table->num_targets;
}
dm_put_live_table(md, srcu_idx);
}
@@ -1248,7 +1248,7 @@ static void retrieve_status(struct dm_table *table,
type = STATUSTYPE_INFO;
/* Get all the target info */
- num_targets = dm_table_get_num_targets(table);
+ num_targets = table->num_targets;
for (i = 0; i < num_targets; i++) {
struct dm_target *ti = dm_table_get_target(table, i);
size_t l;
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index 9c8f3544e99d..4d3bbbea2e9a 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -219,7 +219,7 @@ static struct page_list *alloc_pl(gfp_t gfp)
if (!pl)
return NULL;
- pl->page = alloc_page(gfp);
+ pl->page = alloc_page(gfp | __GFP_HIGHMEM);
if (!pl->page) {
kfree(pl);
return NULL;
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 66486b14ec33..1ec17c32867f 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -1369,7 +1369,7 @@ static int parse_raid_params(struct raid_set *rs, struct dm_arg_set *as,
}
rs->md.bitmap_info.daemon_sleep = value;
} else if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_DATA_OFFSET))) {
- /* Userspace passes new data_offset after having extended the the data image LV */
+ /* Userspace passes new data_offset after having extended the data image LV */
if (test_and_set_bit(__CTR_FLAG_DATA_OFFSET, &rs->ctr_flags)) {
rs->ti->error = "Only one data_offset argument pair allowed";
return -EINVAL;
@@ -3097,6 +3097,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
INIT_WORK(&rs->md.event_work, do_table_event);
ti->private = rs;
ti->num_flush_bios = 1;
+ ti->needs_bio_set_dev = true;
/* Restore any requested new layout for conversion decision */
rs_config_restore(rs, &rs_layout);
@@ -3509,7 +3510,7 @@ static void raid_status(struct dm_target *ti, status_type_t type,
{
struct raid_set *rs = ti->private;
struct mddev *mddev = &rs->md;
- struct r5conf *conf = mddev->private;
+ struct r5conf *conf = rs_is_raid456(rs) ? mddev->private : NULL;
int i, max_nr_stripes = conf ? conf->max_nr_stripes : 0;
unsigned long recovery;
unsigned int raid_param_cnt = 1; /* at least 1 for chunksize */
@@ -3819,7 +3820,7 @@ static void attempt_restore_of_faulty_devices(struct raid_set *rs)
memset(cleared_failed_devices, 0, sizeof(cleared_failed_devices));
- for (i = 0; i < mddev->raid_disks; i++) {
+ for (i = 0; i < rs->raid_disks; i++) {
r = &rs->dev[i].rdev;
/* HM FIXME: enhance journal device recovery processing */
if (test_bit(Journal, &r->flags))
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index a83b98a8d2a9..4f49bbcce4f1 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -43,7 +43,6 @@ unsigned dm_get_reserved_rq_based_ios(void)
return __dm_get_module_param(&reserved_rq_based_ios,
RESERVED_REQUEST_BASED_IOS, DM_RESERVED_MAX_IOS);
}
-EXPORT_SYMBOL_GPL(dm_get_reserved_rq_based_ios);
static unsigned dm_get_blk_mq_nr_hw_queues(void)
{
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 0d336b5ec571..d1c2f84d27e3 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -2026,7 +2026,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
/*
* Write to snapshot - higher level takes care of RW/RO
* flags so we should only get this if we are
- * writeable.
+ * writable.
*/
if (bio_data_dir(bio) == WRITE) {
pe = __lookup_pending_exception(s, chunk);
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index df904b7e95ce..332f96b58252 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -6,6 +6,7 @@
*/
#include "dm-core.h"
+#include "dm-rq.h"
#include <linux/module.h>
#include <linux/vmalloc.h>
@@ -174,8 +175,6 @@ static void dm_table_destroy_crypto_profile(struct dm_table *t);
void dm_table_destroy(struct dm_table *t)
{
- unsigned int i;
-
if (!t)
return;
@@ -184,13 +183,13 @@ void dm_table_destroy(struct dm_table *t)
kvfree(t->index[t->depth - 2]);
/* free the targets */
- for (i = 0; i < t->num_targets; i++) {
- struct dm_target *tgt = t->targets + i;
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
- if (tgt->type->dtr)
- tgt->type->dtr(tgt);
+ if (ti->type->dtr)
+ ti->type->dtr(ti);
- dm_put_target_type(tgt->type);
+ dm_put_target_type(ti->type);
}
kvfree(t->highs);
@@ -450,14 +449,14 @@ EXPORT_SYMBOL(dm_put_device);
/*
* Checks to see if the target joins onto the end of the table.
*/
-static int adjoin(struct dm_table *table, struct dm_target *ti)
+static int adjoin(struct dm_table *t, struct dm_target *ti)
{
struct dm_target *prev;
- if (!table->num_targets)
+ if (!t->num_targets)
return !ti->begin;
- prev = &table->targets[table->num_targets - 1];
+ prev = &t->targets[t->num_targets - 1];
return (ti->begin == (prev->begin + prev->len));
}
@@ -564,8 +563,8 @@ int dm_split_args(int *argc, char ***argvp, char *input)
* two or more targets, the size of each piece it gets split into must
* be compatible with the logical_block_size of the target processing it.
*/
-static int validate_hardware_logical_block_alignment(struct dm_table *table,
- struct queue_limits *limits)
+static int validate_hardware_logical_block_alignment(struct dm_table *t,
+ struct queue_limits *limits)
{
/*
* This function uses arithmetic modulo the logical_block_size
@@ -587,13 +586,13 @@ static int validate_hardware_logical_block_alignment(struct dm_table *table,
struct dm_target *ti;
struct queue_limits ti_limits;
- unsigned i;
+ unsigned int i;
/*
* Check each entry in the table in turn.
*/
- for (i = 0; i < dm_table_get_num_targets(table); i++) {
- ti = dm_table_get_target(table, i);
+ for (i = 0; i < t->num_targets; i++) {
+ ti = dm_table_get_target(t, i);
blk_set_stacking_limits(&ti_limits);
@@ -621,7 +620,7 @@ static int validate_hardware_logical_block_alignment(struct dm_table *table,
if (remaining) {
DMWARN("%s: table line %u (start sect %llu len %llu) "
"not aligned to h/w logical block size %u",
- dm_device_name(table->md), i,
+ dm_device_name(t->md), i,
(unsigned long long) ti->begin,
(unsigned long long) ti->len,
limits->logical_block_size);
@@ -636,7 +635,7 @@ int dm_table_add_target(struct dm_table *t, const char *type,
{
int r = -EINVAL, argc;
char **argv;
- struct dm_target *tgt;
+ struct dm_target *ti;
if (t->singleton) {
DMERR("%s: target type %s must appear alone in table",
@@ -646,87 +645,87 @@ int dm_table_add_target(struct dm_table *t, const char *type,
BUG_ON(t->num_targets >= t->num_allocated);
- tgt = t->targets + t->num_targets;
- memset(tgt, 0, sizeof(*tgt));
+ ti = t->targets + t->num_targets;
+ memset(ti, 0, sizeof(*ti));
if (!len) {
DMERR("%s: zero-length target", dm_device_name(t->md));
return -EINVAL;
}
- tgt->type = dm_get_target_type(type);
- if (!tgt->type) {
+ ti->type = dm_get_target_type(type);
+ if (!ti->type) {
DMERR("%s: %s: unknown target type", dm_device_name(t->md), type);
return -EINVAL;
}
- if (dm_target_needs_singleton(tgt->type)) {
+ if (dm_target_needs_singleton(ti->type)) {
if (t->num_targets) {
- tgt->error = "singleton target type must appear alone in table";
+ ti->error = "singleton target type must appear alone in table";
goto bad;
}
t->singleton = true;
}
- if (dm_target_always_writeable(tgt->type) && !(t->mode & FMODE_WRITE)) {
- tgt->error = "target type may not be included in a read-only table";
+ if (dm_target_always_writeable(ti->type) && !(t->mode & FMODE_WRITE)) {
+ ti->error = "target type may not be included in a read-only table";
goto bad;
}
if (t->immutable_target_type) {
- if (t->immutable_target_type != tgt->type) {
- tgt->error = "immutable target type cannot be mixed with other target types";
+ if (t->immutable_target_type != ti->type) {
+ ti->error = "immutable target type cannot be mixed with other target types";
goto bad;
}
- } else if (dm_target_is_immutable(tgt->type)) {
+ } else if (dm_target_is_immutable(ti->type)) {
if (t->num_targets) {
- tgt->error = "immutable target type cannot be mixed with other target types";
+ ti->error = "immutable target type cannot be mixed with other target types";
goto bad;
}
- t->immutable_target_type = tgt->type;
+ t->immutable_target_type = ti->type;
}
- if (dm_target_has_integrity(tgt->type))
+ if (dm_target_has_integrity(ti->type))
t->integrity_added = 1;
- tgt->table = t;
- tgt->begin = start;
- tgt->len = len;
- tgt->error = "Unknown error";
+ ti->table = t;
+ ti->begin = start;
+ ti->len = len;
+ ti->error = "Unknown error";
/*
* Does this target adjoin the previous one ?
*/
- if (!adjoin(t, tgt)) {
- tgt->error = "Gap in table";
+ if (!adjoin(t, ti)) {
+ ti->error = "Gap in table";
goto bad;
}
r = dm_split_args(&argc, &argv, params);
if (r) {
- tgt->error = "couldn't split parameters";
+ ti->error = "couldn't split parameters";
goto bad;
}
- r = tgt->type->ctr(tgt, argc, argv);
+ r = ti->type->ctr(ti, argc, argv);
kfree(argv);
if (r)
goto bad;
- t->highs[t->num_targets++] = tgt->begin + tgt->len - 1;
+ t->highs[t->num_targets++] = ti->begin + ti->len - 1;
- if (!tgt->num_discard_bios && tgt->discards_supported)
+ if (!ti->num_discard_bios && ti->discards_supported)
DMWARN("%s: %s: ignoring discards_supported because num_discard_bios is zero.",
dm_device_name(t->md), type);
- if (tgt->limit_swap_bios && !static_key_enabled(&swap_bios_enabled.key))
+ if (ti->limit_swap_bios && !static_key_enabled(&swap_bios_enabled.key))
static_branch_enable(&swap_bios_enabled);
return 0;
bad:
- DMERR("%s: %s: %s (%pe)", dm_device_name(t->md), type, tgt->error, ERR_PTR(r));
- dm_put_target_type(tgt->type);
+ DMERR("%s: %s: %s (%pe)", dm_device_name(t->md), type, ti->error, ERR_PTR(r));
+ dm_put_target_type(ti->type);
return r;
}
@@ -825,14 +824,11 @@ static int device_not_dax_synchronous_capable(struct dm_target *ti, struct dm_de
}
static bool dm_table_supports_dax(struct dm_table *t,
- iterate_devices_callout_fn iterate_fn)
+ iterate_devices_callout_fn iterate_fn)
{
- struct dm_target *ti;
- unsigned i;
-
/* Ensure that all targets support DAX. */
- for (i = 0; i < dm_table_get_num_targets(t); i++) {
- ti = dm_table_get_target(t, i);
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
if (!ti->type->direct_access)
return false;
@@ -860,9 +856,8 @@ static int device_is_rq_stackable(struct dm_target *ti, struct dm_dev *dev,
static int dm_table_determine_type(struct dm_table *t)
{
- unsigned i;
unsigned bio_based = 0, request_based = 0, hybrid = 0;
- struct dm_target *tgt;
+ struct dm_target *ti;
struct list_head *devices = dm_table_get_devices(t);
enum dm_queue_mode live_md_type = dm_get_md_type(t->md);
@@ -876,11 +871,11 @@ static int dm_table_determine_type(struct dm_table *t)
goto verify_rq_based;
}
- for (i = 0; i < t->num_targets; i++) {
- tgt = t->targets + i;
- if (dm_target_hybrid(tgt))
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ ti = dm_table_get_target(t, i);
+ if (dm_target_hybrid(ti))
hybrid = 1;
- else if (dm_target_request_based(tgt))
+ else if (dm_target_request_based(ti))
request_based = 1;
else
bio_based = 1;
@@ -942,18 +937,18 @@ verify_rq_based:
return 0;
}
- tgt = dm_table_get_immutable_target(t);
- if (!tgt) {
+ ti = dm_table_get_immutable_target(t);
+ if (!ti) {
DMERR("table load rejected: immutable target is required");
return -EINVAL;
- } else if (tgt->max_io_len) {
+ } else if (ti->max_io_len) {
DMERR("table load rejected: immutable target that splits IO is not supported");
return -EINVAL;
}
/* Non-request-stackable devices can't be used for request-based dm */
- if (!tgt->type->iterate_devices ||
- !tgt->type->iterate_devices(tgt, device_is_rq_stackable, NULL)) {
+ if (!ti->type->iterate_devices ||
+ !ti->type->iterate_devices(ti, device_is_rq_stackable, NULL)) {
DMERR("table load rejected: including non-request-stackable devices");
return -EINVAL;
}
@@ -983,11 +978,9 @@ struct dm_target *dm_table_get_immutable_target(struct dm_table *t)
struct dm_target *dm_table_get_wildcard_target(struct dm_table *t)
{
- struct dm_target *ti;
- unsigned i;
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
- for (i = 0; i < dm_table_get_num_targets(t); i++) {
- ti = dm_table_get_target(t, i);
if (dm_target_is_wildcard(ti->type))
return ti;
}
@@ -1010,32 +1003,56 @@ static bool dm_table_supports_poll(struct dm_table *t);
static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *md)
{
enum dm_queue_mode type = dm_table_get_type(t);
- unsigned per_io_data_size = 0;
- unsigned min_pool_size = 0;
- struct dm_target *ti;
- unsigned i;
- bool poll_supported = false;
+ unsigned int per_io_data_size = 0, front_pad, io_front_pad;
+ unsigned int min_pool_size = 0, pool_size;
+ struct dm_md_mempools *pools;
if (unlikely(type == DM_TYPE_NONE)) {
DMWARN("no table type is set, can't allocate mempools");
return -EINVAL;
}
- if (__table_type_bio_based(type)) {
- for (i = 0; i < t->num_targets; i++) {
- ti = t->targets + i;
- per_io_data_size = max(per_io_data_size, ti->per_io_data_size);
- min_pool_size = max(min_pool_size, ti->num_flush_bios);
- }
- poll_supported = dm_table_supports_poll(t);
+ pools = kzalloc_node(sizeof(*pools), GFP_KERNEL, md->numa_node_id);
+ if (!pools)
+ return -ENOMEM;
+
+ if (type == DM_TYPE_REQUEST_BASED) {
+ pool_size = dm_get_reserved_rq_based_ios();
+ front_pad = offsetof(struct dm_rq_clone_bio_info, clone);
+ goto init_bs;
}
- t->mempools = dm_alloc_md_mempools(md, type, per_io_data_size, min_pool_size,
- t->integrity_supported, poll_supported);
- if (!t->mempools)
- return -ENOMEM;
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
+ per_io_data_size = max(per_io_data_size, ti->per_io_data_size);
+ min_pool_size = max(min_pool_size, ti->num_flush_bios);
+ }
+ pool_size = max(dm_get_reserved_bio_based_ios(), min_pool_size);
+ front_pad = roundup(per_io_data_size,
+ __alignof__(struct dm_target_io)) + DM_TARGET_IO_BIO_OFFSET;
+
+ io_front_pad = roundup(per_io_data_size,
+ __alignof__(struct dm_io)) + DM_IO_BIO_OFFSET;
+ if (bioset_init(&pools->io_bs, pool_size, io_front_pad,
+ dm_table_supports_poll(t) ? BIOSET_PERCPU_CACHE : 0))
+ goto out_free_pools;
+ if (t->integrity_supported &&
+ bioset_integrity_create(&pools->io_bs, pool_size))
+ goto out_free_pools;
+init_bs:
+ if (bioset_init(&pools->bs, pool_size, front_pad, 0))
+ goto out_free_pools;
+ if (t->integrity_supported &&
+ bioset_integrity_create(&pools->bs, pool_size))
+ goto out_free_pools;
+
+ t->mempools = pools;
return 0;
+
+out_free_pools:
+ dm_free_md_mempools(pools);
+ return -ENOMEM;
}
static int setup_indexes(struct dm_table *t)
@@ -1100,10 +1117,10 @@ static struct gendisk * dm_table_get_integrity_disk(struct dm_table *t)
struct list_head *devices = dm_table_get_devices(t);
struct dm_dev_internal *dd = NULL;
struct gendisk *prev_disk = NULL, *template_disk = NULL;
- unsigned i;
- for (i = 0; i < dm_table_get_num_targets(t); i++) {
+ for (unsigned int i = 0; i < t->num_targets; i++) {
struct dm_target *ti = dm_table_get_target(t, i);
+
if (!dm_target_passes_integrity(ti->type))
goto no_integrity;
}
@@ -1217,18 +1234,19 @@ static int dm_keyslot_evict(struct blk_crypto_profile *profile,
struct dm_keyslot_evict_args args = { key };
struct dm_table *t;
int srcu_idx;
- int i;
- struct dm_target *ti;
t = dm_get_live_table(md, &srcu_idx);
if (!t)
return 0;
- for (i = 0; i < dm_table_get_num_targets(t); i++) {
- ti = dm_table_get_target(t, i);
+
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
+
if (!ti->type->iterate_devices)
continue;
ti->type->iterate_devices(ti, dm_keyslot_evict_callback, &args);
}
+
dm_put_live_table(md, srcu_idx);
return args.err;
}
@@ -1277,7 +1295,6 @@ static int dm_table_construct_crypto_profile(struct dm_table *t)
{
struct dm_crypto_profile *dmcp;
struct blk_crypto_profile *profile;
- struct dm_target *ti;
unsigned int i;
bool empty_profile = true;
@@ -1293,8 +1310,8 @@ static int dm_table_construct_crypto_profile(struct dm_table *t)
memset(profile->modes_supported, 0xFF,
sizeof(profile->modes_supported));
- for (i = 0; i < dm_table_get_num_targets(t); i++) {
- ti = dm_table_get_target(t, i);
+ for (i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
if (!dm_target_passes_crypto(ti->type)) {
blk_crypto_intersect_capabilities(profile, NULL);
@@ -1444,14 +1461,6 @@ inline sector_t dm_table_get_size(struct dm_table *t)
}
EXPORT_SYMBOL(dm_table_get_size);
-struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index)
-{
- if (index >= t->num_targets)
- return NULL;
-
- return t->targets + index;
-}
-
/*
* Search the btree for the correct target.
*
@@ -1512,11 +1521,8 @@ static int device_not_poll_capable(struct dm_target *ti, struct dm_dev *dev,
static bool dm_table_any_dev_attr(struct dm_table *t,
iterate_devices_callout_fn func, void *data)
{
- struct dm_target *ti;
- unsigned int i;
-
- for (i = 0; i < dm_table_get_num_targets(t); i++) {
- ti = dm_table_get_target(t, i);
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
if (ti->type->iterate_devices &&
ti->type->iterate_devices(ti, func, data))
@@ -1538,11 +1544,8 @@ static int count_device(struct dm_target *ti, struct dm_dev *dev,
static bool dm_table_supports_poll(struct dm_table *t)
{
- struct dm_target *ti;
- unsigned i = 0;
-
- while (i < dm_table_get_num_targets(t)) {
- ti = dm_table_get_target(t, i++);
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
if (!ti->type->iterate_devices ||
ti->type->iterate_devices(ti, device_not_poll_capable, NULL))
@@ -1558,18 +1561,15 @@ static bool dm_table_supports_poll(struct dm_table *t)
* Returns false if the result is unknown because a target doesn't
* support iterate_devices.
*/
-bool dm_table_has_no_data_devices(struct dm_table *table)
+bool dm_table_has_no_data_devices(struct dm_table *t)
{
- struct dm_target *ti;
- unsigned i, num_devices;
-
- for (i = 0; i < dm_table_get_num_targets(table); i++) {
- ti = dm_table_get_target(table, i);
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
+ unsigned num_devices = 0;
if (!ti->type->iterate_devices)
return false;
- num_devices = 0;
ti->type->iterate_devices(ti, count_device, &num_devices);
if (num_devices)
return false;
@@ -1597,11 +1597,8 @@ static int device_not_zoned_model(struct dm_target *ti, struct dm_dev *dev,
static bool dm_table_supports_zoned_model(struct dm_table *t,
enum blk_zoned_model zoned_model)
{
- struct dm_target *ti;
- unsigned i;
-
- for (i = 0; i < dm_table_get_num_targets(t); i++) {
- ti = dm_table_get_target(t, i);
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
if (dm_target_supports_zoned_hm(ti->type)) {
if (!ti->type->iterate_devices ||
@@ -1632,16 +1629,16 @@ static int device_not_matches_zone_sectors(struct dm_target *ti, struct dm_dev *
* zone sectors, if the destination device is a zoned block device, it shall
* have the specified zone_sectors.
*/
-static int validate_hardware_zoned_model(struct dm_table *table,
+static int validate_hardware_zoned_model(struct dm_table *t,
enum blk_zoned_model zoned_model,
unsigned int zone_sectors)
{
if (zoned_model == BLK_ZONED_NONE)
return 0;
- if (!dm_table_supports_zoned_model(table, zoned_model)) {
+ if (!dm_table_supports_zoned_model(t, zoned_model)) {
DMERR("%s: zoned model is not consistent across all devices",
- dm_device_name(table->md));
+ dm_device_name(t->md));
return -EINVAL;
}
@@ -1649,9 +1646,9 @@ static int validate_hardware_zoned_model(struct dm_table *table,
if (!zone_sectors || !is_power_of_2(zone_sectors))
return -EINVAL;
- if (dm_table_any_dev_attr(table, device_not_matches_zone_sectors, &zone_sectors)) {
+ if (dm_table_any_dev_attr(t, device_not_matches_zone_sectors, &zone_sectors)) {
DMERR("%s: zone sectors is not consistent across all zoned devices",
- dm_device_name(table->md));
+ dm_device_name(t->md));
return -EINVAL;
}
@@ -1661,21 +1658,19 @@ static int validate_hardware_zoned_model(struct dm_table *table,
/*
* Establish the new table's queue_limits and validate them.
*/
-int dm_calculate_queue_limits(struct dm_table *table,
+int dm_calculate_queue_limits(struct dm_table *t,
struct queue_limits *limits)
{
- struct dm_target *ti;
struct queue_limits ti_limits;
- unsigned i;
enum blk_zoned_model zoned_model = BLK_ZONED_NONE;
unsigned int zone_sectors = 0;
blk_set_stacking_limits(limits);
- for (i = 0; i < dm_table_get_num_targets(table); i++) {
- blk_set_stacking_limits(&ti_limits);
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
- ti = dm_table_get_target(table, i);
+ blk_set_stacking_limits(&ti_limits);
if (!ti->type->iterate_devices)
goto combine_limits;
@@ -1716,7 +1711,7 @@ combine_limits:
DMWARN("%s: adding target device "
"(start sect %llu len %llu) "
"caused an alignment inconsistency",
- dm_device_name(table->md),
+ dm_device_name(t->md),
(unsigned long long) ti->begin,
(unsigned long long) ti->len);
}
@@ -1736,10 +1731,10 @@ combine_limits:
zoned_model = limits->zoned;
zone_sectors = limits->chunk_sectors;
}
- if (validate_hardware_zoned_model(table, zoned_model, zone_sectors))
+ if (validate_hardware_zoned_model(t, zoned_model, zone_sectors))
return -EINVAL;
- return validate_hardware_logical_block_alignment(table, limits);
+ return validate_hardware_logical_block_alignment(t, limits);
}
/*
@@ -1783,17 +1778,14 @@ static int device_flush_capable(struct dm_target *ti, struct dm_dev *dev,
static bool dm_table_supports_flush(struct dm_table *t, unsigned long flush)
{
- struct dm_target *ti;
- unsigned i;
-
/*
* Require at least one underlying device to support flushes.
* t->devices includes internal dm devices such as mirror logs
* so we need to use iterate_devices here, which targets
* supporting flushes must provide.
*/
- for (i = 0; i < dm_table_get_num_targets(t); i++) {
- ti = dm_table_get_target(t, i);
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
if (!ti->num_flush_bios)
continue;
@@ -1847,11 +1839,8 @@ static int device_not_write_zeroes_capable(struct dm_target *ti, struct dm_dev *
static bool dm_table_supports_write_zeroes(struct dm_table *t)
{
- struct dm_target *ti;
- unsigned i = 0;
-
- while (i < dm_table_get_num_targets(t)) {
- ti = dm_table_get_target(t, i++);
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
if (!ti->num_write_zeroes_bios)
return false;
@@ -1874,11 +1863,8 @@ static int device_not_nowait_capable(struct dm_target *ti, struct dm_dev *dev,
static bool dm_table_supports_nowait(struct dm_table *t)
{
- struct dm_target *ti;
- unsigned i = 0;
-
- while (i < dm_table_get_num_targets(t)) {
- ti = dm_table_get_target(t, i++);
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
if (!dm_target_supports_nowait(ti->type))
return false;
@@ -1899,11 +1885,8 @@ static int device_not_discard_capable(struct dm_target *ti, struct dm_dev *dev,
static bool dm_table_supports_discards(struct dm_table *t)
{
- struct dm_target *ti;
- unsigned i;
-
- for (i = 0; i < dm_table_get_num_targets(t); i++) {
- ti = dm_table_get_target(t, i);
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
if (!ti->num_discard_bios)
return false;
@@ -1931,11 +1914,8 @@ static int device_not_secure_erase_capable(struct dm_target *ti,
static bool dm_table_supports_secure_erase(struct dm_table *t)
{
- struct dm_target *ti;
- unsigned int i;
-
- for (i = 0; i < dm_table_get_num_targets(t); i++) {
- ti = dm_table_get_target(t, i);
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
if (!ti->num_secure_erase_bios)
return false;
@@ -2065,11 +2045,6 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
return 0;
}
-unsigned int dm_table_get_num_targets(struct dm_table *t)
-{
- return t->num_targets;
-}
-
struct list_head *dm_table_get_devices(struct dm_table *t)
{
return &t->devices;
@@ -2089,12 +2064,11 @@ enum suspend_mode {
static void suspend_targets(struct dm_table *t, enum suspend_mode mode)
{
- int i = t->num_targets;
- struct dm_target *ti = t->targets;
-
lockdep_assert_held(&t->md->suspend_lock);
- while (i--) {
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
+
switch (mode) {
case PRESUSPEND:
if (ti->type->presuspend)
@@ -2109,7 +2083,6 @@ static void suspend_targets(struct dm_table *t, enum suspend_mode mode)
ti->type->postsuspend(ti);
break;
}
- ti++;
}
}
@@ -2139,12 +2112,13 @@ void dm_table_postsuspend_targets(struct dm_table *t)
int dm_table_resume_targets(struct dm_table *t)
{
- int i, r = 0;
+ unsigned int i;
+ int r = 0;
lockdep_assert_held(&t->md->suspend_lock);
for (i = 0; i < t->num_targets; i++) {
- struct dm_target *ti = t->targets + i;
+ struct dm_target *ti = dm_table_get_target(t, i);
if (!ti->type->preresume)
continue;
@@ -2158,7 +2132,7 @@ int dm_table_resume_targets(struct dm_table *t)
}
for (i = 0; i < t->num_targets; i++) {
- struct dm_target *ti = t->targets + i;
+ struct dm_target *ti = dm_table_get_target(t, i);
if (ti->type->resume)
ti->type->resume(ti);
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index 2db7030aba00..a27395c8621f 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -2045,10 +2045,13 @@ int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd,
dm_sm_threshold_fn fn,
void *context)
{
- int r;
+ int r = -EINVAL;
pmd_write_lock_in_core(pmd);
- r = dm_sm_register_threshold_callback(pmd->metadata_sm, threshold, fn, context);
+ if (!pmd->fail_io) {
+ r = dm_sm_register_threshold_callback(pmd->metadata_sm,
+ threshold, fn, context);
+ }
pmd_write_unlock(pmd);
return r;
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 84c083f76673..e76c96c760a9 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -3375,8 +3375,10 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
calc_metadata_threshold(pt),
metadata_low_callback,
pool);
- if (r)
+ if (r) {
+ ti->error = "Error registering metadata threshold";
goto out_flags_changed;
+ }
dm_pool_register_pre_commit_callback(pool->pmd,
metadata_pre_commit_callback, pool);
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index d6dbd47492a8..75b66dd67633 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -527,11 +527,10 @@ static int verity_verify_io(struct dm_verity_io *io)
if (v->validated_blocks)
set_bit(cur_block, v->validated_blocks);
continue;
- }
- else if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_DATA,
- cur_block, NULL, &start) == 0)
+ } else if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_DATA,
+ cur_block, NULL, &start) == 0) {
continue;
- else {
+ } else {
if (bio->bi_status) {
/*
* Error correction failed; Just return error
diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index 2b994b3e22a7..1fc161d65673 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -22,7 +22,7 @@
#define HIGH_WATERMARK 50
#define LOW_WATERMARK 45
-#define MAX_WRITEBACK_JOBS 0
+#define MAX_WRITEBACK_JOBS min(0x10000000 / PAGE_SIZE, totalram_pages() / 16)
#define ENDIO_LATENCY 16
#define WRITEBACK_LATENCY 64
#define AUTOCOMMIT_BLOCKS_SSD 65536
@@ -1325,8 +1325,8 @@ enum wc_map_op {
WC_MAP_ERROR,
};
-static enum wc_map_op writecache_map_remap_origin(struct dm_writecache *wc, struct bio *bio,
- struct wc_entry *e)
+static void writecache_map_remap_origin(struct dm_writecache *wc, struct bio *bio,
+ struct wc_entry *e)
{
if (e) {
sector_t next_boundary =
@@ -1334,8 +1334,6 @@ static enum wc_map_op writecache_map_remap_origin(struct dm_writecache *wc, stru
if (next_boundary < bio->bi_iter.bi_size >> SECTOR_SHIFT)
dm_accept_partial_bio(bio, next_boundary);
}
-
- return WC_MAP_REMAP_ORIGIN;
}
static enum wc_map_op writecache_map_read(struct dm_writecache *wc, struct bio *bio)
@@ -1362,14 +1360,16 @@ read_next_block:
map_op = WC_MAP_REMAP;
}
} else {
- map_op = writecache_map_remap_origin(wc, bio, e);
+ writecache_map_remap_origin(wc, bio, e);
+ wc->stats.reads += (bio->bi_iter.bi_size - wc->block_size) >> wc->block_size_bits;
+ map_op = WC_MAP_REMAP_ORIGIN;
}
return map_op;
}
-static enum wc_map_op writecache_bio_copy_ssd(struct dm_writecache *wc, struct bio *bio,
- struct wc_entry *e, bool search_used)
+static void writecache_bio_copy_ssd(struct dm_writecache *wc, struct bio *bio,
+ struct wc_entry *e, bool search_used)
{
unsigned bio_size = wc->block_size;
sector_t start_cache_sec = cache_sector(wc, e);
@@ -1409,14 +1409,15 @@ static enum wc_map_op writecache_bio_copy_ssd(struct dm_writecache *wc, struct b
bio->bi_iter.bi_sector = start_cache_sec;
dm_accept_partial_bio(bio, bio_size >> SECTOR_SHIFT);
+ wc->stats.writes += bio->bi_iter.bi_size >> wc->block_size_bits;
+ wc->stats.writes_allocate += (bio->bi_iter.bi_size - wc->block_size) >> wc->block_size_bits;
+
if (unlikely(wc->uncommitted_blocks >= wc->autocommit_blocks)) {
wc->uncommitted_blocks = 0;
queue_work(wc->writeback_wq, &wc->flush_work);
} else {
writecache_schedule_autocommit(wc);
}
-
- return WC_MAP_REMAP;
}
static enum wc_map_op writecache_map_write(struct dm_writecache *wc, struct bio *bio)
@@ -1426,9 +1427,10 @@ static enum wc_map_op writecache_map_write(struct dm_writecache *wc, struct bio
do {
bool found_entry = false;
bool search_used = false;
- wc->stats.writes++;
- if (writecache_has_error(wc))
+ if (writecache_has_error(wc)) {
+ wc->stats.writes += bio->bi_iter.bi_size >> wc->block_size_bits;
return WC_MAP_ERROR;
+ }
e = writecache_find_entry(wc, bio->bi_iter.bi_sector, 0);
if (e) {
if (!writecache_entry_is_committed(wc, e)) {
@@ -1452,9 +1454,11 @@ static enum wc_map_op writecache_map_write(struct dm_writecache *wc, struct bio
if (unlikely(!e)) {
if (!WC_MODE_PMEM(wc) && !found_entry) {
direct_write:
- wc->stats.writes_around++;
e = writecache_find_entry(wc, bio->bi_iter.bi_sector, WFE_RETURN_FOLLOWING);
- return writecache_map_remap_origin(wc, bio, e);
+ writecache_map_remap_origin(wc, bio, e);
+ wc->stats.writes_around += bio->bi_iter.bi_size >> wc->block_size_bits;
+ wc->stats.writes += bio->bi_iter.bi_size >> wc->block_size_bits;
+ return WC_MAP_REMAP_ORIGIN;
}
wc->stats.writes_blocked_on_freelist++;
writecache_wait_on_freelist(wc);
@@ -1465,10 +1469,13 @@ direct_write:
wc->uncommitted_blocks++;
wc->stats.writes_allocate++;
bio_copy:
- if (WC_MODE_PMEM(wc))
+ if (WC_MODE_PMEM(wc)) {
bio_copy_block(wc, bio, memory_data(wc, e));
- else
- return writecache_bio_copy_ssd(wc, bio, e, search_used);
+ wc->stats.writes++;
+ } else {
+ writecache_bio_copy_ssd(wc, bio, e, search_used);
+ return WC_MAP_REMAP;
+ }
} while (bio->bi_iter.bi_size);
if (unlikely(bio->bi_opf & REQ_FUA || wc->uncommitted_blocks >= wc->autocommit_blocks))
@@ -1503,7 +1510,7 @@ static enum wc_map_op writecache_map_flush(struct dm_writecache *wc, struct bio
static enum wc_map_op writecache_map_discard(struct dm_writecache *wc, struct bio *bio)
{
- wc->stats.discards++;
+ wc->stats.discards += bio->bi_iter.bi_size >> wc->block_size_bits;
if (writecache_has_error(wc))
return WC_MAP_ERROR;
diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c
index 4d10f302c62e..3dafc0e8b7a9 100644
--- a/drivers/md/dm-zone.c
+++ b/drivers/md/dm-zone.c
@@ -273,11 +273,8 @@ static int device_not_zone_append_capable(struct dm_target *ti,
static bool dm_table_supports_zone_append(struct dm_table *t)
{
- struct dm_target *ti;
- unsigned int i;
-
- for (i = 0; i < dm_table_get_num_targets(t); i++) {
- ti = dm_table_get_target(t, i);
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
if (ti->emulate_zone_append)
return false;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 54c2a23f4e55..99642f69bfa7 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -88,10 +88,6 @@ struct clone_info {
bool submit_as_polled:1;
};
-#define DM_TARGET_IO_BIO_OFFSET (offsetof(struct dm_target_io, clone))
-#define DM_IO_BIO_OFFSET \
- (offsetof(struct dm_target_io, clone) + offsetof(struct dm_io, tio))
-
static inline struct dm_target_io *clone_to_tio(struct bio *clone)
{
return container_of(clone, struct dm_target_io, clone);
@@ -415,7 +411,7 @@ static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
static int dm_prepare_ioctl(struct mapped_device *md, int *srcu_idx,
struct block_device **bdev)
{
- struct dm_target *tgt;
+ struct dm_target *ti;
struct dm_table *map;
int r;
@@ -426,17 +422,17 @@ retry:
return r;
/* We only support devices that have a single target */
- if (dm_table_get_num_targets(map) != 1)
+ if (map->num_targets != 1)
return r;
- tgt = dm_table_get_target(map, 0);
- if (!tgt->type->prepare_ioctl)
+ ti = dm_table_get_target(map, 0);
+ if (!ti->type->prepare_ioctl)
return r;
if (dm_suspended_md(md))
return -EAGAIN;
- r = tgt->type->prepare_ioctl(tgt, bdev);
+ r = ti->type->prepare_ioctl(ti, bdev);
if (r == -ENOTCONN && !fatal_signal_pending(current)) {
dm_put_live_table(md, *srcu_idx);
msleep(10);
@@ -578,9 +574,6 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio)
struct bio *clone;
clone = bio_alloc_clone(NULL, bio, GFP_NOIO, &md->mempools->io_bs);
- /* Set default bdev, but target must bio_set_dev() before issuing IO */
- clone->bi_bdev = md->disk->part0;
-
tio = clone_to_tio(clone);
tio->flags = 0;
dm_tio_set_flag(tio, DM_TIO_INSIDE_DM_IO);
@@ -594,7 +587,6 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio)
atomic_set(&io->io_count, 2);
this_cpu_inc(*md->pending_io);
io->orig_bio = bio;
- io->split_bio = NULL;
io->md = md;
spin_lock_init(&io->lock);
io->start_time = jiffies;
@@ -614,6 +606,7 @@ static void free_io(struct dm_io *io)
static struct bio *alloc_tio(struct clone_info *ci, struct dm_target *ti,
unsigned target_bio_nr, unsigned *len, gfp_t gfp_mask)
{
+ struct mapped_device *md = ci->io->md;
struct dm_target_io *tio;
struct bio *clone;
@@ -623,14 +616,10 @@ static struct bio *alloc_tio(struct clone_info *ci, struct dm_target *ti,
/* alloc_io() already initialized embedded clone */
clone = &tio->clone;
} else {
- struct mapped_device *md = ci->io->md;
-
clone = bio_alloc_clone(NULL, ci->bio, gfp_mask,
&md->mempools->bs);
if (!clone)
return NULL;
- /* Set default bdev, but target must bio_set_dev() before issuing IO */
- clone->bi_bdev = md->disk->part0;
/* REQ_DM_POLL_LIST shouldn't be inherited */
clone->bi_opf &= ~REQ_DM_POLL_LIST;
@@ -646,6 +635,11 @@ static struct bio *alloc_tio(struct clone_info *ci, struct dm_target *ti,
tio->len_ptr = len;
tio->old_sector = 0;
+ /* Set default bdev, but target must bio_set_dev() before issuing IO */
+ clone->bi_bdev = md->disk->part0;
+ if (unlikely(ti->needs_bio_set_dev))
+ bio_set_dev(clone, md->disk->part0);
+
if (len) {
clone->bi_iter.bi_size = to_bytes(*len);
if (bio_integrity(clone))
@@ -884,22 +878,63 @@ static int __noflush_suspending(struct mapped_device *md)
return test_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
}
-static void dm_io_complete(struct dm_io *io)
+static void dm_requeue_add_io(struct dm_io *io, bool first_stage)
{
- blk_status_t io_error;
struct mapped_device *md = io->md;
- struct bio *bio = io->split_bio ? io->split_bio : io->orig_bio;
- if (io->status == BLK_STS_DM_REQUEUE) {
+ if (first_stage) {
+ struct dm_io *next = md->requeue_list;
+
+ md->requeue_list = io;
+ io->next = next;
+ } else {
+ bio_list_add_head(&md->deferred, io->orig_bio);
+ }
+}
+
+static void dm_kick_requeue(struct mapped_device *md, bool first_stage)
+{
+ if (first_stage)
+ queue_work(md->wq, &md->requeue_work);
+ else
+ queue_work(md->wq, &md->work);
+}
+
+/*
+ * Return true if the dm_io's original bio is requeued.
+ * io->status is updated with error if requeue disallowed.
+ */
+static bool dm_handle_requeue(struct dm_io *io, bool first_stage)
+{
+ struct bio *bio = io->orig_bio;
+ bool handle_requeue = (io->status == BLK_STS_DM_REQUEUE);
+ bool handle_polled_eagain = ((io->status == BLK_STS_AGAIN) &&
+ (bio->bi_opf & REQ_POLLED));
+ struct mapped_device *md = io->md;
+ bool requeued = false;
+
+ if (handle_requeue || handle_polled_eagain) {
unsigned long flags;
+
+ if (bio->bi_opf & REQ_POLLED) {
+ /*
+ * Upper layer won't help us poll split bio
+ * (io->orig_bio may only reflect a subset of the
+ * pre-split original) so clear REQ_POLLED.
+ */
+ bio_clear_polled(bio);
+ }
+
/*
- * Target requested pushing back the I/O.
+ * Target requested pushing back the I/O or
+ * polled IO hit BLK_STS_AGAIN.
*/
spin_lock_irqsave(&md->deferred_lock, flags);
- if (__noflush_suspending(md) &&
- !WARN_ON_ONCE(dm_is_zone_write(md, bio))) {
- /* NOTE early return due to BLK_STS_DM_REQUEUE below */
- bio_list_add_head(&md->deferred, bio);
+ if ((__noflush_suspending(md) &&
+ !WARN_ON_ONCE(dm_is_zone_write(md, bio))) ||
+ handle_polled_eagain || first_stage) {
+ dm_requeue_add_io(io, first_stage);
+ requeued = true;
} else {
/*
* noflush suspend was interrupted or this is
@@ -910,6 +945,23 @@ static void dm_io_complete(struct dm_io *io)
spin_unlock_irqrestore(&md->deferred_lock, flags);
}
+ if (requeued)
+ dm_kick_requeue(md, first_stage);
+
+ return requeued;
+}
+
+static void __dm_io_complete(struct dm_io *io, bool first_stage)
+{
+ struct bio *bio = io->orig_bio;
+ struct mapped_device *md = io->md;
+ blk_status_t io_error;
+ bool requeued;
+
+ requeued = dm_handle_requeue(io, first_stage);
+ if (requeued && first_stage)
+ return;
+
io_error = io->status;
if (dm_io_flagged(io, DM_IO_ACCOUNTED))
dm_end_io_acct(io);
@@ -929,23 +981,9 @@ static void dm_io_complete(struct dm_io *io)
if (unlikely(wq_has_sleeper(&md->wait)))
wake_up(&md->wait);
- if (io_error == BLK_STS_DM_REQUEUE || io_error == BLK_STS_AGAIN) {
- if (bio->bi_opf & REQ_POLLED) {
- /*
- * Upper layer won't help us poll split bio (io->orig_bio
- * may only reflect a subset of the pre-split original)
- * so clear REQ_POLLED in case of requeue.
- */
- bio_clear_polled(bio);
- if (io_error == BLK_STS_AGAIN) {
- /* io_uring doesn't handle BLK_STS_AGAIN (yet) */
- queue_io(md, bio);
- return;
- }
- }
- if (io_error == BLK_STS_DM_REQUEUE)
- return;
- }
+ /* Return early if the original bio was requeued */
+ if (requeued)
+ return;
if (bio_is_flush_with_data(bio)) {
/*
@@ -962,6 +1000,58 @@ static void dm_io_complete(struct dm_io *io)
}
}
+static void dm_wq_requeue_work(struct work_struct *work)
+{
+ struct mapped_device *md = container_of(work, struct mapped_device,
+ requeue_work);
+ unsigned long flags;
+ struct dm_io *io;
+
+ /* reuse deferred lock to simplify dm_handle_requeue */
+ spin_lock_irqsave(&md->deferred_lock, flags);
+ io = md->requeue_list;
+ md->requeue_list = NULL;
+ spin_unlock_irqrestore(&md->deferred_lock, flags);
+
+ while (io) {
+ struct dm_io *next = io->next;
+
+ dm_io_rewind(io, &md->queue->bio_split);
+
+ io->next = NULL;
+ __dm_io_complete(io, false);
+ io = next;
+ }
+}
+
+/*
+ * Two staged requeue:
+ *
+ * 1) io->orig_bio points to the real original bio, and the part mapped to
+ * this io must be requeued, instead of other parts of the original bio.
+ *
+ * 2) io->orig_bio points to new cloned bio which matches the requeued dm_io.
+ */
+static void dm_io_complete(struct dm_io *io)
+{
+ bool first_requeue;
+
+ /*
+ * Only dm_io that has been split needs two stage requeue, otherwise
+ * we may run into long bio clone chain during suspend and OOM could
+ * be triggered.
+ *
+ * Also flush data dm_io won't be marked as DM_IO_WAS_SPLIT, so they
+ * also aren't handled via the first stage requeue.
+ */
+ if (dm_io_flagged(io, DM_IO_WAS_SPLIT))
+ first_requeue = true;
+ else
+ first_requeue = false;
+
+ __dm_io_complete(io, first_requeue);
+}
+
/*
* Decrements the number of outstanding ios that a bio has been
* cloned into, completing the original io if necc.
@@ -1240,6 +1330,7 @@ out:
void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors)
{
struct dm_target_io *tio = clone_to_tio(bio);
+ struct dm_io *io = tio->io;
unsigned bio_sectors = bio_sectors(bio);
BUG_ON(dm_tio_flagged(tio, DM_TIO_IS_DUPLICATE_BIO));
@@ -1255,8 +1346,9 @@ void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors)
* __split_and_process_bio() may have already saved mapped part
* for accounting but it is being reduced so update accordingly.
*/
- dm_io_set_flag(tio->io, DM_IO_WAS_SPLIT);
- tio->io->sectors = n_sectors;
+ dm_io_set_flag(io, DM_IO_WAS_SPLIT);
+ io->sectors = n_sectors;
+ io->sector_offset = bio_sectors(io->orig_bio);
}
EXPORT_SYMBOL_GPL(dm_accept_partial_bio);
@@ -1379,17 +1471,7 @@ static void setup_split_accounting(struct clone_info *ci, unsigned len)
*/
dm_io_set_flag(io, DM_IO_WAS_SPLIT);
io->sectors = len;
- }
-
- if (static_branch_unlikely(&stats_enabled) &&
- unlikely(dm_stats_used(&io->md->stats))) {
- /*
- * Save bi_sector in terms of its offset from end of
- * original bio, only needed for DM-stats' benefit.
- * - saved regardless of whether split needed so that
- * dm_accept_partial_bio() doesn't need to.
- */
- io->sector_offset = bio_end_sector(ci->bio) - ci->sector;
+ io->sector_offset = bio_sectors(ci->bio);
}
}
@@ -1423,11 +1505,11 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci,
}
static int __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti,
- unsigned num_bios, unsigned *len)
+ unsigned int num_bios, unsigned *len)
{
struct bio_list blist = BIO_EMPTY_LIST;
struct bio *clone;
- int ret = 0;
+ unsigned int ret = 0;
switch (num_bios) {
case 0:
@@ -1455,8 +1537,7 @@ static int __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti,
static void __send_empty_flush(struct clone_info *ci)
{
- unsigned target_nr = 0;
- struct dm_target *ti;
+ struct dm_table *t = ci->map;
struct bio flush_bio;
/*
@@ -1471,8 +1552,9 @@ static void __send_empty_flush(struct clone_info *ci)
ci->sector_count = 0;
ci->io->tio.clone.bi_iter.bi_size = 0;
- while ((ti = dm_table_get_target(ci->map, target_nr++))) {
- int bios;
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ unsigned int bios;
+ struct dm_target *ti = dm_table_get_target(t, i);
atomic_add(ti->num_flush_bios, &ci->io->io_count);
bios = __send_duplicate_bios(ci, ti, ti->num_flush_bios, NULL);
@@ -1492,7 +1574,7 @@ static void __send_changing_extent_only(struct clone_info *ci, struct dm_target
unsigned num_bios)
{
unsigned len;
- int bios;
+ unsigned int bios;
len = min_t(sector_t, ci->sector_count,
max_io_len_target_boundary(ti, dm_target_offset(ti, ci->sector)));
@@ -1691,11 +1773,9 @@ static void dm_split_and_process_bio(struct mapped_device *md,
* Remainder must be passed to submit_bio_noacct() so it gets handled
* *after* bios already submitted have been completely processed.
*/
- WARN_ON_ONCE(!dm_io_flagged(io, DM_IO_WAS_SPLIT));
- io->split_bio = bio_split(bio, io->sectors, GFP_NOIO,
- &md->queue->bio_split);
- bio_chain(io->split_bio, bio);
- trace_block_split(io->split_bio, bio->bi_iter.bi_sector);
+ bio_trim(bio, io->sectors, ci.sector_count);
+ trace_block_split(bio, bio->bi_iter.bi_sector);
+ bio_inc_remaining(bio);
submit_bio_noacct(bio);
out:
/*
@@ -1971,9 +2051,11 @@ static struct mapped_device *alloc_dev(int minor)
init_waitqueue_head(&md->wait);
INIT_WORK(&md->work, dm_wq_work);
+ INIT_WORK(&md->requeue_work, dm_wq_requeue_work);
init_waitqueue_head(&md->eventq);
init_completion(&md->kobj_holder.completion);
+ md->requeue_list = NULL;
md->swap_bios = get_swap_bios();
sema_init(&md->swap_bios_semaphore, md->swap_bios);
mutex_init(&md->swap_bios_lock);
@@ -2980,54 +3062,6 @@ int dm_noflush_suspending(struct dm_target *ti)
}
EXPORT_SYMBOL_GPL(dm_noflush_suspending);
-struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, enum dm_queue_mode type,
- unsigned per_io_data_size, unsigned min_pool_size,
- bool integrity, bool poll)
-{
- struct dm_md_mempools *pools = kzalloc_node(sizeof(*pools), GFP_KERNEL, md->numa_node_id);
- unsigned int pool_size = 0;
- unsigned int front_pad, io_front_pad;
- int ret;
-
- if (!pools)
- return NULL;
-
- switch (type) {
- case DM_TYPE_BIO_BASED:
- case DM_TYPE_DAX_BIO_BASED:
- pool_size = max(dm_get_reserved_bio_based_ios(), min_pool_size);
- front_pad = roundup(per_io_data_size, __alignof__(struct dm_target_io)) + DM_TARGET_IO_BIO_OFFSET;
- io_front_pad = roundup(per_io_data_size, __alignof__(struct dm_io)) + DM_IO_BIO_OFFSET;
- ret = bioset_init(&pools->io_bs, pool_size, io_front_pad, poll ? BIOSET_PERCPU_CACHE : 0);
- if (ret)
- goto out;
- if (integrity && bioset_integrity_create(&pools->io_bs, pool_size))
- goto out;
- break;
- case DM_TYPE_REQUEST_BASED:
- pool_size = max(dm_get_reserved_rq_based_ios(), min_pool_size);
- front_pad = offsetof(struct dm_rq_clone_bio_info, clone);
- /* per_io_data_size is used for blk-mq pdu at queue allocation */
- break;
- default:
- BUG();
- }
-
- ret = bioset_init(&pools->bs, pool_size, front_pad, 0);
- if (ret)
- goto out;
-
- if (integrity && bioset_integrity_create(&pools->bs, pool_size))
- goto out;
-
- return pools;
-
-out:
- dm_free_md_mempools(pools);
-
- return NULL;
-}
-
void dm_free_md_mempools(struct dm_md_mempools *pools)
{
if (!pools)
@@ -3043,11 +3077,14 @@ struct dm_pr {
u64 old_key;
u64 new_key;
u32 flags;
+ bool abort;
bool fail_early;
+ int ret;
+ enum pr_type type;
};
static int dm_call_pr(struct block_device *bdev, iterate_devices_callout_fn fn,
- void *data)
+ struct dm_pr *pr)
{
struct mapped_device *md = bdev->bd_disk->private_data;
struct dm_table *table;
@@ -3059,15 +3096,21 @@ static int dm_call_pr(struct block_device *bdev, iterate_devices_callout_fn fn,
goto out;
/* We only support devices that have a single target */
- if (dm_table_get_num_targets(table) != 1)
+ if (table->num_targets != 1)
goto out;
ti = dm_table_get_target(table, 0);
+ if (dm_suspended_md(md)) {
+ ret = -EAGAIN;
+ goto out;
+ }
+
ret = -EINVAL;
if (!ti->type->iterate_devices)
goto out;
- ret = ti->type->iterate_devices(ti, fn, data);
+ ti->type->iterate_devices(ti, fn, pr);
+ ret = 0;
out:
dm_put_live_table(md, srcu_idx);
return ret;
@@ -3081,10 +3124,24 @@ static int __dm_pr_register(struct dm_target *ti, struct dm_dev *dev,
{
struct dm_pr *pr = data;
const struct pr_ops *ops = dev->bdev->bd_disk->fops->pr_ops;
+ int ret;
+
+ if (!ops || !ops->pr_register) {
+ pr->ret = -EOPNOTSUPP;
+ return -1;
+ }
- if (!ops || !ops->pr_register)
- return -EOPNOTSUPP;
- return ops->pr_register(dev->bdev, pr->old_key, pr->new_key, pr->flags);
+ ret = ops->pr_register(dev->bdev, pr->old_key, pr->new_key, pr->flags);
+ if (!ret)
+ return 0;
+
+ if (!pr->ret)
+ pr->ret = ret;
+
+ if (pr->fail_early)
+ return -1;
+
+ return 0;
}
static int dm_pr_register(struct block_device *bdev, u64 old_key, u64 new_key,
@@ -3095,82 +3152,145 @@ static int dm_pr_register(struct block_device *bdev, u64 old_key, u64 new_key,
.new_key = new_key,
.flags = flags,
.fail_early = true,
+ .ret = 0,
};
int ret;
ret = dm_call_pr(bdev, __dm_pr_register, &pr);
- if (ret && new_key) {
- /* unregister all paths if we failed to register any path */
- pr.old_key = new_key;
- pr.new_key = 0;
- pr.flags = 0;
- pr.fail_early = false;
- dm_call_pr(bdev, __dm_pr_register, &pr);
+ if (ret) {
+ /* Didn't even get to register a path */
+ return ret;
}
+ if (!pr.ret)
+ return 0;
+ ret = pr.ret;
+
+ if (!new_key)
+ return ret;
+
+ /* unregister all paths if we failed to register any path */
+ pr.old_key = new_key;
+ pr.new_key = 0;
+ pr.flags = 0;
+ pr.fail_early = false;
+ (void) dm_call_pr(bdev, __dm_pr_register, &pr);
return ret;
}
+
+static int __dm_pr_reserve(struct dm_target *ti, struct dm_dev *dev,
+ sector_t start, sector_t len, void *data)
+{
+ struct dm_pr *pr = data;
+ const struct pr_ops *ops = dev->bdev->bd_disk->fops->pr_ops;
+
+ if (!ops || !ops->pr_reserve) {
+ pr->ret = -EOPNOTSUPP;
+ return -1;
+ }
+
+ pr->ret = ops->pr_reserve(dev->bdev, pr->old_key, pr->type, pr->flags);
+ if (!pr->ret)
+ return -1;
+
+ return 0;
+}
+
static int dm_pr_reserve(struct block_device *bdev, u64 key, enum pr_type type,
u32 flags)
{
- struct mapped_device *md = bdev->bd_disk->private_data;
- const struct pr_ops *ops;
- int r, srcu_idx;
+ struct dm_pr pr = {
+ .old_key = key,
+ .flags = flags,
+ .type = type,
+ .fail_early = false,
+ .ret = 0,
+ };
+ int ret;
- r = dm_prepare_ioctl(md, &srcu_idx, &bdev);
- if (r < 0)
- goto out;
+ ret = dm_call_pr(bdev, __dm_pr_reserve, &pr);
+ if (ret)
+ return ret;
- ops = bdev->bd_disk->fops->pr_ops;
- if (ops && ops->pr_reserve)
- r = ops->pr_reserve(bdev, key, type, flags);
- else
- r = -EOPNOTSUPP;
-out:
- dm_unprepare_ioctl(md, srcu_idx);
- return r;
+ return pr.ret;
+}
+
+/*
+ * If there is a non-All Registrants type of reservation, the release must be
+ * sent down the holding path. For the cases where there is no reservation or
+ * the path is not the holder the device will also return success, so we must
+ * try each path to make sure we got the correct path.
+ */
+static int __dm_pr_release(struct dm_target *ti, struct dm_dev *dev,
+ sector_t start, sector_t len, void *data)
+{
+ struct dm_pr *pr = data;
+ const struct pr_ops *ops = dev->bdev->bd_disk->fops->pr_ops;
+
+ if (!ops || !ops->pr_release) {
+ pr->ret = -EOPNOTSUPP;
+ return -1;
+ }
+
+ pr->ret = ops->pr_release(dev->bdev, pr->old_key, pr->type);
+ if (pr->ret)
+ return -1;
+
+ return 0;
}
static int dm_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
{
- struct mapped_device *md = bdev->bd_disk->private_data;
- const struct pr_ops *ops;
- int r, srcu_idx;
+ struct dm_pr pr = {
+ .old_key = key,
+ .type = type,
+ .fail_early = false,
+ };
+ int ret;
- r = dm_prepare_ioctl(md, &srcu_idx, &bdev);
- if (r < 0)
- goto out;
+ ret = dm_call_pr(bdev, __dm_pr_release, &pr);
+ if (ret)
+ return ret;
- ops = bdev->bd_disk->fops->pr_ops;
- if (ops && ops->pr_release)
- r = ops->pr_release(bdev, key, type);
- else
- r = -EOPNOTSUPP;
-out:
- dm_unprepare_ioctl(md, srcu_idx);
- return r;
+ return pr.ret;
+}
+
+static int __dm_pr_preempt(struct dm_target *ti, struct dm_dev *dev,
+ sector_t start, sector_t len, void *data)
+{
+ struct dm_pr *pr = data;
+ const struct pr_ops *ops = dev->bdev->bd_disk->fops->pr_ops;
+
+ if (!ops || !ops->pr_preempt) {
+ pr->ret = -EOPNOTSUPP;
+ return -1;
+ }
+
+ pr->ret = ops->pr_preempt(dev->bdev, pr->old_key, pr->new_key, pr->type,
+ pr->abort);
+ if (!pr->ret)
+ return -1;
+
+ return 0;
}
static int dm_pr_preempt(struct block_device *bdev, u64 old_key, u64 new_key,
enum pr_type type, bool abort)
{
- struct mapped_device *md = bdev->bd_disk->private_data;
- const struct pr_ops *ops;
- int r, srcu_idx;
+ struct dm_pr pr = {
+ .new_key = new_key,
+ .old_key = old_key,
+ .type = type,
+ .fail_early = false,
+ };
+ int ret;
- r = dm_prepare_ioctl(md, &srcu_idx, &bdev);
- if (r < 0)
- goto out;
+ ret = dm_call_pr(bdev, __dm_pr_preempt, &pr);
+ if (ret)
+ return ret;
- ops = bdev->bd_disk->fops->pr_ops;
- if (ops && ops->pr_preempt)
- r = ops->pr_preempt(bdev, old_key, new_key, type, abort);
- else
- r = -EOPNOTSUPP;
-out:
- dm_unprepare_ioctl(md, srcu_idx);
- return r;
+ return pr.ret;
}
static int dm_pr_clear(struct block_device *bdev, u64 key)
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index a8405ce305a9..5201df03ce40 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -53,7 +53,6 @@ struct dm_io;
*---------------------------------------------------------------*/
void dm_table_event_callback(struct dm_table *t,
void (*fn)(void *), void *context);
-struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index);
struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector);
bool dm_table_has_no_data_devices(struct dm_table *table);
int dm_calculate_queue_limits(struct dm_table *table,
@@ -218,9 +217,6 @@ void dm_kcopyd_exit(void);
/*
* Mempool operations
*/
-struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, enum dm_queue_mode type,
- unsigned per_io_data_size, unsigned min_pool_size,
- bool integrity, bool poll);
void dm_free_md_mempools(struct dm_md_mempools *pools);
/*
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 47a01c7cffdf..04c6acf7faaa 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -373,6 +373,12 @@ struct dm_target {
* after returning DM_MAPIO_SUBMITTED from its map function.
*/
bool accounts_remapped_io:1;
+
+ /*
+ * Set if the target will submit the DM bio without first calling
+ * bio_set_dev(). NOTE: ideally a target should _not_ need this.
+ */
+ bool needs_bio_set_dev:1;
};
void *dm_per_bio_data(struct bio *bio, size_t data_size);
@@ -561,7 +567,6 @@ void dm_sync_table(struct mapped_device *md);
* Queries
*/
sector_t dm_table_get_size(struct dm_table *t);
-unsigned int dm_table_get_num_targets(struct dm_table *t);
fmode_t dm_table_get_mode(struct dm_table *t);
struct mapped_device *dm_table_get_md(struct dm_table *t);
const char *dm_table_device_name(struct dm_table *t);
diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h
index 2e9550fef90f..27ad9671f2df 100644
--- a/include/uapi/linux/dm-ioctl.h
+++ b/include/uapi/linux/dm-ioctl.h
@@ -286,9 +286,9 @@ enum {
#define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
#define DM_VERSION_MAJOR 4
-#define DM_VERSION_MINOR 46
+#define DM_VERSION_MINOR 47
#define DM_VERSION_PATCHLEVEL 0
-#define DM_VERSION_EXTRA "-ioctl (2022-02-22)"
+#define DM_VERSION_EXTRA "-ioctl (2022-07-28)"
/* Status bits */
#define DM_READONLY_FLAG (1 << 0) /* In/Out */