summaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-11-01 23:55:54 +0100
committerLinus Torvalds <torvalds@linux-foundation.org>2023-11-01 23:55:54 +0100
commit0364249d2073c32c5214f02866999ce940bc35a2 (patch)
tree8f6fa317669bdc90744481eb5a48f4401f0ca35e /drivers/md
parentMerge tag 'ata-6.7-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/dlem... (diff)
parentdm crypt: account large pages in cc->n_allocated_pages (diff)
downloadlinux-0364249d2073c32c5214f02866999ce940bc35a2.tar.xz
linux-0364249d2073c32c5214f02866999ce940bc35a2.zip
Merge tag 'for-6.7/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper updates from Mike Snitzer: - Update DM core to directly call the map function for both the linear and stripe targets; which are provided by DM core - Various updates to use new safer string functions - Update DM core to respect REQ_NOWAIT flag in normal bios so that memory allocations are always attempted with GFP_NOWAIT - Add Mikulas Patocka to MAINTAINERS as a DM maintainer! - Improve DM delay target's handling of short delays (< 50ms) by using a kthread to check expiration of IOs rather than timers and a wq - Update the DM error target so that it works with zoned storage. This helps xfstests to provide proper IO error handling coverage when testing a filesystem with native zoned storage support - Update both DM crypt and integrity targets to improve performance by using crypto_shash_digest() rather than init+update+final sequence - Fix DM crypt target by backfilling missing memory allocation accounting for compound pages * tag 'for-6.7/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: dm crypt: account large pages in cc->n_allocated_pages dm integrity: use crypto_shash_digest() in sb_mac() dm crypt: use crypto_shash_digest() in crypt_iv_tcw_whitening() dm error: Add support for zoned block devices dm delay: for short delays, use kthread instead of timers and wq MAINTAINERS: add Mikulas Patocka as a DM maintainer dm: respect REQ_NOWAIT flag in normal bios issued to DM dm: enhance alloc_multiple_bios() to be more versatile dm: make __send_duplicate_bios return unsigned int dm log userspace: replace deprecated strncpy with strscpy dm ioctl: replace deprecated strncpy with strscpy_pad dm crypt: replace open-coded kmemdup_nul dm cache metadata: replace deprecated strncpy with strscpy dm: shortcut the calls to linear_map and stripe_map
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/dm-cache-metadata.c6
-rw-r--r--drivers/md/dm-crypt.c26
-rw-r--r--drivers/md/dm-delay.c103
-rw-r--r--drivers/md/dm-integrity.c30
-rw-r--r--drivers/md/dm-ioctl.c4
-rw-r--r--drivers/md/dm-linear.c2
-rw-r--r--drivers/md/dm-log-userspace-base.c2
-rw-r--r--drivers/md/dm-stripe.c2
-rw-r--r--drivers/md/dm-table.c23
-rw-r--r--drivers/md/dm-target.c106
-rw-r--r--drivers/md/dm.c121
-rw-r--r--drivers/md/dm.h2
12 files changed, 320 insertions, 107 deletions
diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c
index acffed750e3e..5a18b80d3666 100644
--- a/drivers/md/dm-cache-metadata.c
+++ b/drivers/md/dm-cache-metadata.c
@@ -597,7 +597,7 @@ static void read_superblock_fields(struct dm_cache_metadata *cmd,
cmd->discard_nr_blocks = to_dblock(le64_to_cpu(disk_super->discard_nr_blocks));
cmd->data_block_size = le32_to_cpu(disk_super->data_block_size);
cmd->cache_blocks = to_cblock(le32_to_cpu(disk_super->cache_blocks));
- strncpy(cmd->policy_name, disk_super->policy_name, sizeof(cmd->policy_name));
+ strscpy(cmd->policy_name, disk_super->policy_name, sizeof(cmd->policy_name));
cmd->policy_version[0] = le32_to_cpu(disk_super->policy_version[0]);
cmd->policy_version[1] = le32_to_cpu(disk_super->policy_version[1]);
cmd->policy_version[2] = le32_to_cpu(disk_super->policy_version[2]);
@@ -707,7 +707,7 @@ static int __commit_transaction(struct dm_cache_metadata *cmd,
disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size);
disk_super->discard_nr_blocks = cpu_to_le64(from_dblock(cmd->discard_nr_blocks));
disk_super->cache_blocks = cpu_to_le32(from_cblock(cmd->cache_blocks));
- strncpy(disk_super->policy_name, cmd->policy_name, sizeof(disk_super->policy_name));
+ strscpy(disk_super->policy_name, cmd->policy_name, sizeof(disk_super->policy_name));
disk_super->policy_version[0] = cpu_to_le32(cmd->policy_version[0]);
disk_super->policy_version[1] = cpu_to_le32(cmd->policy_version[1]);
disk_super->policy_version[2] = cpu_to_le32(cmd->policy_version[2]);
@@ -1726,7 +1726,7 @@ static int write_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *po
(strlen(policy_name) > sizeof(cmd->policy_name) - 1))
return -EINVAL;
- strncpy(cmd->policy_name, policy_name, sizeof(cmd->policy_name));
+ strscpy(cmd->policy_name, policy_name, sizeof(cmd->policy_name));
memcpy(cmd->policy_version, policy_version, sizeof(cmd->policy_version));
hint_size = dm_cache_policy_get_hint_size(policy);
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index be32a290c90a..6de107aff331 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -652,13 +652,7 @@ static int crypt_iv_tcw_whitening(struct crypt_config *cc,
/* calculate crc32 for every 32bit part and xor it */
desc->tfm = tcw->crc32_tfm;
for (i = 0; i < 4; i++) {
- r = crypto_shash_init(desc);
- if (r)
- goto out;
- r = crypto_shash_update(desc, &buf[i * 4], 4);
- if (r)
- goto out;
- r = crypto_shash_final(desc, &buf[i * 4]);
+ r = crypto_shash_digest(desc, &buf[i * 4], 4, &buf[i * 4]);
if (r)
goto out;
}
@@ -1699,11 +1693,17 @@ retry:
order = min(order, remaining_order);
while (order > 0) {
+ if (unlikely(percpu_counter_read_positive(&cc->n_allocated_pages) +
+ (1 << order) > dm_crypt_pages_per_client))
+ goto decrease_order;
pages = alloc_pages(gfp_mask
| __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | __GFP_COMP,
order);
- if (likely(pages != NULL))
+ if (likely(pages != NULL)) {
+ percpu_counter_add(&cc->n_allocated_pages, 1 << order);
goto have_pages;
+ }
+decrease_order:
order--;
}
@@ -1741,10 +1741,13 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone)
if (clone->bi_vcnt > 0) { /* bio_for_each_folio_all crashes with an empty bio */
bio_for_each_folio_all(fi, clone) {
- if (folio_test_large(fi.folio))
+ if (folio_test_large(fi.folio)) {
+ percpu_counter_sub(&cc->n_allocated_pages,
+ 1 << folio_order(fi.folio));
folio_put(fi.folio);
- else
+ } else {
mempool_free(&fi.folio->page, &cc->page_pool);
+ }
}
}
}
@@ -2859,10 +2862,9 @@ static int crypt_ctr_auth_cipher(struct crypt_config *cc, char *cipher_api)
if (!start || !end || ++start > end)
return -EINVAL;
- mac_alg = kzalloc(end - start + 1, GFP_KERNEL);
+ mac_alg = kmemdup_nul(start, end - start, GFP_KERNEL);
if (!mac_alg)
return -ENOMEM;
- strncpy(mac_alg, start, end - start);
mac = crypto_alloc_ahash(mac_alg, 0, CRYPTO_ALG_ALLOCATES_MEMORY);
kfree(mac_alg);
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index 7433525e5985..efd510984e25 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -13,6 +13,7 @@
#include <linux/blkdev.h>
#include <linux/bio.h>
#include <linux/slab.h>
+#include <linux/kthread.h>
#include <linux/device-mapper.h>
@@ -31,6 +32,7 @@ struct delay_c {
struct workqueue_struct *kdelayd_wq;
struct work_struct flush_expired_bios;
struct list_head delayed_bios;
+ struct task_struct *worker;
atomic_t may_delay;
struct delay_class read;
@@ -66,6 +68,44 @@ static void queue_timeout(struct delay_c *dc, unsigned long expires)
mutex_unlock(&dc->timer_lock);
}
+static inline bool delay_is_fast(struct delay_c *dc)
+{
+ return !!dc->worker;
+}
+
+static void flush_delayed_bios_fast(struct delay_c *dc, bool flush_all)
+{
+ struct dm_delay_info *delayed, *next;
+
+ mutex_lock(&delayed_bios_lock);
+ list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) {
+ if (flush_all || time_after_eq(jiffies, delayed->expires)) {
+ struct bio *bio = dm_bio_from_per_bio_data(delayed,
+ sizeof(struct dm_delay_info));
+ list_del(&delayed->list);
+ dm_submit_bio_remap(bio, NULL);
+ delayed->class->ops--;
+ }
+ }
+ mutex_unlock(&delayed_bios_lock);
+}
+
+static int flush_worker_fn(void *data)
+{
+ struct delay_c *dc = data;
+
+ while (1) {
+ flush_delayed_bios_fast(dc, false);
+ if (unlikely(list_empty(&dc->delayed_bios))) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule();
+ } else
+ cond_resched();
+ }
+
+ return 0;
+}
+
static void flush_bios(struct bio *bio)
{
struct bio *n;
@@ -78,7 +118,7 @@ static void flush_bios(struct bio *bio)
}
}
-static struct bio *flush_delayed_bios(struct delay_c *dc, int flush_all)
+static struct bio *flush_delayed_bios(struct delay_c *dc, bool flush_all)
{
struct dm_delay_info *delayed, *next;
unsigned long next_expires = 0;
@@ -115,7 +155,10 @@ static void flush_expired_bios(struct work_struct *work)
struct delay_c *dc;
dc = container_of(work, struct delay_c, flush_expired_bios);
- flush_bios(flush_delayed_bios(dc, 0));
+ if (delay_is_fast(dc))
+ flush_delayed_bios_fast(dc, false);
+ else
+ flush_bios(flush_delayed_bios(dc, false));
}
static void delay_dtr(struct dm_target *ti)
@@ -131,8 +174,11 @@ static void delay_dtr(struct dm_target *ti)
dm_put_device(ti, dc->write.dev);
if (dc->flush.dev)
dm_put_device(ti, dc->flush.dev);
+ if (dc->worker)
+ kthread_stop(dc->worker);
- mutex_destroy(&dc->timer_lock);
+ if (!delay_is_fast(dc))
+ mutex_destroy(&dc->timer_lock);
kfree(dc);
}
@@ -175,6 +221,7 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
{
struct delay_c *dc;
int ret;
+ unsigned int max_delay;
if (argc != 3 && argc != 6 && argc != 9) {
ti->error = "Requires exactly 3, 6 or 9 arguments";
@@ -188,16 +235,14 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
}
ti->private = dc;
- timer_setup(&dc->delay_timer, handle_delayed_timer, 0);
- INIT_WORK(&dc->flush_expired_bios, flush_expired_bios);
INIT_LIST_HEAD(&dc->delayed_bios);
- mutex_init(&dc->timer_lock);
atomic_set(&dc->may_delay, 1);
dc->argc = argc;
ret = delay_class_ctr(ti, &dc->read, argv);
if (ret)
goto bad;
+ max_delay = dc->read.delay;
if (argc == 3) {
ret = delay_class_ctr(ti, &dc->write, argv);
@@ -206,6 +251,8 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ret = delay_class_ctr(ti, &dc->flush, argv);
if (ret)
goto bad;
+ max_delay = max(max_delay, dc->write.delay);
+ max_delay = max(max_delay, dc->flush.delay);
goto out;
}
@@ -216,19 +263,37 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ret = delay_class_ctr(ti, &dc->flush, argv + 3);
if (ret)
goto bad;
+ max_delay = max(max_delay, dc->flush.delay);
goto out;
}
ret = delay_class_ctr(ti, &dc->flush, argv + 6);
if (ret)
goto bad;
+ max_delay = max(max_delay, dc->flush.delay);
out:
- dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0);
- if (!dc->kdelayd_wq) {
- ret = -EINVAL;
- DMERR("Couldn't start kdelayd");
- goto bad;
+ if (max_delay < 50) {
+ /*
+ * In case of small requested delays, use kthread instead of
+ * timers and workqueue to achieve better latency.
+ */
+ dc->worker = kthread_create(&flush_worker_fn, dc,
+ "dm-delay-flush-worker");
+ if (IS_ERR(dc->worker)) {
+ ret = PTR_ERR(dc->worker);
+ goto bad;
+ }
+ } else {
+ timer_setup(&dc->delay_timer, handle_delayed_timer, 0);
+ INIT_WORK(&dc->flush_expired_bios, flush_expired_bios);
+ mutex_init(&dc->timer_lock);
+ dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0);
+ if (!dc->kdelayd_wq) {
+ ret = -EINVAL;
+ DMERR("Couldn't start kdelayd");
+ goto bad;
+ }
}
ti->num_flush_bios = 1;
@@ -260,7 +325,10 @@ static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio)
list_add_tail(&delayed->list, &dc->delayed_bios);
mutex_unlock(&delayed_bios_lock);
- queue_timeout(dc, expires);
+ if (delay_is_fast(dc))
+ wake_up_process(dc->worker);
+ else
+ queue_timeout(dc, expires);
return DM_MAPIO_SUBMITTED;
}
@@ -270,8 +338,13 @@ static void delay_presuspend(struct dm_target *ti)
struct delay_c *dc = ti->private;
atomic_set(&dc->may_delay, 0);
- del_timer_sync(&dc->delay_timer);
- flush_bios(flush_delayed_bios(dc, 1));
+
+ if (delay_is_fast(dc))
+ flush_delayed_bios_fast(dc, true);
+ else {
+ del_timer_sync(&dc->delay_timer);
+ flush_bios(flush_delayed_bios(dc, true));
+ }
}
static void delay_resume(struct dm_target *ti)
@@ -356,7 +429,7 @@ out:
static struct target_type delay_target = {
.name = "delay",
- .version = {1, 3, 0},
+ .version = {1, 4, 0},
.features = DM_TARGET_PASSES_INTEGRITY,
.module = THIS_MODULE,
.ctr = delay_ctr,
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 97a8d5fc9ebb..e85c688fd91e 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -493,42 +493,32 @@ static int sb_mac(struct dm_integrity_c *ic, bool wr)
{
SHASH_DESC_ON_STACK(desc, ic->journal_mac);
int r;
- unsigned int size = crypto_shash_digestsize(ic->journal_mac);
+ unsigned int mac_size = crypto_shash_digestsize(ic->journal_mac);
+ __u8 *sb = (__u8 *)ic->sb;
+ __u8 *mac = sb + (1 << SECTOR_SHIFT) - mac_size;
- if (sizeof(struct superblock) + size > 1 << SECTOR_SHIFT) {
+ if (sizeof(struct superblock) + mac_size > 1 << SECTOR_SHIFT) {
dm_integrity_io_error(ic, "digest is too long", -EINVAL);
return -EINVAL;
}
desc->tfm = ic->journal_mac;
- r = crypto_shash_init(desc);
- if (unlikely(r < 0)) {
- dm_integrity_io_error(ic, "crypto_shash_init", r);
- return r;
- }
-
- r = crypto_shash_update(desc, (__u8 *)ic->sb, (1 << SECTOR_SHIFT) - size);
- if (unlikely(r < 0)) {
- dm_integrity_io_error(ic, "crypto_shash_update", r);
- return r;
- }
-
if (likely(wr)) {
- r = crypto_shash_final(desc, (__u8 *)ic->sb + (1 << SECTOR_SHIFT) - size);
+ r = crypto_shash_digest(desc, sb, mac - sb, mac);
if (unlikely(r < 0)) {
- dm_integrity_io_error(ic, "crypto_shash_final", r);
+ dm_integrity_io_error(ic, "crypto_shash_digest", r);
return r;
}
} else {
- __u8 result[HASH_MAX_DIGESTSIZE];
+ __u8 actual_mac[HASH_MAX_DIGESTSIZE];
- r = crypto_shash_final(desc, result);
+ r = crypto_shash_digest(desc, sb, mac - sb, actual_mac);
if (unlikely(r < 0)) {
- dm_integrity_io_error(ic, "crypto_shash_final", r);
+ dm_integrity_io_error(ic, "crypto_shash_digest", r);
return r;
}
- if (memcmp((__u8 *)ic->sb + (1 << SECTOR_SHIFT) - size, result, size)) {
+ if (memcmp(mac, actual_mac, mac_size)) {
dm_integrity_io_error(ic, "superblock mac", -EILSEQ);
dm_audit_log_target(DM_MSG_PREFIX, "mac-superblock", ic->ti, 0);
return -EILSEQ;
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 21ebb6c39394..e65058e0ed06 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -1295,8 +1295,8 @@ static void retrieve_status(struct dm_table *table,
spec->status = 0;
spec->sector_start = ti->begin;
spec->length = ti->len;
- strncpy(spec->target_type, ti->type->name,
- sizeof(spec->target_type) - 1);
+ strscpy_pad(spec->target_type, ti->type->name,
+ sizeof(spec->target_type));
outptr += sizeof(struct dm_target_spec);
remaining = len - (outptr - outbuf);
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index f4448d520ee9..2d3e186ca87e 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -85,7 +85,7 @@ static sector_t linear_map_sector(struct dm_target *ti, sector_t bi_sector)
return lc->start + dm_target_offset(ti, bi_sector);
}
-static int linear_map(struct dm_target *ti, struct bio *bio)
+int linear_map(struct dm_target *ti, struct bio *bio)
{
struct linear_c *lc = ti->private;
diff --git a/drivers/md/dm-log-userspace-base.c b/drivers/md/dm-log-userspace-base.c
index 5aace6ee6d47..7e4f27e86150 100644
--- a/drivers/md/dm-log-userspace-base.c
+++ b/drivers/md/dm-log-userspace-base.c
@@ -224,7 +224,7 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,
lc->usr_argc = argc;
- strncpy(lc->uuid, argv[0], DM_UUID_LEN);
+ strscpy(lc->uuid, argv[0], sizeof(lc->uuid));
argc--;
argv++;
spin_lock_init(&lc->flush_lock);
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 5e70f5ae394d..16b93ae51d96 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -268,7 +268,7 @@ static int stripe_map_range(struct stripe_c *sc, struct bio *bio,
return DM_MAPIO_SUBMITTED;
}
-static int stripe_map(struct dm_target *ti, struct bio *bio)
+int stripe_map(struct dm_target *ti, struct bio *bio)
{
struct stripe_c *sc = ti->private;
uint32_t stripe;
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 37b48f63ae6a..198d38b53322 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -844,7 +844,8 @@ static bool dm_table_supports_dax(struct dm_table *t,
if (!ti->type->direct_access)
return false;
- if (!ti->type->iterate_devices ||
+ if (dm_target_is_wildcard(ti->type) ||
+ !ti->type->iterate_devices ||
ti->type->iterate_devices(ti, iterate_fn, NULL))
return false;
}
@@ -1587,6 +1588,14 @@ static int device_not_zoned_model(struct dm_target *ti, struct dm_dev *dev,
return blk_queue_zoned_model(q) != *zoned_model;
}
+static int device_is_zoned_model(struct dm_target *ti, struct dm_dev *dev,
+ sector_t start, sector_t len, void *data)
+{
+ struct request_queue *q = bdev_get_queue(dev->bdev);
+
+ return blk_queue_zoned_model(q) != BLK_ZONED_NONE;
+}
+
/*
* Check the device zoned model based on the target feature flag. If the target
* has the DM_TARGET_ZONED_HM feature flag set, host-managed zoned devices are
@@ -1600,6 +1609,18 @@ static bool dm_table_supports_zoned_model(struct dm_table *t,
for (unsigned int i = 0; i < t->num_targets; i++) {
struct dm_target *ti = dm_table_get_target(t, i);
+ /*
+ * For the wildcard target (dm-error), if we do not have a
+ * backing device, we must always return false. If we have a
+ * backing device, the result must depend on checking zoned
+ * model, like for any other target. So for this, check directly
+ * if the target backing device is zoned as we get "false" when
+ * dm-error was set without a backing device.
+ */
+ if (dm_target_is_wildcard(ti->type) &&
+ !ti->type->iterate_devices(ti, device_is_zoned_model, NULL))
+ return false;
+
if (dm_target_supports_zoned_hm(ti->type)) {
if (!ti->type->iterate_devices ||
ti->type->iterate_devices(ti, device_not_zoned_model,
diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c
index 27e2992ff249..0c4efb0bef8a 100644
--- a/drivers/md/dm-target.c
+++ b/drivers/md/dm-target.c
@@ -116,9 +116,63 @@ EXPORT_SYMBOL(dm_unregister_target);
* io-err: always fails an io, useful for bringing
* up LVs that have holes in them.
*/
+struct io_err_c {
+ struct dm_dev *dev;
+ sector_t start;
+};
+
+static int io_err_get_args(struct dm_target *tt, unsigned int argc, char **args)
+{
+ unsigned long long start;
+ struct io_err_c *ioec;
+ char dummy;
+ int ret;
+
+ ioec = kmalloc(sizeof(*ioec), GFP_KERNEL);
+ if (!ioec) {
+ tt->error = "Cannot allocate io_err context";
+ return -ENOMEM;
+ }
+
+ ret = -EINVAL;
+ if (sscanf(args[1], "%llu%c", &start, &dummy) != 1 ||
+ start != (sector_t)start) {
+ tt->error = "Invalid device sector";
+ goto bad;
+ }
+ ioec->start = start;
+
+ ret = dm_get_device(tt, args[0], dm_table_get_mode(tt->table), &ioec->dev);
+ if (ret) {
+ tt->error = "Device lookup failed";
+ goto bad;
+ }
+
+ tt->private = ioec;
+
+ return 0;
+
+bad:
+ kfree(ioec);
+
+ return ret;
+}
+
static int io_err_ctr(struct dm_target *tt, unsigned int argc, char **args)
{
/*
+ * If we have arguments, assume it is the path to the backing
+ * block device and its mapping start sector (same as dm-linear).
+ * In this case, get the device so that we can get its limits.
+ */
+ if (argc == 2) {
+ int ret = io_err_get_args(tt, argc, args);
+
+ if (ret)
+ return ret;
+ }
+
+ /*
* Return error for discards instead of -EOPNOTSUPP
*/
tt->num_discard_bios = 1;
@@ -129,7 +183,12 @@ static int io_err_ctr(struct dm_target *tt, unsigned int argc, char **args)
static void io_err_dtr(struct dm_target *tt)
{
- /* empty */
+ struct io_err_c *ioec = tt->private;
+
+ if (ioec) {
+ dm_put_device(tt, ioec->dev);
+ kfree(ioec);
+ }
}
static int io_err_map(struct dm_target *tt, struct bio *bio)
@@ -149,6 +208,45 @@ static void io_err_release_clone_rq(struct request *clone,
{
}
+#ifdef CONFIG_BLK_DEV_ZONED
+static sector_t io_err_map_sector(struct dm_target *ti, sector_t bi_sector)
+{
+ struct io_err_c *ioec = ti->private;
+
+ return ioec->start + dm_target_offset(ti, bi_sector);
+}
+
+static int io_err_report_zones(struct dm_target *ti,
+ struct dm_report_zones_args *args, unsigned int nr_zones)
+{
+ struct io_err_c *ioec = ti->private;
+
+ /*
+ * This should never be called when we do not have a backing device
+ * as that mean the target is not a zoned one.
+ */
+ if (WARN_ON_ONCE(!ioec))
+ return -EIO;
+
+ return dm_report_zones(ioec->dev->bdev, ioec->start,
+ io_err_map_sector(ti, args->next_sector),
+ args, nr_zones);
+}
+#else
+#define io_err_report_zones NULL
+#endif
+
+static int io_err_iterate_devices(struct dm_target *ti,
+ iterate_devices_callout_fn fn, void *data)
+{
+ struct io_err_c *ioec = ti->private;
+
+ if (!ioec)
+ return 0;
+
+ return fn(ti, ioec->dev, ioec->start, ti->len, data);
+}
+
static void io_err_io_hints(struct dm_target *ti, struct queue_limits *limits)
{
limits->max_discard_sectors = UINT_MAX;
@@ -165,15 +263,17 @@ static long io_err_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
static struct target_type error_target = {
.name = "error",
- .version = {1, 6, 0},
- .features = DM_TARGET_WILDCARD,
+ .version = {1, 7, 0},
+ .features = DM_TARGET_WILDCARD | DM_TARGET_ZONED_HM,
.ctr = io_err_ctr,
.dtr = io_err_dtr,
.map = io_err_map,
.clone_and_map_rq = io_err_clone_and_map_rq,
.release_clone_rq = io_err_release_clone_rq,
+ .iterate_devices = io_err_iterate_devices,
.io_hints = io_err_io_hints,
.direct_access = io_err_dax_direct_access,
+ .report_zones = io_err_report_zones,
};
int __init dm_target_init(void)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index f7212e8fc27f..23c32cd1f1d8 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -570,13 +570,15 @@ static void dm_end_io_acct(struct dm_io *io)
dm_io_acct(io, true);
}
-static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio)
+static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio, gfp_t gfp_mask)
{
struct dm_io *io;
struct dm_target_io *tio;
struct bio *clone;
- clone = bio_alloc_clone(NULL, bio, GFP_NOIO, &md->mempools->io_bs);
+ clone = bio_alloc_clone(NULL, bio, gfp_mask, &md->mempools->io_bs);
+ if (unlikely(!clone))
+ return NULL;
tio = clone_to_tio(clone);
tio->flags = 0;
dm_tio_set_flag(tio, DM_TIO_INSIDE_DM_IO);
@@ -1426,9 +1428,16 @@ static void __map_bio(struct bio *clone)
if (unlikely(dm_emulate_zone_append(md)))
r = dm_zone_map_bio(tio);
else
+ goto do_map;
+ } else {
+do_map:
+ if (likely(ti->type->map == linear_map))
+ r = linear_map(ti, clone);
+ else if (ti->type->map == stripe_map)
+ r = stripe_map(ti, clone);
+ else
r = ti->type->map(ti, clone);
- } else
- r = ti->type->map(ti, clone);
+ }
switch (r) {
case DM_MAPIO_SUBMITTED:
@@ -1473,15 +1482,15 @@ static void setup_split_accounting(struct clone_info *ci, unsigned int len)
static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci,
struct dm_target *ti, unsigned int num_bios,
- unsigned *len)
+ unsigned *len, gfp_t gfp_flag)
{
struct bio *bio;
- int try;
+ int try = (gfp_flag & GFP_NOWAIT) ? 0 : 1;
- for (try = 0; try < 2; try++) {
+ for (; try < 2; try++) {
int bio_nr;
- if (try)
+ if (try && num_bios > 1)
mutex_lock(&ci->io->md->table_devices_lock);
for (bio_nr = 0; bio_nr < num_bios; bio_nr++) {
bio = alloc_tio(ci, ti, bio_nr, len,
@@ -1491,7 +1500,7 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci,
bio_list_add(blist, bio);
}
- if (try)
+ if (try && num_bios > 1)
mutex_unlock(&ci->io->md->table_devices_lock);
if (bio_nr == num_bios)
return;
@@ -1501,34 +1510,31 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci,
}
}
-static int __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti,
- unsigned int num_bios, unsigned int *len)
+static unsigned int __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti,
+ unsigned int num_bios, unsigned int *len,
+ gfp_t gfp_flag)
{
struct bio_list blist = BIO_EMPTY_LIST;
struct bio *clone;
unsigned int ret = 0;
- switch (num_bios) {
- case 0:
- break;
- case 1:
- if (len)
- setup_split_accounting(ci, *len);
- clone = alloc_tio(ci, ti, 0, len, GFP_NOIO);
- __map_bio(clone);
- ret = 1;
- break;
- default:
- if (len)
- setup_split_accounting(ci, *len);
- /* dm_accept_partial_bio() is not supported with shared tio->len_ptr */
- alloc_multiple_bios(&blist, ci, ti, num_bios, len);
- while ((clone = bio_list_pop(&blist))) {
+ if (WARN_ON_ONCE(num_bios == 0)) /* num_bios = 0 is a bug in caller */
+ return 0;
+
+ /* dm_accept_partial_bio() is not supported with shared tio->len_ptr */
+ if (len)
+ setup_split_accounting(ci, *len);
+
+ /*
+ * Using alloc_multiple_bios(), even if num_bios is 1, to consistently
+ * support allocating using GFP_NOWAIT with GFP_NOIO fallback.
+ */
+ alloc_multiple_bios(&blist, ci, ti, num_bios, len, gfp_flag);
+ while ((clone = bio_list_pop(&blist))) {
+ if (num_bios > 1)
dm_tio_set_flag(clone_to_tio(clone), DM_TIO_IS_DUPLICATE_BIO);
- __map_bio(clone);
- ret += 1;
- }
- break;
+ __map_bio(clone);
+ ret += 1;
}
return ret;
@@ -1555,8 +1561,12 @@ static void __send_empty_flush(struct clone_info *ci)
unsigned int bios;
struct dm_target *ti = dm_table_get_target(t, i);
+ if (unlikely(ti->num_flush_bios == 0))
+ continue;
+
atomic_add(ti->num_flush_bios, &ci->io->io_count);
- bios = __send_duplicate_bios(ci, ti, ti->num_flush_bios, NULL);
+ bios = __send_duplicate_bios(ci, ti, ti->num_flush_bios,
+ NULL, GFP_NOWAIT);
atomic_sub(ti->num_flush_bios - bios, &ci->io->io_count);
}
@@ -1569,10 +1579,9 @@ static void __send_empty_flush(struct clone_info *ci)
bio_uninit(ci->bio);
}
-static void __send_changing_extent_only(struct clone_info *ci, struct dm_target *ti,
- unsigned int num_bios,
- unsigned int max_granularity,
- unsigned int max_sectors)
+static void __send_abnormal_io(struct clone_info *ci, struct dm_target *ti,
+ unsigned int num_bios, unsigned int max_granularity,
+ unsigned int max_sectors)
{
unsigned int len, bios;
@@ -1580,7 +1589,7 @@ static void __send_changing_extent_only(struct clone_info *ci, struct dm_target
__max_io_len(ti, ci->sector, max_granularity, max_sectors));
atomic_add(num_bios, &ci->io->io_count);
- bios = __send_duplicate_bios(ci, ti, num_bios, &len);
+ bios = __send_duplicate_bios(ci, ti, num_bios, &len, GFP_NOIO);
/*
* alloc_io() takes one extra reference for submission, so the
* reference won't reach 0 without the following (+1) subtraction
@@ -1649,8 +1658,8 @@ static blk_status_t __process_abnormal_io(struct clone_info *ci,
if (unlikely(!num_bios))
return BLK_STS_NOTSUPP;
- __send_changing_extent_only(ci, ti, num_bios,
- max_granularity, max_sectors);
+ __send_abnormal_io(ci, ti, num_bios, max_granularity, max_sectors);
+
return BLK_STS_OK;
}
@@ -1709,10 +1718,6 @@ static blk_status_t __split_and_process_bio(struct clone_info *ci)
if (unlikely(!ti))
return BLK_STS_IOERR;
- if (unlikely((ci->bio->bi_opf & REQ_NOWAIT) != 0) &&
- unlikely(!dm_target_supports_nowait(ti->type)))
- return BLK_STS_NOTSUPP;
-
if (unlikely(ci->is_abnormal_io))
return __process_abnormal_io(ci, ti);
@@ -1724,7 +1729,17 @@ static blk_status_t __split_and_process_bio(struct clone_info *ci)
len = min_t(sector_t, max_io_len(ti, ci->sector), ci->sector_count);
setup_split_accounting(ci, len);
- clone = alloc_tio(ci, ti, 0, &len, GFP_NOIO);
+
+ if (unlikely(ci->bio->bi_opf & REQ_NOWAIT)) {
+ if (unlikely(!dm_target_supports_nowait(ti->type)))
+ return BLK_STS_NOTSUPP;
+
+ clone = alloc_tio(ci, ti, 0, &len, GFP_NOWAIT);
+ if (unlikely(!clone))
+ return BLK_STS_AGAIN;
+ } else {
+ clone = alloc_tio(ci, ti, 0, &len, GFP_NOIO);
+ }
__map_bio(clone);
ci->sector += len;
@@ -1733,11 +1748,11 @@ static blk_status_t __split_and_process_bio(struct clone_info *ci)
return BLK_STS_OK;
}
-static void init_clone_info(struct clone_info *ci, struct mapped_device *md,
+static void init_clone_info(struct clone_info *ci, struct dm_io *io,
struct dm_table *map, struct bio *bio, bool is_abnormal)
{
ci->map = map;
- ci->io = alloc_io(md, bio);
+ ci->io = io;
ci->bio = bio;
ci->is_abnormal_io = is_abnormal;
ci->submit_as_polled = false;
@@ -1772,8 +1787,18 @@ static void dm_split_and_process_bio(struct mapped_device *md,
return;
}
- init_clone_info(&ci, md, map, bio, is_abnormal);
- io = ci.io;
+ /* Only support nowait for normal IO */
+ if (unlikely(bio->bi_opf & REQ_NOWAIT) && !is_abnormal) {
+ io = alloc_io(md, bio, GFP_NOWAIT);
+ if (unlikely(!io)) {
+ /* Unable to do anything without dm_io. */
+ bio_wouldblock_error(bio);
+ return;
+ }
+ } else {
+ io = alloc_io(md, bio, GFP_NOIO);
+ }
+ init_clone_info(&ci, io, map, bio, is_abnormal);
if (bio->bi_opf & REQ_PREFLUSH) {
__send_empty_flush(&ci);
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index f682295af91f..7f1acbf6bd9e 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -188,9 +188,11 @@ void dm_kobject_release(struct kobject *kobj);
/*
* Targets for linear and striped mappings
*/
+int linear_map(struct dm_target *ti, struct bio *bio);
int dm_linear_init(void);
void dm_linear_exit(void);
+int stripe_map(struct dm_target *ti, struct bio *bio);
int dm_stripe_init(void);
void dm_stripe_exit(void);