diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-03-10 02:40:27 +0100 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-03-10 02:40:27 +0100 |
commit | 6cdc577a18a616c331f57e268c97466171cfc45f (patch) | |
tree | 95fba928918a06b32ed59efb2de7e8ba1513e78b /drivers/md | |
parent | Merge tag 'scsi-misc' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi (diff) | |
parent | dm integrity: limit the rate of error messages (diff) | |
download | linux-6cdc577a18a616c331f57e268c97466171cfc45f.tar.xz linux-6cdc577a18a616c331f57e268c97466171cfc45f.zip |
Merge tag 'for-5.1/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper updates from Mike Snitzer:
- Update bio-based DM core to always call blk_queue_split() and update
DM targets to properly advertise discard limits that
blk_queue_split() looks at when dtermining to split discard. Whereby
allowing DM core's own 'split_discard_bios' to be removed.
- Improve DM cache target to provide support for discard passdown to
the origin device.
- Introduce support to directly boot to a DM mapped device from init by
using dm-mod.create= module param. This eliminates the need for an
elaborate initramfs that is otherwise needed to create DM devices.
This feature's implementation has been worked on for quite some time
(got up to v12) and is of particular interest to Android and other
more embedded platforms (e.g. ARM).
- Rate limit errors from the DM integrity target that were identified
as the cause for recent NMI hangs due to console limitations.
- Add sanity checks for user input to thin-pool and external snapshot
creation.
- Remove some unused leftover kmem caches from when old .request_fn
request-based support was removed.
- Various small cleanups and fixes to targets (e.g. typos, needless
unlikely() annotations, use struct_size(), remove needless
.direct_access method from dm-snapshot)
* tag 'for-5.1/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
dm integrity: limit the rate of error messages
dm snapshot: don't define direct_access if we don't support it
dm cache: add support for discard passdown to the origin device
dm writecache: fix typo in name for writeback_wq
dm: add support to directly boot to a mapped device
dm thin: add sanity checks to thin-pool and external snapshot creation
dm block manager: remove redundant unlikely annotation
dm verity fec: remove redundant unlikely annotation
dm integrity: remove redundant unlikely annotation
dm: always call blk_queue_split() in dm_process_bio()
dm: fix to_sector() for 32bit
dm switch: use struct_size() in kzalloc()
dm: remove unused _rq_tio_cache and _rq_cache
dm: eliminate 'split_discard_bios' flag from DM target interface
dm: update dm_process_bio() to split bio if in ->make_request_fn()
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/Kconfig | 12 | ||||
-rw-r--r-- | drivers/md/Makefile | 4 | ||||
-rw-r--r-- | drivers/md/dm-cache-target.c | 127 | ||||
-rw-r--r-- | drivers/md/dm-init.c | 303 | ||||
-rw-r--r-- | drivers/md/dm-integrity.c | 10 | ||||
-rw-r--r-- | drivers/md/dm-ioctl.c | 103 | ||||
-rw-r--r-- | drivers/md/dm-raid.c | 14 | ||||
-rw-r--r-- | drivers/md/dm-rq.c | 16 | ||||
-rw-r--r-- | drivers/md/dm-rq.h | 16 | ||||
-rw-r--r-- | drivers/md/dm-snap.c | 8 | ||||
-rw-r--r-- | drivers/md/dm-switch.c | 3 | ||||
-rw-r--r-- | drivers/md/dm-thin.c | 14 | ||||
-rw-r--r-- | drivers/md/dm-verity-fec.c | 6 | ||||
-rw-r--r-- | drivers/md/dm-writecache.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-zoned-target.c | 1 | ||||
-rw-r--r-- | drivers/md/dm.c | 139 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-block-manager.c | 8 |
17 files changed, 647 insertions, 139 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 3db222509e44..2557f198e175 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -436,6 +436,18 @@ config DM_DELAY If unsure, say N. +config DM_INIT + bool "DM \"dm-mod.create=\" parameter support" + depends on BLK_DEV_DM=y + ---help--- + Enable "dm-mod.create=" parameter to create mapped devices at init time. + This option is useful to allow mounting rootfs without requiring an + initramfs. + See Documentation/device-mapper/dm-init.txt for dm-mod.create="..." + format. + + If unsure, say N. + config DM_UEVENT bool "DM uevents" depends on BLK_DEV_DM diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 822f4e8753bc..a52b703e588e 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -69,6 +69,10 @@ obj-$(CONFIG_DM_INTEGRITY) += dm-integrity.o obj-$(CONFIG_DM_ZONED) += dm-zoned.o obj-$(CONFIG_DM_WRITECACHE) += dm-writecache.o +ifeq ($(CONFIG_DM_INIT),y) +dm-mod-objs += dm-init.o +endif + ifeq ($(CONFIG_DM_UEVENT),y) dm-mod-objs += dm-uevent.o endif diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index b29a8327eed1..d249cf8ac277 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -353,6 +353,7 @@ struct cache_features { enum cache_metadata_mode mode; enum cache_io_mode io_mode; unsigned metadata_version; + bool discard_passdown:1; }; struct cache_stats { @@ -1899,7 +1900,11 @@ static bool process_discard_bio(struct cache *cache, struct bio *bio) b = to_dblock(from_dblock(b) + 1); } - bio_endio(bio); + if (cache->features.discard_passdown) { + remap_to_origin(cache, bio); + generic_make_request(bio); + } else + bio_endio(bio); return false; } @@ -2233,13 +2238,14 @@ static void init_features(struct cache_features *cf) cf->mode = CM_WRITE; cf->io_mode = CM_IO_WRITEBACK; cf->metadata_version = 1; + cf->discard_passdown = true; } static int parse_features(struct cache_args *ca, struct dm_arg_set *as, char **error) { static const struct dm_arg _args[] = { - {0, 2, "Invalid number of cache feature arguments"}, + {0, 3, "Invalid number of cache feature arguments"}, }; int r, mode_ctr = 0; @@ -2274,6 +2280,9 @@ static int parse_features(struct cache_args *ca, struct dm_arg_set *as, else if (!strcasecmp(arg, "metadata2")) cf->metadata_version = 2; + else if (!strcasecmp(arg, "no_discard_passdown")) + cf->discard_passdown = false; + else { *error = "Unrecognised cache feature requested"; return -EINVAL; @@ -2496,7 +2505,6 @@ static int cache_create(struct cache_args *ca, struct cache **result) ti->num_discard_bios = 1; ti->discards_supported = true; - ti->split_discard_bios = false; ti->per_io_data_size = sizeof(struct per_bio_data); @@ -3120,6 +3128,39 @@ static void cache_resume(struct dm_target *ti) do_waker(&cache->waker.work); } +static void emit_flags(struct cache *cache, char *result, + unsigned maxlen, ssize_t *sz_ptr) +{ + ssize_t sz = *sz_ptr; + struct cache_features *cf = &cache->features; + unsigned count = (cf->metadata_version == 2) + !cf->discard_passdown + 1; + + DMEMIT("%u ", count); + + if (cf->metadata_version == 2) + DMEMIT("metadata2 "); + + if (writethrough_mode(cache)) + DMEMIT("writethrough "); + + else if (passthrough_mode(cache)) + DMEMIT("passthrough "); + + else if (writeback_mode(cache)) + DMEMIT("writeback "); + + else { + DMEMIT("unknown "); + DMERR("%s: internal error: unknown io mode: %d", + cache_device_name(cache), (int) cf->io_mode); + } + + if (!cf->discard_passdown) + DMEMIT("no_discard_passdown "); + + *sz_ptr = sz; +} + /* * Status format: * @@ -3186,25 +3227,7 @@ static void cache_status(struct dm_target *ti, status_type_t type, (unsigned) atomic_read(&cache->stats.promotion), (unsigned long) atomic_read(&cache->nr_dirty)); - if (cache->features.metadata_version == 2) - DMEMIT("2 metadata2 "); - else - DMEMIT("1 "); - - if (writethrough_mode(cache)) - DMEMIT("writethrough "); - - else if (passthrough_mode(cache)) - DMEMIT("passthrough "); - - else if (writeback_mode(cache)) - DMEMIT("writeback "); - - else { - DMERR("%s: internal error: unknown io mode: %d", - cache_device_name(cache), (int) cache->features.io_mode); - goto err; - } + emit_flags(cache, result, maxlen, &sz); DMEMIT("2 migration_threshold %llu ", (unsigned long long) cache->migration_threshold); @@ -3433,14 +3456,62 @@ static int cache_iterate_devices(struct dm_target *ti, return r; } +static bool origin_dev_supports_discard(struct block_device *origin_bdev) +{ + struct request_queue *q = bdev_get_queue(origin_bdev); + + return q && blk_queue_discard(q); +} + +/* + * If discard_passdown was enabled verify that the origin device + * supports discards. Disable discard_passdown if not. + */ +static void disable_passdown_if_not_supported(struct cache *cache) +{ + struct block_device *origin_bdev = cache->origin_dev->bdev; + struct queue_limits *origin_limits = &bdev_get_queue(origin_bdev)->limits; + const char *reason = NULL; + char buf[BDEVNAME_SIZE]; + + if (!cache->features.discard_passdown) + return; + + if (!origin_dev_supports_discard(origin_bdev)) + reason = "discard unsupported"; + + else if (origin_limits->max_discard_sectors < cache->sectors_per_block) + reason = "max discard sectors smaller than a block"; + + if (reason) { + DMWARN("Origin device (%s) %s: Disabling discard passdown.", + bdevname(origin_bdev, buf), reason); + cache->features.discard_passdown = false; + } +} + static void set_discard_limits(struct cache *cache, struct queue_limits *limits) { + struct block_device *origin_bdev = cache->origin_dev->bdev; + struct queue_limits *origin_limits = &bdev_get_queue(origin_bdev)->limits; + + if (!cache->features.discard_passdown) { + /* No passdown is done so setting own virtual limits */ + limits->max_discard_sectors = min_t(sector_t, cache->discard_block_size * 1024, + cache->origin_sectors); + limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT; + return; + } + /* - * FIXME: these limits may be incompatible with the cache device + * cache_iterate_devices() is stacking both origin and fast device limits + * but discards aren't passed to fast device, so inherit origin's limits. */ - limits->max_discard_sectors = min_t(sector_t, cache->discard_block_size * 1024, - cache->origin_sectors); - limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT; + limits->max_discard_sectors = origin_limits->max_discard_sectors; + limits->max_hw_discard_sectors = origin_limits->max_hw_discard_sectors; + limits->discard_granularity = origin_limits->discard_granularity; + limits->discard_alignment = origin_limits->discard_alignment; + limits->discard_misaligned = origin_limits->discard_misaligned; } static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) @@ -3457,6 +3528,8 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) blk_limits_io_min(limits, cache->sectors_per_block << SECTOR_SHIFT); blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT); } + + disable_passdown_if_not_supported(cache); set_discard_limits(cache, limits); } @@ -3464,7 +3537,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type cache_target = { .name = "cache", - .version = {2, 0, 0}, + .version = {2, 1, 0}, .module = THIS_MODULE, .ctr = cache_ctr, .dtr = cache_dtr, diff --git a/drivers/md/dm-init.c b/drivers/md/dm-init.c new file mode 100644 index 000000000000..b53f30f16b4d --- /dev/null +++ b/drivers/md/dm-init.c @@ -0,0 +1,303 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * dm-init.c + * Copyright (C) 2017 The Chromium OS Authors <chromium-os-dev@chromium.org> + * + * This file is released under the GPLv2. + */ + +#include <linux/ctype.h> +#include <linux/device.h> +#include <linux/device-mapper.h> +#include <linux/init.h> +#include <linux/list.h> +#include <linux/moduleparam.h> + +#define DM_MSG_PREFIX "init" +#define DM_MAX_DEVICES 256 +#define DM_MAX_TARGETS 256 +#define DM_MAX_STR_SIZE 4096 + +static char *create; + +/* + * Format: dm-mod.create=<name>,<uuid>,<minor>,<flags>,<table>[,<table>+][;<name>,<uuid>,<minor>,<flags>,<table>[,<table>+]+] + * Table format: <start_sector> <num_sectors> <target_type> <target_args> + * + * See Documentation/device-mapper/dm-init.txt for dm-mod.create="..." format + * details. + */ + +struct dm_device { + struct dm_ioctl dmi; + struct dm_target_spec *table[DM_MAX_TARGETS]; + char *target_args_array[DM_MAX_TARGETS]; + struct list_head list; +}; + +const char *dm_allowed_targets[] __initconst = { + "crypt", + "delay", + "linear", + "snapshot-origin", + "striped", + "verity", +}; + +static int __init dm_verify_target_type(const char *target) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(dm_allowed_targets); i++) { + if (!strcmp(dm_allowed_targets[i], target)) + return 0; + } + return -EINVAL; +} + +static void __init dm_setup_cleanup(struct list_head *devices) +{ + struct dm_device *dev, *tmp; + unsigned int i; + + list_for_each_entry_safe(dev, tmp, devices, list) { + list_del(&dev->list); + for (i = 0; i < dev->dmi.target_count; i++) { + kfree(dev->table[i]); + kfree(dev->target_args_array[i]); + } + kfree(dev); + } +} + +/** + * str_field_delimit - delimit a string based on a separator char. + * @str: the pointer to the string to delimit. + * @separator: char that delimits the field + * + * Find a @separator and replace it by '\0'. + * Remove leading and trailing spaces. + * Return the remainder string after the @separator. + */ +static char __init *str_field_delimit(char **str, char separator) +{ + char *s; + + /* TODO: add support for escaped characters */ + *str = skip_spaces(*str); + s = strchr(*str, separator); + /* Delimit the field and remove trailing spaces */ + if (s) + *s = '\0'; + *str = strim(*str); + return s ? ++s : NULL; +} + +/** + * dm_parse_table_entry - parse a table entry + * @dev: device to store the parsed information. + * @str: the pointer to a string with the format: + * <start_sector> <num_sectors> <target_type> <target_args>[, ...] + * + * Return the remainder string after the table entry, i.e, after the comma which + * delimits the entry or NULL if reached the end of the string. + */ +static char __init *dm_parse_table_entry(struct dm_device *dev, char *str) +{ + const unsigned int n = dev->dmi.target_count - 1; + struct dm_target_spec *sp; + unsigned int i; + /* fields: */ + char *field[4]; + char *next; + + field[0] = str; + /* Delimit first 3 fields that are separated by space */ + for (i = 0; i < ARRAY_SIZE(field) - 1; i++) { + field[i + 1] = str_field_delimit(&field[i], ' '); + if (!field[i + 1]) + return ERR_PTR(-EINVAL); + } + /* Delimit last field that can be terminated by comma */ + next = str_field_delimit(&field[i], ','); + + sp = kzalloc(sizeof(*sp), GFP_KERNEL); + if (!sp) + return ERR_PTR(-ENOMEM); + dev->table[n] = sp; + + /* start_sector */ + if (kstrtoull(field[0], 0, &sp->sector_start)) + return ERR_PTR(-EINVAL); + /* num_sector */ + if (kstrtoull(field[1], 0, &sp->length)) + return ERR_PTR(-EINVAL); + /* target_type */ + strscpy(sp->target_type, field[2], sizeof(sp->target_type)); + if (dm_verify_target_type(sp->target_type)) { + DMERR("invalid type \"%s\"", sp->target_type); + return ERR_PTR(-EINVAL); + } + /* target_args */ + dev->target_args_array[n] = kstrndup(field[3], GFP_KERNEL, + DM_MAX_STR_SIZE); + if (!dev->target_args_array[n]) + return ERR_PTR(-ENOMEM); + + return next; +} + +/** + * dm_parse_table - parse "dm-mod.create=" table field + * @dev: device to store the parsed information. + * @str: the pointer to a string with the format: + * <table>[,<table>+] + */ +static int __init dm_parse_table(struct dm_device *dev, char *str) +{ + char *table_entry = str; + + while (table_entry) { + DMDEBUG("parsing table \"%s\"", str); + if (++dev->dmi.target_count >= DM_MAX_TARGETS) { + DMERR("too many targets %u > %d", + dev->dmi.target_count, DM_MAX_TARGETS); + return -EINVAL; + } + table_entry = dm_parse_table_entry(dev, table_entry); + if (IS_ERR(table_entry)) { + DMERR("couldn't parse table"); + return PTR_ERR(table_entry); + } + } + + return 0; +} + +/** + * dm_parse_device_entry - parse a device entry + * @dev: device to store the parsed information. + * @str: the pointer to a string with the format: + * name,uuid,minor,flags,table[; ...] + * + * Return the remainder string after the table entry, i.e, after the semi-colon + * which delimits the entry or NULL if reached the end of the string. + */ +static char __init *dm_parse_device_entry(struct dm_device *dev, char *str) +{ + /* There are 5 fields: name,uuid,minor,flags,table; */ + char *field[5]; + unsigned int i; + char *next; + + field[0] = str; + /* Delimit first 4 fields that are separated by comma */ + for (i = 0; i < ARRAY_SIZE(field) - 1; i++) { + field[i+1] = str_field_delimit(&field[i], ','); + if (!field[i+1]) + return ERR_PTR(-EINVAL); + } + /* Delimit last field that can be delimited by semi-colon */ + next = str_field_delimit(&field[i], ';'); + + /* name */ + strscpy(dev->dmi.name, field[0], sizeof(dev->dmi.name)); + /* uuid */ + strscpy(dev->dmi.uuid, field[1], sizeof(dev->dmi.uuid)); + /* minor */ + if (strlen(field[2])) { + if (kstrtoull(field[2], 0, &dev->dmi.dev)) + return ERR_PTR(-EINVAL); + dev->dmi.flags |= DM_PERSISTENT_DEV_FLAG; + } + /* flags */ + if (!strcmp(field[3], "ro")) + dev->dmi.flags |= DM_READONLY_FLAG; + else if (strcmp(field[3], "rw")) + return ERR_PTR(-EINVAL); + /* table */ + if (dm_parse_table(dev, field[4])) + return ERR_PTR(-EINVAL); + + return next; +} + +/** + * dm_parse_devices - parse "dm-mod.create=" argument + * @devices: list of struct dm_device to store the parsed information. + * @str: the pointer to a string with the format: + * <device>[;<device>+] + */ +static int __init dm_parse_devices(struct list_head *devices, char *str) +{ + unsigned long ndev = 0; + struct dm_device *dev; + char *device = str; + + DMDEBUG("parsing \"%s\"", str); + while (device) { + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return -ENOMEM; + list_add_tail(&dev->list, devices); + + if (++ndev >= DM_MAX_DEVICES) { + DMERR("too many targets %u > %d", + dev->dmi.target_count, DM_MAX_TARGETS); + return -EINVAL; + } + + device = dm_parse_device_entry(dev, device); + if (IS_ERR(device)) { + DMERR("couldn't parse device"); + return PTR_ERR(device); + } + } + + return 0; +} + +/** + * dm_init_init - parse "dm-mod.create=" argument and configure drivers + */ +static int __init dm_init_init(void) +{ + struct dm_device *dev; + LIST_HEAD(devices); + char *str; + int r; + + if (!create) + return 0; + + if (strlen(create) >= DM_MAX_STR_SIZE) { + DMERR("Argument is too big. Limit is %d\n", DM_MAX_STR_SIZE); + return -EINVAL; + } + str = kstrndup(create, GFP_KERNEL, DM_MAX_STR_SIZE); + if (!str) + return -ENOMEM; + + r = dm_parse_devices(&devices, str); + if (r) + goto out; + + DMINFO("waiting for all devices to be available before creating mapped devices\n"); + wait_for_device_probe(); + + list_for_each_entry(dev, &devices, list) { + if (dm_early_create(&dev->dmi, dev->table, + dev->target_args_array)) + break; + } +out: + kfree(str); + dm_setup_cleanup(&devices); + return r; +} + +late_initcall(dm_init_init); + +module_param(create, charp, 0); +MODULE_PARM_DESC(create, "Create a mapped device in early boot"); diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 457200ca6287..d57d997a52c8 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -1122,7 +1122,7 @@ static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, se return r; data = dm_bufio_read(ic->bufio, *metadata_block, &b); - if (unlikely(IS_ERR(data))) + if (IS_ERR(data)) return PTR_ERR(data); to_copy = min((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - *metadata_offset, total_size); @@ -1368,8 +1368,8 @@ again: checksums_ptr - checksums, !dio->write ? TAG_CMP : TAG_WRITE); if (unlikely(r)) { if (r > 0) { - DMERR("Checksum failed at sector 0x%llx", - (unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size))); + DMERR_LIMIT("Checksum failed at sector 0x%llx", + (unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size))); r = -EILSEQ; atomic64_inc(&ic->number_of_mismatches); } @@ -1561,8 +1561,8 @@ retry_kmap: integrity_sector_checksum(ic, logical_sector, mem + bv.bv_offset, checksums_onstack); if (unlikely(memcmp(checksums_onstack, journal_entry_tag(ic, je), ic->tag_size))) { - DMERR("Checksum failed when reading from journal, at sector 0x%llx", - (unsigned long long)logical_sector); + DMERR_LIMIT("Checksum failed when reading from journal, at sector 0x%llx", + (unsigned long long)logical_sector); } } #endif diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index f666778ad237..c740153b4e52 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -2018,3 +2018,106 @@ out: return r; } + + +/** + * dm_early_create - create a mapped device in early boot. + * + * @dmi: Contains main information of the device mapping to be created. + * @spec_array: array of pointers to struct dm_target_spec. Describes the + * mapping table of the device. + * @target_params_array: array of strings with the parameters to a specific + * target. + * + * Instead of having the struct dm_target_spec and the parameters for every + * target embedded at the end of struct dm_ioctl (as performed in a normal + * ioctl), pass them as arguments, so the caller doesn't need to serialize them. + * The size of the spec_array and target_params_array is given by + * @dmi->target_count. + * This function is supposed to be called in early boot, so locking mechanisms + * to protect against concurrent loads are not required. + */ +int __init dm_early_create(struct dm_ioctl *dmi, + struct dm_target_spec **spec_array, + char **target_params_array) +{ + int r, m = DM_ANY_MINOR; + struct dm_table *t, *old_map; + struct mapped_device *md; + unsigned int i; + + if (!dmi->target_count) + return -EINVAL; + + r = check_name(dmi->name); + if (r) + return r; + + if (dmi->flags & DM_PERSISTENT_DEV_FLAG) + m = MINOR(huge_decode_dev(dmi->dev)); + + /* alloc dm device */ + r = dm_create(m, &md); + if (r) + return r; + + /* hash insert */ + r = dm_hash_insert(dmi->name, *dmi->uuid ? dmi->uuid : NULL, md); + if (r) + goto err_destroy_dm; + + /* alloc table */ + r = dm_table_create(&t, get_mode(dmi), dmi->target_count, md); + if (r) + goto err_destroy_dm; + + /* add targets */ + for (i = 0; i < dmi->target_count; i++) { + r = dm_table_add_target(t, spec_array[i]->target_type, + (sector_t) spec_array[i]->sector_start, + (sector_t) spec_array[i]->length, + target_params_array[i]); + if (r) { + DMWARN("error adding target to table"); + goto err_destroy_table; + } + } + + /* finish table */ + r = dm_table_complete(t); + if (r) + goto err_destroy_table; + + md->type = dm_table_get_type(t); + /* setup md->queue to reflect md's type (may block) */ + r = dm_setup_md_queue(md, t); + if (r) { + DMWARN("unable to set up device queue for new table."); + goto err_destroy_table; + } + + /* Set new map */ + dm_suspend(md, 0); + old_map = dm_swap_table(md, t); + if (IS_ERR(old_map)) { + r = PTR_ERR(old_map); + goto err_destroy_table; + } + set_disk_ro(dm_disk(md), !!(dmi->flags & DM_READONLY_FLAG)); + + /* resume device */ + r = dm_resume(md); + if (r) + goto err_destroy_table; + + DMINFO("%s (%s) is ready", md->disk->disk_name, dmi->name); + dm_put(md); + return 0; + +err_destroy_table: + dm_table_destroy(t); +err_destroy_dm: + dm_put(md); + dm_destroy(md); + return r; +} diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index adcfe8ae10aa..9fdef6897316 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -2986,11 +2986,6 @@ static void configure_discard_support(struct raid_set *rs) } } - /* - * RAID1 and RAID10 personalities require bio splitting, - * RAID0/4/5/6 don't and process large discard bios properly. - */ - ti->split_discard_bios = !!(rs_is_raid1(rs) || rs_is_raid10(rs)); ti->num_discard_bios = 1; } @@ -3747,6 +3742,15 @@ static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits) blk_limits_io_min(limits, chunk_size); blk_limits_io_opt(limits, chunk_size * mddev_data_stripes(rs)); + + /* + * RAID1 and RAID10 personalities require bio splitting, + * RAID0/4/5/6 don't and process large discard bios properly. + */ + if (rs_is_raid1(rs) || rs_is_raid10(rs)) { + limits->discard_granularity = chunk_size; + limits->max_discard_sectors = chunk_size; + } } static void raid_postsuspend(struct dm_target *ti) diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index b2c03c079a8d..09773636602d 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -12,6 +12,22 @@ #define DM_MSG_PREFIX "core-rq" +/* + * One of these is allocated per request. + */ +struct dm_rq_target_io { + struct mapped_device *md; + struct dm_target *ti; + struct request *orig, *clone; + struct kthread_work work; + blk_status_t error; + union map_info info; + struct dm_stats_aux stats_aux; + unsigned long duration_jiffies; + unsigned n_sectors; + unsigned completed; +}; + #define DM_MQ_NR_HW_QUEUES 1 #define DM_MQ_QUEUE_DEPTH 2048 static unsigned dm_mq_nr_hw_queues = DM_MQ_NR_HW_QUEUES; diff --git a/drivers/md/dm-rq.h b/drivers/md/dm-rq.h index b39245545229..1eea0da641db 100644 --- a/drivers/md/dm-rq.h +++ b/drivers/md/dm-rq.h @@ -17,22 +17,6 @@ struct mapped_device; /* - * One of these is allocated per request. - */ -struct dm_rq_target_io { - struct mapped_device *md; - struct dm_target *ti; - struct request *orig, *clone; - struct kthread_work work; - blk_status_t error; - union map_info info; - struct dm_stats_aux stats_aux; - unsigned long duration_jiffies; - unsigned n_sectors; - unsigned completed; -}; - -/* * For request-based dm - the bio clones we allocate are embedded in these * structs. * diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 36805b12661e..a168963b757d 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -2338,13 +2338,6 @@ static int origin_map(struct dm_target *ti, struct bio *bio) return do_origin(o->dev, bio); } -static long origin_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, - long nr_pages, void **kaddr, pfn_t *pfn) -{ - DMWARN("device does not support dax."); - return -EIO; -} - /* * Set the target "max_io_len" field to the minimum of all the snapshots' * chunk sizes. @@ -2404,7 +2397,6 @@ static struct target_type origin_target = { .postsuspend = origin_postsuspend, .status = origin_status, .iterate_devices = origin_iterate_devices, - .direct_access = origin_dax_direct_access, }; static struct target_type snapshot_target = { diff --git a/drivers/md/dm-switch.c b/drivers/md/dm-switch.c index fae35caf3672..8a0f057b8122 100644 --- a/drivers/md/dm-switch.c +++ b/drivers/md/dm-switch.c @@ -61,8 +61,7 @@ static struct switch_ctx *alloc_switch_ctx(struct dm_target *ti, unsigned nr_pat { struct switch_ctx *sctx; - sctx = kzalloc(sizeof(struct switch_ctx) + nr_paths * sizeof(struct switch_path), - GFP_KERNEL); + sctx = kzalloc(struct_size(sctx, path_list, nr_paths), GFP_KERNEL); if (!sctx) return NULL; diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index e83b63608262..fcd887703f95 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -3283,6 +3283,13 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) as.argc = argc; as.argv = argv; + /* make sure metadata and data are different devices */ + if (!strcmp(argv[0], argv[1])) { + ti->error = "Error setting metadata or data device"; + r = -EINVAL; + goto out_unlock; + } + /* * Set default pool features. */ @@ -4167,6 +4174,12 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) tc->sort_bio_list = RB_ROOT; if (argc == 3) { + if (!strcmp(argv[0], argv[2])) { + ti->error = "Error setting origin device"; + r = -EINVAL; + goto bad_origin_dev; + } + r = dm_get_device(ti, argv[2], FMODE_READ, &origin_dev); if (r) { ti->error = "Error opening origin device"; @@ -4227,7 +4240,6 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) if (tc->pool->pf.discard_enabled) { ti->discards_supported = true; ti->num_discard_bios = 1; - ti->split_discard_bios = false; } mutex_unlock(&dm_thin_pool_table.mutex); diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c index 0ce04e5b4afb..b634fa23f4c4 100644 --- a/drivers/md/dm-verity-fec.c +++ b/drivers/md/dm-verity-fec.c @@ -73,7 +73,7 @@ static u8 *fec_read_parity(struct dm_verity *v, u64 rsb, int index, *offset = (unsigned)(position - (block << v->data_dev_block_bits)); res = dm_bufio_read(v->fec->bufio, v->fec->start + block, buf); - if (unlikely(IS_ERR(res))) { + if (IS_ERR(res)) { DMERR("%s: FEC %llu: parity read failed (block %llu): %ld", v->data_dev->name, (unsigned long long)rsb, (unsigned long long)(v->fec->start + block), @@ -163,7 +163,7 @@ static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio, dm_bufio_release(buf); par = fec_read_parity(v, rsb, block_offset, &offset, &buf); - if (unlikely(IS_ERR(par))) + if (IS_ERR(par)) return PTR_ERR(par); } } @@ -253,7 +253,7 @@ static int fec_read_bufs(struct dm_verity *v, struct dm_verity_io *io, } bbuf = dm_bufio_read(bufio, block, &buf); - if (unlikely(IS_ERR(bbuf))) { + if (IS_ERR(bbuf)) { DMWARN_LIMIT("%s: FEC %llu: read failed (%llu): %ld", v->data_dev->name, (unsigned long long)rsb, diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index 2b8cee35e4d5..f7822875589e 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -1859,7 +1859,7 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv) goto bad; } - wc->writeback_wq = alloc_workqueue("writecache-writeabck", WQ_MEM_RECLAIM, 1); + wc->writeback_wq = alloc_workqueue("writecache-writeback", WQ_MEM_RECLAIM, 1); if (!wc->writeback_wq) { r = -ENOMEM; ti->error = "Could not allocate writeback workqueue"; diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c index 6af5babe6837..8865c1709e16 100644 --- a/drivers/md/dm-zoned-target.c +++ b/drivers/md/dm-zoned-target.c @@ -727,7 +727,6 @@ static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->per_io_data_size = sizeof(struct dmz_bioctx); ti->flush_supported = true; ti->discards_supported = true; - ti->split_discard_bios = true; /* The exposed capacity is the number of chunks that can be mapped */ ti->len = (sector_t)dmz_nr_chunks(dmz->metadata) << dev->zone_nr_sectors_shift; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 515e6af9bed2..68d24056d0b1 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -158,9 +158,6 @@ struct table_device { struct dm_dev dm_dev; }; -static struct kmem_cache *_rq_tio_cache; -static struct kmem_cache *_rq_cache; - /* * Bio-based DM's mempools' reserved IOs set by the user. */ @@ -222,20 +219,11 @@ static unsigned dm_get_numa_node(void) static int __init local_init(void) { - int r = -ENOMEM; - - _rq_tio_cache = KMEM_CACHE(dm_rq_target_io, 0); - if (!_rq_tio_cache) - return r; - - _rq_cache = kmem_cache_create("dm_old_clone_request", sizeof(struct request), - __alignof__(struct request), 0, NULL); - if (!_rq_cache) - goto out_free_rq_tio_cache; + int r; r = dm_uevent_init(); if (r) - goto out_free_rq_cache; + return r; deferred_remove_workqueue = alloc_workqueue("kdmremove", WQ_UNBOUND, 1); if (!deferred_remove_workqueue) { @@ -257,10 +245,6 @@ out_free_workqueue: destroy_workqueue(deferred_remove_workqueue); out_uevent_exit: dm_uevent_exit(); -out_free_rq_cache: - kmem_cache_destroy(_rq_cache); -out_free_rq_tio_cache: - kmem_cache_destroy(_rq_tio_cache); return r; } @@ -270,8 +254,6 @@ static void local_exit(void) flush_scheduled_work(); destroy_workqueue(deferred_remove_workqueue); - kmem_cache_destroy(_rq_cache); - kmem_cache_destroy(_rq_tio_cache); unregister_blkdev(_major, _name); dm_uevent_exit(); @@ -1478,17 +1460,10 @@ static unsigned get_num_write_zeroes_bios(struct dm_target *ti) return ti->num_write_zeroes_bios; } -typedef bool (*is_split_required_fn)(struct dm_target *ti); - -static bool is_split_required_for_discard(struct dm_target *ti) -{ - return ti->split_discard_bios; -} - static int __send_changing_extent_only(struct clone_info *ci, struct dm_target *ti, - unsigned num_bios, bool is_split_required) + unsigned num_bios) { - unsigned len; + unsigned len = ci->sector_count; /* * Even though the device advertised support for this type of @@ -1499,11 +1474,6 @@ static int __send_changing_extent_only(struct clone_info *ci, struct dm_target * if (!num_bios) return -EOPNOTSUPP; - if (!is_split_required) - len = min((sector_t)ci->sector_count, max_io_len_target_boundary(ci->sector, ti)); - else - len = min((sector_t)ci->sector_count, max_io_len(ci->sector, ti)); - __send_duplicate_bios(ci, ti, num_bios, &len); ci->sector += len; @@ -1514,23 +1484,38 @@ static int __send_changing_extent_only(struct clone_info *ci, struct dm_target * static int __send_discard(struct clone_info *ci, struct dm_target *ti) { - return __send_changing_extent_only(ci, ti, get_num_discard_bios(ti), - is_split_required_for_discard(ti)); + return __send_changing_extent_only(ci, ti, get_num_discard_bios(ti)); } static int __send_secure_erase(struct clone_info *ci, struct dm_target *ti) { - return __send_changing_extent_only(ci, ti, get_num_secure_erase_bios(ti), false); + return __send_changing_extent_only(ci, ti, get_num_secure_erase_bios(ti)); } static int __send_write_same(struct clone_info *ci, struct dm_target *ti) { - return __send_changing_extent_only(ci, ti, get_num_write_same_bios(ti), false); + return __send_changing_extent_only(ci, ti, get_num_write_same_bios(ti)); } static int __send_write_zeroes(struct clone_info *ci, struct dm_target *ti) { - return __send_changing_extent_only(ci, ti, get_num_write_zeroes_bios(ti), false); + return __send_changing_extent_only(ci, ti, get_num_write_zeroes_bios(ti)); +} + +static bool is_abnormal_io(struct bio *bio) +{ + bool r = false; + + switch (bio_op(bio)) { + case REQ_OP_DISCARD: + case REQ_OP_SECURE_ERASE: + case REQ_OP_WRITE_SAME: + case REQ_OP_WRITE_ZEROES: + r = true; + break; + } + + return r; } static bool __process_abnormal_io(struct clone_info *ci, struct dm_target *ti, @@ -1565,7 +1550,7 @@ static int __split_and_process_non_flush(struct clone_info *ci) if (!dm_target_is_valid(ti)) return -EIO; - if (unlikely(__process_abnormal_io(ci, ti, &r))) + if (__process_abnormal_io(ci, ti, &r)) return r; len = min_t(sector_t, max_io_len(ci->sector, ti), ci->sector_count); @@ -1601,13 +1586,6 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md, blk_qc_t ret = BLK_QC_T_NONE; int error = 0; - if (unlikely(!map)) { - bio_io_error(bio); - return ret; - } - - blk_queue_split(md->queue, &bio); - init_clone_info(&ci, md, map, bio); if (bio->bi_opf & REQ_PREFLUSH) { @@ -1675,18 +1653,13 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md, * Optimized variant of __split_and_process_bio that leverages the * fact that targets that use it do _not_ have a need to split bios. */ -static blk_qc_t __process_bio(struct mapped_device *md, - struct dm_table *map, struct bio *bio) +static blk_qc_t __process_bio(struct mapped_device *md, struct dm_table *map, + struct bio *bio, struct dm_target *ti) { struct clone_info ci; blk_qc_t ret = BLK_QC_T_NONE; int error = 0; - if (unlikely(!map)) { - bio_io_error(bio); - return ret; - } - init_clone_info(&ci, md, map, bio); if (bio->bi_opf & REQ_PREFLUSH) { @@ -1704,21 +1677,11 @@ static blk_qc_t __process_bio(struct mapped_device *md, error = __send_empty_flush(&ci); /* dec_pending submits any data associated with flush */ } else { - struct dm_target *ti = md->immutable_target; struct dm_target_io *tio; - /* - * Defend against IO still getting in during teardown - * - as was seen for a time with nvme-fcloop - */ - if (WARN_ON_ONCE(!ti || !dm_target_is_valid(ti))) { - error = -EIO; - goto out; - } - ci.bio = bio; ci.sector_count = bio_sectors(bio); - if (unlikely(__process_abnormal_io(&ci, ti, &error))) + if (__process_abnormal_io(&ci, ti, &error)) goto out; tio = alloc_tio(&ci, ti, 0, GFP_NOIO); @@ -1730,11 +1693,55 @@ out: return ret; } +static void dm_queue_split(struct mapped_device *md, struct dm_target *ti, struct bio **bio) +{ + unsigned len, sector_count; + + sector_count = bio_sectors(*bio); + len = min_t(sector_t, max_io_len((*bio)->bi_iter.bi_sector, ti), sector_count); + + if (sector_count > len) { + struct bio *split = bio_split(*bio, len, GFP_NOIO, &md->queue->bio_split); + + bio_chain(split, *bio); + trace_block_split(md->queue, split, (*bio)->bi_iter.bi_sector); + generic_make_request(*bio); + *bio = split; + } +} + static blk_qc_t dm_process_bio(struct mapped_device *md, struct dm_table *map, struct bio *bio) { + blk_qc_t ret = BLK_QC_T_NONE; + struct dm_target *ti = md->immutable_target; + + if (unlikely(!map)) { + bio_io_error(bio); + return ret; + } + + if (!ti) { + ti = dm_table_find_target(map, bio->bi_iter.bi_sector); + if (unlikely(!ti || !dm_target_is_valid(ti))) { + bio_io_error(bio); + return ret; + } + } + + /* + * If in ->make_request_fn we need to use blk_queue_split(), otherwise + * queue_limits for abnormal requests (e.g. discard, writesame, etc) + * won't be imposed. + */ + if (current->bio_list) { + blk_queue_split(md->queue, &bio); + if (!is_abnormal_io(bio)) + dm_queue_split(md, ti, &bio); + } + if (dm_get_md_type(md) == DM_TYPE_NVME_BIO_BASED) - return __process_bio(md, map, bio); + return __process_bio(md, map, bio, ti); else return __split_and_process_bio(md, map, bio); } diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c index 492a3f8ac119..3972232b8037 100644 --- a/drivers/md/persistent-data/dm-block-manager.c +++ b/drivers/md/persistent-data/dm-block-manager.c @@ -462,7 +462,7 @@ int dm_bm_read_lock(struct dm_block_manager *bm, dm_block_t b, int r; p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result); - if (unlikely(IS_ERR(p))) + if (IS_ERR(p)) return PTR_ERR(p); aux = dm_bufio_get_aux_data(to_buffer(*result)); @@ -498,7 +498,7 @@ int dm_bm_write_lock(struct dm_block_manager *bm, return -EPERM; p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result); - if (unlikely(IS_ERR(p))) + if (IS_ERR(p)) return PTR_ERR(p); aux = dm_bufio_get_aux_data(to_buffer(*result)); @@ -531,7 +531,7 @@ int dm_bm_read_try_lock(struct dm_block_manager *bm, int r; p = dm_bufio_get(bm->bufio, b, (struct dm_buffer **) result); - if (unlikely(IS_ERR(p))) + if (IS_ERR(p)) return PTR_ERR(p); if (unlikely(!p)) return -EWOULDBLOCK; @@ -567,7 +567,7 @@ int dm_bm_write_lock_zero(struct dm_block_manager *bm, return -EPERM; p = dm_bufio_new(bm->bufio, b, (struct dm_buffer **) result); - if (unlikely(IS_ERR(p))) + if (IS_ERR(p)) return PTR_ERR(p); memset(p, 0, dm_bm_block_size(bm)); |