diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-08-07 22:08:09 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-08-07 22:08:09 +0200 |
commit | 2f12d44085dabf5fa5779ff0bb0aaa1b2cc768cb (patch) | |
tree | d0f2ae220bdd15d9bdfe713f3e613f55699593a3 | |
parent | Merge tag 'media/v5.9-1' of git://git.kernel.org/pub/scm/linux/kernel/git/mch... (diff) | |
parent | dm: don't call report zones for more than the user requested (diff) | |
download | linux-2f12d44085dabf5fa5779ff0bb0aaa1b2cc768cb.tar.xz linux-2f12d44085dabf5fa5779ff0bb0aaa1b2cc768cb.zip |
Merge tag 'for-5.9/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper updates from Mike Snitzer:
- DM multipath locking fixes around m->flags tests and improvements to
bio-based code so that it follows patterns established by
request-based code.
- Request-based DM core improvement to eliminate unnecessary call to
blk_mq_queue_stopped().
- Add "panic_on_corruption" error handling mode to DM verity target.
- DM bufio fix to to perform buffer cleanup from a workqueue rather
than wait for IO in reclaim context from shrinker.
- DM crypt improvement to optionally avoid async processing via
workqueues for reads and/or writes -- via "no_read_workqueue" and
"no_write_workqueue" features. This more direct IO processing
improves latency and throughput with faster storage. Avoiding
workqueue IO submission for writes (DM_CRYPT_NO_WRITE_WORKQUEUE) is a
requirement for adding zoned block device support to DM crypt.
- Add zoned block device support to DM crypt. Makes use of
DM_CRYPT_NO_WRITE_WORKQUEUE and a new optional feature
(DM_CRYPT_WRITE_INLINE) that allows write completion to wait for
encryption to complete. This allows write ordering to be preserved,
which is needed for zoned block devices.
- Fix DM ebs target's check for REQ_OP_FLUSH.
- Fix DM core's report zones support to not report more zones than were
requested.
- A few small compiler warning fixes.
- DM dust improvements to return output directly to the user rather
than require they scrape the system log for output.
* tag 'for-5.9/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
dm: don't call report zones for more than the user requested
dm ebs: Fix incorrect checking for REQ_OP_FLUSH
dm init: Set file local variable static
dm ioctl: Fix compilation warning
dm raid: Remove empty if statement
dm verity: Fix compilation warning
dm crypt: Enable zoned block device support
dm crypt: add flags to optionally bypass kcryptd workqueues
dm bufio: do buffer cleanup from a workqueue
dm rq: don't call blk_mq_queue_stopped() in dm_stop_queue()
dm dust: add interface to list all badblocks
dm dust: report some message results directly back to user
dm verity: add "panic_on_corruption" error handling mode
dm mpath: use double checked locking in fast path
dm mpath: rename current_pgpath to pgpath in multipath_prepare_ioctl
dm mpath: rework __map_bio()
dm mpath: factor out multipath_queue_bio
dm mpath: push locking down to must_push_back_rq()
dm mpath: take m->lock spinlock when testing QUEUE_IF_NO_PATH
dm mpath: changes from initial m->flags locking audit
-rw-r--r-- | Documentation/admin-guide/device-mapper/dm-dust.rst | 32 | ||||
-rw-r--r-- | Documentation/admin-guide/device-mapper/verity.rst | 4 | ||||
-rw-r--r-- | drivers/md/dm-bufio.c | 60 | ||||
-rw-r--r-- | drivers/md/dm-crypt.c | 129 | ||||
-rw-r--r-- | drivers/md/dm-dust.c | 58 | ||||
-rw-r--r-- | drivers/md/dm-ebs-target.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-init.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-ioctl.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-mpath.c | 146 | ||||
-rw-r--r-- | drivers/md/dm-raid.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-rq.c | 3 | ||||
-rw-r--r-- | drivers/md/dm-verity-target.c | 13 | ||||
-rw-r--r-- | drivers/md/dm-verity-verify-sig.h | 14 | ||||
-rw-r--r-- | drivers/md/dm-verity.h | 3 | ||||
-rw-r--r-- | drivers/md/dm.c | 3 |
15 files changed, 355 insertions, 118 deletions
diff --git a/Documentation/admin-guide/device-mapper/dm-dust.rst b/Documentation/admin-guide/device-mapper/dm-dust.rst index b6e7e7ead831..e35ec8cd2f88 100644 --- a/Documentation/admin-guide/device-mapper/dm-dust.rst +++ b/Documentation/admin-guide/device-mapper/dm-dust.rst @@ -69,10 +69,11 @@ Create the dm-dust device: $ sudo dmsetup create dust1 --table '0 33552384 dust /dev/vdb1 0 4096' Check the status of the read behavior ("bypass" indicates that all I/O -will be passed through to the underlying device):: +will be passed through to the underlying device; "verbose" indicates that +bad block additions, removals, and remaps will be verbosely logged):: $ sudo dmsetup status dust1 - 0 33552384 dust 252:17 bypass + 0 33552384 dust 252:17 bypass verbose $ sudo dd if=/dev/mapper/dust1 of=/dev/null bs=512 count=128 iflag=direct 128+0 records in @@ -164,7 +165,7 @@ following message command:: A message will print with the number of bad blocks currently configured on the device:: - kernel: device-mapper: dust: countbadblocks: 895 badblock(s) found + countbadblocks: 895 badblock(s) found Querying for specific bad blocks -------------------------------- @@ -176,11 +177,11 @@ following message command:: The following message will print if the block is in the list:: - device-mapper: dust: queryblock: block 72 found in badblocklist + dust_query_block: block 72 found in badblocklist The following message will print if the block is not in the list:: - device-mapper: dust: queryblock: block 72 not found in badblocklist + dust_query_block: block 72 not found in badblocklist The "queryblock" message command will work in both the "enabled" and "disabled" modes, allowing the verification of whether a block @@ -198,12 +199,28 @@ following message command:: After clearing the bad block list, the following message will appear:: - kernel: device-mapper: dust: clearbadblocks: badblocks cleared + dust_clear_badblocks: badblocks cleared If there were no bad blocks to clear, the following message will appear:: - kernel: device-mapper: dust: clearbadblocks: no badblocks found + dust_clear_badblocks: no badblocks found + +Listing the bad block list +-------------------------- + +To list all bad blocks in the bad block list (using an example device +with blocks 1 and 2 in the bad block list), run the following message +command:: + + $ sudo dmsetup message dust1 0 listbadblocks + 1 + 2 + +If there are no bad blocks in the bad block list, the command will +execute with no output:: + + $ sudo dmsetup message dust1 0 listbadblocks Message commands list --------------------- @@ -223,6 +240,7 @@ Single argument message commands:: countbadblocks clearbadblocks + listbadblocks disable enable quiet diff --git a/Documentation/admin-guide/device-mapper/verity.rst b/Documentation/admin-guide/device-mapper/verity.rst index bb02caa45289..66f71f0dab1b 100644 --- a/Documentation/admin-guide/device-mapper/verity.rst +++ b/Documentation/admin-guide/device-mapper/verity.rst @@ -83,6 +83,10 @@ restart_on_corruption not compatible with ignore_corruption and requires user space support to avoid restart loops. +panic_on_corruption + Panic the device when a corrupted block is discovered. This option is + not compatible with ignore_corruption and restart_on_corruption. + ignore_zero_blocks Do not verify blocks that are expected to contain zeroes and always return zeroes instead. This may be useful if the partition contains unused blocks diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 6d1565021d74..9c1a86bde658 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -108,7 +108,10 @@ struct dm_bufio_client { int async_write_error; struct list_head client_list; + struct shrinker shrinker; + struct work_struct shrink_work; + atomic_long_t need_shrink; }; /* @@ -1634,8 +1637,7 @@ static unsigned long get_retain_buffers(struct dm_bufio_client *c) return retain_bytes; } -static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan, - gfp_t gfp_mask) +static void __scan(struct dm_bufio_client *c) { int l; struct dm_buffer *b, *tmp; @@ -1646,42 +1648,58 @@ static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan, for (l = 0; l < LIST_SIZE; l++) { list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list) { - if (__try_evict_buffer(b, gfp_mask)) + if (count - freed <= retain_target) + atomic_long_set(&c->need_shrink, 0); + if (!atomic_long_read(&c->need_shrink)) + return; + if (__try_evict_buffer(b, GFP_KERNEL)) { + atomic_long_dec(&c->need_shrink); freed++; - if (!--nr_to_scan || ((count - freed) <= retain_target)) - return freed; + } cond_resched(); } } - return freed; } -static unsigned long -dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) +static void shrink_work(struct work_struct *w) +{ + struct dm_bufio_client *c = container_of(w, struct dm_bufio_client, shrink_work); + + dm_bufio_lock(c); + __scan(c); + dm_bufio_unlock(c); +} + +static unsigned long dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) { struct dm_bufio_client *c; - unsigned long freed; c = container_of(shrink, struct dm_bufio_client, shrinker); - if (sc->gfp_mask & __GFP_FS) - dm_bufio_lock(c); - else if (!dm_bufio_trylock(c)) - return SHRINK_STOP; + atomic_long_add(sc->nr_to_scan, &c->need_shrink); + queue_work(dm_bufio_wq, &c->shrink_work); - freed = __scan(c, sc->nr_to_scan, sc->gfp_mask); - dm_bufio_unlock(c); - return freed; + return sc->nr_to_scan; } -static unsigned long -dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc) +static unsigned long dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc) { struct dm_bufio_client *c = container_of(shrink, struct dm_bufio_client, shrinker); unsigned long count = READ_ONCE(c->n_buffers[LIST_CLEAN]) + READ_ONCE(c->n_buffers[LIST_DIRTY]); unsigned long retain_target = get_retain_buffers(c); + unsigned long queued_for_cleanup = atomic_long_read(&c->need_shrink); + + if (unlikely(count < retain_target)) + count = 0; + else + count -= retain_target; - return (count < retain_target) ? 0 : (count - retain_target); + if (unlikely(count < queued_for_cleanup)) + count = 0; + else + count -= queued_for_cleanup; + + return count; } /* @@ -1772,6 +1790,9 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign __free_buffer_wake(b); } + INIT_WORK(&c->shrink_work, shrink_work); + atomic_long_set(&c->need_shrink, 0); + c->shrinker.count_objects = dm_bufio_shrink_count; c->shrinker.scan_objects = dm_bufio_shrink_scan; c->shrinker.seeks = 1; @@ -1817,6 +1838,7 @@ void dm_bufio_client_destroy(struct dm_bufio_client *c) drop_buffers(c); unregister_shrinker(&c->shrinker); + flush_work(&c->shrink_work); mutex_lock(&dm_bufio_clients_lock); diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 37dcc52cf21d..148960721254 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -69,6 +69,7 @@ struct dm_crypt_io { u8 *integrity_metadata; bool integrity_metadata_from_pool; struct work_struct work; + struct tasklet_struct tasklet; struct convert_context ctx; @@ -127,7 +128,9 @@ struct iv_elephant_private { * and encrypts / decrypts at the same time. */ enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID, - DM_CRYPT_SAME_CPU, DM_CRYPT_NO_OFFLOAD }; + DM_CRYPT_SAME_CPU, DM_CRYPT_NO_OFFLOAD, + DM_CRYPT_NO_READ_WORKQUEUE, DM_CRYPT_NO_WRITE_WORKQUEUE, + DM_CRYPT_WRITE_INLINE }; enum cipher_flags { CRYPT_MODE_INTEGRITY_AEAD, /* Use authenticated mode for cihper */ @@ -1523,7 +1526,7 @@ static void crypt_free_req(struct crypt_config *cc, void *req, struct bio *base_ * Encrypt / decrypt data from one bio to another one (can be the same one) */ static blk_status_t crypt_convert(struct crypt_config *cc, - struct convert_context *ctx) + struct convert_context *ctx, bool atomic) { unsigned int tag_offset = 0; unsigned int sector_step = cc->sector_size >> SECTOR_SHIFT; @@ -1566,7 +1569,8 @@ static blk_status_t crypt_convert(struct crypt_config *cc, atomic_dec(&ctx->cc_pending); ctx->cc_sector += sector_step; tag_offset++; - cond_resched(); + if (!atomic) + cond_resched(); continue; /* * There was a data integrity error. @@ -1892,7 +1896,8 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async) clone->bi_iter.bi_sector = cc->start + io->sector; - if (likely(!async) && test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags)) { + if ((likely(!async) && test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags)) || + test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags)) { submit_bio_noacct(clone); return; } @@ -1915,9 +1920,32 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async) spin_unlock_irqrestore(&cc->write_thread_lock, flags); } +static bool kcryptd_crypt_write_inline(struct crypt_config *cc, + struct convert_context *ctx) + +{ + if (!test_bit(DM_CRYPT_WRITE_INLINE, &cc->flags)) + return false; + + /* + * Note: zone append writes (REQ_OP_ZONE_APPEND) do not have ordering + * constraints so they do not need to be issued inline by + * kcryptd_crypt_write_convert(). + */ + switch (bio_op(ctx->bio_in)) { + case REQ_OP_WRITE: + case REQ_OP_WRITE_SAME: + case REQ_OP_WRITE_ZEROES: + return true; + default: + return false; + } +} + static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) { struct crypt_config *cc = io->cc; + struct convert_context *ctx = &io->ctx; struct bio *clone; int crypt_finished; sector_t sector = io->sector; @@ -1927,7 +1955,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) * Prevent io from disappearing until this function completes. */ crypt_inc_pending(io); - crypt_convert_init(cc, &io->ctx, NULL, io->base_bio, sector); + crypt_convert_init(cc, ctx, NULL, io->base_bio, sector); clone = crypt_alloc_buffer(io, io->base_bio->bi_iter.bi_size); if (unlikely(!clone)) { @@ -1941,10 +1969,16 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) sector += bio_sectors(clone); crypt_inc_pending(io); - r = crypt_convert(cc, &io->ctx); + r = crypt_convert(cc, ctx, + test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags)); if (r) io->error = r; - crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending); + crypt_finished = atomic_dec_and_test(&ctx->cc_pending); + if (!crypt_finished && kcryptd_crypt_write_inline(cc, ctx)) { + /* Wait for completion signaled by kcryptd_async_done() */ + wait_for_completion(&ctx->restart); + crypt_finished = 1; + } /* Encryption was already finished, submit io now */ if (crypt_finished) { @@ -1971,7 +2005,8 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io) crypt_convert_init(cc, &io->ctx, io->base_bio, io->base_bio, io->sector); - r = crypt_convert(cc, &io->ctx); + r = crypt_convert(cc, &io->ctx, + test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags)); if (r) io->error = r; @@ -2015,10 +2050,21 @@ static void kcryptd_async_done(struct crypto_async_request *async_req, if (!atomic_dec_and_test(&ctx->cc_pending)) return; - if (bio_data_dir(io->base_bio) == READ) + /* + * The request is fully completed: for inline writes, let + * kcryptd_crypt_write_convert() do the IO submission. + */ + if (bio_data_dir(io->base_bio) == READ) { kcryptd_crypt_read_done(io); - else - kcryptd_crypt_write_io_submit(io, 1); + return; + } + + if (kcryptd_crypt_write_inline(cc, ctx)) { + complete(&ctx->restart); + return; + } + + kcryptd_crypt_write_io_submit(io, 1); } static void kcryptd_crypt(struct work_struct *work) @@ -2031,10 +2077,28 @@ static void kcryptd_crypt(struct work_struct *work) kcryptd_crypt_write_convert(io); } +static void kcryptd_crypt_tasklet(unsigned long work) +{ + kcryptd_crypt((struct work_struct *)work); +} + static void kcryptd_queue_crypt(struct dm_crypt_io *io) { struct crypt_config *cc = io->cc; + if ((bio_data_dir(io->base_bio) == READ && test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags)) || + (bio_data_dir(io->base_bio) == WRITE && test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags))) { + if (in_irq()) { + /* Crypto API's "skcipher_walk_first() refuses to work in hard IRQ context */ + tasklet_init(&io->tasklet, kcryptd_crypt_tasklet, (unsigned long)&io->work); + tasklet_schedule(&io->tasklet); + return; + } + + kcryptd_crypt(&io->work); + return; + } + INIT_WORK(&io->work, kcryptd_crypt); queue_work(cc->crypt_queue, &io->work); } @@ -2838,7 +2902,7 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar struct crypt_config *cc = ti->private; struct dm_arg_set as; static const struct dm_arg _args[] = { - {0, 6, "Invalid number of feature args"}, + {0, 8, "Invalid number of feature args"}, }; unsigned int opt_params, val; const char *opt_string, *sval; @@ -2868,6 +2932,10 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar else if (!strcasecmp(opt_string, "submit_from_crypt_cpus")) set_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags); + else if (!strcasecmp(opt_string, "no_read_workqueue")) + set_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags); + else if (!strcasecmp(opt_string, "no_write_workqueue")) + set_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags); else if (sscanf(opt_string, "integrity:%u:", &val) == 1) { if (val == 0 || val > MAX_TAG_SIZE) { ti->error = "Invalid integrity arguments"; @@ -2908,6 +2976,21 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar return 0; } +#ifdef CONFIG_BLK_DEV_ZONED + +static int crypt_report_zones(struct dm_target *ti, + struct dm_report_zones_args *args, unsigned int nr_zones) +{ + struct crypt_config *cc = ti->private; + sector_t sector = cc->start + dm_target_offset(ti, args->next_sector); + + args->start = cc->start; + return blkdev_report_zones(cc->dev->bdev, sector, nr_zones, + dm_report_zones_cb, args); +} + +#endif + /* * Construct an encryption mapping: * <cipher> [<key>|:<key_size>:<user|logon>:<key_description>] <iv_offset> <dev_path> <start> @@ -3041,6 +3124,16 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) } cc->start = tmpll; + /* + * For zoned block devices, we need to preserve the issuer write + * ordering. To do so, disable write workqueues and force inline + * encryption completion. + */ + if (bdev_is_zoned(cc->dev->bdev)) { + set_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags); + set_bit(DM_CRYPT_WRITE_INLINE, &cc->flags); + } + if (crypt_integrity_aead(cc) || cc->integrity_iv_size) { ret = crypt_integrity_ctr(cc, ti); if (ret) @@ -3196,6 +3289,8 @@ static void crypt_status(struct dm_target *ti, status_type_t type, num_feature_args += !!ti->num_discard_bios; num_feature_args += test_bit(DM_CRYPT_SAME_CPU, &cc->flags); num_feature_args += test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags); + num_feature_args += test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags); + num_feature_args += test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags); num_feature_args += cc->sector_size != (1 << SECTOR_SHIFT); num_feature_args += test_bit(CRYPT_IV_LARGE_SECTORS, &cc->cipher_flags); if (cc->on_disk_tag_size) @@ -3208,6 +3303,10 @@ static void crypt_status(struct dm_target *ti, status_type_t type, DMEMIT(" same_cpu_crypt"); if (test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags)) DMEMIT(" submit_from_crypt_cpus"); + if (test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags)) + DMEMIT(" no_read_workqueue"); + if (test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags)) + DMEMIT(" no_write_workqueue"); if (cc->on_disk_tag_size) DMEMIT(" integrity:%u:%s", cc->on_disk_tag_size, cc->cipher_auth); if (cc->sector_size != (1 << SECTOR_SHIFT)) @@ -3320,10 +3419,14 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type crypt_target = { .name = "crypt", - .version = {1, 21, 0}, + .version = {1, 22, 0}, .module = THIS_MODULE, .ctr = crypt_ctr, .dtr = crypt_dtr, +#ifdef CONFIG_BLK_DEV_ZONED + .features = DM_TARGET_ZONED_HM, + .report_zones = crypt_report_zones, +#endif .map = crypt_map, .status = crypt_status, .postsuspend = crypt_postsuspend, diff --git a/drivers/md/dm-dust.c b/drivers/md/dm-dust.c index ff03b90072c5..072ea913cebc 100644 --- a/drivers/md/dm-dust.c +++ b/drivers/md/dm-dust.c @@ -138,20 +138,22 @@ static int dust_add_block(struct dust_device *dd, unsigned long long block, return 0; } -static int dust_query_block(struct dust_device *dd, unsigned long long block) +static int dust_query_block(struct dust_device *dd, unsigned long long block, char *result, + unsigned int maxlen, unsigned int *sz_ptr) { struct badblock *bblock; unsigned long flags; + unsigned int sz = *sz_ptr; spin_lock_irqsave(&dd->dust_lock, flags); bblock = dust_rb_search(&dd->badblocklist, block); if (bblock != NULL) - DMINFO("%s: block %llu found in badblocklist", __func__, block); + DMEMIT("%s: block %llu found in badblocklist", __func__, block); else - DMINFO("%s: block %llu not found in badblocklist", __func__, block); + DMEMIT("%s: block %llu not found in badblocklist", __func__, block); spin_unlock_irqrestore(&dd->dust_lock, flags); - return 0; + return 1; } static int __dust_map_read(struct dust_device *dd, sector_t thisblock) @@ -259,11 +261,13 @@ static bool __dust_clear_badblocks(struct rb_root *tree, return true; } -static int dust_clear_badblocks(struct dust_device *dd) +static int dust_clear_badblocks(struct dust_device *dd, char *result, unsigned int maxlen, + unsigned int *sz_ptr) { unsigned long flags; struct rb_root badblocklist; unsigned long long badblock_count; + unsigned int sz = *sz_ptr; spin_lock_irqsave(&dd->dust_lock, flags); badblocklist = dd->badblocklist; @@ -273,11 +277,36 @@ static int dust_clear_badblocks(struct dust_device *dd) spin_unlock_irqrestore(&dd->dust_lock, flags); if (!__dust_clear_badblocks(&badblocklist, badblock_count)) - DMINFO("%s: no badblocks found", __func__); + DMEMIT("%s: no badblocks found", __func__); else - DMINFO("%s: badblocks cleared", __func__); + DMEMIT("%s: badblocks cleared", __func__); - return 0; + return 1; +} + +static int dust_list_badblocks(struct dust_device *dd, char *result, unsigned int maxlen, + unsigned int *sz_ptr) +{ + unsigned long flags; + struct rb_root badblocklist; + struct rb_node *node; + struct badblock *bblk; + unsigned int sz = *sz_ptr; + unsigned long long num = 0; + + spin_lock_irqsave(&dd->dust_lock, flags); + badblocklist = dd->badblocklist; + for (node = rb_first(&badblocklist); node; node = rb_next(node)) { + bblk = rb_entry(node, struct badblock, node); + DMEMIT("%llu\n", bblk->bb); + num++; + } + + spin_unlock_irqrestore(&dd->dust_lock, flags); + if (!num) + DMEMIT("No blocks in badblocklist"); + + return 1; } /* @@ -383,7 +412,7 @@ static void dust_dtr(struct dm_target *ti) } static int dust_message(struct dm_target *ti, unsigned int argc, char **argv, - char *result_buf, unsigned int maxlen) + char *result, unsigned int maxlen) { struct dust_device *dd = ti->private; sector_t size = i_size_read(dd->dev->bdev->bd_inode) >> SECTOR_SHIFT; @@ -393,6 +422,7 @@ static int dust_message(struct dm_target *ti, unsigned int argc, char **argv, unsigned char wr_fail_cnt; unsigned int tmp_ui; unsigned long flags; + unsigned int sz = 0; char dummy; if (argc == 1) { @@ -410,18 +440,20 @@ static int dust_message(struct dm_target *ti, unsigned int argc, char **argv, r = 0; } else if (!strcasecmp(argv[0], "countbadblocks")) { spin_lock_irqsave(&dd->dust_lock, flags); - DMINFO("countbadblocks: %llu badblock(s) found", + DMEMIT("countbadblocks: %llu badblock(s) found", dd->badblock_count); spin_unlock_irqrestore(&dd->dust_lock, flags); - r = 0; + r = 1; } else if (!strcasecmp(argv[0], "clearbadblocks")) { - r = dust_clear_badblocks(dd); + r = dust_clear_badblocks(dd, result, maxlen, &sz); } else if (!strcasecmp(argv[0], "quiet")) { if (!dd->quiet_mode) dd->quiet_mode = true; else dd->quiet_mode = false; r = 0; + } else if (!strcasecmp(argv[0], "listbadblocks")) { + r = dust_list_badblocks(dd, result, maxlen, &sz); } else { invalid_msg = true; } @@ -441,7 +473,7 @@ static int dust_message(struct dm_target *ti, unsigned int argc, char **argv, else if (!strcasecmp(argv[0], "removebadblock")) r = dust_remove_block(dd, block); else if (!strcasecmp(argv[0], "queryblock")) - r = dust_query_block(dd, block); + r = dust_query_block(dd, block, result, maxlen, &sz); else invalid_msg = true; diff --git a/drivers/md/dm-ebs-target.c b/drivers/md/dm-ebs-target.c index 44451276f128..cb85610527c2 100644 --- a/drivers/md/dm-ebs-target.c +++ b/drivers/md/dm-ebs-target.c @@ -363,7 +363,7 @@ static int ebs_map(struct dm_target *ti, struct bio *bio) bio_set_dev(bio, ec->dev->bdev); bio->bi_iter.bi_sector = ec->start + dm_target_offset(ti, bio->bi_iter.bi_sector); - if (unlikely(bio->bi_opf & REQ_OP_FLUSH)) + if (unlikely(bio_op(bio) == REQ_OP_FLUSH)) return DM_MAPIO_REMAPPED; /* * Only queue for bufio processing in case of partial or overlapping buffers diff --git a/drivers/md/dm-init.c b/drivers/md/dm-init.c index b869316d3722..b0c45c6ebe0b 100644 --- a/drivers/md/dm-init.c +++ b/drivers/md/dm-init.c @@ -36,7 +36,7 @@ struct dm_device { struct list_head list; }; -const char * const dm_allowed_targets[] __initconst = { +static const char * const dm_allowed_targets[] __initconst = { "crypt", "delay", "linear", diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 056d891a32a9..28122e850ea1 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1168,7 +1168,7 @@ static void retrieve_status(struct dm_table *table, spec->sector_start = ti->begin; spec->length = ti->len; strncpy(spec->target_type, ti->type->name, - sizeof(spec->target_type)); + sizeof(spec->target_type) - 1); outptr += sizeof(struct dm_target_spec); remaining = len - (outptr - outbuf); diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 73bb23de6336..53645a6f474c 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -128,6 +128,20 @@ static void queue_if_no_path_timeout_work(struct timer_list *t); #define MPATHF_PG_INIT_REQUIRED 5 /* pg_init needs calling? */ #define MPATHF_PG_INIT_DELAY_RETRY 6 /* Delay pg_init retry? */ +static bool mpath_double_check_test_bit(int MPATHF_bit, struct multipath *m) +{ + bool r = test_bit(MPATHF_bit, &m->flags); + + if (r) { + unsigned long flags; + spin_lock_irqsave(&m->lock, flags); + r = test_bit(MPATHF_bit, &m->flags); + spin_unlock_irqrestore(&m->lock, flags); + } + + return r; +} + /*----------------------------------------------- * Allocation routines *-----------------------------------------------*/ @@ -335,6 +349,8 @@ static int pg_init_all_paths(struct multipath *m) static void __switch_pg(struct multipath *m, struct priority_group *pg) { + lockdep_assert_held(&m->lock); + m->current_pg = pg; /* Must we initialise the PG first, and queue I/O till it's ready? */ @@ -382,7 +398,9 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes) unsigned bypassed = 1; if (!atomic_read(&m->nr_valid_paths)) { + spin_lock_irqsave(&m->lock, flags); clear_bit(MPATHF_QUEUE_IO, &m->flags); + spin_unlock_irqrestore(&m->lock, flags); goto failed; } @@ -422,8 +440,11 @@ check_current_pg: continue; pgpath = choose_path_in_pg(m, pg, nr_bytes); if (!IS_ERR_OR_NULL(pgpath)) { - if (!bypassed) + if (!bypassed) { + spin_lock_irqsave(&m->lock, flags); set_bit(MPATHF_PG_INIT_DELAY_RETRY, &m->flags); + spin_unlock_irqrestore(&m->lock, flags); + } return pgpath; } } @@ -465,7 +486,14 @@ static bool __must_push_back(struct multipath *m) static bool must_push_back_rq(struct multipath *m) { - return test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) || __must_push_back(m); + unsigned long flags; + bool ret; + + spin_lock_irqsave(&m->lock, flags); + ret = (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) || __must_push_back(m)); + spin_unlock_irqrestore(&m->lock, flags); + + return ret; } /* @@ -485,7 +513,7 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq, /* Do we need to select a new pgpath? */ pgpath = READ_ONCE(m->current_pgpath); - if (!pgpath || !test_bit(MPATHF_QUEUE_IO, &m->flags)) + if (!pgpath || !mpath_double_check_test_bit(MPATHF_QUEUE_IO, m)) pgpath = choose_pgpath(m, nr_bytes); if (!pgpath) { @@ -493,8 +521,8 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq, return DM_MAPIO_DELAY_REQUEUE; dm_report_EIO(m); /* Failed */ return DM_MAPIO_KILL; - } else if (test_bit(MPATHF_QUEUE_IO, &m->flags) || - test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) { + } else if (mpath_double_check_test_bit(MPATHF_QUEUE_IO, m) || + mpath_double_check_test_bit(MPATHF_PG_INIT_REQUIRED, m)) { pg_init_all_paths(m); return DM_MAPIO_DELAY_REQUEUE; } @@ -560,33 +588,45 @@ static void multipath_release_clone(struct request *clone, * Map cloned bios (bio-based multipath) */ +static void __multipath_queue_bio(struct multipath *m, struct bio *bio) +{ + /* Queue for the daemon to resubmit */ + bio_list_add(&m->queued_bios, bio); + if (!test_bit(MPATHF_QUEUE_IO, &m->flags)) + queue_work(kmultipathd, &m->process_queued_bios); +} + +static void multipath_queue_bio(struct multipath *m, struct bio *bio) +{ + unsigned long flags; + + spin_lock_irqsave(&m->lock, flags); + __multipath_queue_bio(m, bio); + spin_unlock_irqrestore(&m->lock, flags); +} + static struct pgpath *__map_bio(struct multipath *m, struct bio *bio) { struct pgpath *pgpath; unsigned long flags; - bool queue_io; /* Do we need to select a new pgpath? */ pgpath = READ_ONCE(m->current_pgpath); - if (!pgpath || !test_bit(MPATHF_QUEUE_IO, &m->flags)) + if (!pgpath || !mpath_double_check_test_bit(MPATHF_QUEUE_IO, m)) pgpath = choose_pgpath(m, bio->bi_iter.bi_size); - /* MPATHF_QUEUE_IO might have been cleared by choose_pgpath. */ - queue_io = test_bit(MPATHF_QUEUE_IO, &m->flags); - - if ((pgpath && queue_io) || - (!pgpath && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))) { - /* Queue for the daemon to resubmit */ + if (!pgpath) { spin_lock_irqsave(&m->lock, flags); - bio_list_add(&m->queued_bios, bio); + if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) { + __multipath_queue_bio(m, bio); + pgpath = ERR_PTR(-EAGAIN); + } spin_unlock_irqrestore(&m->lock, flags); - /* PG_INIT_REQUIRED cannot be set without QUEUE_IO */ - if (queue_io || test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) - pg_init_all_paths(m); - else if (!queue_io) - queue_work(kmultipathd, &m->process_queued_bios); - + } else if (mpath_double_check_test_bit(MPATHF_QUEUE_IO, m) || + mpath_double_check_test_bit(MPATHF_PG_INIT_REQUIRED, m)) { + multipath_queue_bio(m, bio); + pg_init_all_paths(m); return ERR_PTR(-EAGAIN); } @@ -835,7 +875,7 @@ static int setup_scsi_dh(struct block_device *bdev, struct multipath *m, struct request_queue *q = bdev_get_queue(bdev); int r; - if (test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags)) { + if (mpath_double_check_test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, m)) { retain: if (*attached_handler_name) { /* @@ -1614,7 +1654,7 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone, if (pgpath) fail_path(pgpath); - if (atomic_read(&m->nr_valid_paths) == 0 && + if (!atomic_read(&m->nr_valid_paths) && !must_push_back_rq(m)) { if (error == BLK_STS_IOERR) dm_report_EIO(m); @@ -1649,23 +1689,22 @@ static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone, if (pgpath) fail_path(pgpath); - if (atomic_read(&m->nr_valid_paths) == 0 && - !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) { - if (__must_push_back(m)) { - r = DM_ENDIO_REQUEUE; - } else { - dm_report_EIO(m); - *error = BLK_STS_IOERR; + if (!atomic_read(&m->nr_valid_paths)) { + spin_lock_irqsave(&m->lock, flags); + if (!test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) { + if (__must_push_back(m)) { + r = DM_ENDIO_REQUEUE; + } else { + dm_report_EIO(m); + *error = BLK_STS_IOERR; + } + spin_unlock_irqrestore(&m->lock, flags); + goto done; } - goto done; + spin_unlock_irqrestore(&m->lock, flags); } - spin_lock_irqsave(&m->lock, flags); - bio_list_add(&m->queued_bios, clone); - spin_unlock_irqrestore(&m->lock, flags); - if (!test_bit(MPATHF_QUEUE_IO, &m->flags)) - queue_work(kmultipathd, &m->process_queued_bios); - + multipath_queue_bio(m, clone); r = DM_ENDIO_INCOMPLETE; done: if (pgpath) { @@ -1937,16 +1976,17 @@ static int multipath_prepare_ioctl(struct dm_target *ti, struct block_device **bdev) { struct multipath *m = ti->private; - struct pgpath *current_pgpath; + struct pgpath *pgpath; + unsigned long flags; int r; - current_pgpath = READ_ONCE(m->current_pgpath); - if (!current_pgpath || !test_bit(MPATHF_QUEUE_IO, &m->flags)) - current_pgpath = choose_pgpath(m, 0); + pgpath = READ_ONCE(m->current_pgpath); + if (!pgpath || !mpath_double_check_test_bit(MPATHF_QUEUE_IO, m)) + pgpath = choose_pgpath(m, 0); - if (current_pgpath) { - if (!test_bit(MPATHF_QUEUE_IO, &m->flags)) { - *bdev = current_pgpath->path.dev->bdev; + if (pgpath) { + if (!mpath_double_check_test_bit(MPATHF_QUEUE_IO, m)) { + *bdev = pgpath->path.dev->bdev; r = 0; } else { /* pg_init has not started or completed */ @@ -1954,10 +1994,11 @@ static int multipath_prepare_ioctl(struct dm_target *ti, } } else { /* No path is available */ + r = -EIO; + spin_lock_irqsave(&m->lock, flags); if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) r = -ENOTCONN; - else - r = -EIO; + spin_unlock_irqrestore(&m->lock, flags); } if (r == -ENOTCONN) { @@ -1965,8 +2006,10 @@ static int multipath_prepare_ioctl(struct dm_target *ti, /* Path status changed, redo selection */ (void) choose_pgpath(m, 0); } + spin_lock_irqsave(&m->lock, flags); if (test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) - pg_init_all_paths(m); + (void) __pg_init_all_paths(m); + spin_unlock_irqrestore(&m->lock, flags); dm_table_run_md_queue_async(m->ti->table); process_queued_io_list(m); } @@ -2026,8 +2069,15 @@ static int multipath_busy(struct dm_target *ti) return true; /* no paths available, for blk-mq: rely on IO mapping to delay requeue */ - if (!atomic_read(&m->nr_valid_paths) && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) - return (m->queue_mode != DM_TYPE_REQUEST_BASED); + if (!atomic_read(&m->nr_valid_paths)) { + unsigned long flags; + spin_lock_irqsave(&m->lock, flags); + if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) { + spin_unlock_irqrestore(&m->lock, flags); + return (m->queue_mode != DM_TYPE_REQUEST_BASED); + } + spin_unlock_irqrestore(&m->lock, flags); + } /* Guess which priority_group will be used at next mapping time */ pg = READ_ONCE(m->current_pg); diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index d9e270957e18..8d2b835d7a10 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -2337,8 +2337,6 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev) if (new_devs == rs->raid_disks || !rebuilds) { /* Replace a broken device */ - if (new_devs == 1 && !rs->delta_disks) - ; if (new_devs == rs->raid_disks) { DMINFO("Superblocks created for new raid set"); set_bit(MD_ARRAY_FIRST_USE, &mddev->flags); diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 7ce387a1cc6a..6d743ff6a314 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -70,9 +70,6 @@ void dm_start_queue(struct request_queue *q) void dm_stop_queue(struct request_queue *q) { - if (blk_mq_queue_stopped(q)) - return; - blk_mq_quiesce_queue(q); } diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 75fa4d9b7617..f74982dcbea0 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -30,6 +30,7 @@ #define DM_VERITY_OPT_LOGGING "ignore_corruption" #define DM_VERITY_OPT_RESTART "restart_on_corruption" +#define DM_VERITY_OPT_PANIC "panic_on_corruption" #define DM_VERITY_OPT_IGN_ZEROES "ignore_zero_blocks" #define DM_VERITY_OPT_AT_MOST_ONCE "check_at_most_once" @@ -254,6 +255,9 @@ out: if (v->mode == DM_VERITY_MODE_RESTART) kernel_restart("dm-verity device corrupted"); + if (v->mode == DM_VERITY_MODE_PANIC) + panic("dm-verity device corrupted"); + return 1; } @@ -742,6 +746,9 @@ static void verity_status(struct dm_target *ti, status_type_t type, case DM_VERITY_MODE_RESTART: DMEMIT(DM_VERITY_OPT_RESTART); break; + case DM_VERITY_MODE_PANIC: + DMEMIT(DM_VERITY_OPT_PANIC); + break; default: BUG(); } @@ -907,6 +914,10 @@ static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v, v->mode = DM_VERITY_MODE_RESTART; continue; + } else if (!strcasecmp(arg_name, DM_VERITY_OPT_PANIC)) { + v->mode = DM_VERITY_MODE_PANIC; + continue; + } else if (!strcasecmp(arg_name, DM_VERITY_OPT_IGN_ZEROES)) { r = verity_alloc_zero_digest(v); if (r) { @@ -1221,7 +1232,7 @@ bad: static struct target_type verity_target = { .name = "verity", - .version = {1, 6, 0}, + .version = {1, 7, 0}, .module = THIS_MODULE, .ctr = verity_ctr, .dtr = verity_dtr, diff --git a/drivers/md/dm-verity-verify-sig.h b/drivers/md/dm-verity-verify-sig.h index 19b1547aa741..3987c7141f79 100644 --- a/drivers/md/dm-verity-verify-sig.h +++ b/drivers/md/dm-verity-verify-sig.h @@ -34,25 +34,25 @@ void verity_verify_sig_opts_cleanup(struct dm_verity_sig_opts *sig_opts); #define DM_VERITY_ROOT_HASH_VERIFICATION_OPTS 0 -int verity_verify_root_hash(const void *data, size_t data_len, - const void *sig_data, size_t sig_len) +static inline int verity_verify_root_hash(const void *data, size_t data_len, + const void *sig_data, size_t sig_len) { return 0; } -bool verity_verify_is_sig_opt_arg(const char *arg_name) +static inline bool verity_verify_is_sig_opt_arg(const char *arg_name) { return false; } -int verity_verify_sig_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v, - struct dm_verity_sig_opts *sig_opts, - unsigned int *argc, const char *arg_name) +static inline int verity_verify_sig_parse_opt_args(struct dm_arg_set *as, + struct dm_verity *v, struct dm_verity_sig_opts *sig_opts, + unsigned int *argc, const char *arg_name) { return -EINVAL; } -void verity_verify_sig_opts_cleanup(struct dm_verity_sig_opts *sig_opts) +static inline void verity_verify_sig_opts_cleanup(struct dm_verity_sig_opts *sig_opts) { } diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h index 641b9e3a399b..4e769d13473a 100644 --- a/drivers/md/dm-verity.h +++ b/drivers/md/dm-verity.h @@ -20,7 +20,8 @@ enum verity_mode { DM_VERITY_MODE_EIO, DM_VERITY_MODE_LOGGING, - DM_VERITY_MODE_RESTART + DM_VERITY_MODE_RESTART, + DM_VERITY_MODE_PANIC }; enum verity_block_type { diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 87cf45f619fd..32fa6499739f 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -504,7 +504,8 @@ static int dm_blk_report_zones(struct gendisk *disk, sector_t sector, } args.tgt = tgt; - ret = tgt->type->report_zones(tgt, &args, nr_zones); + ret = tgt->type->report_zones(tgt, &args, + nr_zones - args.zone_idx); if (ret < 0) goto out; } while (args.zone_idx < nr_zones && |