diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-14 00:24:31 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-14 00:24:31 +0200 |
commit | 2260840592fbed5be98ca03c97eb8172941f27ac (patch) | |
tree | 58549ab8b811b0ae728506866c7f404ac1ea27ba /drivers | |
parent | Merge tag 'for-linus-5.3' of git://github.com/cminyard/linux-ipmi (diff) | |
parent | dm bufio: fix deadlock with loop device (diff) | |
download | linux-2260840592fbed5be98ca03c97eb8172941f27ac.tar.xz linux-2260840592fbed5be98ca03c97eb8172941f27ac.zip |
Merge tag 'for-5.3/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper updates from Mike Snitzer:
- Add encrypted byte-offset initialization vector (eboiv) to DM crypt.
- Add optional discard features to DM snapshot which allow freeing
space from a DM device whose free space was exhausted.
- Various small improvements to use struct_size() and kzalloc().
- Fix to check if DM thin metadata is in fail_io mode before attempting
to update the superblock to set the needs_check flag. Otherwise the
DM thin-pool can hang.
- Fix DM bufio shrinker's potential for ABBA recursion deadlock with DM
thin provisioning on loop usecase.
* tag 'for-5.3/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
dm bufio: fix deadlock with loop device
dm snapshot: add optional discard support features
dm crypt: implement eboiv - encrypted byte-offset initialization vector
dm crypt: remove obsolete comment about plumb IV
dm crypt: wipe private IV struct after key invalid flag is set
dm integrity: use kzalloc() instead of kmalloc() + memset()
dm: update stale comment in end_clone_bio()
dm log writes: fix incorrect comment about the logged sequence example
dm log writes: use struct_size() to calculate size of pending_block
dm crypt: use struct_size() when allocating encryption context
dm integrity: always set version on superblock update
dm thin metadata: check if in fail_io mode when setting needs_check
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/md/dm-bufio.c | 4 | ||||
-rw-r--r-- | drivers/md/dm-crypt.c | 101 | ||||
-rw-r--r-- | drivers/md/dm-integrity.c | 7 | ||||
-rw-r--r-- | drivers/md/dm-log-writes.c | 4 | ||||
-rw-r--r-- | drivers/md/dm-rq.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-snap.c | 186 | ||||
-rw-r--r-- | drivers/md/dm-thin-metadata.c | 7 |
7 files changed, 268 insertions, 43 deletions
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 2a48ea3f1b30..b6b5acc92ca2 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -1599,9 +1599,7 @@ dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) unsigned long freed; c = container_of(shrink, struct dm_bufio_client, shrinker); - if (sc->gfp_mask & __GFP_FS) - dm_bufio_lock(c); - else if (!dm_bufio_trylock(c)) + if (!dm_bufio_trylock(c)) return SHRINK_STOP; freed = __scan(c, sc->nr_to_scan, sc->gfp_mask); diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 1b16d34bb785..d5216bcc4649 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -120,6 +120,10 @@ struct iv_tcw_private { u8 *whitening; }; +struct iv_eboiv_private { + struct crypto_cipher *tfm; +}; + /* * Crypt: maps a linear range of a block device * and encrypts / decrypts at the same time. @@ -159,6 +163,7 @@ struct crypt_config { struct iv_benbi_private benbi; struct iv_lmk_private lmk; struct iv_tcw_private tcw; + struct iv_eboiv_private eboiv; } iv_gen_private; u64 iv_offset; unsigned int iv_size; @@ -291,8 +296,9 @@ static struct crypto_aead *any_tfm_aead(struct crypt_config *cc) * Note that this encryption scheme is vulnerable to watermarking attacks * and should be used for old compatible containers access only. * - * plumb: unimplemented, see: - * http://article.gmane.org/gmane.linux.kernel.device-mapper.dm-crypt/454 + * eboiv: Encrypted byte-offset IV (used in Bitlocker in CBC mode) + * The IV is encrypted little-endian byte-offset (with the same key + * and cipher as the volume). */ static int crypt_iv_plain_gen(struct crypt_config *cc, u8 *iv, @@ -841,6 +847,67 @@ static int crypt_iv_random_gen(struct crypt_config *cc, u8 *iv, return 0; } +static void crypt_iv_eboiv_dtr(struct crypt_config *cc) +{ + struct iv_eboiv_private *eboiv = &cc->iv_gen_private.eboiv; + + crypto_free_cipher(eboiv->tfm); + eboiv->tfm = NULL; +} + +static int crypt_iv_eboiv_ctr(struct crypt_config *cc, struct dm_target *ti, + const char *opts) +{ + struct iv_eboiv_private *eboiv = &cc->iv_gen_private.eboiv; + struct crypto_cipher *tfm; + + tfm = crypto_alloc_cipher(cc->cipher, 0, 0); + if (IS_ERR(tfm)) { + ti->error = "Error allocating crypto tfm for EBOIV"; + return PTR_ERR(tfm); + } + + if (crypto_cipher_blocksize(tfm) != cc->iv_size) { + ti->error = "Block size of EBOIV cipher does " + "not match IV size of block cipher"; + crypto_free_cipher(tfm); + return -EINVAL; + } + + eboiv->tfm = tfm; + return 0; +} + +static int crypt_iv_eboiv_init(struct crypt_config *cc) +{ + struct iv_eboiv_private *eboiv = &cc->iv_gen_private.eboiv; + int err; + + err = crypto_cipher_setkey(eboiv->tfm, cc->key, cc->key_size); + if (err) + return err; + + return 0; +} + +static int crypt_iv_eboiv_wipe(struct crypt_config *cc) +{ + /* Called after cc->key is set to random key in crypt_wipe() */ + return crypt_iv_eboiv_init(cc); +} + +static int crypt_iv_eboiv_gen(struct crypt_config *cc, u8 *iv, + struct dm_crypt_request *dmreq) +{ + struct iv_eboiv_private *eboiv = &cc->iv_gen_private.eboiv; + + memset(iv, 0, cc->iv_size); + *(__le64 *)iv = cpu_to_le64(dmreq->iv_sector * cc->sector_size); + crypto_cipher_encrypt_one(eboiv->tfm, iv, iv); + + return 0; +} + static const struct crypt_iv_operations crypt_iv_plain_ops = { .generator = crypt_iv_plain_gen }; @@ -893,6 +960,14 @@ static struct crypt_iv_operations crypt_iv_random_ops = { .generator = crypt_iv_random_gen }; +static struct crypt_iv_operations crypt_iv_eboiv_ops = { + .ctr = crypt_iv_eboiv_ctr, + .dtr = crypt_iv_eboiv_dtr, + .init = crypt_iv_eboiv_init, + .wipe = crypt_iv_eboiv_wipe, + .generator = crypt_iv_eboiv_gen +}; + /* * Integrity extensions */ @@ -2158,6 +2233,14 @@ static int crypt_wipe_key(struct crypt_config *cc) clear_bit(DM_CRYPT_KEY_VALID, &cc->flags); get_random_bytes(&cc->key, cc->key_size); + + /* Wipe IV private keys */ + if (cc->iv_gen_ops && cc->iv_gen_ops->wipe) { + r = cc->iv_gen_ops->wipe(cc); + if (r) + return r; + } + kzfree(cc->key_string); cc->key_string = NULL; r = crypt_setkey(cc); @@ -2288,6 +2371,8 @@ static int crypt_ctr_ivmode(struct dm_target *ti, const char *ivmode) cc->iv_gen_ops = &crypt_iv_benbi_ops; else if (strcmp(ivmode, "null") == 0) cc->iv_gen_ops = &crypt_iv_null_ops; + else if (strcmp(ivmode, "eboiv") == 0) + cc->iv_gen_ops = &crypt_iv_eboiv_ops; else if (strcmp(ivmode, "lmk") == 0) { cc->iv_gen_ops = &crypt_iv_lmk_ops; /* @@ -2699,7 +2784,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) return -EINVAL; } - cc = kzalloc(sizeof(*cc) + key_size * sizeof(u8), GFP_KERNEL); + cc = kzalloc(struct_size(cc, key, key_size), GFP_KERNEL); if (!cc) { ti->error = "Cannot allocate encryption context"; return -ENOMEM; @@ -3050,14 +3135,8 @@ static int crypt_message(struct dm_target *ti, unsigned argc, char **argv, memset(cc->key, 0, cc->key_size * sizeof(u8)); return ret; } - if (argc == 2 && !strcasecmp(argv[1], "wipe")) { - if (cc->iv_gen_ops && cc->iv_gen_ops->wipe) { - ret = cc->iv_gen_ops->wipe(cc); - if (ret) - return ret; - } + if (argc == 2 && !strcasecmp(argv[1], "wipe")) return crypt_wipe_key(cc); - } } error: @@ -3094,7 +3173,7 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type crypt_target = { .name = "crypt", - .version = {1, 18, 1}, + .version = {1, 19, 0}, .module = THIS_MODULE, .ctr = crypt_ctr, .dtr = crypt_dtr, diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 44e76cda087a..b1b0de402dfc 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -476,6 +476,9 @@ static int sync_rw_sb(struct dm_integrity_c *ic, int op, int op_flags) io_loc.sector = ic->start; io_loc.count = SB_SECTORS; + if (op == REQ_OP_WRITE) + sb_set_version(ic); + return dm_io(&io_req, 1, &io_loc, NULL); } @@ -2317,7 +2320,6 @@ static void recalc_write_super(struct dm_integrity_c *ic) if (dm_integrity_failed(ic)) return; - sb_set_version(ic); r = sync_rw_sb(ic, REQ_OP_WRITE, 0); if (unlikely(r)) dm_integrity_io_error(ic, "writing superblock", r); @@ -3358,7 +3360,7 @@ static int create_journal(struct dm_integrity_c *ic, char **error) goto bad; } - crypt_iv = kmalloc(ivsize, GFP_KERNEL); + crypt_iv = kzalloc(ivsize, GFP_KERNEL); if (!crypt_iv) { *error = "Could not allocate iv"; r = -ENOMEM; @@ -3387,7 +3389,6 @@ static int create_journal(struct dm_integrity_c *ic, char **error) sg_set_buf(&sg[i], va, PAGE_SIZE); } sg_set_buf(&sg[i], &ic->commit_ids, sizeof ic->commit_ids); - memset(crypt_iv, 0x00, ivsize); skcipher_request_set_crypt(req, sg, sg, PAGE_SIZE * ic->journal_pages + sizeof ic->commit_ids, crypt_iv); diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index e549392e0ea5..99721c76225d 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -40,7 +40,7 @@ * * Would result in the log looking like this: * - * c,a,flush,fuad,b,<other writes>,<next flush> + * c,a,b,flush,fuad,<other writes>,<next flush> * * This is meant to help expose problems where file systems do not properly wait * on data being written before invoking a FLUSH. FUA bypasses cache so once it @@ -699,7 +699,7 @@ static int log_writes_map(struct dm_target *ti, struct bio *bio) if (discard_bio) alloc_size = sizeof(struct pending_block); else - alloc_size = sizeof(struct pending_block) + sizeof(struct bio_vec) * bio_segments(bio); + alloc_size = struct_size(block, vecs, bio_segments(bio)); block = kzalloc(alloc_size, GFP_NOIO); if (!block) { diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 5f7063f05ae0..c9e44ac1f9a6 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -115,7 +115,7 @@ static void end_clone_bio(struct bio *clone) /* * Update the original request. - * Do not use blk_end_request() here, because it may complete + * Do not use blk_mq_end_request() here, because it may complete * the original request before the clone, and break the ordering. */ if (is_last) diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 3107f2b1988b..63916e1dc569 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1,6 +1,4 @@ /* - * dm-snapshot.c - * * Copyright (C) 2001-2002 Sistina Software (UK) Limited. * * This file is released under the GPL. @@ -134,7 +132,10 @@ struct dm_snapshot { * - I/O error while merging * => stop merging; set merge_failed; process I/O normally. */ - int merge_failed; + bool merge_failed:1; + + bool discard_zeroes_cow:1; + bool discard_passdown_origin:1; /* * Incoming bios that overlap with chunks being merged must wait @@ -1173,12 +1174,64 @@ static void stop_merge(struct dm_snapshot *s) clear_bit(SHUTDOWN_MERGE, &s->state_bits); } +static int parse_snapshot_features(struct dm_arg_set *as, struct dm_snapshot *s, + struct dm_target *ti) +{ + int r; + unsigned argc; + const char *arg_name; + + static const struct dm_arg _args[] = { + {0, 2, "Invalid number of feature arguments"}, + }; + + /* + * No feature arguments supplied. + */ + if (!as->argc) + return 0; + + r = dm_read_arg_group(_args, as, &argc, &ti->error); + if (r) + return -EINVAL; + + while (argc && !r) { + arg_name = dm_shift_arg(as); + argc--; + + if (!strcasecmp(arg_name, "discard_zeroes_cow")) + s->discard_zeroes_cow = true; + + else if (!strcasecmp(arg_name, "discard_passdown_origin")) + s->discard_passdown_origin = true; + + else { + ti->error = "Unrecognised feature requested"; + r = -EINVAL; + break; + } + } + + if (!s->discard_zeroes_cow && s->discard_passdown_origin) { + /* + * TODO: really these are disjoint.. but ti->num_discard_bios + * and dm_bio_get_target_bio_nr() require rigid constraints. + */ + ti->error = "discard_passdown_origin feature depends on discard_zeroes_cow"; + r = -EINVAL; + } + + return r; +} + /* - * Construct a snapshot mapping: <origin_dev> <COW-dev> <p|po|n> <chunk-size> + * Construct a snapshot mapping: + * <origin_dev> <COW-dev> <p|po|n> <chunk-size> [<# feature args> [<arg>]*] */ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) { struct dm_snapshot *s; + struct dm_arg_set as; int i; int r = -EINVAL; char *origin_path, *cow_path; @@ -1186,8 +1239,8 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) unsigned args_used, num_flush_bios = 1; fmode_t origin_mode = FMODE_READ; - if (argc != 4) { - ti->error = "requires exactly 4 arguments"; + if (argc < 4) { + ti->error = "requires 4 or more arguments"; r = -EINVAL; goto bad; } @@ -1204,6 +1257,13 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; } + as.argc = argc; + as.argv = argv; + dm_consume_args(&as, 4); + r = parse_snapshot_features(&as, s, ti); + if (r) + goto bad_features; + origin_path = argv[0]; argv++; argc--; @@ -1289,6 +1349,8 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->private = s; ti->num_flush_bios = num_flush_bios; + if (s->discard_zeroes_cow) + ti->num_discard_bios = (s->discard_passdown_origin ? 2 : 1); ti->per_io_data_size = sizeof(struct dm_snap_tracked_chunk); /* Add snapshot to the list of snapshots for this origin */ @@ -1336,29 +1398,22 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) bad_read_metadata: unregister_snapshot(s); - bad_load_and_register: mempool_exit(&s->pending_pool); - bad_pending_pool: dm_kcopyd_client_destroy(s->kcopyd_client); - bad_kcopyd: dm_exception_table_exit(&s->pending, pending_cache); dm_exception_table_exit(&s->complete, exception_cache); - bad_hash_tables: dm_exception_store_destroy(s->store); - bad_store: dm_put_device(ti, s->cow); - bad_cow: dm_put_device(ti, s->origin); - bad_origin: +bad_features: kfree(s); - bad: return r; } @@ -1806,6 +1861,37 @@ static void remap_exception(struct dm_snapshot *s, struct dm_exception *e, (bio->bi_iter.bi_sector & s->store->chunk_mask); } +static void zero_callback(int read_err, unsigned long write_err, void *context) +{ + struct bio *bio = context; + struct dm_snapshot *s = bio->bi_private; + + up(&s->cow_count); + bio->bi_status = write_err ? BLK_STS_IOERR : 0; + bio_endio(bio); +} + +static void zero_exception(struct dm_snapshot *s, struct dm_exception *e, + struct bio *bio, chunk_t chunk) +{ + struct dm_io_region dest; + + dest.bdev = s->cow->bdev; + dest.sector = bio->bi_iter.bi_sector; + dest.count = s->store->chunk_size; + + down(&s->cow_count); + WARN_ON_ONCE(bio->bi_private); + bio->bi_private = s; + dm_kcopyd_zero(s->kcopyd_client, 1, &dest, 0, zero_callback, bio); +} + +static bool io_overlaps_chunk(struct dm_snapshot *s, struct bio *bio) +{ + return bio->bi_iter.bi_size == + (s->store->chunk_size << SECTOR_SHIFT); +} + static int snapshot_map(struct dm_target *ti, struct bio *bio) { struct dm_exception *e; @@ -1839,10 +1925,43 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) goto out_unlock; } + if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) { + if (s->discard_passdown_origin && dm_bio_get_target_bio_nr(bio)) { + /* + * passdown discard to origin (without triggering + * snapshot exceptions via do_origin; doing so would + * defeat the goal of freeing space in origin that is + * implied by the "discard_passdown_origin" feature) + */ + bio_set_dev(bio, s->origin->bdev); + track_chunk(s, bio, chunk); + goto out_unlock; + } + /* discard to snapshot (target_bio_nr == 0) zeroes exceptions */ + } + /* If the block is already remapped - use that, else remap it */ e = dm_lookup_exception(&s->complete, chunk); if (e) { remap_exception(s, e, bio, chunk); + if (unlikely(bio_op(bio) == REQ_OP_DISCARD) && + io_overlaps_chunk(s, bio)) { + dm_exception_table_unlock(&lock); + up_read(&s->lock); + zero_exception(s, e, bio, chunk); + r = DM_MAPIO_SUBMITTED; /* discard is not issued */ + goto out; + } + goto out_unlock; + } + + if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) { + /* + * If no exception exists, complete discard immediately + * otherwise it'll trigger copy-out. + */ + bio_endio(bio); + r = DM_MAPIO_SUBMITTED; goto out_unlock; } @@ -1890,9 +2009,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) r = DM_MAPIO_SUBMITTED; - if (!pe->started && - bio->bi_iter.bi_size == - (s->store->chunk_size << SECTOR_SHIFT)) { + if (!pe->started && io_overlaps_chunk(s, bio)) { pe->started = 1; dm_exception_table_unlock(&lock); @@ -2138,6 +2255,7 @@ static void snapshot_status(struct dm_target *ti, status_type_t type, { unsigned sz = 0; struct dm_snapshot *snap = ti->private; + unsigned num_features; switch (type) { case STATUSTYPE_INFO: @@ -2178,8 +2296,16 @@ static void snapshot_status(struct dm_target *ti, status_type_t type, * make sense. */ DMEMIT("%s %s", snap->origin->name, snap->cow->name); - snap->store->type->status(snap->store, type, result + sz, - maxlen - sz); + sz += snap->store->type->status(snap->store, type, result + sz, + maxlen - sz); + num_features = snap->discard_zeroes_cow + snap->discard_passdown_origin; + if (num_features) { + DMEMIT(" %u", num_features); + if (snap->discard_zeroes_cow) + DMEMIT(" discard_zeroes_cow"); + if (snap->discard_passdown_origin) + DMEMIT(" discard_passdown_origin"); + } break; } } @@ -2198,6 +2324,22 @@ static int snapshot_iterate_devices(struct dm_target *ti, return r; } +static void snapshot_io_hints(struct dm_target *ti, struct queue_limits *limits) +{ + struct dm_snapshot *snap = ti->private; + + if (snap->discard_zeroes_cow) { + struct dm_snapshot *snap_src = NULL, *snap_dest = NULL; + + (void) __find_snapshots_sharing_cow(snap, &snap_src, &snap_dest, NULL); + if (snap_src && snap_dest) + snap = snap_src; + + /* All discards are split on chunk_size boundary */ + limits->discard_granularity = snap->store->chunk_size; + limits->max_discard_sectors = snap->store->chunk_size; + } +} /*----------------------------------------------------------------- * Origin methods @@ -2522,7 +2664,7 @@ static struct target_type origin_target = { static struct target_type snapshot_target = { .name = "snapshot", - .version = {1, 15, 0}, + .version = {1, 16, 0}, .module = THIS_MODULE, .ctr = snapshot_ctr, .dtr = snapshot_dtr, @@ -2532,11 +2674,12 @@ static struct target_type snapshot_target = { .resume = snapshot_resume, .status = snapshot_status, .iterate_devices = snapshot_iterate_devices, + .io_hints = snapshot_io_hints, }; static struct target_type merge_target = { .name = dm_snapshot_merge_target_name, - .version = {1, 4, 0}, + .version = {1, 5, 0}, .module = THIS_MODULE, .ctr = snapshot_ctr, .dtr = snapshot_dtr, @@ -2547,6 +2690,7 @@ static struct target_type merge_target = { .resume = snapshot_merge_resume, .status = snapshot_status, .iterate_devices = snapshot_iterate_devices, + .io_hints = snapshot_io_hints, }; static int __init dm_snapshot_init(void) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 7f0840601737..4c68a7b93d5e 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -2046,16 +2046,19 @@ int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd, int dm_pool_metadata_set_needs_check(struct dm_pool_metadata *pmd) { - int r; + int r = -EINVAL; struct dm_block *sblock; struct thin_disk_superblock *disk_super; pmd_write_lock(pmd); + if (pmd->fail_io) + goto out; + pmd->flags |= THIN_METADATA_NEEDS_CHECK_FLAG; r = superblock_lock(pmd, &sblock); if (r) { - DMERR("couldn't read superblock"); + DMERR("couldn't lock superblock"); goto out; } |