diff options
Diffstat (limited to 'drivers/md')
48 files changed, 1567 insertions, 251 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 0602e82a9516..f45fb372e51b 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -15,6 +15,7 @@ if MD config BLK_DEV_MD tristate "RAID support" + select BLOCK_HOLDER_DEPRECATED if SYSFS help This driver lets you combine several hard disk partitions into one logical block device. This can be used to simply append one @@ -201,6 +202,7 @@ config BLK_DEV_DM_BUILTIN config BLK_DEV_DM tristate "Device mapper support" + select BLOCK_HOLDER_DEPRECATED if SYSFS select BLK_DEV_DM_BUILTIN depends on DAX || DAX=n help @@ -340,7 +342,7 @@ config DM_WRITECACHE config DM_EBS tristate "Emulated block size target (EXPERIMENTAL)" - depends on BLK_DEV_DM + depends on BLK_DEV_DM && !HIGHMEM select DM_BUFIO help dm-ebs emulates smaller logical block size on backing devices diff --git a/drivers/md/Makefile b/drivers/md/Makefile index a74aaf8b1445..816945eeed7f 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -96,6 +96,10 @@ ifeq ($(CONFIG_BLK_DEV_ZONED),y) dm-mod-objs += dm-zone.o endif +ifeq ($(CONFIG_IMA),y) +dm-mod-objs += dm-ima.o +endif + ifeq ($(CONFIG_DM_VERITY_FEC),y) dm-verity-objs += dm-verity-fec.o endif diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig index d1ca4d059c20..cf3e8096942a 100644 --- a/drivers/md/bcache/Kconfig +++ b/drivers/md/bcache/Kconfig @@ -2,6 +2,7 @@ config BCACHE tristate "Block device as cache" + select BLOCK_HOLDER_DEPRECATED if SYSFS select CRC64 help Allows a block device to be used as cache for other devices; uses diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 183a58c89377..0595559de174 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -378,7 +378,7 @@ static void do_btree_node_write(struct btree *b) struct bvec_iter_all iter_all; bio_for_each_segment_all(bv, b->bio, iter_all) { - memcpy(page_address(bv->bv_page), addr, PAGE_SIZE); + memcpy(bvec_virt(bv), addr, PAGE_SIZE); addr += PAGE_SIZE; } diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 185246a0d855..f2874c77ff79 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -885,11 +885,6 @@ static void bcache_device_free(struct bcache_device *d) bcache_device_detach(d); if (disk) { - bool disk_added = (disk->flags & GENHD_FL_UP) != 0; - - if (disk_added) - del_gendisk(disk); - blk_cleanup_disk(disk); ida_simple_remove(&bcache_device_idx, first_minor_to_idx(disk->first_minor)); @@ -931,20 +926,20 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size, n = BITS_TO_LONGS(d->nr_stripes) * sizeof(unsigned long); d->full_dirty_stripes = kvzalloc(n, GFP_KERNEL); if (!d->full_dirty_stripes) - return -ENOMEM; + goto out_free_stripe_sectors_dirty; idx = ida_simple_get(&bcache_device_idx, 0, BCACHE_DEVICE_IDX_MAX, GFP_KERNEL); if (idx < 0) - return idx; + goto out_free_full_dirty_stripes; if (bioset_init(&d->bio_split, 4, offsetof(struct bbio, bio), BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER)) - goto err; + goto out_ida_remove; d->disk = blk_alloc_disk(NUMA_NO_NODE); if (!d->disk) - goto err; + goto out_bioset_exit; set_capacity(d->disk, sectors); snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", idx); @@ -987,8 +982,14 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size, return 0; -err: +out_bioset_exit: + bioset_exit(&d->bio_split); +out_ida_remove: ida_simple_remove(&bcache_device_idx, idx); +out_free_full_dirty_stripes: + kvfree(d->full_dirty_stripes); +out_free_stripe_sectors_dirty: + kvfree(d->stripe_sectors_dirty); return -ENOMEM; } @@ -1365,8 +1366,10 @@ static void cached_dev_free(struct closure *cl) mutex_lock(&bch_register_lock); - if (atomic_read(&dc->running)) + if (atomic_read(&dc->running)) { bd_unlink_disk_holder(dc->bdev, dc->disk.disk); + del_gendisk(dc->disk.disk); + } bcache_device_free(&dc->disk); list_del(&dc->list); @@ -1512,6 +1515,7 @@ static void flash_dev_free(struct closure *cl) mutex_lock(&bch_register_lock); atomic_long_sub(bcache_dev_sectors_dirty(d), &d->c->flash_dev_dirty_sectors); + del_gendisk(d->disk); bcache_device_free(d); mutex_unlock(&bch_register_lock); kobject_put(&d->kobj); diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h index bca4a7c97da7..b64460a76267 100644 --- a/drivers/md/bcache/util.h +++ b/drivers/md/bcache/util.h @@ -15,8 +15,6 @@ #include "closure.h" -#define PAGE_SECTORS (PAGE_SIZE / 512) - struct closure; #ifdef CONFIG_BCACHE_DEBUG diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 8e4ced5a2516..bdd500447dea 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -3122,6 +3122,30 @@ static void cache_status(struct dm_target *ti, status_type_t type, DMEMIT(" %s", cache->ctr_args[i]); if (cache->nr_ctr_args) DMEMIT(" %s", cache->ctr_args[cache->nr_ctr_args - 1]); + break; + + case STATUSTYPE_IMA: + DMEMIT_TARGET_NAME_VERSION(ti->type); + if (get_cache_mode(cache) == CM_FAIL) + DMEMIT(",metadata_mode=fail"); + else if (get_cache_mode(cache) == CM_READ_ONLY) + DMEMIT(",metadata_mode=ro"); + else + DMEMIT(",metadata_mode=rw"); + + format_dev_t(buf, cache->metadata_dev->bdev->bd_dev); + DMEMIT(",cache_metadata_device=%s", buf); + format_dev_t(buf, cache->cache_dev->bdev->bd_dev); + DMEMIT(",cache_device=%s", buf); + format_dev_t(buf, cache->origin_dev->bdev->bd_dev); + DMEMIT(",cache_origin_device=%s", buf); + DMEMIT(",writethrough=%c", writethrough_mode(cache) ? 'y' : 'n'); + DMEMIT(",writeback=%c", writeback_mode(cache) ? 'y' : 'n'); + DMEMIT(",passthrough=%c", passthrough_mode(cache) ? 'y' : 'n'); + DMEMIT(",metadata2=%c", cache->features.metadata_version == 2 ? 'y' : 'n'); + DMEMIT(",no_discard_passdown=%c", cache->features.discard_passdown ? 'n' : 'y'); + DMEMIT(";"); + break; } return; diff --git a/drivers/md/dm-clone-target.c b/drivers/md/dm-clone-target.c index a90bdf9b2ca6..84dbe08ad205 100644 --- a/drivers/md/dm-clone-target.c +++ b/drivers/md/dm-clone-target.c @@ -1499,6 +1499,11 @@ static void clone_status(struct dm_target *ti, status_type_t type, for (i = 0; i < clone->nr_ctr_args; i++) DMEMIT(" %s", clone->ctr_args[i]); + break; + + case STATUSTYPE_IMA: + *result = '\0'; + break; } return; diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index edc1553c4eea..55dccdfbcb22 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -18,6 +18,7 @@ #include <trace/events/block.h> #include "dm.h" +#include "dm-ima.h" #define DM_RESERVED_MAX_IOS 1024 @@ -119,6 +120,10 @@ struct mapped_device { unsigned int nr_zones; unsigned int *zwp_offset; #endif + +#ifdef CONFIG_IMA + struct dm_ima_measurements ima; +#endif }; /* diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 50f4cbd600d5..916b7da16de2 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -2223,11 +2223,11 @@ static void kcryptd_queue_crypt(struct dm_crypt_io *io) if ((bio_data_dir(io->base_bio) == READ && test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags)) || (bio_data_dir(io->base_bio) == WRITE && test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags))) { /* - * in_irq(): Crypto API's skcipher_walk_first() refuses to work in hard IRQ context. + * in_hardirq(): Crypto API's skcipher_walk_first() refuses to work in hard IRQ context. * irqs_disabled(): the kernel may run some IO completion from the idle thread, but * it is being executed with irqs disabled. */ - if (in_irq() || irqs_disabled()) { + if (in_hardirq() || irqs_disabled()) { tasklet_init(&io->tasklet, kcryptd_crypt_tasklet, (unsigned long)&io->work); tasklet_schedule(&io->tasklet); return; @@ -2661,7 +2661,12 @@ static void *crypt_page_alloc(gfp_t gfp_mask, void *pool_data) struct crypt_config *cc = pool_data; struct page *page; - if (unlikely(percpu_counter_compare(&cc->n_allocated_pages, dm_crypt_pages_per_client) >= 0) && + /* + * Note, percpu_counter_read_positive() may over (and under) estimate + * the current usage by at most (batch - 1) * num_online_cpus() pages, + * but avoids potential spinlock contention of an exact result. + */ + if (unlikely(percpu_counter_read_positive(&cc->n_allocated_pages) >= dm_crypt_pages_per_client) && likely(gfp_mask & __GFP_NORETRY)) return NULL; @@ -3485,7 +3490,34 @@ static void crypt_status(struct dm_target *ti, status_type_t type, if (test_bit(CRYPT_IV_LARGE_SECTORS, &cc->cipher_flags)) DMEMIT(" iv_large_sectors"); } + break; + case STATUSTYPE_IMA: + DMEMIT_TARGET_NAME_VERSION(ti->type); + DMEMIT(",allow_discards=%c", ti->num_discard_bios ? 'y' : 'n'); + DMEMIT(",same_cpu_crypt=%c", test_bit(DM_CRYPT_SAME_CPU, &cc->flags) ? 'y' : 'n'); + DMEMIT(",submit_from_crypt_cpus=%c", test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags) ? + 'y' : 'n'); + DMEMIT(",no_read_workqueue=%c", test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags) ? + 'y' : 'n'); + DMEMIT(",no_write_workqueue=%c", test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags) ? + 'y' : 'n'); + DMEMIT(",iv_large_sectors=%c", test_bit(CRYPT_IV_LARGE_SECTORS, &cc->cipher_flags) ? + 'y' : 'n'); + + if (cc->on_disk_tag_size) + DMEMIT(",integrity_tag_size=%u,cipher_auth=%s", + cc->on_disk_tag_size, cc->cipher_auth); + if (cc->sector_size != (1 << SECTOR_SHIFT)) + DMEMIT(",sector_size=%d", cc->sector_size); + if (cc->cipher_string) + DMEMIT(",cipher_string=%s", cc->cipher_string); + + DMEMIT(",key_size=%u", cc->key_size); + DMEMIT(",key_parts=%u", cc->key_parts); + DMEMIT(",key_extra_size=%u", cc->key_extra_size); + DMEMIT(",key_mac_size=%u", cc->key_mac_size); + DMEMIT(";"); break; } } diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index 2628a832787b..59e51d285b0e 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -326,6 +326,10 @@ static void delay_status(struct dm_target *ti, status_type_t type, DMEMIT_DELAY_CLASS(&dc->flush); } break; + + case STATUSTYPE_IMA: + *result = '\0'; + break; } } diff --git a/drivers/md/dm-dust.c b/drivers/md/dm-dust.c index cbe1058ee589..3163e2b1418e 100644 --- a/drivers/md/dm-dust.c +++ b/drivers/md/dm-dust.c @@ -527,6 +527,10 @@ static void dust_status(struct dm_target *ti, status_type_t type, DMEMIT("%s %llu %u", dd->dev->name, (unsigned long long)dd->start, dd->blksz); break; + + case STATUSTYPE_IMA: + *result = '\0'; + break; } } diff --git a/drivers/md/dm-ebs-target.c b/drivers/md/dm-ebs-target.c index 71475a2410be..d25989660a76 100644 --- a/drivers/md/dm-ebs-target.c +++ b/drivers/md/dm-ebs-target.c @@ -74,7 +74,7 @@ static int __ebs_rw_bvec(struct ebs_c *ec, int rw, struct bio_vec *bv, struct bv if (unlikely(!bv->bv_page || !bv_len)) return -EIO; - pa = page_address(bv->bv_page) + bv->bv_offset; + pa = bvec_virt(bv); /* Handle overlapping page <-> blocks */ while (bv_len) { @@ -401,6 +401,9 @@ static void ebs_status(struct dm_target *ti, status_type_t type, snprintf(result, maxlen, ec->u_bs_set ? "%s %llu %u %u" : "%s %llu %u", ec->dev->name, (unsigned long long) ec->start, ec->e_bs, ec->u_bs); break; + case STATUSTYPE_IMA: + *result = '\0'; + break; } } diff --git a/drivers/md/dm-era-target.c b/drivers/md/dm-era-target.c index 3b748393fca5..2a78f6874143 100644 --- a/drivers/md/dm-era-target.c +++ b/drivers/md/dm-era-target.c @@ -1644,6 +1644,10 @@ static void era_status(struct dm_target *ti, status_type_t type, format_dev_t(buf, era->origin_dev->bdev->bd_dev); DMEMIT("%s %u", buf, era->sectors_per_block); break; + + case STATUSTYPE_IMA: + *result = '\0'; + break; } return; diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index 5877220c01ed..4b94ffe6f2d4 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -440,6 +440,10 @@ static void flakey_status(struct dm_target *ti, status_type_t type, fc->corrupt_bio_value, fc->corrupt_bio_flags); break; + + case STATUSTYPE_IMA: + result[0] = '\0'; + break; } } diff --git a/drivers/md/dm-ima.c b/drivers/md/dm-ima.c new file mode 100644 index 000000000000..2c5edfbd7711 --- /dev/null +++ b/drivers/md/dm-ima.c @@ -0,0 +1,751 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Microsoft Corporation + * + * Author: Tushar Sugandhi <tusharsu@linux.microsoft.com> + * + * File: dm-ima.c + * Enables IMA measurements for DM targets + */ + +#include "dm-core.h" +#include "dm-ima.h" + +#include <linux/ima.h> +#include <crypto/hash.h> +#include <linux/crypto.h> +#include <crypto/hash_info.h> + +#define DM_MSG_PREFIX "ima" + +/* + * Internal function to prefix separator characters in input buffer with escape + * character, so that they don't interfere with the construction of key-value pairs, + * and clients can split the key1=val1,key2=val2,key3=val3; pairs properly. + */ +static void fix_separator_chars(char **buf) +{ + int l = strlen(*buf); + int i, j, sp = 0; + + for (i = 0; i < l; i++) + if ((*buf)[i] == '\\' || (*buf)[i] == ';' || (*buf)[i] == '=' || (*buf)[i] == ',') + sp++; + + if (!sp) + return; + + for (i = l-1, j = i+sp; i >= 0; i--) { + (*buf)[j--] = (*buf)[i]; + if ((*buf)[i] == '\\' || (*buf)[i] == ';' || (*buf)[i] == '=' || (*buf)[i] == ',') + (*buf)[j--] = '\\'; + } +} + +/* + * Internal function to allocate memory for IMA measurements. + */ +static void *dm_ima_alloc(size_t len, gfp_t flags, bool noio) +{ + unsigned int noio_flag; + void *ptr; + + if (noio) + noio_flag = memalloc_noio_save(); + + ptr = kzalloc(len, flags); + + if (noio) + memalloc_noio_restore(noio_flag); + + return ptr; +} + +/* + * Internal function to allocate and copy name and uuid for IMA measurements. + */ +static int dm_ima_alloc_and_copy_name_uuid(struct mapped_device *md, char **dev_name, + char **dev_uuid, bool noio) +{ + int r; + *dev_name = dm_ima_alloc(DM_NAME_LEN*2, GFP_KERNEL, noio); + if (!(*dev_name)) { + r = -ENOMEM; + goto error; + } + + *dev_uuid = dm_ima_alloc(DM_UUID_LEN*2, GFP_KERNEL, noio); + if (!(*dev_uuid)) { + r = -ENOMEM; + goto error; + } + + r = dm_copy_name_and_uuid(md, *dev_name, *dev_uuid); + if (r) + goto error; + + fix_separator_chars(dev_name); + fix_separator_chars(dev_uuid); + + return 0; +error: + kfree(*dev_name); + kfree(*dev_uuid); + *dev_name = NULL; + *dev_uuid = NULL; + return r; +} + +/* + * Internal function to allocate and copy device data for IMA measurements. + */ +static int dm_ima_alloc_and_copy_device_data(struct mapped_device *md, char **device_data, + unsigned int num_targets, bool noio) +{ + char *dev_name = NULL, *dev_uuid = NULL; + int r; + + r = dm_ima_alloc_and_copy_name_uuid(md, &dev_name, &dev_uuid, noio); + if (r) + return r; + + *device_data = dm_ima_alloc(DM_IMA_DEVICE_BUF_LEN, GFP_KERNEL, noio); + if (!(*device_data)) { + r = -ENOMEM; + goto error; + } + + scnprintf(*device_data, DM_IMA_DEVICE_BUF_LEN, + "name=%s,uuid=%s,major=%d,minor=%d,minor_count=%d,num_targets=%u;", + dev_name, dev_uuid, md->disk->major, md->disk->first_minor, + md->disk->minors, num_targets); +error: + kfree(dev_name); + kfree(dev_uuid); + return r; +} + +/* + * Internal wrapper function to call IMA to measure DM data. + */ +static void dm_ima_measure_data(const char *event_name, const void *buf, size_t buf_len, + bool noio) +{ + unsigned int noio_flag; + + if (noio) + noio_flag = memalloc_noio_save(); + + ima_measure_critical_data(DM_NAME, event_name, buf, buf_len, + false, NULL, 0); + + if (noio) + memalloc_noio_restore(noio_flag); +} + +/* + * Internal function to allocate and copy current device capacity for IMA measurements. + */ +static int dm_ima_alloc_and_copy_capacity_str(struct mapped_device *md, char **capacity_str, + bool noio) +{ + sector_t capacity; + + capacity = get_capacity(md->disk); + + *capacity_str = dm_ima_alloc(DM_IMA_DEVICE_CAPACITY_BUF_LEN, GFP_KERNEL, noio); + if (!(*capacity_str)) + return -ENOMEM; + + scnprintf(*capacity_str, DM_IMA_DEVICE_BUF_LEN, "current_device_capacity=%llu;", + capacity); + + return 0; +} + +/* + * Initialize/reset the dm ima related data structure variables. + */ +void dm_ima_reset_data(struct mapped_device *md) +{ + memset(&(md->ima), 0, sizeof(md->ima)); + md->ima.dm_version_str_len = strlen(DM_IMA_VERSION_STR); +} + +/* + * Build up the IMA data for each target, and finally measure. + */ +void dm_ima_measure_on_table_load(struct dm_table *table, unsigned int status_flags) +{ + size_t device_data_buf_len, target_metadata_buf_len, target_data_buf_len, l = 0; + char *target_metadata_buf = NULL, *target_data_buf = NULL, *digest_buf = NULL; + char *ima_buf = NULL, *device_data_buf = NULL; + int digest_size, last_target_measured = -1, r; + status_type_t type = STATUSTYPE_IMA; + size_t cur_total_buf_len = 0; + unsigned int num_targets, i; + SHASH_DESC_ON_STACK(shash, NULL); + struct crypto_shash *tfm = NULL; + u8 *digest = NULL; + bool noio = false; + /* + * In below hash_alg_prefix_len assignment +1 is for the additional char (':'), + * when prefixing the hash value with the hash algorithm name. e.g. sha256:<hash_value>. + */ + const size_t hash_alg_prefix_len = strlen(DM_IMA_TABLE_HASH_ALG) + 1; + char table_load_event_name[] = "dm_table_load"; + + ima_buf = dm_ima_alloc(DM_IMA_MEASUREMENT_BUF_LEN, GFP_KERNEL, noio); + if (!ima_buf) + return; + + target_metadata_buf = dm_ima_alloc(DM_IMA_TARGET_METADATA_BUF_LEN, GFP_KERNEL, noio); + if (!target_metadata_buf) + goto error; + + target_data_buf = dm_ima_alloc(DM_IMA_TARGET_DATA_BUF_LEN, GFP_KERNEL, noio); + if (!target_data_buf) + goto error; + + num_targets = dm_table_get_num_targets(table); + + if (dm_ima_alloc_and_copy_device_data(table->md, &device_data_buf, num_targets, noio)) + goto error; + + tfm = crypto_alloc_shash(DM_IMA_TABLE_HASH_ALG, 0, 0); + if (IS_ERR(tfm)) + goto error; + + shash->tfm = tfm; + digest_size = crypto_shash_digestsize(tfm); + digest = dm_ima_alloc(digest_size, GFP_KERNEL, noio); + if (!digest) + goto error; + + r = crypto_shash_init(shash); + if (r) + goto error; + + memcpy(ima_buf + l, DM_IMA_VERSION_STR, table->md->ima.dm_version_str_len); + l += table->md->ima.dm_version_str_len; + + device_data_buf_len = strlen(device_data_buf); + memcpy(ima_buf + l, device_data_buf, device_data_buf_len); + l += device_data_buf_len; + + for (i = 0; i < num_targets; i++) { + struct dm_target *ti = dm_table_get_target(table, i); + + if (!ti) + goto error; + + last_target_measured = 0; + + /* + * First retrieve the target metadata. + */ + scnprintf(target_metadata_buf, DM_IMA_TARGET_METADATA_BUF_LEN, + "target_index=%d,target_begin=%llu,target_len=%llu,", + i, ti->begin, ti->len); + target_metadata_buf_len = strlen(target_metadata_buf); + + /* + * Then retrieve the actual target data. + */ + if (ti->type->status) + ti->type->status(ti, type, status_flags, target_data_buf, + DM_IMA_TARGET_DATA_BUF_LEN); + else + target_data_buf[0] = '\0'; + + target_data_buf_len = strlen(target_data_buf); + + /* + * Check if the total data can fit into the IMA buffer. + */ + cur_total_buf_len = l + target_metadata_buf_len + target_data_buf_len; + + /* + * IMA measurements for DM targets are best-effort. + * If the total data buffered so far, including the current target, + * is too large to fit into DM_IMA_MEASUREMENT_BUF_LEN, measure what + * we have in the current buffer, and continue measuring the remaining + * targets by prefixing the device metadata again. + */ + if (unlikely(cur_total_buf_len >= DM_IMA_MEASUREMENT_BUF_LEN)) { + dm_ima_measure_data(table_load_event_name, ima_buf, l, noio); + r = crypto_shash_update(shash, (const u8 *)ima_buf, l); + if (r < 0) + goto error; + + memset(ima_buf, 0, DM_IMA_MEASUREMENT_BUF_LEN); + l = 0; + + /* + * Each new "dm_table_load" entry in IMA log should have device data + * prefix, so that multiple records from the same "dm_table_load" for + * a given device can be linked together. + */ + memcpy(ima_buf + l, DM_IMA_VERSION_STR, table->md->ima.dm_version_str_len); + l += table->md->ima.dm_version_str_len; + + memcpy(ima_buf + l, device_data_buf, device_data_buf_len); + l += device_data_buf_len; + + /* + * If this iteration of the for loop turns out to be the last target + * in the table, dm_ima_measure_data("dm_table_load", ...) doesn't need + * to be called again, just the hash needs to be finalized. + * "last_target_measured" tracks this state. + */ + last_target_measured = 1; + } + + /* + * Fill-in all the target metadata, so that multiple targets for the same + * device can be linked together. + */ + memcpy(ima_buf + l, target_metadata_buf, target_metadata_buf_len); + l += target_metadata_buf_len; + + memcpy(ima_buf + l, target_data_buf, target_data_buf_len); + l += target_data_buf_len; + } + + if (!last_target_measured) { + dm_ima_measure_data(table_load_event_name, ima_buf, l, noio); + + r = crypto_shash_update(shash, (const u8 *)ima_buf, l); + if (r < 0) + goto error; + } + + /* + * Finalize the table hash, and store it in table->md->ima.inactive_table.hash, + * so that the table data can be verified against the future device state change + * events, e.g. resume, rename, remove, table-clear etc. + */ + r = crypto_shash_final(shash, digest); + if (r < 0) + goto error; + + digest_buf = dm_ima_alloc((digest_size*2) + hash_alg_prefix_len + 1, GFP_KERNEL, noio); + + if (!digest_buf) + goto error; + + snprintf(digest_buf, hash_alg_prefix_len + 1, "%s:", DM_IMA_TABLE_HASH_ALG); + + for (i = 0; i < digest_size; i++) + snprintf((digest_buf + hash_alg_prefix_len + (i*2)), 3, "%02x", digest[i]); + + if (table->md->ima.active_table.hash != table->md->ima.inactive_table.hash) + kfree(table->md->ima.inactive_table.hash); + + table->md->ima.inactive_table.hash = digest_buf; + table->md->ima.inactive_table.hash_len = strlen(digest_buf); + table->md->ima.inactive_table.num_targets = num_targets; + + if (table->md->ima.active_table.device_metadata != + table->md->ima.inactive_table.device_metadata) + kfree(table->md->ima.inactive_table.device_metadata); + + table->md->ima.inactive_table.device_metadata = device_data_buf; + table->md->ima.inactive_table.device_metadata_len = device_data_buf_len; + + goto exit; +error: + kfree(digest_buf); + kfree(device_data_buf); +exit: + kfree(digest); + if (tfm) + crypto_free_shash(tfm); + kfree(ima_buf); + kfree(target_metadata_buf); + kfree(target_data_buf); +} + +/* + * Measure IMA data on device resume. + */ +void dm_ima_measure_on_device_resume(struct mapped_device *md, bool swap) +{ + char *device_table_data, *dev_name = NULL, *dev_uuid = NULL, *capacity_str = NULL; + char active[] = "active_table_hash="; + unsigned int active_len = strlen(active), capacity_len = 0; + unsigned int l = 0; + bool noio = true; + bool nodata = true; + int r; + + device_table_data = dm_ima_alloc(DM_IMA_DEVICE_BUF_LEN, GFP_KERNEL, noio); + if (!device_table_data) + return; + + r = dm_ima_alloc_and_copy_capacity_str(md, &capacity_str, noio); + if (r) + goto error; + + memcpy(device_table_data + l, DM_IMA_VERSION_STR, md->ima.dm_version_str_len); + l += md->ima.dm_version_str_len; + + if (swap) { + if (md->ima.active_table.hash != md->ima.inactive_table.hash) + kfree(md->ima.active_table.hash); + + md->ima.active_table.hash = NULL; + md->ima.active_table.hash_len = 0; + + if (md->ima.active_table.device_metadata != + md->ima.inactive_table.device_metadata) + kfree(md->ima.active_table.device_metadata); + + md->ima.active_table.device_metadata = NULL; + md->ima.active_table.device_metadata_len = 0; + md->ima.active_table.num_targets = 0; + + if (md->ima.inactive_table.hash) { + md->ima.active_table.hash = md->ima.inactive_table.hash; + md->ima.active_table.hash_len = md->ima.inactive_table.hash_len; + md->ima.inactive_table.hash = NULL; + md->ima.inactive_table.hash_len = 0; + } + + if (md->ima.inactive_table.device_metadata) { + md->ima.active_table.device_metadata = + md->ima.inactive_table.device_metadata; + md->ima.active_table.device_metadata_len = + md->ima.inactive_table.device_metadata_len; + md->ima.active_table.num_targets = md->ima.inactive_table.num_targets; + md->ima.inactive_table.device_metadata = NULL; + md->ima.inactive_table.device_metadata_len = 0; + md->ima.inactive_table.num_targets = 0; + } + } + + if (md->ima.active_table.device_metadata) { + memcpy(device_table_data + l, md->ima.active_table.device_metadata, + md->ima.active_table.device_metadata_len); + l += md->ima.active_table.device_metadata_len; + + nodata = false; + } + + if (md->ima.active_table.hash) { + memcpy(device_table_data + l, active, active_len); + l += active_len; + + memcpy(device_table_data + l, md->ima.active_table.hash, + md->ima.active_table.hash_len); + l += md->ima.active_table.hash_len; + + memcpy(device_table_data + l, ";", 1); + l++; + + nodata = false; + } + + if (nodata) { + r = dm_ima_alloc_and_copy_name_uuid(md, &dev_name, &dev_uuid, noio); + if (r) + goto error; + + scnprintf(device_table_data, DM_IMA_DEVICE_BUF_LEN, + "%sname=%s,uuid=%s;device_resume=no_data;", + DM_IMA_VERSION_STR, dev_name, dev_uuid); + l += strlen(device_table_data); + + } + + capacity_len = strlen(capacity_str); + memcpy(device_table_data + l, capacity_str, capacity_len); + l += capacity_len; + + dm_ima_measure_data("dm_device_resume", device_table_data, l, noio); + + kfree(dev_name); + kfree(dev_uuid); +error: + kfree(capacity_str); + kfree(device_table_data); +} + +/* + * Measure IMA data on remove. + */ +void dm_ima_measure_on_device_remove(struct mapped_device *md, bool remove_all) +{ + char *device_table_data, *dev_name = NULL, *dev_uuid = NULL, *capacity_str = NULL; + char active_table_str[] = "active_table_hash="; + char inactive_table_str[] = "inactive_table_hash="; + char device_active_str[] = "device_active_metadata="; + char device_inactive_str[] = "device_inactive_metadata="; + char remove_all_str[] = "remove_all="; + unsigned int active_table_len = strlen(active_table_str); + unsigned int inactive_table_len = strlen(inactive_table_str); + unsigned int device_active_len = strlen(device_active_str); + unsigned int device_inactive_len = strlen(device_inactive_str); + unsigned int remove_all_len = strlen(remove_all_str); + unsigned int capacity_len = 0; + unsigned int l = 0; + bool noio = true; + bool nodata = true; + int r; + + device_table_data = dm_ima_alloc(DM_IMA_DEVICE_BUF_LEN*2, GFP_KERNEL, noio); + if (!device_table_data) + goto exit; + + r = dm_ima_alloc_and_copy_capacity_str(md, &capacity_str, noio); + if (r) { + kfree(device_table_data); + goto exit; + } + + memcpy(device_table_data + l, DM_IMA_VERSION_STR, md->ima.dm_version_str_len); + l += md->ima.dm_version_str_len; + + if (md->ima.active_table.device_metadata) { + memcpy(device_table_data + l, device_active_str, device_active_len); + l += device_active_len; + + memcpy(device_table_data + l, md->ima.active_table.device_metadata, + md->ima.active_table.device_metadata_len); + l += md->ima.active_table.device_metadata_len; + + nodata = false; + } + + if (md->ima.inactive_table.device_metadata) { + memcpy(device_table_data + l, device_inactive_str, device_inactive_len); + l += device_inactive_len; + + memcpy(device_table_data + l, md->ima.inactive_table.device_metadata, + md->ima.inactive_table.device_metadata_len); + l += md->ima.inactive_table.device_metadata_len; + + nodata = false; + } + + if (md->ima.active_table.hash) { + memcpy(device_table_data + l, active_table_str, active_table_len); + l += active_table_len; + + memcpy(device_table_data + l, md->ima.active_table.hash, + md->ima.active_table.hash_len); + l += md->ima.active_table.hash_len; + + memcpy(device_table_data + l, ",", 1); + l++; + + nodata = false; + } + + if (md->ima.inactive_table.hash) { + memcpy(device_table_data + l, inactive_table_str, inactive_table_len); + l += inactive_table_len; + + memcpy(device_table_data + l, md->ima.inactive_table.hash, + md->ima.inactive_table.hash_len); + l += md->ima.inactive_table.hash_len; + + memcpy(device_table_data + l, ",", 1); + l++; + + nodata = false; + } + /* + * In case both active and inactive tables, and corresponding + * device metadata is cleared/missing - record the name and uuid + * in IMA measurements. + */ + if (nodata) { + if (dm_ima_alloc_and_copy_name_uuid(md, &dev_name, &dev_uuid, noio)) + goto error; + + scnprintf(device_table_data, DM_IMA_DEVICE_BUF_LEN, + "%sname=%s,uuid=%s;device_remove=no_data;", + DM_IMA_VERSION_STR, dev_name, dev_uuid); + l += strlen(device_table_data); + } + + memcpy(device_table_data + l, remove_all_str, remove_all_len); + l += remove_all_len; + memcpy(device_table_data + l, remove_all ? "y;" : "n;", 2); + l += 2; + + capacity_len = strlen(capacity_str); + memcpy(device_table_data + l, capacity_str, capacity_len); + l += capacity_len; + + dm_ima_measure_data("dm_device_remove", device_table_data, l, noio); + +error: + kfree(device_table_data); + kfree(capacity_str); +exit: + kfree(md->ima.active_table.device_metadata); + + if (md->ima.active_table.device_metadata != + md->ima.inactive_table.device_metadata) + kfree(md->ima.inactive_table.device_metadata); + + kfree(md->ima.active_table.hash); + + if (md->ima.active_table.hash != md->ima.inactive_table.hash) + kfree(md->ima.inactive_table.hash); + + dm_ima_reset_data(md); + + kfree(dev_name); + kfree(dev_uuid); +} + +/* + * Measure ima data on table clear. + */ +void dm_ima_measure_on_table_clear(struct mapped_device *md, bool new_map) +{ + unsigned int l = 0, capacity_len = 0; + char *device_table_data = NULL, *dev_name = NULL, *dev_uuid = NULL, *capacity_str = NULL; + char inactive_str[] = "inactive_table_hash="; + unsigned int inactive_len = strlen(inactive_str); + bool noio = true; + bool nodata = true; + int r; + + device_table_data = dm_ima_alloc(DM_IMA_DEVICE_BUF_LEN, GFP_KERNEL, noio); + if (!device_table_data) + return; + + r = dm_ima_alloc_and_copy_capacity_str(md, &capacity_str, noio); + if (r) + goto error1; + + memcpy(device_table_data + l, DM_IMA_VERSION_STR, md->ima.dm_version_str_len); + l += md->ima.dm_version_str_len; + + if (md->ima.inactive_table.device_metadata_len && + md->ima.inactive_table.hash_len) { + memcpy(device_table_data + l, md->ima.inactive_table.device_metadata, + md->ima.inactive_table.device_metadata_len); + l += md->ima.inactive_table.device_metadata_len; + + memcpy(device_table_data + l, inactive_str, inactive_len); + l += inactive_len; + + memcpy(device_table_data + l, md->ima.inactive_table.hash, + md->ima.inactive_table.hash_len); + + l += md->ima.inactive_table.hash_len; + + memcpy(device_table_data + l, ";", 1); + l++; + + nodata = false; + } + + if (nodata) { + if (dm_ima_alloc_and_copy_name_uuid(md, &dev_name, &dev_uuid, noio)) + goto error2; + + scnprintf(device_table_data, DM_IMA_DEVICE_BUF_LEN, + "%sname=%s,uuid=%s;table_clear=no_data;", + DM_IMA_VERSION_STR, dev_name, dev_uuid); + l += strlen(device_table_data); + } + + capacity_len = strlen(capacity_str); + memcpy(device_table_data + l, capacity_str, capacity_len); + l += capacity_len; + + dm_ima_measure_data("dm_table_clear", device_table_data, l, noio); + + if (new_map) { + if (md->ima.inactive_table.hash && + md->ima.inactive_table.hash != md->ima.active_table.hash) + kfree(md->ima.inactive_table.hash); + + md->ima.inactive_table.hash = NULL; + md->ima.inactive_table.hash_len = 0; + + if (md->ima.inactive_table.device_metadata && + md->ima.inactive_table.device_metadata != md->ima.active_table.device_metadata) + kfree(md->ima.inactive_table.device_metadata); + + md->ima.inactive_table.device_metadata = NULL; + md->ima.inactive_table.device_metadata_len = 0; + md->ima.inactive_table.num_targets = 0; + + if (md->ima.active_table.hash) { + md->ima.inactive_table.hash = md->ima.active_table.hash; + md->ima.inactive_table.hash_len = md->ima.active_table.hash_len; + } + + if (md->ima.active_table.device_metadata) { + md->ima.inactive_table.device_metadata = + md->ima.active_table.device_metadata; + md->ima.inactive_table.device_metadata_len = + md->ima.active_table.device_metadata_len; + md->ima.inactive_table.num_targets = + md->ima.active_table.num_targets; + } + } + + kfree(dev_name); + kfree(dev_uuid); +error2: + kfree(capacity_str); +error1: + kfree(device_table_data); +} + +/* + * Measure IMA data on device rename. + */ +void dm_ima_measure_on_device_rename(struct mapped_device *md) +{ + char *old_device_data = NULL, *new_device_data = NULL, *combined_device_data = NULL; + char *new_dev_name = NULL, *new_dev_uuid = NULL, *capacity_str = NULL; + bool noio = true; + int r; + + if (dm_ima_alloc_and_copy_device_data(md, &new_device_data, + md->ima.active_table.num_targets, noio)) + return; + + if (dm_ima_alloc_and_copy_name_uuid(md, &new_dev_name, &new_dev_uuid, noio)) + goto error; + + combined_device_data = dm_ima_alloc(DM_IMA_DEVICE_BUF_LEN * 2, GFP_KERNEL, noio); + if (!combined_device_data) + goto error; + + r = dm_ima_alloc_and_copy_capacity_str(md, &capacity_str, noio); + if (r) + goto error; + + old_device_data = md->ima.active_table.device_metadata; + + md->ima.active_table.device_metadata = new_device_data; + md->ima.active_table.device_metadata_len = strlen(new_device_data); + + scnprintf(combined_device_data, DM_IMA_DEVICE_BUF_LEN * 2, + "%s%snew_name=%s,new_uuid=%s;%s", DM_IMA_VERSION_STR, old_device_data, + new_dev_name, new_dev_uuid, capacity_str); + + dm_ima_measure_data("dm_device_rename", combined_device_data, strlen(combined_device_data), + noio); + + goto exit; + +error: + kfree(new_device_data); +exit: + kfree(capacity_str); + kfree(combined_device_data); + kfree(old_device_data); + kfree(new_dev_name); + kfree(new_dev_uuid); +} diff --git a/drivers/md/dm-ima.h b/drivers/md/dm-ima.h new file mode 100644 index 000000000000..b8c3b614670b --- /dev/null +++ b/drivers/md/dm-ima.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright (C) 2021 Microsoft Corporation + * + * Author: Tushar Sugandhi <tusharsu@linux.microsoft.com> + * + * File: dm-ima.h + * Header file for device mapper IMA measurements. + */ + +#ifndef DM_IMA_H +#define DM_IMA_H + +#define DM_IMA_MEASUREMENT_BUF_LEN 4096 +#define DM_IMA_DEVICE_BUF_LEN 1024 +#define DM_IMA_TARGET_METADATA_BUF_LEN 128 +#define DM_IMA_TARGET_DATA_BUF_LEN 2048 +#define DM_IMA_DEVICE_CAPACITY_BUF_LEN 128 +#define DM_IMA_TABLE_HASH_ALG "sha256" + +#define __dm_ima_stringify(s) #s +#define __dm_ima_str(s) __dm_ima_stringify(s) + +#define DM_IMA_VERSION_STR "dm_version=" \ + __dm_ima_str(DM_VERSION_MAJOR) "." \ + __dm_ima_str(DM_VERSION_MINOR) "." \ + __dm_ima_str(DM_VERSION_PATCHLEVEL) ";" + +#ifdef CONFIG_IMA + +struct dm_ima_device_table_metadata { + /* + * Contains data specific to the device which is common across + * all the targets in the table (e.g. name, uuid, major, minor, etc). + * The values are stored in comma separated list of key1=val1,key2=val2; + * pairs delimited by a semicolon at the end of the list. + */ + char *device_metadata; + unsigned int device_metadata_len; + unsigned int num_targets; + + /* + * Contains the sha256 hashes of the IMA measurements of the target + * attributes' key-value pairs from the active/inactive tables. + */ + char *hash; + unsigned int hash_len; +}; + +/* + * This structure contains device metadata, and table hash for + * active and inactive tables for ima measurements. + */ +struct dm_ima_measurements { + struct dm_ima_device_table_metadata active_table; + struct dm_ima_device_table_metadata inactive_table; + unsigned int dm_version_str_len; +}; + +void dm_ima_reset_data(struct mapped_device *md); +void dm_ima_measure_on_table_load(struct dm_table *table, unsigned int status_flags); +void dm_ima_measure_on_device_resume(struct mapped_device *md, bool swap); +void dm_ima_measure_on_device_remove(struct mapped_device *md, bool remove_all); +void dm_ima_measure_on_table_clear(struct mapped_device *md, bool new_map); +void dm_ima_measure_on_device_rename(struct mapped_device *md); + +#else + +static inline void dm_ima_reset_data(struct mapped_device *md) {} +static inline void dm_ima_measure_on_table_load(struct dm_table *table, unsigned int status_flags) {} +static inline void dm_ima_measure_on_device_resume(struct mapped_device *md, bool swap) {} +static inline void dm_ima_measure_on_device_remove(struct mapped_device *md, bool remove_all) {} +static inline void dm_ima_measure_on_table_clear(struct mapped_device *md, bool new_map) {} +static inline void dm_ima_measure_on_device_rename(struct mapped_device *md) {} + +#endif /* CONFIG_IMA */ + +#endif /* DM_IMA_H */ diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 20f2510db1f6..dc03b70f6e65 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -1819,7 +1819,7 @@ again: unsigned this_len; BUG_ON(PageHighMem(biv.bv_page)); - tag = lowmem_page_address(biv.bv_page) + biv.bv_offset; + tag = bvec_virt(&biv); this_len = min(biv.bv_len, data_to_process); r = dm_integrity_rw_tag(ic, tag, &dio->metadata_block, &dio->metadata_offset, this_len, dio->op == REQ_OP_READ ? TAG_READ : TAG_WRITE); @@ -2006,7 +2006,7 @@ retry_kmap: unsigned tag_now = min(biv.bv_len, tag_todo); char *tag_addr; BUG_ON(PageHighMem(biv.bv_page)); - tag_addr = lowmem_page_address(biv.bv_page) + biv.bv_offset; + tag_addr = bvec_virt(&biv); if (likely(dio->op == REQ_OP_WRITE)) memcpy(tag_ptr, tag_addr, tag_now); else @@ -3306,6 +3306,30 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type, EMIT_ALG(journal_mac_alg, "journal_mac"); break; } + case STATUSTYPE_IMA: + DMEMIT_TARGET_NAME_VERSION(ti->type); + DMEMIT(",dev_name=%s,start=%llu,tag_size=%u,mode=%c", + ic->dev->name, ic->start, ic->tag_size, ic->mode); + + if (ic->meta_dev) + DMEMIT(",meta_device=%s", ic->meta_dev->name); + if (ic->sectors_per_block != 1) + DMEMIT(",block_size=%u", ic->sectors_per_block << SECTOR_SHIFT); + + DMEMIT(",recalculate=%c", (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) ? + 'y' : 'n'); + DMEMIT(",allow_discards=%c", ic->discard ? 'y' : 'n'); + DMEMIT(",fix_padding=%c", + ((ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING)) != 0) ? 'y' : 'n'); + DMEMIT(",fix_hmac=%c", + ((ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) != 0) ? 'y' : 'n'); + DMEMIT(",legacy_recalculate=%c", ic->legacy_recalculate ? 'y' : 'n'); + + DMEMIT(",journal_sectors=%u", ic->initial_sectors - SB_SECTORS); + DMEMIT(",interleave_sectors=%u", 1U << ic->sb->log2_interleave_sectors); + DMEMIT(",buffer_sectors=%u", 1U << ic->log2_buffer_sectors); + DMEMIT(";"); + break; } } diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 2209cbcd84db..21fe8652b095 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -6,7 +6,7 @@ */ #include "dm-core.h" - +#include "dm-ima.h" #include <linux/module.h> #include <linux/vmalloc.h> #include <linux/miscdevice.h> @@ -20,6 +20,7 @@ #include <linux/compat.h> #include <linux/uaccess.h> +#include <linux/ima.h> #define DM_MSG_PREFIX "ioctl" #define DM_DRIVER_EMAIL "dm-devel@redhat.com" @@ -347,6 +348,7 @@ retry: dm_sync_table(md); dm_table_destroy(t); } + dm_ima_measure_on_device_remove(md, true); dm_put(md); if (likely(keep_open_devices)) dm_destroy(md); @@ -483,6 +485,9 @@ static struct mapped_device *dm_hash_rename(struct dm_ioctl *param, param->flags |= DM_UEVENT_GENERATED_FLAG; md = hc->md; + + dm_ima_measure_on_device_rename(md); + up_write(&_hash_lock); kfree(old_name); @@ -981,6 +986,8 @@ static int dev_remove(struct file *filp, struct dm_ioctl *param, size_t param_si param->flags &= ~DM_DEFERRED_REMOVE; + dm_ima_measure_on_device_remove(md, false); + if (!dm_kobject_uevent(md, KOBJ_REMOVE, param->event_nr)) param->flags |= DM_UEVENT_GENERATED_FLAG; @@ -1159,8 +1166,12 @@ static int do_resume(struct dm_ioctl *param) if (dm_suspended_md(md)) { r = dm_resume(md); - if (!r && !dm_kobject_uevent(md, KOBJ_CHANGE, param->event_nr)) - param->flags |= DM_UEVENT_GENERATED_FLAG; + if (!r) { + dm_ima_measure_on_device_resume(md, new_map ? true : false); + + if (!dm_kobject_uevent(md, KOBJ_CHANGE, param->event_nr)) + param->flags |= DM_UEVENT_GENERATED_FLAG; + } } /* @@ -1224,6 +1235,8 @@ static void retrieve_status(struct dm_table *table, if (param->flags & DM_STATUS_TABLE_FLAG) type = STATUSTYPE_TABLE; + else if (param->flags & DM_IMA_MEASUREMENT_FLAG) + type = STATUSTYPE_IMA; else type = STATUSTYPE_INFO; @@ -1425,6 +1438,8 @@ static int table_load(struct file *filp, struct dm_ioctl *param, size_t param_si if (r) goto err_unlock_md_type; + dm_ima_measure_on_table_load(t, STATUSTYPE_IMA); + immutable_target_type = dm_get_immutable_target_type(md); if (immutable_target_type && (immutable_target_type != dm_table_get_immutable_target_type(t)) && @@ -1436,9 +1451,6 @@ static int table_load(struct file *filp, struct dm_ioctl *param, size_t param_si } if (dm_get_md_type(md) == DM_TYPE_NONE) { - /* Initial table load: acquire type of table. */ - dm_set_md_type(md, dm_table_get_type(t)); - /* setup md->queue to reflect md's type (may block) */ r = dm_setup_md_queue(md, t); if (r) { @@ -1496,6 +1508,7 @@ static int table_clear(struct file *filp, struct dm_ioctl *param, size_t param_s struct hash_cell *hc; struct mapped_device *md; struct dm_table *old_map = NULL; + bool has_new_map = false; down_write(&_hash_lock); @@ -1509,6 +1522,7 @@ static int table_clear(struct file *filp, struct dm_ioctl *param, size_t param_s if (hc->new_map) { old_map = hc->new_map; hc->new_map = NULL; + has_new_map = true; } param->flags &= ~DM_INACTIVE_PRESENT_FLAG; @@ -1520,6 +1534,7 @@ static int table_clear(struct file *filp, struct dm_ioctl *param, size_t param_s dm_sync_table(md); dm_table_destroy(old_map); } + dm_ima_measure_on_table_clear(md, has_new_map); dm_put(md); return 0; @@ -2187,7 +2202,6 @@ int __init dm_early_create(struct dm_ioctl *dmi, if (r) goto err_destroy_table; - md->type = dm_table_get_type(t); /* setup md->queue to reflect md's type (may block) */ r = dm_setup_md_queue(md, t); if (r) { diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index c91f1e2e2f65..679b4c0a2eea 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -106,6 +106,7 @@ static void linear_status(struct dm_target *ti, status_type_t type, unsigned status_flags, char *result, unsigned maxlen) { struct linear_c *lc = (struct linear_c *) ti->private; + size_t sz = 0; switch (type) { case STATUSTYPE_INFO: @@ -113,8 +114,13 @@ static void linear_status(struct dm_target *ti, status_type_t type, break; case STATUSTYPE_TABLE: - snprintf(result, maxlen, "%s %llu", lc->dev->name, - (unsigned long long)lc->start); + DMEMIT("%s %llu", lc->dev->name, (unsigned long long)lc->start); + break; + + case STATUSTYPE_IMA: + DMEMIT_TARGET_NAME_VERSION(ti->type); + DMEMIT(",device_name=%s,start=%llu;", lc->dev->name, + (unsigned long long)lc->start); break; } } diff --git a/drivers/md/dm-log-userspace-base.c b/drivers/md/dm-log-userspace-base.c index 52090bee17c2..9ab93ebea889 100644 --- a/drivers/md/dm-log-userspace-base.c +++ b/drivers/md/dm-log-userspace-base.c @@ -820,6 +820,9 @@ static int userspace_status(struct dm_dirty_log *log, status_type_t status_type, DMEMIT("integrated_flush "); DMEMIT("%s ", table_args); break; + case STATUSTYPE_IMA: + *result = '\0'; + break; } return (r) ? 0 : (int)sz; } diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index 57882654ffee..d93a4db23512 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -834,6 +834,10 @@ static void log_writes_status(struct dm_target *ti, status_type_t type, case STATUSTYPE_TABLE: DMEMIT("%s %s", lc->dev->name, lc->logdev->name); break; + + case STATUSTYPE_IMA: + *result = '\0'; + break; } } diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index 33e71ea6cc14..1ecf75ef276a 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -793,6 +793,11 @@ static int core_status(struct dm_dirty_log *log, status_type_t status, DMEMIT("%s %u %u ", log->type->name, lc->sync == DEFAULTSYNC ? 1 : 2, lc->region_size); DMEMIT_SYNC; + break; + + case STATUSTYPE_IMA: + *result = '\0'; + break; } return sz; @@ -817,6 +822,11 @@ static int disk_status(struct dm_dirty_log *log, status_type_t status, lc->sync == DEFAULTSYNC ? 2 : 3, lc->log_dev->name, lc->region_size); DMEMIT_SYNC; + break; + + case STATUSTYPE_IMA: + *result = '\0'; + break; } return sz; diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index bced42f082b0..694aaca4eea2 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1790,7 +1790,7 @@ static void multipath_resume(struct dm_target *ti) static void multipath_status(struct dm_target *ti, status_type_t type, unsigned status_flags, char *result, unsigned maxlen) { - int sz = 0; + int sz = 0, pg_counter, pgpath_counter; unsigned long flags; struct multipath *m = ti->private; struct priority_group *pg; @@ -1904,6 +1904,44 @@ static void multipath_status(struct dm_target *ti, status_type_t type, } } break; + + case STATUSTYPE_IMA: + sz = 0; /*reset the result pointer*/ + + DMEMIT_TARGET_NAME_VERSION(ti->type); + DMEMIT(",nr_priority_groups=%u", m->nr_priority_groups); + + pg_counter = 0; + list_for_each_entry(pg, &m->priority_groups, list) { + if (pg->bypassed) + state = 'D'; /* Disabled */ + else if (pg == m->current_pg) + state = 'A'; /* Currently Active */ + else + state = 'E'; /* Enabled */ + DMEMIT(",pg_state_%d=%c", pg_counter, state); + DMEMIT(",nr_pgpaths_%d=%u", pg_counter, pg->nr_pgpaths); + DMEMIT(",path_selector_name_%d=%s", pg_counter, pg->ps.type->name); + + pgpath_counter = 0; + list_for_each_entry(p, &pg->pgpaths, list) { + DMEMIT(",path_name_%d_%d=%s,is_active_%d_%d=%c,fail_count_%d_%d=%u", + pg_counter, pgpath_counter, p->path.dev->name, + pg_counter, pgpath_counter, p->is_active ? 'A' : 'F', + pg_counter, pgpath_counter, p->fail_count); + if (pg->ps.type->status) { + DMEMIT(",path_selector_status_%d_%d=", + pg_counter, pgpath_counter); + sz += pg->ps.type->status(&pg->ps, &p->path, + type, result + sz, + maxlen - sz); + } + pgpath_counter++; + } + pg_counter++; + } + DMEMIT(";"); + break; } spin_unlock_irqrestore(&m->lock, flags); diff --git a/drivers/md/dm-ps-historical-service-time.c b/drivers/md/dm-ps-historical-service-time.c index 186f91e2752c..1856a1b125cc 100644 --- a/drivers/md/dm-ps-historical-service-time.c +++ b/drivers/md/dm-ps-historical-service-time.c @@ -255,6 +255,9 @@ static int hst_status(struct path_selector *ps, struct dm_path *path, case STATUSTYPE_TABLE: DMEMIT("0 "); break; + case STATUSTYPE_IMA: + *result = '\0'; + break; } } diff --git a/drivers/md/dm-ps-io-affinity.c b/drivers/md/dm-ps-io-affinity.c index cb8e83bfb1a7..f74501e65a8e 100644 --- a/drivers/md/dm-ps-io-affinity.c +++ b/drivers/md/dm-ps-io-affinity.c @@ -170,6 +170,9 @@ static int ioa_status(struct path_selector *ps, struct dm_path *path, pi = path->pscontext; DMEMIT("%*pb ", cpumask_pr_args(pi->cpumask)); break; + case STATUSTYPE_IMA: + *result = '\0'; + break; } return sz; diff --git a/drivers/md/dm-ps-queue-length.c b/drivers/md/dm-ps-queue-length.c index 5fd018d18418..cef70657bbbc 100644 --- a/drivers/md/dm-ps-queue-length.c +++ b/drivers/md/dm-ps-queue-length.c @@ -102,6 +102,9 @@ static int ql_status(struct path_selector *ps, struct dm_path *path, case STATUSTYPE_TABLE: DMEMIT("%u ", pi->repeat_count); break; + case STATUSTYPE_IMA: + *result = '\0'; + break; } } diff --git a/drivers/md/dm-ps-round-robin.c b/drivers/md/dm-ps-round-robin.c index bdbb7e6e8212..27f44c5fa04e 100644 --- a/drivers/md/dm-ps-round-robin.c +++ b/drivers/md/dm-ps-round-robin.c @@ -100,6 +100,10 @@ static int rr_status(struct path_selector *ps, struct dm_path *path, pi = path->pscontext; DMEMIT("%u ", pi->repeat_count); break; + + case STATUSTYPE_IMA: + *result = '\0'; + break; } } diff --git a/drivers/md/dm-ps-service-time.c b/drivers/md/dm-ps-service-time.c index 9cfda665e9eb..3ec9c33265c5 100644 --- a/drivers/md/dm-ps-service-time.c +++ b/drivers/md/dm-ps-service-time.c @@ -99,6 +99,9 @@ static int st_status(struct path_selector *ps, struct dm_path *path, DMEMIT("%u %u ", pi->repeat_count, pi->relative_throughput); break; + case STATUSTYPE_IMA: + result[0] = '\0'; + break; } } diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index bf4a467fc73a..d9ef52159a22 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -3671,6 +3671,45 @@ static void raid_status(struct dm_target *ti, status_type_t type, for (i = 0; i < rs->raid_disks; i++) DMEMIT(" %s %s", __get_dev_name(rs->dev[i].meta_dev), __get_dev_name(rs->dev[i].data_dev)); + break; + + case STATUSTYPE_IMA: + rt = get_raid_type_by_ll(mddev->new_level, mddev->new_layout); + if (!rt) + return; + + DMEMIT_TARGET_NAME_VERSION(ti->type); + DMEMIT(",raid_type=%s,raid_disks=%d", rt->name, mddev->raid_disks); + + /* Access most recent mddev properties for status output */ + smp_rmb(); + recovery = rs->md.recovery; + state = decipher_sync_action(mddev, recovery); + DMEMIT(",raid_state=%s", sync_str(state)); + + for (i = 0; i < rs->raid_disks; i++) { + DMEMIT(",raid_device_%d_status=", i); + DMEMIT(__raid_dev_status(rs, &rs->dev[i].rdev)); + } + + if (rt_is_raid456(rt)) { + DMEMIT(",journal_dev_mode="); + switch (rs->journal_dev.mode) { + case R5C_JOURNAL_MODE_WRITE_THROUGH: + DMEMIT("%s", + _raid456_journal_mode[R5C_JOURNAL_MODE_WRITE_THROUGH].param); + break; + case R5C_JOURNAL_MODE_WRITE_BACK: + DMEMIT("%s", + _raid456_journal_mode[R5C_JOURNAL_MODE_WRITE_BACK].param); + break; + default: + DMEMIT("invalid"); + break; + } + } + DMEMIT(";"); + break; } } diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index ebb4810cc3b4..8811d484fdd1 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -1435,6 +1435,23 @@ static void mirror_status(struct dm_target *ti, status_type_t type, } break; + + case STATUSTYPE_IMA: + DMEMIT_TARGET_NAME_VERSION(ti->type); + DMEMIT(",nr_mirrors=%d", ms->nr_mirrors); + for (m = 0; m < ms->nr_mirrors; m++) { + DMEMIT(",mirror_device_%d=%s", m, ms->mirror[m].dev->name); + DMEMIT(",mirror_device_%d_status=%c", + m, device_status_char(&(ms->mirror[m]))); + } + + DMEMIT(",handle_errors=%c", errors_handled(ms) ? 'y' : 'n'); + DMEMIT(",keep_log=%c", keep_log(ms) ? 'y' : 'n'); + + DMEMIT(",log_type_status="); + sz += log->type->status(log, type, result+sz, maxlen-sz); + DMEMIT(";"); + break; } } diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 0dbd48cbdff9..5b95eea517d1 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -559,7 +559,6 @@ int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t) err = blk_mq_init_allocated_queue(md->tag_set, md->queue); if (err) goto out_tag_set; - elevator_init_mq(md->queue); return 0; out_tag_set: diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index 9ab4bf651ca9..3bb5cff5d6fc 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -908,6 +908,10 @@ static unsigned persistent_status(struct dm_exception_store *store, case STATUSTYPE_TABLE: DMEMIT(" %s %llu", store->userspace_supports_overflow ? "PO" : "P", (unsigned long long)store->chunk_size); + break; + case STATUSTYPE_IMA: + *result = '\0'; + break; } return sz; diff --git a/drivers/md/dm-snap-transient.c b/drivers/md/dm-snap-transient.c index 4d50a12cf00c..0e0ae4c36b37 100644 --- a/drivers/md/dm-snap-transient.c +++ b/drivers/md/dm-snap-transient.c @@ -95,6 +95,10 @@ static unsigned transient_status(struct dm_exception_store *store, break; case STATUSTYPE_TABLE: DMEMIT(" N %llu", (unsigned long long)store->chunk_size); + break; + case STATUSTYPE_IMA: + *result = '\0'; + break; } return sz; diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 751ec5ea1dbb..dcf34c6b05ad 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -2390,6 +2390,16 @@ static void snapshot_status(struct dm_target *ti, status_type_t type, DMEMIT(" discard_passdown_origin"); } break; + + case STATUSTYPE_IMA: + DMEMIT_TARGET_NAME_VERSION(ti->type); + DMEMIT(",snap_origin_name=%s", snap->origin->name); + DMEMIT(",snap_cow_name=%s", snap->cow->name); + DMEMIT(",snap_valid=%c", snap->valid ? 'y' : 'n'); + DMEMIT(",snap_merge_failed=%c", snap->merge_failed ? 'y' : 'n'); + DMEMIT(",snapshot_overflowed=%c", snap->snapshot_overflowed ? 'y' : 'n'); + DMEMIT(";"); + break; } } @@ -2734,6 +2744,9 @@ static void origin_status(struct dm_target *ti, status_type_t type, case STATUSTYPE_TABLE: snprintf(result, maxlen, "%s", o->dev->name); break; + case STATUSTYPE_IMA: + result[0] = '\0'; + break; } } diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index df359d33cda8..6660b6b53d5b 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -428,6 +428,21 @@ static void stripe_status(struct dm_target *ti, status_type_t type, DMEMIT(" %s %llu", sc->stripe[i].dev->name, (unsigned long long)sc->stripe[i].physical_start); break; + + case STATUSTYPE_IMA: + DMEMIT_TARGET_NAME_VERSION(ti->type); + DMEMIT(",stripes=%d,chunk_size=%llu", sc->stripes, + (unsigned long long)sc->chunk_size); + + for (i = 0; i < sc->stripes; i++) { + DMEMIT(",stripe_%d_device_name=%s", i, sc->stripe[i].dev->name); + DMEMIT(",stripe_%d_physical_start=%llu", i, + (unsigned long long)sc->stripe[i].physical_start); + DMEMIT(",stripe_%d_status=%c", i, + atomic_read(&(sc->stripe[i].error_count)) ? 'D' : 'A'); + } + DMEMIT(";"); + break; } } diff --git a/drivers/md/dm-switch.c b/drivers/md/dm-switch.c index 262e2b0fd975..028a92ff6d57 100644 --- a/drivers/md/dm-switch.c +++ b/drivers/md/dm-switch.c @@ -504,6 +504,10 @@ static void switch_status(struct dm_target *ti, status_type_t type, DMEMIT(" %s %llu", sctx->path_list[path_nr].dmdev->name, (unsigned long long)sctx->path_list[path_nr].start); break; + + case STATUSTYPE_IMA: + result[0] = '\0'; + break; } } diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 0543cdf89e92..b03eabc1ed7c 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -2076,7 +2076,7 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, } dm_update_keyslot_manager(q, t); - blk_queue_update_readahead(q); + disk_update_readahead(t->md->disk); return 0; } diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 985baee3a678..4c67b77c23c1 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -4012,6 +4012,10 @@ static void pool_status(struct dm_target *ti, status_type_t type, (unsigned long long)pt->low_water_blocks); emit_flags(&pt->requested_pf, result, sz, maxlen); break; + + case STATUSTYPE_IMA: + *result = '\0'; + break; } return; @@ -4423,6 +4427,10 @@ static void thin_status(struct dm_target *ti, status_type_t type, if (tc->origin_dev) DMEMIT(" %s", format_dev_t(buf, tc->origin_dev->bdev->bd_dev)); break; + + case STATUSTYPE_IMA: + *result = '\0'; + break; } } diff --git a/drivers/md/dm-unstripe.c b/drivers/md/dm-unstripe.c index 7357c1bd5863..fdc8921e5c19 100644 --- a/drivers/md/dm-unstripe.c +++ b/drivers/md/dm-unstripe.c @@ -156,6 +156,10 @@ static void unstripe_status(struct dm_target *ti, status_type_t type, uc->stripes, (unsigned long long)uc->chunk_size, uc->unstripe, uc->dev->name, (unsigned long long)uc->physical_start); break; + + case STATUSTYPE_IMA: + *result = '\0'; + break; } } diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index d3e76aefc1a6..22a5ac82446a 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -772,6 +772,49 @@ static void verity_status(struct dm_target *ti, status_type_t type, DMEMIT(" " DM_VERITY_ROOT_HASH_VERIFICATION_OPT_SIG_KEY " %s", v->signature_key_desc); break; + + case STATUSTYPE_IMA: + DMEMIT_TARGET_NAME_VERSION(ti->type); + DMEMIT(",hash_failed=%c", v->hash_failed ? 'C' : 'V'); + DMEMIT(",verity_version=%u", v->version); + DMEMIT(",data_device_name=%s", v->data_dev->name); + DMEMIT(",hash_device_name=%s", v->hash_dev->name); + DMEMIT(",verity_algorithm=%s", v->alg_name); + + DMEMIT(",root_digest="); + for (x = 0; x < v->digest_size; x++) + DMEMIT("%02x", v->root_digest[x]); + + DMEMIT(",salt="); + if (!v->salt_size) + DMEMIT("-"); + else + for (x = 0; x < v->salt_size; x++) + DMEMIT("%02x", v->salt[x]); + + DMEMIT(",ignore_zero_blocks=%c", v->zero_digest ? 'y' : 'n'); + DMEMIT(",check_at_most_once=%c", v->validated_blocks ? 'y' : 'n'); + if (v->signature_key_desc) + DMEMIT(",root_hash_sig_key_desc=%s", v->signature_key_desc); + + if (v->mode != DM_VERITY_MODE_EIO) { + DMEMIT(",verity_mode="); + switch (v->mode) { + case DM_VERITY_MODE_LOGGING: + DMEMIT(DM_VERITY_OPT_LOGGING); + break; + case DM_VERITY_MODE_RESTART: + DMEMIT(DM_VERITY_OPT_RESTART); + break; + case DM_VERITY_MODE_PANIC: + DMEMIT(DM_VERITY_OPT_PANIC); + break; + default: + DMEMIT("invalid"); + } + } + DMEMIT(";"); + break; } } diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index e21e29e81bbf..18320444fb0a 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -206,6 +206,19 @@ struct dm_writecache { struct bio_set bio_set; mempool_t copy_pool; + + struct { + unsigned long long reads; + unsigned long long read_hits; + unsigned long long writes; + unsigned long long write_hits_uncommitted; + unsigned long long write_hits_committed; + unsigned long long writes_around; + unsigned long long writes_allocate; + unsigned long long writes_blocked_on_freelist; + unsigned long long flushes; + unsigned long long discards; + } stats; }; #define WB_LIST_INLINE 16 @@ -1157,6 +1170,18 @@ static int process_cleaner_mesg(unsigned argc, char **argv, struct dm_writecache return 0; } +static int process_clear_stats_mesg(unsigned argc, char **argv, struct dm_writecache *wc) +{ + if (argc != 1) + return -EINVAL; + + wc_lock(wc); + memset(&wc->stats, 0, sizeof wc->stats); + wc_unlock(wc); + + return 0; +} + static int writecache_message(struct dm_target *ti, unsigned argc, char **argv, char *result, unsigned maxlen) { @@ -1169,6 +1194,8 @@ static int writecache_message(struct dm_target *ti, unsigned argc, char **argv, r = process_flush_on_suspend_mesg(argc, argv, wc); else if (!strcasecmp(argv[0], "cleaner")) r = process_cleaner_mesg(argc, argv, wc); + else if (!strcasecmp(argv[0], "clear_stats")) + r = process_clear_stats_mesg(argc, argv, wc); else DMERR("unrecognised message received: %s", argv[0]); @@ -1214,14 +1241,13 @@ static void memcpy_flushcache_optimized(void *dest, void *source, size_t size) static void bio_copy_block(struct dm_writecache *wc, struct bio *bio, void *data) { void *buf; - unsigned long flags; unsigned size; int rw = bio_data_dir(bio); unsigned remaining_size = wc->block_size; do { struct bio_vec bv = bio_iter_iovec(bio, bio->bi_iter); - buf = bvec_kmap_irq(&bv, &flags); + buf = bvec_kmap_local(&bv); size = bv.bv_len; if (unlikely(size > remaining_size)) size = remaining_size; @@ -1239,7 +1265,7 @@ static void bio_copy_block(struct dm_writecache *wc, struct bio *bio, void *data memcpy_flushcache_optimized(data, buf, size); } - bvec_kunmap_irq(buf, &flags); + kunmap_local(buf); data = (char *)data + size; remaining_size -= size; @@ -1294,216 +1320,278 @@ static void writecache_offload_bio(struct dm_writecache *wc, struct bio *bio) bio_list_add(&wc->flush_list, bio); } -static int writecache_map(struct dm_target *ti, struct bio *bio) +enum wc_map_op { + WC_MAP_SUBMIT, + WC_MAP_REMAP, + WC_MAP_REMAP_ORIGIN, + WC_MAP_RETURN, + WC_MAP_ERROR, +}; + +static enum wc_map_op writecache_map_remap_origin(struct dm_writecache *wc, struct bio *bio, + struct wc_entry *e) { - struct wc_entry *e; - struct dm_writecache *wc = ti->private; + if (e) { + sector_t next_boundary = + read_original_sector(wc, e) - bio->bi_iter.bi_sector; + if (next_boundary < bio->bi_iter.bi_size >> SECTOR_SHIFT) + dm_accept_partial_bio(bio, next_boundary); + } - bio->bi_private = NULL; + return WC_MAP_REMAP_ORIGIN; +} - wc_lock(wc); +static enum wc_map_op writecache_map_read(struct dm_writecache *wc, struct bio *bio) +{ + enum wc_map_op map_op; + struct wc_entry *e; - if (unlikely(bio->bi_opf & REQ_PREFLUSH)) { - if (writecache_has_error(wc)) - goto unlock_error; +read_next_block: + wc->stats.reads++; + e = writecache_find_entry(wc, bio->bi_iter.bi_sector, WFE_RETURN_FOLLOWING); + if (e && read_original_sector(wc, e) == bio->bi_iter.bi_sector) { + wc->stats.read_hits++; if (WC_MODE_PMEM(wc)) { - writecache_flush(wc); - if (writecache_has_error(wc)) - goto unlock_error; - if (unlikely(wc->cleaner) || unlikely(wc->metadata_only)) - goto unlock_remap_origin; - goto unlock_submit; + bio_copy_block(wc, bio, memory_data(wc, e)); + if (bio->bi_iter.bi_size) + goto read_next_block; + map_op = WC_MAP_SUBMIT; } else { - if (dm_bio_get_target_bio_nr(bio)) - goto unlock_remap_origin; - writecache_offload_bio(wc, bio); - goto unlock_return; + dm_accept_partial_bio(bio, wc->block_size >> SECTOR_SHIFT); + bio_set_dev(bio, wc->ssd_dev->bdev); + bio->bi_iter.bi_sector = cache_sector(wc, e); + if (!writecache_entry_is_committed(wc, e)) + writecache_wait_for_ios(wc, WRITE); + map_op = WC_MAP_REMAP; } + } else { + map_op = writecache_map_remap_origin(wc, bio, e); } - bio->bi_iter.bi_sector = dm_target_offset(ti, bio->bi_iter.bi_sector); + return map_op; +} - if (unlikely((((unsigned)bio->bi_iter.bi_sector | bio_sectors(bio)) & - (wc->block_size / 512 - 1)) != 0)) { - DMERR("I/O is not aligned, sector %llu, size %u, block size %u", - (unsigned long long)bio->bi_iter.bi_sector, - bio->bi_iter.bi_size, wc->block_size); - goto unlock_error; - } +static enum wc_map_op writecache_bio_copy_ssd(struct dm_writecache *wc, struct bio *bio, + struct wc_entry *e, bool search_used) +{ + unsigned bio_size = wc->block_size; + sector_t start_cache_sec = cache_sector(wc, e); + sector_t current_cache_sec = start_cache_sec + (bio_size >> SECTOR_SHIFT); - if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) { - if (writecache_has_error(wc)) - goto unlock_error; - if (WC_MODE_PMEM(wc)) { - writecache_discard(wc, bio->bi_iter.bi_sector, bio_end_sector(bio)); - goto unlock_remap_origin; + while (bio_size < bio->bi_iter.bi_size) { + if (!search_used) { + struct wc_entry *f = writecache_pop_from_freelist(wc, current_cache_sec); + if (!f) + break; + write_original_sector_seq_count(wc, f, bio->bi_iter.bi_sector + + (bio_size >> SECTOR_SHIFT), wc->seq_count); + writecache_insert_entry(wc, f); + wc->uncommitted_blocks++; } else { - writecache_offload_bio(wc, bio); - goto unlock_return; + struct wc_entry *f; + struct rb_node *next = rb_next(&e->rb_node); + if (!next) + break; + f = container_of(next, struct wc_entry, rb_node); + if (f != e + 1) + break; + if (read_original_sector(wc, f) != + read_original_sector(wc, e) + (wc->block_size >> SECTOR_SHIFT)) + break; + if (unlikely(f->write_in_progress)) + break; + if (writecache_entry_is_committed(wc, f)) + wc->overwrote_committed = true; + e = f; } + bio_size += wc->block_size; + current_cache_sec += wc->block_size >> SECTOR_SHIFT; } - if (bio_data_dir(bio) == READ) { -read_next_block: - e = writecache_find_entry(wc, bio->bi_iter.bi_sector, WFE_RETURN_FOLLOWING); - if (e && read_original_sector(wc, e) == bio->bi_iter.bi_sector) { - if (WC_MODE_PMEM(wc)) { - bio_copy_block(wc, bio, memory_data(wc, e)); - if (bio->bi_iter.bi_size) - goto read_next_block; - goto unlock_submit; - } else { - dm_accept_partial_bio(bio, wc->block_size >> SECTOR_SHIFT); - bio_set_dev(bio, wc->ssd_dev->bdev); - bio->bi_iter.bi_sector = cache_sector(wc, e); - if (!writecache_entry_is_committed(wc, e)) - writecache_wait_for_ios(wc, WRITE); - goto unlock_remap; + bio_set_dev(bio, wc->ssd_dev->bdev); + bio->bi_iter.bi_sector = start_cache_sec; + dm_accept_partial_bio(bio, bio_size >> SECTOR_SHIFT); + + if (unlikely(wc->uncommitted_blocks >= wc->autocommit_blocks)) { + wc->uncommitted_blocks = 0; + queue_work(wc->writeback_wq, &wc->flush_work); + } else { + writecache_schedule_autocommit(wc); + } + + return WC_MAP_REMAP; +} + +static enum wc_map_op writecache_map_write(struct dm_writecache *wc, struct bio *bio) +{ + struct wc_entry *e; + + do { + bool found_entry = false; + bool search_used = false; + wc->stats.writes++; + if (writecache_has_error(wc)) + return WC_MAP_ERROR; + e = writecache_find_entry(wc, bio->bi_iter.bi_sector, 0); + if (e) { + if (!writecache_entry_is_committed(wc, e)) { + wc->stats.write_hits_uncommitted++; + search_used = true; + goto bio_copy; } - } else { - if (e) { - sector_t next_boundary = - read_original_sector(wc, e) - bio->bi_iter.bi_sector; - if (next_boundary < bio->bi_iter.bi_size >> SECTOR_SHIFT) { - dm_accept_partial_bio(bio, next_boundary); - } + wc->stats.write_hits_committed++; + if (!WC_MODE_PMEM(wc) && !e->write_in_progress) { + wc->overwrote_committed = true; + search_used = true; + goto bio_copy; } - goto unlock_remap_origin; + found_entry = true; + } else { + if (unlikely(wc->cleaner) || + (wc->metadata_only && !(bio->bi_opf & REQ_META))) + goto direct_write; } - } else { - do { - bool found_entry = false; - bool search_used = false; - if (writecache_has_error(wc)) - goto unlock_error; - e = writecache_find_entry(wc, bio->bi_iter.bi_sector, 0); - if (e) { - if (!writecache_entry_is_committed(wc, e)) { - search_used = true; - goto bio_copy; - } - if (!WC_MODE_PMEM(wc) && !e->write_in_progress) { - wc->overwrote_committed = true; - search_used = true; - goto bio_copy; - } - found_entry = true; - } else { - if (unlikely(wc->cleaner) || - (wc->metadata_only && !(bio->bi_opf & REQ_META))) - goto direct_write; - } - e = writecache_pop_from_freelist(wc, (sector_t)-1); - if (unlikely(!e)) { - if (!WC_MODE_PMEM(wc) && !found_entry) { + e = writecache_pop_from_freelist(wc, (sector_t)-1); + if (unlikely(!e)) { + if (!WC_MODE_PMEM(wc) && !found_entry) { direct_write: - e = writecache_find_entry(wc, bio->bi_iter.bi_sector, WFE_RETURN_FOLLOWING); - if (e) { - sector_t next_boundary = read_original_sector(wc, e) - bio->bi_iter.bi_sector; - BUG_ON(!next_boundary); - if (next_boundary < bio->bi_iter.bi_size >> SECTOR_SHIFT) { - dm_accept_partial_bio(bio, next_boundary); - } - } - goto unlock_remap_origin; - } - writecache_wait_on_freelist(wc); - continue; + wc->stats.writes_around++; + e = writecache_find_entry(wc, bio->bi_iter.bi_sector, WFE_RETURN_FOLLOWING); + return writecache_map_remap_origin(wc, bio, e); } - write_original_sector_seq_count(wc, e, bio->bi_iter.bi_sector, wc->seq_count); - writecache_insert_entry(wc, e); - wc->uncommitted_blocks++; + wc->stats.writes_blocked_on_freelist++; + writecache_wait_on_freelist(wc); + continue; + } + write_original_sector_seq_count(wc, e, bio->bi_iter.bi_sector, wc->seq_count); + writecache_insert_entry(wc, e); + wc->uncommitted_blocks++; + wc->stats.writes_allocate++; bio_copy: - if (WC_MODE_PMEM(wc)) { - bio_copy_block(wc, bio, memory_data(wc, e)); - } else { - unsigned bio_size = wc->block_size; - sector_t start_cache_sec = cache_sector(wc, e); - sector_t current_cache_sec = start_cache_sec + (bio_size >> SECTOR_SHIFT); - - while (bio_size < bio->bi_iter.bi_size) { - if (!search_used) { - struct wc_entry *f = writecache_pop_from_freelist(wc, current_cache_sec); - if (!f) - break; - write_original_sector_seq_count(wc, f, bio->bi_iter.bi_sector + - (bio_size >> SECTOR_SHIFT), wc->seq_count); - writecache_insert_entry(wc, f); - wc->uncommitted_blocks++; - } else { - struct wc_entry *f; - struct rb_node *next = rb_next(&e->rb_node); - if (!next) - break; - f = container_of(next, struct wc_entry, rb_node); - if (f != e + 1) - break; - if (read_original_sector(wc, f) != - read_original_sector(wc, e) + (wc->block_size >> SECTOR_SHIFT)) - break; - if (unlikely(f->write_in_progress)) - break; - if (writecache_entry_is_committed(wc, f)) - wc->overwrote_committed = true; - e = f; - } - bio_size += wc->block_size; - current_cache_sec += wc->block_size >> SECTOR_SHIFT; - } + if (WC_MODE_PMEM(wc)) + bio_copy_block(wc, bio, memory_data(wc, e)); + else + return writecache_bio_copy_ssd(wc, bio, e, search_used); + } while (bio->bi_iter.bi_size); - bio_set_dev(bio, wc->ssd_dev->bdev); - bio->bi_iter.bi_sector = start_cache_sec; - dm_accept_partial_bio(bio, bio_size >> SECTOR_SHIFT); + if (unlikely(bio->bi_opf & REQ_FUA || wc->uncommitted_blocks >= wc->autocommit_blocks)) + writecache_flush(wc); + else + writecache_schedule_autocommit(wc); - if (unlikely(wc->uncommitted_blocks >= wc->autocommit_blocks)) { - wc->uncommitted_blocks = 0; - queue_work(wc->writeback_wq, &wc->flush_work); - } else { - writecache_schedule_autocommit(wc); - } - goto unlock_remap; - } - } while (bio->bi_iter.bi_size); + return WC_MAP_SUBMIT; +} - if (unlikely(bio->bi_opf & REQ_FUA || - wc->uncommitted_blocks >= wc->autocommit_blocks)) - writecache_flush(wc); - else - writecache_schedule_autocommit(wc); - goto unlock_submit; +static enum wc_map_op writecache_map_flush(struct dm_writecache *wc, struct bio *bio) +{ + if (writecache_has_error(wc)) + return WC_MAP_ERROR; + + if (WC_MODE_PMEM(wc)) { + wc->stats.flushes++; + writecache_flush(wc); + if (writecache_has_error(wc)) + return WC_MAP_ERROR; + else if (unlikely(wc->cleaner) || unlikely(wc->metadata_only)) + return WC_MAP_REMAP_ORIGIN; + return WC_MAP_SUBMIT; } + /* SSD: */ + if (dm_bio_get_target_bio_nr(bio)) + return WC_MAP_REMAP_ORIGIN; + wc->stats.flushes++; + writecache_offload_bio(wc, bio); + return WC_MAP_RETURN; +} -unlock_remap_origin: - if (likely(wc->pause != 0)) { - if (bio_op(bio) == REQ_OP_WRITE) { - dm_iot_io_begin(&wc->iot, 1); - bio->bi_private = (void *)2; - } +static enum wc_map_op writecache_map_discard(struct dm_writecache *wc, struct bio *bio) +{ + wc->stats.discards++; + + if (writecache_has_error(wc)) + return WC_MAP_ERROR; + + if (WC_MODE_PMEM(wc)) { + writecache_discard(wc, bio->bi_iter.bi_sector, bio_end_sector(bio)); + return WC_MAP_REMAP_ORIGIN; } - bio_set_dev(bio, wc->dev->bdev); - wc_unlock(wc); - return DM_MAPIO_REMAPPED; + /* SSD: */ + writecache_offload_bio(wc, bio); + return WC_MAP_RETURN; +} -unlock_remap: - /* make sure that writecache_end_io decrements bio_in_progress: */ - bio->bi_private = (void *)1; - atomic_inc(&wc->bio_in_progress[bio_data_dir(bio)]); - wc_unlock(wc); - return DM_MAPIO_REMAPPED; +static int writecache_map(struct dm_target *ti, struct bio *bio) +{ + struct dm_writecache *wc = ti->private; + enum wc_map_op map_op; -unlock_submit: - wc_unlock(wc); - bio_endio(bio); - return DM_MAPIO_SUBMITTED; + bio->bi_private = NULL; -unlock_return: - wc_unlock(wc); - return DM_MAPIO_SUBMITTED; + wc_lock(wc); -unlock_error: - wc_unlock(wc); - bio_io_error(bio); - return DM_MAPIO_SUBMITTED; + if (unlikely(bio->bi_opf & REQ_PREFLUSH)) { + map_op = writecache_map_flush(wc, bio); + goto done; + } + + bio->bi_iter.bi_sector = dm_target_offset(ti, bio->bi_iter.bi_sector); + + if (unlikely((((unsigned)bio->bi_iter.bi_sector | bio_sectors(bio)) & + (wc->block_size / 512 - 1)) != 0)) { + DMERR("I/O is not aligned, sector %llu, size %u, block size %u", + (unsigned long long)bio->bi_iter.bi_sector, + bio->bi_iter.bi_size, wc->block_size); + map_op = WC_MAP_ERROR; + goto done; + } + + if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) { + map_op = writecache_map_discard(wc, bio); + goto done; + } + + if (bio_data_dir(bio) == READ) + map_op = writecache_map_read(wc, bio); + else + map_op = writecache_map_write(wc, bio); +done: + switch (map_op) { + case WC_MAP_REMAP_ORIGIN: + if (likely(wc->pause != 0)) { + if (bio_op(bio) == REQ_OP_WRITE) { + dm_iot_io_begin(&wc->iot, 1); + bio->bi_private = (void *)2; + } + } + bio_set_dev(bio, wc->dev->bdev); + wc_unlock(wc); + return DM_MAPIO_REMAPPED; + + case WC_MAP_REMAP: + /* make sure that writecache_end_io decrements bio_in_progress: */ + bio->bi_private = (void *)1; + atomic_inc(&wc->bio_in_progress[bio_data_dir(bio)]); + wc_unlock(wc); + return DM_MAPIO_REMAPPED; + + case WC_MAP_SUBMIT: + wc_unlock(wc); + bio_endio(bio); + return DM_MAPIO_SUBMITTED; + + case WC_MAP_RETURN: + wc_unlock(wc); + return DM_MAPIO_SUBMITTED; + + case WC_MAP_ERROR: + wc_unlock(wc); + bio_io_error(bio); + return DM_MAPIO_SUBMITTED; + + default: + BUG(); + return -1; + } } static int writecache_end_io(struct dm_target *ti, struct bio *bio, blk_status_t *status) @@ -2569,9 +2657,20 @@ static void writecache_status(struct dm_target *ti, status_type_t type, switch (type) { case STATUSTYPE_INFO: - DMEMIT("%ld %llu %llu %llu", writecache_has_error(wc), + DMEMIT("%ld %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu", + writecache_has_error(wc), (unsigned long long)wc->n_blocks, (unsigned long long)wc->freelist_size, - (unsigned long long)wc->writeback_size); + (unsigned long long)wc->writeback_size, + wc->stats.reads, + wc->stats.read_hits, + wc->stats.writes, + wc->stats.write_hits_uncommitted, + wc->stats.write_hits_committed, + wc->stats.writes_around, + wc->stats.writes_allocate, + wc->stats.writes_blocked_on_freelist, + wc->stats.flushes, + wc->stats.discards); break; case STATUSTYPE_TABLE: DMEMIT("%c %s %s %u ", WC_MODE_PMEM(wc) ? 'p' : 's', @@ -2624,12 +2723,15 @@ static void writecache_status(struct dm_target *ti, status_type_t type, if (wc->pause_set) DMEMIT(" pause_writeback %u", wc->pause_value); break; + case STATUSTYPE_IMA: + *result = '\0'; + break; } } static struct target_type writecache_target = { .name = "writecache", - .version = {1, 5, 0}, + .version = {1, 6, 0}, .module = THIS_MODULE, .ctr = writecache_ctr, .dtr = writecache_dtr, diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c index 7e88df64d197..ae1bc48c0043 100644 --- a/drivers/md/dm-zoned-target.c +++ b/drivers/md/dm-zoned-target.c @@ -1119,6 +1119,9 @@ static void dmz_status(struct dm_target *ti, status_type_t type, DMEMIT(" %s", buf); } break; + case STATUSTYPE_IMA: + *result = '\0'; + break; } return; } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 2c5f9e585211..84e9145b1714 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -8,6 +8,7 @@ #include "dm-core.h" #include "dm-rq.h" #include "dm-uevent.h" +#include "dm-ima.h" #include <linux/init.h> #include <linux/module.h> @@ -261,9 +262,13 @@ static void (*_exits[])(void) = { static int __init dm_init(void) { const int count = ARRAY_SIZE(_inits); - int r, i; +#if (IS_ENABLED(CONFIG_IMA) && !IS_ENABLED(CONFIG_IMA_DISABLE_HTABLE)) + DMWARN("CONFIG_IMA_DISABLE_HTABLE is disabled." + " Duplicate IMA measurements will not be recorded in the IMA log."); +#endif + for (i = 0; i < count; i++) { r = _inits[i](); if (r) @@ -271,8 +276,7 @@ static int __init dm_init(void) } return 0; - - bad: +bad: while (i--) _exits[i](); @@ -1693,14 +1697,13 @@ static void cleanup_mapped_device(struct mapped_device *md) spin_lock(&_minor_lock); md->disk->private_data = NULL; spin_unlock(&_minor_lock); - del_gendisk(md->disk); - } - - if (md->queue) + if (dm_get_md_type(md) != DM_TYPE_NONE) { + dm_sysfs_exit(md); + del_gendisk(md->disk); + } dm_queue_destroy_keyslot_manager(md->queue); - - if (md->disk) blk_cleanup_disk(md->disk); + } cleanup_srcu_struct(&md->io_barrier); @@ -1792,7 +1795,6 @@ static struct mapped_device *alloc_dev(int minor) goto bad; } - add_disk_no_queue_reg(md->disk); format_dev_t(md->name, MKDEV(_major, minor)); md->wq = alloc_workqueue("kdmflush", WQ_MEM_RECLAIM, 0); @@ -1993,18 +1995,13 @@ static struct dm_table *__unbind(struct mapped_device *md) */ int dm_create(int minor, struct mapped_device **result) { - int r; struct mapped_device *md; md = alloc_dev(minor); if (!md) return -ENXIO; - r = dm_sysfs_init(md); - if (r) { - free_dev(md); - return r; - } + dm_ima_reset_data(md); *result = md; return 0; @@ -2056,9 +2053,9 @@ EXPORT_SYMBOL_GPL(dm_get_queue_limits); */ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t) { - int r; + enum dm_queue_mode type = dm_table_get_type(t); struct queue_limits limits; - enum dm_queue_mode type = dm_get_md_type(md); + int r; switch (type) { case DM_TYPE_REQUEST_BASED: @@ -2086,8 +2083,14 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t) if (r) return r; - blk_register_queue(md->disk); + add_disk(md->disk); + r = dm_sysfs_init(md); + if (r) { + del_gendisk(md->disk); + return r; + } + md->type = type; return 0; } @@ -2193,7 +2196,6 @@ static void __dm_destroy(struct mapped_device *md, bool wait) DMWARN("%s: Forcibly removing mapped_device still in use! (%d users)", dm_device_name(md), atomic_read(&md->holders)); - dm_sysfs_exit(md); dm_table_destroy(__unbind(md)); free_dev(md); } diff --git a/drivers/md/md.h b/drivers/md/md.h index 832547cf038f..4c96c36bd01a 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -764,9 +764,7 @@ struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev); static inline bool is_mddev_broken(struct md_rdev *rdev, const char *md_type) { - int flags = rdev->bdev->bd_disk->flags; - - if (!(flags & GENHD_FL_UP)) { + if (!disk_live(rdev->bdev->bd_disk)) { if (!test_and_set_bit(MD_BROKEN, &rdev->mddev->flags)) pr_warn("md: %s: %s array has a missing/failed member\n", mdname(rdev->mddev), md_type); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 51f2547c2007..19598bd38939 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -474,8 +474,6 @@ static void raid1_end_write_request(struct bio *bio) /* * When the device is faulty, it is not necessary to * handle write error. - * For failfast, this is the only remaining device, - * We need to retry the write without FailFast. */ if (!test_bit(Faulty, &rdev->flags)) set_bit(R1BIO_WriteError, &r1_bio->state); @@ -1331,6 +1329,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, struct raid1_plug_cb *plug = NULL; int first_clone; int max_sectors; + bool write_behind = false; if (mddev_is_clustered(mddev) && md_cluster_ops->area_resyncing(mddev, WRITE, @@ -1383,6 +1382,15 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, max_sectors = r1_bio->sectors; for (i = 0; i < disks; i++) { struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev); + + /* + * The write-behind io is only attempted on drives marked as + * write-mostly, which means we could allocate write behind + * bio later. + */ + if (rdev && test_bit(WriteMostly, &rdev->flags)) + write_behind = true; + if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) { atomic_inc(&rdev->nr_pending); blocked_rdev = rdev; @@ -1456,6 +1464,15 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, goto retry_write; } + /* + * When using a bitmap, we may call alloc_behind_master_bio below. + * alloc_behind_master_bio allocates a copy of the data payload a page + * at a time and thus needs a new bio that can fit the whole payload + * this bio in page sized chunks. + */ + if (write_behind && bitmap) + max_sectors = min_t(int, max_sectors, + BIO_MAX_VECS * (PAGE_SIZE >> 9)); if (max_sectors < bio_sectors(bio)) { struct bio *split = bio_split(bio, max_sectors, GFP_NOIO, &conf->bio_split); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 16977e8e075d..aa2636582841 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -471,12 +471,12 @@ static void raid10_end_write_request(struct bio *bio) /* * When the device is faulty, it is not necessary to * handle write error. - * For failfast, this is the only remaining device, - * We need to retry the write without FailFast. */ if (!test_bit(Faulty, &rdev->flags)) set_bit(R10BIO_WriteError, &r10_bio->state); else { + /* Fail the request */ + set_bit(R10BIO_Degraded, &r10_bio->state); r10_bio->devs[slot].bio = NULL; to_put = bio; dec_rdev = 1; @@ -1712,6 +1712,11 @@ retry_discard: } else r10_bio->master_bio = (struct bio *)first_r10bio; + /* + * first select target devices under rcu_lock and + * inc refcount on their rdev. Record them by setting + * bios[x] to bio + */ rcu_read_lock(); for (disk = 0; disk < geo->raid_disks; disk++) { struct md_rdev *rdev = rcu_dereference(conf->mirrors[disk].rdev); @@ -1743,9 +1748,6 @@ retry_discard: for (disk = 0; disk < geo->raid_disks; disk++) { sector_t dev_start, dev_end; struct bio *mbio, *rbio = NULL; - struct md_rdev *rdev = rcu_dereference(conf->mirrors[disk].rdev); - struct md_rdev *rrdev = rcu_dereference( - conf->mirrors[disk].replacement); /* * Now start to calculate the start and end address for each disk. @@ -1775,9 +1777,12 @@ retry_discard: /* * It only handles discard bio which size is >= stripe size, so - * dev_end > dev_start all the time + * dev_end > dev_start all the time. + * It doesn't need to use rcu lock to get rdev here. We already + * add rdev->nr_pending in the first loop. */ if (r10_bio->devs[disk].bio) { + struct md_rdev *rdev = conf->mirrors[disk].rdev; mbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set); mbio->bi_end_io = raid10_end_discard_request; mbio->bi_private = r10_bio; @@ -1790,6 +1795,7 @@ retry_discard: bio_endio(mbio); } if (r10_bio->devs[disk].repl_bio) { + struct md_rdev *rrdev = conf->mirrors[disk].replacement; rbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set); rbio->bi_end_io = raid10_end_discard_request; rbio->bi_private = r10_bio; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index b8436e4930ed..02ed53b20654 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2437,7 +2437,7 @@ static int resize_chunks(struct r5conf *conf, int new_disks, int new_sectors) conf->scribble_sectors >= new_sectors) return 0; mddev_suspend(conf->mddev); - get_online_cpus(); + cpus_read_lock(); for_each_present_cpu(cpu) { struct raid5_percpu *percpu; @@ -2449,7 +2449,7 @@ static int resize_chunks(struct r5conf *conf, int new_disks, int new_sectors) break; } - put_online_cpus(); + cpus_read_unlock(); mddev_resume(conf->mddev); if (!err) { conf->scribble_disks = new_disks; |