diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-02-21 21:11:41 +0100 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-02-21 21:11:41 +0100 |
commit | 7a771ceac771d009f7203c40b256b0608d7ea2f8 (patch) | |
tree | 940260bccb165f47669397515c00900629c01803 /drivers/md/persistent-data | |
parent | Merge tag 'mmc-v4.11' of git://git.kernel.org/pub/scm/linux/kernel/git/ulfh/mmc (diff) | |
parent | dm: flush queued bios when process blocks to avoid deadlock (diff) | |
download | linux-7a771ceac771d009f7203c40b256b0608d7ea2f8.tar.xz linux-7a771ceac771d009f7203c40b256b0608d7ea2f8.zip |
Merge tag 'dm-4.11-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper updates from Mike Snitzer:
- Fix dm-raid transient device failure processing and other smaller
tweaks.
- Add journal support to the DM raid target to close the 'write hole'
on raid 4/5/6.
- Fix dm-cache corruption, due to rounding bug, when cache exceeds 2TB.
- Add 'metadata2' feature to dm-cache to separate the dirty bitset out
from other cache metadata. This improves speed of shutting down a
large cache device (which implies writing out dirty bits).
- Fix a memory leak during dm-stats data structure destruction.
- Fix a DM multipath round-robin path selector performance regression
that was caused by less precise balancing across all paths.
- Lastly, introduce a DM core fix for a long-standing DM snapshot
deadlock that is rooted in the complexity of the device stack used in
conjunction with block core maintaining bios on current->bio_list to
manage recursion in generic_make_request(). A more comprehensive fix
to block core (and its hook in the cpu scheduler) would be wonderful
but this DM-specific fix is pragmatic considering how difficult it
has been to make progress on a generic fix.
* tag 'dm-4.11-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (22 commits)
dm: flush queued bios when process blocks to avoid deadlock
dm round robin: revert "use percpu 'repeat_count' and 'current_path'"
dm stats: fix a leaked s->histogram_boundaries array
dm space map metadata: constify dm_space_map structures
dm cache metadata: use cursor api in blocks_are_clean_separate_dirty()
dm persistent data: add cursor skip functions to the cursor APIs
dm cache metadata: use dm_bitset_new() to create the dirty bitset in format 2
dm bitset: add dm_bitset_new()
dm cache metadata: name the cache block that couldn't be loaded
dm cache metadata: add "metadata2" feature
dm cache metadata: use bitset cursor api to load discard bitset
dm bitset: introduce cursor api
dm btree: use GFP_NOFS in dm_btree_del()
dm space map common: memcpy the disk root to ensure it's arch aligned
dm block manager: add unlikely() annotations on dm_bufio error paths
dm cache: fix corruption seen when using cache > 2TB
dm raid: cleanup awkward branching in raid_message() option processing
dm raid: use mddev rather than rdev->mddev
dm raid: use read_disk_sb() throughout
dm raid: add raid4/5/6 journaling support
...
Diffstat (limited to 'drivers/md/persistent-data')
-rw-r--r-- | drivers/md/persistent-data/dm-array.c | 21 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-array.h | 1 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-bitset.c | 146 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-bitset.h | 39 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-block-manager.c | 8 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-btree.c | 18 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-btree.h | 1 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-space-map-common.c | 16 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-space-map-metadata.c | 4 |
9 files changed, 242 insertions, 12 deletions
diff --git a/drivers/md/persistent-data/dm-array.c b/drivers/md/persistent-data/dm-array.c index 7938cd21fa4c..185dc60360b5 100644 --- a/drivers/md/persistent-data/dm-array.c +++ b/drivers/md/persistent-data/dm-array.c @@ -976,6 +976,27 @@ int dm_array_cursor_next(struct dm_array_cursor *c) } EXPORT_SYMBOL_GPL(dm_array_cursor_next); +int dm_array_cursor_skip(struct dm_array_cursor *c, uint32_t count) +{ + int r; + + do { + uint32_t remaining = le32_to_cpu(c->ab->nr_entries) - c->index; + + if (count < remaining) { + c->index += count; + return 0; + } + + count -= remaining; + r = dm_array_cursor_next(c); + + } while (!r); + + return r; +} +EXPORT_SYMBOL_GPL(dm_array_cursor_skip); + void dm_array_cursor_get_value(struct dm_array_cursor *c, void **value_le) { *value_le = element_at(c->info, c->ab, c->index); diff --git a/drivers/md/persistent-data/dm-array.h b/drivers/md/persistent-data/dm-array.h index 27ee49a55473..d7d2d579c662 100644 --- a/drivers/md/persistent-data/dm-array.h +++ b/drivers/md/persistent-data/dm-array.h @@ -207,6 +207,7 @@ void dm_array_cursor_end(struct dm_array_cursor *c); uint32_t dm_array_cursor_index(struct dm_array_cursor *c); int dm_array_cursor_next(struct dm_array_cursor *c); +int dm_array_cursor_skip(struct dm_array_cursor *c, uint32_t count); /* * value_le is only valid while the cursor points at the current value. diff --git a/drivers/md/persistent-data/dm-bitset.c b/drivers/md/persistent-data/dm-bitset.c index 36f7cc2c7109..b7208d82e748 100644 --- a/drivers/md/persistent-data/dm-bitset.c +++ b/drivers/md/persistent-data/dm-bitset.c @@ -39,6 +39,48 @@ int dm_bitset_empty(struct dm_disk_bitset *info, dm_block_t *root) } EXPORT_SYMBOL_GPL(dm_bitset_empty); +struct packer_context { + bit_value_fn fn; + unsigned nr_bits; + void *context; +}; + +static int pack_bits(uint32_t index, void *value, void *context) +{ + int r; + struct packer_context *p = context; + unsigned bit, nr = min(64u, p->nr_bits - (index * 64)); + uint64_t word = 0; + bool bv; + + for (bit = 0; bit < nr; bit++) { + r = p->fn(index * 64 + bit, &bv, p->context); + if (r) + return r; + + if (bv) + set_bit(bit, (unsigned long *) &word); + else + clear_bit(bit, (unsigned long *) &word); + } + + *((__le64 *) value) = cpu_to_le64(word); + + return 0; +} + +int dm_bitset_new(struct dm_disk_bitset *info, dm_block_t *root, + uint32_t size, bit_value_fn fn, void *context) +{ + struct packer_context p; + p.fn = fn; + p.nr_bits = size; + p.context = context; + + return dm_array_new(&info->array_info, root, dm_div_up(size, 64), pack_bits, &p); +} +EXPORT_SYMBOL_GPL(dm_bitset_new); + int dm_bitset_resize(struct dm_disk_bitset *info, dm_block_t root, uint32_t old_nr_entries, uint32_t new_nr_entries, bool default_value, dm_block_t *new_root) @@ -168,4 +210,108 @@ int dm_bitset_test_bit(struct dm_disk_bitset *info, dm_block_t root, } EXPORT_SYMBOL_GPL(dm_bitset_test_bit); +static int cursor_next_array_entry(struct dm_bitset_cursor *c) +{ + int r; + __le64 *value; + + r = dm_array_cursor_next(&c->cursor); + if (r) + return r; + + dm_array_cursor_get_value(&c->cursor, (void **) &value); + c->array_index++; + c->bit_index = 0; + c->current_bits = le64_to_cpu(*value); + return 0; +} + +int dm_bitset_cursor_begin(struct dm_disk_bitset *info, + dm_block_t root, uint32_t nr_entries, + struct dm_bitset_cursor *c) +{ + int r; + __le64 *value; + + if (!nr_entries) + return -ENODATA; + + c->info = info; + c->entries_remaining = nr_entries; + + r = dm_array_cursor_begin(&info->array_info, root, &c->cursor); + if (r) + return r; + + dm_array_cursor_get_value(&c->cursor, (void **) &value); + c->array_index = 0; + c->bit_index = 0; + c->current_bits = le64_to_cpu(*value); + + return r; +} +EXPORT_SYMBOL_GPL(dm_bitset_cursor_begin); + +void dm_bitset_cursor_end(struct dm_bitset_cursor *c) +{ + return dm_array_cursor_end(&c->cursor); +} +EXPORT_SYMBOL_GPL(dm_bitset_cursor_end); + +int dm_bitset_cursor_next(struct dm_bitset_cursor *c) +{ + int r = 0; + + if (!c->entries_remaining) + return -ENODATA; + + c->entries_remaining--; + if (++c->bit_index > 63) + r = cursor_next_array_entry(c); + + return r; +} +EXPORT_SYMBOL_GPL(dm_bitset_cursor_next); + +int dm_bitset_cursor_skip(struct dm_bitset_cursor *c, uint32_t count) +{ + int r; + __le64 *value; + uint32_t nr_array_skip; + uint32_t remaining_in_word = 64 - c->bit_index; + + if (c->entries_remaining < count) + return -ENODATA; + + if (count < remaining_in_word) { + c->bit_index += count; + c->entries_remaining -= count; + return 0; + + } else { + c->entries_remaining -= remaining_in_word; + count -= remaining_in_word; + } + + nr_array_skip = (count / 64) + 1; + r = dm_array_cursor_skip(&c->cursor, nr_array_skip); + if (r) + return r; + + dm_array_cursor_get_value(&c->cursor, (void **) &value); + c->entries_remaining -= count; + c->array_index += nr_array_skip; + c->bit_index = count & 63; + c->current_bits = le64_to_cpu(*value); + + return 0; +} +EXPORT_SYMBOL_GPL(dm_bitset_cursor_skip); + +bool dm_bitset_cursor_get_value(struct dm_bitset_cursor *c) +{ + return test_bit(c->bit_index, (unsigned long *) &c->current_bits); +} +EXPORT_SYMBOL_GPL(dm_bitset_cursor_get_value); + /*----------------------------------------------------------------*/ diff --git a/drivers/md/persistent-data/dm-bitset.h b/drivers/md/persistent-data/dm-bitset.h index c2287d672ef5..df888da04ee1 100644 --- a/drivers/md/persistent-data/dm-bitset.h +++ b/drivers/md/persistent-data/dm-bitset.h @@ -93,6 +93,22 @@ void dm_disk_bitset_init(struct dm_transaction_manager *tm, int dm_bitset_empty(struct dm_disk_bitset *info, dm_block_t *new_root); /* + * Creates a new bitset populated with values provided by a callback + * function. This is more efficient than creating an empty bitset, + * resizing, and then setting values since that process incurs a lot of + * copying. + * + * info - describes the array + * root - the root block of the array on disk + * size - the number of entries in the array + * fn - the callback + * context - passed to the callback + */ +typedef int (*bit_value_fn)(uint32_t index, bool *value, void *context); +int dm_bitset_new(struct dm_disk_bitset *info, dm_block_t *root, + uint32_t size, bit_value_fn fn, void *context); + +/* * Resize the bitset. * * info - describes the bitset @@ -161,6 +177,29 @@ int dm_bitset_test_bit(struct dm_disk_bitset *info, dm_block_t root, int dm_bitset_flush(struct dm_disk_bitset *info, dm_block_t root, dm_block_t *new_root); +struct dm_bitset_cursor { + struct dm_disk_bitset *info; + struct dm_array_cursor cursor; + + uint32_t entries_remaining; + uint32_t array_index; + uint32_t bit_index; + uint64_t current_bits; +}; + +/* + * Make sure you've flush any dm_disk_bitset and updated the root before + * using this. + */ +int dm_bitset_cursor_begin(struct dm_disk_bitset *info, + dm_block_t root, uint32_t nr_entries, + struct dm_bitset_cursor *c); +void dm_bitset_cursor_end(struct dm_bitset_cursor *c); + +int dm_bitset_cursor_next(struct dm_bitset_cursor *c); +int dm_bitset_cursor_skip(struct dm_bitset_cursor *c, uint32_t count); +bool dm_bitset_cursor_get_value(struct dm_bitset_cursor *c); + /*----------------------------------------------------------------*/ #endif /* _LINUX_DM_BITSET_H */ diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c index 758d90cc2733..0863905dee02 100644 --- a/drivers/md/persistent-data/dm-block-manager.c +++ b/drivers/md/persistent-data/dm-block-manager.c @@ -462,7 +462,7 @@ int dm_bm_read_lock(struct dm_block_manager *bm, dm_block_t b, int r; p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result); - if (IS_ERR(p)) + if (unlikely(IS_ERR(p))) return PTR_ERR(p); aux = dm_bufio_get_aux_data(to_buffer(*result)); @@ -498,7 +498,7 @@ int dm_bm_write_lock(struct dm_block_manager *bm, return -EPERM; p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result); - if (IS_ERR(p)) + if (unlikely(IS_ERR(p))) return PTR_ERR(p); aux = dm_bufio_get_aux_data(to_buffer(*result)); @@ -531,7 +531,7 @@ int dm_bm_read_try_lock(struct dm_block_manager *bm, int r; p = dm_bufio_get(bm->bufio, b, (struct dm_buffer **) result); - if (IS_ERR(p)) + if (unlikely(IS_ERR(p))) return PTR_ERR(p); if (unlikely(!p)) return -EWOULDBLOCK; @@ -567,7 +567,7 @@ int dm_bm_write_lock_zero(struct dm_block_manager *bm, return -EPERM; p = dm_bufio_new(bm->bufio, b, (struct dm_buffer **) result); - if (IS_ERR(p)) + if (unlikely(IS_ERR(p))) return PTR_ERR(p); memset(p, 0, dm_bm_block_size(bm)); diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c index 20a40329d84a..02e2ee0d8a00 100644 --- a/drivers/md/persistent-data/dm-btree.c +++ b/drivers/md/persistent-data/dm-btree.c @@ -272,7 +272,12 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root) int r; struct del_stack *s; - s = kmalloc(sizeof(*s), GFP_NOIO); + /* + * dm_btree_del() is called via an ioctl, as such should be + * considered an FS op. We can't recurse back into the FS, so we + * allocate GFP_NOFS. + */ + s = kmalloc(sizeof(*s), GFP_NOFS); if (!s) return -ENOMEM; s->info = info; @@ -1139,6 +1144,17 @@ int dm_btree_cursor_next(struct dm_btree_cursor *c) } EXPORT_SYMBOL_GPL(dm_btree_cursor_next); +int dm_btree_cursor_skip(struct dm_btree_cursor *c, uint32_t count) +{ + int r = 0; + + while (count-- && !r) + r = dm_btree_cursor_next(c); + + return r; +} +EXPORT_SYMBOL_GPL(dm_btree_cursor_skip); + int dm_btree_cursor_get_value(struct dm_btree_cursor *c, uint64_t *key, void *value_le) { if (c->depth) { diff --git a/drivers/md/persistent-data/dm-btree.h b/drivers/md/persistent-data/dm-btree.h index db9bd26adf31..3dc5bb1a4748 100644 --- a/drivers/md/persistent-data/dm-btree.h +++ b/drivers/md/persistent-data/dm-btree.h @@ -209,6 +209,7 @@ int dm_btree_cursor_begin(struct dm_btree_info *info, dm_block_t root, bool prefetch_leaves, struct dm_btree_cursor *c); void dm_btree_cursor_end(struct dm_btree_cursor *c); int dm_btree_cursor_next(struct dm_btree_cursor *c); +int dm_btree_cursor_skip(struct dm_btree_cursor *c, uint32_t count); int dm_btree_cursor_get_value(struct dm_btree_cursor *c, uint64_t *key, void *value_le); #endif /* _LINUX_DM_BTREE_H */ diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c index 4c28608a0c94..829b4ce057d8 100644 --- a/drivers/md/persistent-data/dm-space-map-common.c +++ b/drivers/md/persistent-data/dm-space-map-common.c @@ -626,13 +626,19 @@ int sm_ll_open_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm, void *root_le, size_t len) { int r; - struct disk_sm_root *smr = root_le; + struct disk_sm_root smr; if (len < sizeof(struct disk_sm_root)) { DMERR("sm_metadata root too small"); return -ENOMEM; } + /* + * We don't know the alignment of the root_le buffer, so need to + * copy into a new structure. + */ + memcpy(&smr, root_le, sizeof(smr)); + r = sm_ll_init(ll, tm); if (r < 0) return r; @@ -644,10 +650,10 @@ int sm_ll_open_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm, ll->max_entries = metadata_ll_max_entries; ll->commit = metadata_ll_commit; - ll->nr_blocks = le64_to_cpu(smr->nr_blocks); - ll->nr_allocated = le64_to_cpu(smr->nr_allocated); - ll->bitmap_root = le64_to_cpu(smr->bitmap_root); - ll->ref_count_root = le64_to_cpu(smr->ref_count_root); + ll->nr_blocks = le64_to_cpu(smr.nr_blocks); + ll->nr_allocated = le64_to_cpu(smr.nr_allocated); + ll->bitmap_root = le64_to_cpu(smr.bitmap_root); + ll->ref_count_root = le64_to_cpu(smr.ref_count_root); return ll->open_index(ll); } diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c index 20557e2c60c6..4aed69d9dd17 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/drivers/md/persistent-data/dm-space-map-metadata.c @@ -544,7 +544,7 @@ static int sm_metadata_copy_root(struct dm_space_map *sm, void *where_le, size_t static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks); -static struct dm_space_map ops = { +static const struct dm_space_map ops = { .destroy = sm_metadata_destroy, .extend = sm_metadata_extend, .get_nr_blocks = sm_metadata_get_nr_blocks, @@ -671,7 +671,7 @@ static int sm_bootstrap_copy_root(struct dm_space_map *sm, void *where, return -EINVAL; } -static struct dm_space_map bootstrap_ops = { +static const struct dm_space_map bootstrap_ops = { .destroy = sm_bootstrap_destroy, .extend = sm_bootstrap_extend, .get_nr_blocks = sm_bootstrap_get_nr_blocks, |