diff options
author | Song Liu <song@kernel.org> | 2024-08-28 23:55:57 +0200 |
---|---|---|
committer | Song Liu <song@kernel.org> | 2024-08-28 23:55:57 +0200 |
commit | 7f67fdae3372c323ad02601ab65502c9e488a521 (patch) | |
tree | 8c2b9ccaa460b5a7fde2941faa0fb2d9fc091423 /drivers/md/md.c | |
parent | md: Remove flush handling (diff) | |
parent | md/md-bitmap: make in memory structure internal (diff) | |
download | linux-7f67fdae3372c323ad02601ab65502c9e488a521.tar.xz linux-7f67fdae3372c323ad02601ab65502c9e488a521.zip |
Merge branch 'md-6.12-bitmap' into md-6.12
From Yu Kuai (with minor changes by Song Liu):
The background is that currently bitmap is using a global spin_lock,
causing lock contention and huge IO performance degradation for all raid
levels.
However, it's impossible to implement a new lock free bitmap with
current situation that md-bitmap exposes the internal implementation
with lots of exported apis. Hence bitmap_operations is invented, to
describe bitmap core implementation, and a new bitmap can be introduced
with a new bitmap_operations, we only need to switch to the new one
during initialization.
And with this we can build bitmap as kernel module, but that's not
our concern for now.
This version was tested with mdadm tests and lvm2 tests. This set does
not introduce new errors in these tests.
* md-6.12-bitmap: (42 commits)
md/md-bitmap: make in memory structure internal
md/md-bitmap: merge md_bitmap_enabled() into bitmap_operations
md/md-bitmap: merge md_bitmap_wait_behind_writes() into bitmap_operations
md/md-bitmap: merge md_bitmap_free() into bitmap_operations
md/md-bitmap: merge md_bitmap_set_pages() into struct bitmap_operations
md/md-bitmap: merge md_bitmap_copy_from_slot() into struct bitmap_operation.
md/md-bitmap: merge get_bitmap_from_slot() into bitmap_operations
md/md-bitmap: merge md_bitmap_resize() into bitmap_operations
md/md-bitmap: pass in mddev directly for md_bitmap_resize()
md/md-bitmap: merge md_bitmap_daemon_work() into bitmap_operations
md/md-bitmap: merge bitmap_unplug() into bitmap_operations
md/md-bitmap: merge md_bitmap_unplug_async() into md_bitmap_unplug()
md/md-bitmap: merge md_bitmap_sync_with_cluster() into bitmap_operations
md/md-bitmap: merge md_bitmap_cond_end_sync() into bitmap_operations
md/md-bitmap: merge md_bitmap_close_sync() into bitmap_operations
md/md-bitmap: merge md_bitmap_end_sync() into bitmap_operations
md/md-bitmap: remove the parameter 'aborted' for md_bitmap_end_sync()
md/md-bitmap: merge md_bitmap_start_sync() into bitmap_operations
md/md-bitmap: merge md_bitmap_endwrite() into bitmap_operations
md/md-bitmap: merge md_bitmap_startwrite() into bitmap_operations
...
Signed-off-by: Song Liu <song@kernel.org>
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r-- | drivers/md/md.c | 155 |
1 files changed, 100 insertions, 55 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index ce5d8be13821..414146111425 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -664,6 +664,7 @@ int mddev_init(struct mddev *mddev) mddev->resync_min = 0; mddev->resync_max = MaxSector; mddev->level = LEVEL_NONE; + mddev_set_bitmap_ops(mddev); INIT_WORK(&mddev->sync_work, md_start_sync); INIT_WORK(&mddev->del_work, mddev_delayed_delete); @@ -1264,6 +1265,18 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor return ret; } +static u64 md_bitmap_events_cleared(struct mddev *mddev) +{ + struct md_bitmap_stats stats; + int err; + + err = mddev->bitmap_ops->get_stats(mddev->bitmap, &stats); + if (err) + return 0; + + return stats.events_cleared; +} + /* * validate_super for 0.90.0 * note: we are not using "freshest" for 0.9 superblock @@ -1356,7 +1369,7 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *freshest, stru /* if adding to array with a bitmap, then we can accept an * older device ... but not too old. */ - if (ev1 < mddev->bitmap->events_cleared) + if (ev1 < md_bitmap_events_cleared(mddev)) return 0; if (ev1 < mddev->events) set_bit(Bitmap_sync, &rdev->flags); @@ -1883,7 +1896,7 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *freshest, struc /* If adding to array with a bitmap, then we can accept an * older device, but not too old. */ - if (ev1 < mddev->bitmap->events_cleared) + if (ev1 < md_bitmap_events_cleared(mddev)) return 0; if (ev1 < mddev->events) set_bit(Bitmap_sync, &rdev->flags); @@ -2215,7 +2228,6 @@ super_1_allow_new_offset(struct md_rdev *rdev, unsigned long long new_offset) { /* All necessary checks on new >= old have been done */ - struct bitmap *bitmap; if (new_offset >= rdev->data_offset) return 1; @@ -2232,11 +2244,18 @@ super_1_allow_new_offset(struct md_rdev *rdev, */ if (rdev->sb_start + (32+4)*2 > new_offset) return 0; - bitmap = rdev->mddev->bitmap; - if (bitmap && !rdev->mddev->bitmap_info.file && - rdev->sb_start + rdev->mddev->bitmap_info.offset + - bitmap->storage.file_pages * (PAGE_SIZE>>9) > new_offset) - return 0; + + if (!rdev->mddev->bitmap_info.file) { + struct mddev *mddev = rdev->mddev; + struct md_bitmap_stats stats; + int err; + + err = mddev->bitmap_ops->get_stats(mddev->bitmap, &stats); + if (!err && rdev->sb_start + mddev->bitmap_info.offset + + stats.file_pages * (PAGE_SIZE >> 9) > new_offset) + return 0; + } + if (rdev->badblocks.sector + rdev->badblocks.size > new_offset) return 0; @@ -2712,7 +2731,7 @@ repeat: mddev_add_trace_msg(mddev, "md md_update_sb"); rewrite: - md_bitmap_update_sb(mddev->bitmap); + mddev->bitmap_ops->update_sb(mddev->bitmap); rdev_for_each(rdev, mddev) { if (rdev->sb_loaded != 1) continue; /* no noise on spare devices */ @@ -4572,17 +4591,23 @@ bitmap_store(struct mddev *mddev, const char *buf, size_t len) /* buf should be <chunk> <chunk> ... or <chunk>-<chunk> ... (range) */ while (*buf) { chunk = end_chunk = simple_strtoul(buf, &end, 0); - if (buf == end) break; + if (buf == end) + break; + if (*end == '-') { /* range */ buf = end + 1; end_chunk = simple_strtoul(buf, &end, 0); - if (buf == end) break; + if (buf == end) + break; } - if (*end && !isspace(*end)) break; - md_bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk); + + if (*end && !isspace(*end)) + break; + + mddev->bitmap_ops->dirty_bits(mddev, chunk, end_chunk); buf = skip_spaces(end); } - md_bitmap_unplug(mddev->bitmap); /* flush the bits to disk */ + mddev->bitmap_ops->unplug(mddev, true); /* flush the bits to disk */ out: mddev_unlock(mddev); return len; @@ -6098,16 +6123,10 @@ int md_run(struct mddev *mddev) } if (err == 0 && pers->sync_request && (mddev->bitmap_info.file || mddev->bitmap_info.offset)) { - struct bitmap *bitmap; - - bitmap = md_bitmap_create(mddev, -1); - if (IS_ERR(bitmap)) { - err = PTR_ERR(bitmap); + err = mddev->bitmap_ops->create(mddev, -1); + if (err) pr_warn("%s: failed to create bitmap (%d)\n", mdname(mddev), err); - } else - mddev->bitmap = bitmap; - } if (err) goto bitmap_abort; @@ -6177,7 +6196,7 @@ bitmap_abort: pers->free(mddev, mddev->private); mddev->private = NULL; module_put(pers->owner); - md_bitmap_destroy(mddev); + mddev->bitmap_ops->destroy(mddev); abort: bioset_exit(&mddev->io_clone_set); exit_sync_set: @@ -6196,9 +6215,10 @@ int do_md_run(struct mddev *mddev) err = md_run(mddev); if (err) goto out; - err = md_bitmap_load(mddev); + + err = mddev->bitmap_ops->load(mddev); if (err) { - md_bitmap_destroy(mddev); + mddev->bitmap_ops->destroy(mddev); goto out; } @@ -6342,7 +6362,8 @@ static void __md_stop_writes(struct mddev *mddev) mddev->pers->quiesce(mddev, 1); mddev->pers->quiesce(mddev, 0); } - md_bitmap_flush(mddev); + + mddev->bitmap_ops->flush(mddev); if (md_is_rdwr(mddev) && ((!mddev->in_sync && !mddev_is_clustered(mddev)) || @@ -6369,7 +6390,7 @@ EXPORT_SYMBOL_GPL(md_stop_writes); static void mddev_detach(struct mddev *mddev) { - md_bitmap_wait_behind_writes(mddev); + mddev->bitmap_ops->wait_behind_writes(mddev); if (mddev->pers && mddev->pers->quiesce && !is_md_suspended(mddev)) { mddev->pers->quiesce(mddev, 1); mddev->pers->quiesce(mddev, 0); @@ -6384,7 +6405,8 @@ static void mddev_detach(struct mddev *mddev) static void __md_stop(struct mddev *mddev) { struct md_personality *pers = mddev->pers; - md_bitmap_destroy(mddev); + + mddev->bitmap_ops->destroy(mddev); mddev_detach(mddev); spin_lock(&mddev->lock); mddev->pers = NULL; @@ -7162,22 +7184,19 @@ static int set_bitmap_file(struct mddev *mddev, int fd) err = 0; if (mddev->pers) { if (fd >= 0) { - struct bitmap *bitmap; + err = mddev->bitmap_ops->create(mddev, -1); + if (!err) + err = mddev->bitmap_ops->load(mddev); - bitmap = md_bitmap_create(mddev, -1); - if (!IS_ERR(bitmap)) { - mddev->bitmap = bitmap; - err = md_bitmap_load(mddev); - } else - err = PTR_ERR(bitmap); if (err) { - md_bitmap_destroy(mddev); + mddev->bitmap_ops->destroy(mddev); fd = -1; } } else if (fd < 0) { - md_bitmap_destroy(mddev); + mddev->bitmap_ops->destroy(mddev); } } + if (fd < 0) { struct file *f = mddev->bitmap_info.file; if (f) { @@ -7446,7 +7465,6 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info) goto err; } if (info->state & (1<<MD_SB_BITMAP_PRESENT)) { - struct bitmap *bitmap; /* add the bitmap */ if (mddev->bitmap) { rv = -EEXIST; @@ -7460,24 +7478,24 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info) mddev->bitmap_info.default_offset; mddev->bitmap_info.space = mddev->bitmap_info.default_space; - bitmap = md_bitmap_create(mddev, -1); - if (!IS_ERR(bitmap)) { - mddev->bitmap = bitmap; - rv = md_bitmap_load(mddev); - } else - rv = PTR_ERR(bitmap); + rv = mddev->bitmap_ops->create(mddev, -1); + if (!rv) + rv = mddev->bitmap_ops->load(mddev); + if (rv) - md_bitmap_destroy(mddev); + mddev->bitmap_ops->destroy(mddev); } else { - /* remove the bitmap */ - if (!mddev->bitmap) { - rv = -ENOENT; + struct md_bitmap_stats stats; + + rv = mddev->bitmap_ops->get_stats(mddev->bitmap, &stats); + if (rv) goto err; - } - if (mddev->bitmap->storage.file) { + + if (stats.file) { rv = -EINVAL; goto err; } + if (mddev->bitmap_info.nodes) { /* hold PW on all the bitmap lock */ if (md_cluster_ops->lock_all_bitmaps(mddev) <= 0) { @@ -7492,7 +7510,7 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info) module_put(md_cluster_mod); mddev->safemode_delay = DEFAULT_SAFEMODE_DELAY; } - md_bitmap_destroy(mddev); + mddev->bitmap_ops->destroy(mddev); mddev->bitmap_info.offset = 0; } } @@ -8262,6 +8280,33 @@ static void md_seq_stop(struct seq_file *seq, void *v) spin_unlock(&all_mddevs_lock); } +static void md_bitmap_status(struct seq_file *seq, struct mddev *mddev) +{ + struct md_bitmap_stats stats; + unsigned long used_pages; + unsigned long chunk_kb; + int err; + + err = mddev->bitmap_ops->get_stats(mddev->bitmap, &stats); + if (err) + return; + + chunk_kb = mddev->bitmap_info.chunksize >> 10; + used_pages = stats.pages - stats.missing_pages; + + seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], %lu%s chunk", + used_pages, stats.pages, used_pages << (PAGE_SHIFT - 10), + chunk_kb ? chunk_kb : mddev->bitmap_info.chunksize, + chunk_kb ? "KB" : "B"); + + if (stats.file) { + seq_puts(seq, ", file: "); + seq_file_path(seq, stats.file, " \t\n"); + } + + seq_putc(seq, '\n'); +} + static int md_seq_show(struct seq_file *seq, void *v) { struct mddev *mddev; @@ -8345,7 +8390,7 @@ static int md_seq_show(struct seq_file *seq, void *v) } else seq_printf(seq, "\n "); - md_bitmap_status(seq, mddev->bitmap); + md_bitmap_status(seq, mddev); seq_printf(seq, "\n"); } @@ -9397,7 +9442,7 @@ static void md_start_sync(struct work_struct *ws) * stored on all devices. So make sure all bitmap pages get written. */ if (spares) - md_bitmap_write_all(mddev->bitmap); + mddev->bitmap_ops->write_all(mddev); name = test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ? "reshape" : "resync"; @@ -9485,7 +9530,7 @@ static void unregister_sync_thread(struct mddev *mddev) void md_check_recovery(struct mddev *mddev) { if (mddev->bitmap) - md_bitmap_daemon_work(mddev); + mddev->bitmap_ops->daemon_work(mddev); if (signal_pending(current)) { if (mddev->pers->sync_request && !mddev->external) { @@ -9856,7 +9901,7 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev) if (ret) pr_info("md-cluster: resize failed\n"); else - md_bitmap_update_sb(mddev->bitmap); + mddev->bitmap_ops->update_sb(mddev->bitmap); } /* Check for change of roles in the active devices */ |