diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/bitmap.c | 9 | ||||
-rw-r--r-- | drivers/md/md.c | 20 | ||||
-rw-r--r-- | drivers/md/md.h | 1 | ||||
-rw-r--r-- | drivers/md/raid0.c | 4 | ||||
-rw-r--r-- | drivers/md/raid1.c | 22 | ||||
-rw-r--r-- | drivers/md/raid10.c | 35 | ||||
-rw-r--r-- | drivers/md/raid5-cache.c | 12 | ||||
-rw-r--r-- | drivers/md/raid5-ppl.c | 171 | ||||
-rw-r--r-- | drivers/md/raid5.c | 16 |
9 files changed, 225 insertions, 65 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 40f3cd7eab0f..d2121637b4ab 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -625,7 +625,7 @@ re_read: err = read_sb_page(bitmap->mddev, offset, sb_page, - 0, sizeof(bitmap_super_t)); + 0, PAGE_SIZE); } if (err) return err; @@ -2058,6 +2058,11 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks, long pages; struct bitmap_page *new_bp; + if (bitmap->storage.file && !init) { + pr_info("md: cannot resize file-based bitmap\n"); + return -EINVAL; + } + if (chunksize == 0) { /* If there is enough space, leave the chunk size unchanged, * else increase by factor of two until there is enough space. @@ -2118,7 +2123,7 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks, if (store.sb_page && bitmap->storage.sb_page) memcpy(page_address(store.sb_page), page_address(bitmap->storage.sb_page), - sizeof(bitmap_super_t)); + PAGE_SIZE); bitmap_file_unmap(&bitmap->storage); bitmap->storage = store; diff --git a/drivers/md/md.c b/drivers/md/md.c index 078c6f3b7e53..08fcaebc61bd 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1538,7 +1538,8 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_ } else if (sb->bblog_offset != 0) rdev->badblocks.shift = 0; - if (le32_to_cpu(sb->feature_map) & MD_FEATURE_PPL) { + if ((le32_to_cpu(sb->feature_map) & + (MD_FEATURE_PPL | MD_FEATURE_MULTIPLE_PPLS))) { rdev->ppl.offset = (__s16)le16_to_cpu(sb->ppl.offset); rdev->ppl.size = le16_to_cpu(sb->ppl.size); rdev->ppl.sector = rdev->sb_start + rdev->ppl.offset; @@ -1657,10 +1658,15 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev) if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL) set_bit(MD_HAS_JOURNAL, &mddev->flags); - if (le32_to_cpu(sb->feature_map) & MD_FEATURE_PPL) { + if (le32_to_cpu(sb->feature_map) & + (MD_FEATURE_PPL | MD_FEATURE_MULTIPLE_PPLS)) { if (le32_to_cpu(sb->feature_map) & (MD_FEATURE_BITMAP_OFFSET | MD_FEATURE_JOURNAL)) return -EINVAL; + if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_PPL) && + (le32_to_cpu(sb->feature_map) & + MD_FEATURE_MULTIPLE_PPLS)) + return -EINVAL; set_bit(MD_HAS_PPL, &mddev->flags); } } else if (mddev->pers == NULL) { @@ -1877,7 +1883,11 @@ retry: sb->feature_map |= cpu_to_le32(MD_FEATURE_JOURNAL); if (test_bit(MD_HAS_PPL, &mddev->flags)) { - sb->feature_map |= cpu_to_le32(MD_FEATURE_PPL); + if (test_bit(MD_HAS_MULTIPLE_PPLS, &mddev->flags)) + sb->feature_map |= + cpu_to_le32(MD_FEATURE_MULTIPLE_PPLS); + else + sb->feature_map |= cpu_to_le32(MD_FEATURE_PPL); sb->ppl.offset = cpu_to_le16(rdev->ppl.offset); sb->ppl.size = cpu_to_le16(rdev->ppl.size); } @@ -4285,6 +4295,8 @@ new_dev_store(struct mddev *mddev, const char *buf, size_t len) if (err) export_rdev(rdev); mddev_unlock(mddev); + if (!err) + md_new_event(mddev); return err ? err : len; } @@ -7838,7 +7850,7 @@ static const struct file_operations md_seq_fops = { .open = md_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release, .poll = mdstat_poll, }; diff --git a/drivers/md/md.h b/drivers/md/md.h index c0d436fb88f0..561d22b9a9a8 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -236,6 +236,7 @@ enum mddev_flags { * never cause the array to become failed. */ MD_HAS_PPL, /* The raid array has PPL feature set */ + MD_HAS_MULTIPLE_PPLS, /* The raid array has multiple PPLs feature set */ }; enum mddev_sb_flags { diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 05a4521b832f..5a00fc118470 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -30,7 +30,8 @@ ((1L << MD_HAS_JOURNAL) | \ (1L << MD_JOURNAL_CLEAN) | \ (1L << MD_FAILFAST_SUPPORTED) |\ - (1L << MD_HAS_PPL)) + (1L << MD_HAS_PPL) | \ + (1L << MD_HAS_MULTIPLE_PPLS)) static int raid0_congested(struct mddev *mddev, int bits) { @@ -539,6 +540,7 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio) !discard_bio) continue; bio_chain(discard_bio, bio); + bio_clone_blkcg_association(discard_bio, bio); if (mddev->gendisk) trace_block_bio_remap(bdev_get_queue(rdev->bdev), discard_bio, disk_devt(mddev->gendisk), diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index baf5e358d22a..f3f3e40dc9d8 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -48,7 +48,8 @@ #define UNSUPPORTED_MDDEV_FLAGS \ ((1L << MD_HAS_JOURNAL) | \ (1L << MD_JOURNAL_CLEAN) | \ - (1L << MD_HAS_PPL)) + (1L << MD_HAS_PPL) | \ + (1L << MD_HAS_MULTIPLE_PPLS)) /* * Number of guaranteed r1bios in case of extreme VM load: @@ -2560,6 +2561,23 @@ static int init_resync(struct r1conf *conf) return 0; } +static struct r1bio *raid1_alloc_init_r1buf(struct r1conf *conf) +{ + struct r1bio *r1bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO); + struct resync_pages *rps; + struct bio *bio; + int i; + + for (i = conf->poolinfo->raid_disks; i--; ) { + bio = r1bio->bios[i]; + rps = bio->bi_private; + bio_reset(bio); + bio->bi_private = rps; + } + r1bio->master_bio = NULL; + return r1bio; +} + /* * perform a "sync" on one "block" * @@ -2645,7 +2663,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, bitmap_cond_end_sync(mddev->bitmap, sector_nr, mddev_is_clustered(mddev) && (sector_nr + 2 * RESYNC_SECTORS > conf->cluster_sync_high)); - r1_bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO); + r1_bio = raid1_alloc_init_r1buf(conf); raise_barrier(conf, sector_nr); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index d1f948e371e0..374df5796649 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2796,6 +2796,35 @@ static int init_resync(struct r10conf *conf) return 0; } +static struct r10bio *raid10_alloc_init_r10buf(struct r10conf *conf) +{ + struct r10bio *r10bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO); + struct rsync_pages *rp; + struct bio *bio; + int nalloc; + int i; + + if (test_bit(MD_RECOVERY_SYNC, &conf->mddev->recovery) || + test_bit(MD_RECOVERY_RESHAPE, &conf->mddev->recovery)) + nalloc = conf->copies; /* resync */ + else + nalloc = 2; /* recovery */ + + for (i = 0; i < nalloc; i++) { + bio = r10bio->devs[i].bio; + rp = bio->bi_private; + bio_reset(bio); + bio->bi_private = rp; + bio = r10bio->devs[i].repl_bio; + if (bio) { + rp = bio->bi_private; + bio_reset(bio); + bio->bi_private = rp; + } + } + return r10bio; +} + /* * perform a "sync" on one "block" * @@ -3025,7 +3054,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, atomic_inc(&mreplace->nr_pending); rcu_read_unlock(); - r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO); + r10_bio = raid10_alloc_init_r10buf(conf); r10_bio->state = 0; raise_barrier(conf, rb2 != NULL); atomic_set(&r10_bio->remaining, 0); @@ -3234,7 +3263,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, } if (sync_blocks < max_sync) max_sync = sync_blocks; - r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO); + r10_bio = raid10_alloc_init_r10buf(conf); r10_bio->state = 0; r10_bio->mddev = mddev; @@ -4358,7 +4387,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, read_more: /* Now schedule reads for blocks from sector_nr to last */ - r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO); + r10_bio = raid10_alloc_init_r10buf(conf); r10_bio->state = 0; raise_barrier(conf, sectors_done != 0); atomic_set(&r10_bio->remaining, 0); diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 5d7da1c700aa..0b7406ac8ce1 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -2529,11 +2529,18 @@ static void r5l_write_super(struct r5l_log *log, sector_t cp) static ssize_t r5c_journal_mode_show(struct mddev *mddev, char *page) { - struct r5conf *conf = mddev->private; + struct r5conf *conf; int ret; - if (!conf->log) + ret = mddev_lock(mddev); + if (ret) + return ret; + + conf = mddev->private; + if (!conf || !conf->log) { + mddev_unlock(mddev); return 0; + } switch (conf->log->r5c_journal_mode) { case R5C_JOURNAL_MODE_WRITE_THROUGH: @@ -2551,6 +2558,7 @@ static ssize_t r5c_journal_mode_show(struct mddev *mddev, char *page) default: ret = 0; } + mddev_unlock(mddev); return ret; } diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c index 1e237c40d6fa..cd026c88f7ef 100644 --- a/drivers/md/raid5-ppl.c +++ b/drivers/md/raid5-ppl.c @@ -87,6 +87,8 @@ * The current io_unit accepting new stripes is always at the end of the list. */ +#define PPL_SPACE_SIZE (128 * 1024) + struct ppl_conf { struct mddev *mddev; @@ -122,6 +124,10 @@ struct ppl_log { * always at the end of io_list */ spinlock_t io_list_lock; struct list_head io_list; /* all io_units of this log */ + + sector_t next_io_sector; + unsigned int entry_space; + bool use_multippl; }; #define PPL_IO_INLINE_BVECS 32 @@ -264,13 +270,12 @@ static int ppl_log_stripe(struct ppl_log *log, struct stripe_head *sh) int i; sector_t data_sector = 0; int data_disks = 0; - unsigned int entry_space = (log->rdev->ppl.size << 9) - PPL_HEADER_SIZE; struct r5conf *conf = sh->raid_conf; pr_debug("%s: stripe: %llu\n", __func__, (unsigned long long)sh->sector); /* check if current io_unit is full */ - if (io && (io->pp_size == entry_space || + if (io && (io->pp_size == log->entry_space || io->entries_count == PPL_HDR_MAX_ENTRIES)) { pr_debug("%s: add io_unit blocked by seq: %llu\n", __func__, io->seq); @@ -451,12 +456,25 @@ static void ppl_submit_iounit(struct ppl_io_unit *io) pplhdr->entries_count = cpu_to_le32(io->entries_count); pplhdr->checksum = cpu_to_le32(~crc32c_le(~0, pplhdr, PPL_HEADER_SIZE)); + /* Rewind the buffer if current PPL is larger then remaining space */ + if (log->use_multippl && + log->rdev->ppl.sector + log->rdev->ppl.size - log->next_io_sector < + (PPL_HEADER_SIZE + io->pp_size) >> 9) + log->next_io_sector = log->rdev->ppl.sector; + + bio->bi_end_io = ppl_log_endio; bio->bi_opf = REQ_OP_WRITE | REQ_FUA; bio_set_dev(bio, log->rdev->bdev); - bio->bi_iter.bi_sector = log->rdev->ppl.sector; + bio->bi_iter.bi_sector = log->next_io_sector; bio_add_page(bio, io->header_page, PAGE_SIZE, 0); + pr_debug("%s: log->current_io_sector: %llu\n", __func__, + (unsigned long long)log->next_io_sector); + + if (log->use_multippl) + log->next_io_sector += (PPL_HEADER_SIZE + io->pp_size) >> 9; + list_for_each_entry(sh, &io->stripe_list, log_list) { /* entries for full stripe writes have no partial parity */ if (test_bit(STRIPE_FULL_WRITE, &sh->state)) @@ -813,12 +831,14 @@ out: return ret; } -static int ppl_recover(struct ppl_log *log, struct ppl_header *pplhdr) +static int ppl_recover(struct ppl_log *log, struct ppl_header *pplhdr, + sector_t offset) { struct ppl_conf *ppl_conf = log->ppl_conf; struct md_rdev *rdev = log->rdev; struct mddev *mddev = rdev->mddev; - sector_t ppl_sector = rdev->ppl.sector + (PPL_HEADER_SIZE >> 9); + sector_t ppl_sector = rdev->ppl.sector + offset + + (PPL_HEADER_SIZE >> 9); struct page *page; int i; int ret = 0; @@ -902,6 +922,9 @@ static int ppl_write_empty_header(struct ppl_log *log) return -ENOMEM; pplhdr = page_address(page); + /* zero out PPL space to avoid collision with old PPLs */ + blkdev_issue_zeroout(rdev->bdev, rdev->ppl.sector, + log->rdev->ppl.size, GFP_NOIO, 0); memset(pplhdr->reserved, 0xff, PPL_HDR_RESERVED); pplhdr->signature = cpu_to_le32(log->ppl_conf->signature); pplhdr->checksum = cpu_to_le32(~crc32c_le(~0, pplhdr, PAGE_SIZE)); @@ -922,63 +945,110 @@ static int ppl_load_distributed(struct ppl_log *log) struct ppl_conf *ppl_conf = log->ppl_conf; struct md_rdev *rdev = log->rdev; struct mddev *mddev = rdev->mddev; - struct page *page; - struct ppl_header *pplhdr; + struct page *page, *page2, *tmp; + struct ppl_header *pplhdr = NULL, *prev_pplhdr = NULL; u32 crc, crc_stored; u32 signature; - int ret = 0; + int ret = 0, i; + sector_t pplhdr_offset = 0, prev_pplhdr_offset = 0; pr_debug("%s: disk: %d\n", __func__, rdev->raid_disk); - - /* read PPL header */ + /* read PPL headers, find the recent one */ page = alloc_page(GFP_KERNEL); if (!page) return -ENOMEM; - if (!sync_page_io(rdev, rdev->ppl.sector - rdev->data_offset, - PAGE_SIZE, page, REQ_OP_READ, 0, false)) { - md_error(mddev, rdev); - ret = -EIO; - goto out; + page2 = alloc_page(GFP_KERNEL); + if (!page2) { + __free_page(page); + return -ENOMEM; } - pplhdr = page_address(page); - /* check header validity */ - crc_stored = le32_to_cpu(pplhdr->checksum); - pplhdr->checksum = 0; - crc = ~crc32c_le(~0, pplhdr, PAGE_SIZE); + /* searching ppl area for latest ppl */ + while (pplhdr_offset < rdev->ppl.size - (PPL_HEADER_SIZE >> 9)) { + if (!sync_page_io(rdev, + rdev->ppl.sector - rdev->data_offset + + pplhdr_offset, PAGE_SIZE, page, REQ_OP_READ, + 0, false)) { + md_error(mddev, rdev); + ret = -EIO; + /* if not able to read - don't recover any PPL */ + pplhdr = NULL; + break; + } + pplhdr = page_address(page); + + /* check header validity */ + crc_stored = le32_to_cpu(pplhdr->checksum); + pplhdr->checksum = 0; + crc = ~crc32c_le(~0, pplhdr, PAGE_SIZE); + + if (crc_stored != crc) { + pr_debug("%s: ppl header crc does not match: stored: 0x%x calculated: 0x%x (offset: %llu)\n", + __func__, crc_stored, crc, + (unsigned long long)pplhdr_offset); + pplhdr = prev_pplhdr; + pplhdr_offset = prev_pplhdr_offset; + break; + } - if (crc_stored != crc) { - pr_debug("%s: ppl header crc does not match: stored: 0x%x calculated: 0x%x\n", - __func__, crc_stored, crc); - ppl_conf->mismatch_count++; - goto out; - } + signature = le32_to_cpu(pplhdr->signature); - signature = le32_to_cpu(pplhdr->signature); + if (mddev->external) { + /* + * For external metadata the header signature is set and + * validated in userspace. + */ + ppl_conf->signature = signature; + } else if (ppl_conf->signature != signature) { + pr_debug("%s: ppl header signature does not match: stored: 0x%x configured: 0x%x (offset: %llu)\n", + __func__, signature, ppl_conf->signature, + (unsigned long long)pplhdr_offset); + pplhdr = prev_pplhdr; + pplhdr_offset = prev_pplhdr_offset; + break; + } - if (mddev->external) { - /* - * For external metadata the header signature is set and - * validated in userspace. - */ - ppl_conf->signature = signature; - } else if (ppl_conf->signature != signature) { - pr_debug("%s: ppl header signature does not match: stored: 0x%x configured: 0x%x\n", - __func__, signature, ppl_conf->signature); - ppl_conf->mismatch_count++; - goto out; + if (prev_pplhdr && le64_to_cpu(prev_pplhdr->generation) > + le64_to_cpu(pplhdr->generation)) { + /* previous was newest */ + pplhdr = prev_pplhdr; + pplhdr_offset = prev_pplhdr_offset; + break; + } + + prev_pplhdr_offset = pplhdr_offset; + prev_pplhdr = pplhdr; + + tmp = page; + page = page2; + page2 = tmp; + + /* calculate next potential ppl offset */ + for (i = 0; i < le32_to_cpu(pplhdr->entries_count); i++) + pplhdr_offset += + le32_to_cpu(pplhdr->entries[i].pp_size) >> 9; + pplhdr_offset += PPL_HEADER_SIZE >> 9; } + /* no valid ppl found */ + if (!pplhdr) + ppl_conf->mismatch_count++; + else + pr_debug("%s: latest PPL found at offset: %llu, with generation: %llu\n", + __func__, (unsigned long long)pplhdr_offset, + le64_to_cpu(pplhdr->generation)); + /* attempt to recover from log if we are starting a dirty array */ - if (!mddev->pers && mddev->recovery_cp != MaxSector) - ret = ppl_recover(log, pplhdr); -out: + if (pplhdr && !mddev->pers && mddev->recovery_cp != MaxSector) + ret = ppl_recover(log, pplhdr, pplhdr_offset); + /* write empty header if we are starting the array */ if (!ret && !mddev->pers) ret = ppl_write_empty_header(log); __free_page(page); + __free_page(page2); pr_debug("%s: return: %d mismatch_count: %d recovered_entries: %d\n", __func__, ret, ppl_conf->mismatch_count, @@ -1031,6 +1101,7 @@ static int ppl_load(struct ppl_conf *ppl_conf) static void __ppl_exit_log(struct ppl_conf *ppl_conf) { clear_bit(MD_HAS_PPL, &ppl_conf->mddev->flags); + clear_bit(MD_HAS_MULTIPLE_PPLS, &ppl_conf->mddev->flags); kfree(ppl_conf->child_logs); @@ -1099,6 +1170,22 @@ static int ppl_validate_rdev(struct md_rdev *rdev) return 0; } +static void ppl_init_child_log(struct ppl_log *log, struct md_rdev *rdev) +{ + if ((rdev->ppl.size << 9) >= (PPL_SPACE_SIZE + + PPL_HEADER_SIZE) * 2) { + log->use_multippl = true; + set_bit(MD_HAS_MULTIPLE_PPLS, + &log->ppl_conf->mddev->flags); + log->entry_space = PPL_SPACE_SIZE; + } else { + log->use_multippl = false; + log->entry_space = (log->rdev->ppl.size << 9) - + PPL_HEADER_SIZE; + } + log->next_io_sector = rdev->ppl.sector; +} + int ppl_init_log(struct r5conf *conf) { struct ppl_conf *ppl_conf; @@ -1196,6 +1283,7 @@ int ppl_init_log(struct r5conf *conf) q = bdev_get_queue(rdev->bdev); if (test_bit(QUEUE_FLAG_WC, &q->queue_flags)) need_cache_flush = true; + ppl_init_child_log(log, rdev); } } @@ -1261,6 +1349,7 @@ int ppl_modify_log(struct r5conf *conf, struct md_rdev *rdev, bool add) if (!ret) { log->rdev = rdev; ret = ppl_write_empty_header(log); + ppl_init_child_log(log, rdev); } } else { log->rdev = NULL; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 3ae8bbceb6c4..4188a4881148 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -494,7 +494,6 @@ static int grow_buffers(struct stripe_head *sh, gfp_t gfp) return 0; } -static void raid5_build_block(struct stripe_head *sh, int i, int previous); static void stripe_set_idx(sector_t stripe, struct r5conf *conf, int previous, struct stripe_head *sh); @@ -530,7 +529,7 @@ retry: WARN_ON(1); } dev->flags = 0; - raid5_build_block(sh, i, previous); + dev->sector = raid5_compute_blocknr(sh, i, previous); } if (read_seqcount_retry(&conf->gen_lock, seq)) goto retry; @@ -2662,14 +2661,6 @@ static void raid5_end_write_request(struct bio *bi) raid5_release_stripe(sh->batch_head); } -static void raid5_build_block(struct stripe_head *sh, int i, int previous) -{ - struct r5dev *dev = &sh->dev[i]; - - dev->flags = 0; - dev->sector = raid5_compute_blocknr(sh, i, previous); -} - static void raid5_error(struct mddev *mddev, struct md_rdev *rdev) { char b[BDEVNAME_SIZE]; @@ -6237,6 +6228,10 @@ static void raid5_do_work(struct work_struct *work) spin_unlock_irq(&conf->device_lock); + flush_deferred_bios(conf); + + r5l_flush_stripe_to_raid(conf->log); + async_tx_issue_pending_all(); blk_finish_plug(&plug); @@ -7243,6 +7238,7 @@ static int raid5_run(struct mddev *mddev) pr_warn("md/raid:%s: using journal device and PPL not allowed - disabling PPL\n", mdname(mddev)); clear_bit(MD_HAS_PPL, &mddev->flags); + clear_bit(MD_HAS_MULTIPLE_PPLS, &mddev->flags); } if (mddev->private == NULL) |