diff options
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r-- | drivers/md/raid5.c | 320 |
1 files changed, 257 insertions, 63 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index ef0fd4830803..39343479ac2a 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -448,13 +448,74 @@ out: return sh; } -static void shrink_buffers(struct stripe_head *sh) +#if PAGE_SIZE != DEFAULT_STRIPE_SIZE +static void free_stripe_pages(struct stripe_head *sh) { + int i; struct page *p; + + /* Have not allocate page pool */ + if (!sh->pages) + return; + + for (i = 0; i < sh->nr_pages; i++) { + p = sh->pages[i]; + if (p) + put_page(p); + sh->pages[i] = NULL; + } +} + +static int alloc_stripe_pages(struct stripe_head *sh, gfp_t gfp) +{ + int i; + struct page *p; + + for (i = 0; i < sh->nr_pages; i++) { + /* The page have allocated. */ + if (sh->pages[i]) + continue; + + p = alloc_page(gfp); + if (!p) { + free_stripe_pages(sh); + return -ENOMEM; + } + sh->pages[i] = p; + } + return 0; +} + +static int +init_stripe_shared_pages(struct stripe_head *sh, struct r5conf *conf, int disks) +{ + int nr_pages, cnt; + + if (sh->pages) + return 0; + + /* Each of the sh->dev[i] need one conf->stripe_size */ + cnt = PAGE_SIZE / conf->stripe_size; + nr_pages = (disks + cnt - 1) / cnt; + + sh->pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); + if (!sh->pages) + return -ENOMEM; + sh->nr_pages = nr_pages; + sh->stripes_per_page = cnt; + return 0; +} +#endif + +static void shrink_buffers(struct stripe_head *sh) +{ int i; int num = sh->raid_conf->pool_size; +#if PAGE_SIZE == DEFAULT_STRIPE_SIZE for (i = 0; i < num ; i++) { + struct page *p; + WARN_ON(sh->dev[i].page != sh->dev[i].orig_page); p = sh->dev[i].page; if (!p) @@ -462,6 +523,11 @@ static void shrink_buffers(struct stripe_head *sh) sh->dev[i].page = NULL; put_page(p); } +#else + for (i = 0; i < num; i++) + sh->dev[i].page = NULL; + free_stripe_pages(sh); /* Free pages */ +#endif } static int grow_buffers(struct stripe_head *sh, gfp_t gfp) @@ -469,6 +535,7 @@ static int grow_buffers(struct stripe_head *sh, gfp_t gfp) int i; int num = sh->raid_conf->pool_size; +#if PAGE_SIZE == DEFAULT_STRIPE_SIZE for (i = 0; i < num; i++) { struct page *page; @@ -477,8 +544,18 @@ static int grow_buffers(struct stripe_head *sh, gfp_t gfp) } sh->dev[i].page = page; sh->dev[i].orig_page = page; + sh->dev[i].offset = 0; } +#else + if (alloc_stripe_pages(sh, gfp)) + return -ENOMEM; + for (i = 0; i < num; i++) { + sh->dev[i].page = raid5_get_dev_page(sh, i); + sh->dev[i].orig_page = sh->dev[i].page; + sh->dev[i].offset = raid5_get_page_offset(sh, i); + } +#endif return 0; } @@ -1130,7 +1207,7 @@ again: sh->dev[i].vec.bv_page = sh->dev[i].page; bi->bi_vcnt = 1; bi->bi_io_vec[0].bv_len = RAID5_STRIPE_SIZE(conf); - bi->bi_io_vec[0].bv_offset = 0; + bi->bi_io_vec[0].bv_offset = sh->dev[i].offset; bi->bi_iter.bi_size = RAID5_STRIPE_SIZE(conf); bi->bi_write_hint = sh->dev[i].write_hint; if (!rrdev) @@ -1184,7 +1261,7 @@ again: sh->dev[i].rvec.bv_page = sh->dev[i].page; rbi->bi_vcnt = 1; rbi->bi_io_vec[0].bv_len = RAID5_STRIPE_SIZE(conf); - rbi->bi_io_vec[0].bv_offset = 0; + rbi->bi_io_vec[0].bv_offset = sh->dev[i].offset; rbi->bi_iter.bi_size = RAID5_STRIPE_SIZE(conf); rbi->bi_write_hint = sh->dev[i].write_hint; sh->dev[i].write_hint = RWH_WRITE_LIFE_NOT_SET; @@ -1226,7 +1303,7 @@ again: static struct dma_async_tx_descriptor * async_copy_data(int frombio, struct bio *bio, struct page **page, - sector_t sector, struct dma_async_tx_descriptor *tx, + unsigned int poff, sector_t sector, struct dma_async_tx_descriptor *tx, struct stripe_head *sh, int no_skipcopy) { struct bio_vec bvl; @@ -1272,11 +1349,11 @@ async_copy_data(int frombio, struct bio *bio, struct page **page, !no_skipcopy) *page = bio_page; else - tx = async_memcpy(*page, bio_page, page_offset, + tx = async_memcpy(*page, bio_page, page_offset + poff, b_offset, clen, &submit); } else tx = async_memcpy(bio_page, *page, b_offset, - page_offset, clen, &submit); + page_offset + poff, clen, &submit); } /* chain the operations */ submit.depend_tx = tx; @@ -1349,6 +1426,7 @@ static void ops_run_biofill(struct stripe_head *sh) while (rbi && rbi->bi_iter.bi_sector < dev->sector + RAID5_STRIPE_SECTORS(conf)) { tx = async_copy_data(0, rbi, &dev->page, + dev->offset, dev->sector, tx, sh, 0); rbi = r5_next_bio(conf, rbi, dev->sector); } @@ -1404,14 +1482,25 @@ static addr_conv_t *to_addr_conv(struct stripe_head *sh, return (void *) (to_addr_page(percpu, i) + sh->disks + 2); } +/* + * Return a pointer to record offset address. + */ +static unsigned int * +to_addr_offs(struct stripe_head *sh, struct raid5_percpu *percpu) +{ + return (unsigned int *) (to_addr_conv(sh, percpu, 0) + sh->disks + 2); +} + static struct dma_async_tx_descriptor * ops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu) { int disks = sh->disks; struct page **xor_srcs = to_addr_page(percpu, 0); + unsigned int *off_srcs = to_addr_offs(sh, percpu); int target = sh->ops.target; struct r5dev *tgt = &sh->dev[target]; struct page *xor_dest = tgt->page; + unsigned int off_dest = tgt->offset; int count = 0; struct dma_async_tx_descriptor *tx; struct async_submit_ctl submit; @@ -1423,19 +1512,22 @@ ops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu) __func__, (unsigned long long)sh->sector, target); BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); - for (i = disks; i--; ) - if (i != target) + for (i = disks; i--; ) { + if (i != target) { + off_srcs[count] = sh->dev[i].offset; xor_srcs[count++] = sh->dev[i].page; + } + } atomic_inc(&sh->count); init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, NULL, ops_complete_compute, sh, to_addr_conv(sh, percpu, 0)); if (unlikely(count == 1)) - tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, + tx = async_memcpy(xor_dest, xor_srcs[0], off_dest, off_srcs[0], RAID5_STRIPE_SIZE(sh->raid_conf), &submit); else - tx = async_xor(xor_dest, xor_srcs, 0, count, + tx = async_xor_offs(xor_dest, off_dest, xor_srcs, off_srcs, count, RAID5_STRIPE_SIZE(sh->raid_conf), &submit); return tx; @@ -1443,6 +1535,7 @@ ops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu) /* set_syndrome_sources - populate source buffers for gen_syndrome * @srcs - (struct page *) array of size sh->disks + * @offs - (unsigned int) array of offset for each page * @sh - stripe_head to parse * * Populates srcs in proper layout order for the stripe and returns the @@ -1451,6 +1544,7 @@ ops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu) * is recorded in srcs[count+1]]. */ static int set_syndrome_sources(struct page **srcs, + unsigned int *offs, struct stripe_head *sh, int srctype) { @@ -1481,6 +1575,12 @@ static int set_syndrome_sources(struct page **srcs, srcs[slot] = sh->dev[i].orig_page; else srcs[slot] = sh->dev[i].page; + /* + * For R5_InJournal, PAGE_SIZE must be 4KB and will + * not shared page. In that case, dev[i].offset + * is 0. + */ + offs[slot] = sh->dev[i].offset; } i = raid6_next_disk(i, disks); } while (i != d0_idx); @@ -1493,12 +1593,14 @@ ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu) { int disks = sh->disks; struct page **blocks = to_addr_page(percpu, 0); + unsigned int *offs = to_addr_offs(sh, percpu); int target; int qd_idx = sh->qd_idx; struct dma_async_tx_descriptor *tx; struct async_submit_ctl submit; struct r5dev *tgt; struct page *dest; + unsigned int dest_off; int i; int count; @@ -1517,17 +1619,18 @@ ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu) tgt = &sh->dev[target]; BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); dest = tgt->page; + dest_off = tgt->offset; atomic_inc(&sh->count); if (target == qd_idx) { - count = set_syndrome_sources(blocks, sh, SYNDROME_SRC_ALL); + count = set_syndrome_sources(blocks, offs, sh, SYNDROME_SRC_ALL); blocks[count] = NULL; /* regenerating p is not necessary */ BUG_ON(blocks[count+1] != dest); /* q should already be set */ init_async_submit(&submit, ASYNC_TX_FENCE, NULL, ops_complete_compute, sh, to_addr_conv(sh, percpu, 0)); - tx = async_gen_syndrome(blocks, 0, count+2, + tx = async_gen_syndrome(blocks, offs, count+2, RAID5_STRIPE_SIZE(sh->raid_conf), &submit); } else { /* Compute any data- or p-drive using XOR */ @@ -1535,13 +1638,14 @@ ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu) for (i = disks; i-- ; ) { if (i == target || i == qd_idx) continue; + offs[count] = sh->dev[i].offset; blocks[count++] = sh->dev[i].page; } init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, NULL, ops_complete_compute, sh, to_addr_conv(sh, percpu, 0)); - tx = async_xor(dest, blocks, 0, count, + tx = async_xor_offs(dest, dest_off, blocks, offs, count, RAID5_STRIPE_SIZE(sh->raid_conf), &submit); } @@ -1561,6 +1665,7 @@ ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu) struct r5dev *tgt2 = &sh->dev[target2]; struct dma_async_tx_descriptor *tx; struct page **blocks = to_addr_page(percpu, 0); + unsigned int *offs = to_addr_offs(sh, percpu); struct async_submit_ctl submit; BUG_ON(sh->batch_head); @@ -1573,13 +1678,16 @@ ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu) /* we need to open-code set_syndrome_sources to handle the * slot number conversion for 'faila' and 'failb' */ - for (i = 0; i < disks ; i++) + for (i = 0; i < disks ; i++) { + offs[i] = 0; blocks[i] = NULL; + } count = 0; i = d0_idx; do { int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks); + offs[slot] = sh->dev[i].offset; blocks[slot] = sh->dev[i].page; if (i == target) @@ -1604,11 +1712,12 @@ ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu) init_async_submit(&submit, ASYNC_TX_FENCE, NULL, ops_complete_compute, sh, to_addr_conv(sh, percpu, 0)); - return async_gen_syndrome(blocks, 0, syndrome_disks+2, + return async_gen_syndrome(blocks, offs, syndrome_disks+2, RAID5_STRIPE_SIZE(sh->raid_conf), &submit); } else { struct page *dest; + unsigned int dest_off; int data_target; int qd_idx = sh->qd_idx; @@ -1622,22 +1731,24 @@ ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu) for (i = disks; i-- ; ) { if (i == data_target || i == qd_idx) continue; + offs[count] = sh->dev[i].offset; blocks[count++] = sh->dev[i].page; } dest = sh->dev[data_target].page; + dest_off = sh->dev[data_target].offset; init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, NULL, NULL, NULL, to_addr_conv(sh, percpu, 0)); - tx = async_xor(dest, blocks, 0, count, + tx = async_xor_offs(dest, dest_off, blocks, offs, count, RAID5_STRIPE_SIZE(sh->raid_conf), &submit); - count = set_syndrome_sources(blocks, sh, SYNDROME_SRC_ALL); + count = set_syndrome_sources(blocks, offs, sh, SYNDROME_SRC_ALL); init_async_submit(&submit, ASYNC_TX_FENCE, tx, ops_complete_compute, sh, to_addr_conv(sh, percpu, 0)); - return async_gen_syndrome(blocks, 0, count+2, + return async_gen_syndrome(blocks, offs, count+2, RAID5_STRIPE_SIZE(sh->raid_conf), &submit); } @@ -1650,13 +1761,13 @@ ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu) return async_raid6_datap_recov(syndrome_disks+2, RAID5_STRIPE_SIZE(sh->raid_conf), faila, - blocks, &submit); + blocks, offs, &submit); } else { /* We're missing D+D. */ return async_raid6_2data_recov(syndrome_disks+2, RAID5_STRIPE_SIZE(sh->raid_conf), faila, failb, - blocks, &submit); + blocks, offs, &submit); } } } @@ -1682,10 +1793,12 @@ ops_run_prexor5(struct stripe_head *sh, struct raid5_percpu *percpu, { int disks = sh->disks; struct page **xor_srcs = to_addr_page(percpu, 0); + unsigned int *off_srcs = to_addr_offs(sh, percpu); int count = 0, pd_idx = sh->pd_idx, i; struct async_submit_ctl submit; /* existing parity data subtracted */ + unsigned int off_dest = off_srcs[count] = sh->dev[pd_idx].offset; struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; BUG_ON(sh->batch_head); @@ -1695,15 +1808,22 @@ ops_run_prexor5(struct stripe_head *sh, struct raid5_percpu *percpu, for (i = disks; i--; ) { struct r5dev *dev = &sh->dev[i]; /* Only process blocks that are known to be uptodate */ - if (test_bit(R5_InJournal, &dev->flags)) + if (test_bit(R5_InJournal, &dev->flags)) { + /* + * For this case, PAGE_SIZE must be equal to 4KB and + * page offset is zero. + */ + off_srcs[count] = dev->offset; xor_srcs[count++] = dev->orig_page; - else if (test_bit(R5_Wantdrain, &dev->flags)) + } else if (test_bit(R5_Wantdrain, &dev->flags)) { + off_srcs[count] = dev->offset; xor_srcs[count++] = dev->page; + } } init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, ops_complete_prexor, sh, to_addr_conv(sh, percpu, 0)); - tx = async_xor(xor_dest, xor_srcs, 0, count, + tx = async_xor_offs(xor_dest, off_dest, xor_srcs, off_srcs, count, RAID5_STRIPE_SIZE(sh->raid_conf), &submit); return tx; @@ -1714,17 +1834,18 @@ ops_run_prexor6(struct stripe_head *sh, struct raid5_percpu *percpu, struct dma_async_tx_descriptor *tx) { struct page **blocks = to_addr_page(percpu, 0); + unsigned int *offs = to_addr_offs(sh, percpu); int count; struct async_submit_ctl submit; pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector); - count = set_syndrome_sources(blocks, sh, SYNDROME_SRC_WANT_DRAIN); + count = set_syndrome_sources(blocks, offs, sh, SYNDROME_SRC_WANT_DRAIN); init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_PQ_XOR_DST, tx, ops_complete_prexor, sh, to_addr_conv(sh, percpu, 0)); - tx = async_gen_syndrome(blocks, 0, count+2, + tx = async_gen_syndrome(blocks, offs, count+2, RAID5_STRIPE_SIZE(sh->raid_conf), &submit); return tx; @@ -1775,6 +1896,7 @@ again: set_bit(R5_Discard, &dev->flags); else { tx = async_copy_data(1, wbi, &dev->page, + dev->offset, dev->sector, tx, sh, r5c_is_writeback(conf->log)); if (dev->page != dev->orig_page && @@ -1854,9 +1976,11 @@ ops_run_reconstruct5(struct stripe_head *sh, struct raid5_percpu *percpu, { int disks = sh->disks; struct page **xor_srcs; + unsigned int *off_srcs; struct async_submit_ctl submit; int count, pd_idx = sh->pd_idx, i; struct page *xor_dest; + unsigned int off_dest; int prexor = 0; unsigned long flags; int j = 0; @@ -1881,24 +2005,31 @@ ops_run_reconstruct5(struct stripe_head *sh, struct raid5_percpu *percpu, again: count = 0; xor_srcs = to_addr_page(percpu, j); + off_srcs = to_addr_offs(sh, percpu); /* check if prexor is active which means only process blocks * that are part of a read-modify-write (written) */ if (head_sh->reconstruct_state == reconstruct_state_prexor_drain_run) { prexor = 1; + off_dest = off_srcs[count] = sh->dev[pd_idx].offset; xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; for (i = disks; i--; ) { struct r5dev *dev = &sh->dev[i]; if (head_sh->dev[i].written || - test_bit(R5_InJournal, &head_sh->dev[i].flags)) + test_bit(R5_InJournal, &head_sh->dev[i].flags)) { + off_srcs[count] = dev->offset; xor_srcs[count++] = dev->page; + } } } else { xor_dest = sh->dev[pd_idx].page; + off_dest = sh->dev[pd_idx].offset; for (i = disks; i--; ) { struct r5dev *dev = &sh->dev[i]; - if (i != pd_idx) + if (i != pd_idx) { + off_srcs[count] = dev->offset; xor_srcs[count++] = dev->page; + } } } @@ -1924,10 +2055,10 @@ again: } if (unlikely(count == 1)) - tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, + tx = async_memcpy(xor_dest, xor_srcs[0], off_dest, off_srcs[0], RAID5_STRIPE_SIZE(sh->raid_conf), &submit); else - tx = async_xor(xor_dest, xor_srcs, 0, count, + tx = async_xor_offs(xor_dest, off_dest, xor_srcs, off_srcs, count, RAID5_STRIPE_SIZE(sh->raid_conf), &submit); if (!last_stripe) { j++; @@ -1943,6 +2074,7 @@ ops_run_reconstruct6(struct stripe_head *sh, struct raid5_percpu *percpu, { struct async_submit_ctl submit; struct page **blocks; + unsigned int *offs; int count, i, j = 0; struct stripe_head *head_sh = sh; int last_stripe; @@ -1967,6 +2099,7 @@ ops_run_reconstruct6(struct stripe_head *sh, struct raid5_percpu *percpu, again: blocks = to_addr_page(percpu, j); + offs = to_addr_offs(sh, percpu); if (sh->reconstruct_state == reconstruct_state_prexor_drain_run) { synflags = SYNDROME_SRC_WRITTEN; @@ -1976,7 +2109,7 @@ again: txflags = ASYNC_TX_ACK; } - count = set_syndrome_sources(blocks, sh, synflags); + count = set_syndrome_sources(blocks, offs, sh, synflags); last_stripe = !head_sh->batch_head || list_first_entry(&sh->batch_list, struct stripe_head, batch_list) == head_sh; @@ -1988,7 +2121,7 @@ again: } else init_async_submit(&submit, 0, tx, NULL, NULL, to_addr_conv(sh, percpu, j)); - tx = async_gen_syndrome(blocks, 0, count+2, + tx = async_gen_syndrome(blocks, offs, count+2, RAID5_STRIPE_SIZE(sh->raid_conf), &submit); if (!last_stripe) { j++; @@ -2016,7 +2149,9 @@ static void ops_run_check_p(struct stripe_head *sh, struct raid5_percpu *percpu) int pd_idx = sh->pd_idx; int qd_idx = sh->qd_idx; struct page *xor_dest; + unsigned int off_dest; struct page **xor_srcs = to_addr_page(percpu, 0); + unsigned int *off_srcs = to_addr_offs(sh, percpu); struct dma_async_tx_descriptor *tx; struct async_submit_ctl submit; int count; @@ -2028,16 +2163,19 @@ static void ops_run_check_p(struct stripe_head *sh, struct raid5_percpu *percpu) BUG_ON(sh->batch_head); count = 0; xor_dest = sh->dev[pd_idx].page; + off_dest = sh->dev[pd_idx].offset; + off_srcs[count] = off_dest; xor_srcs[count++] = xor_dest; for (i = disks; i--; ) { if (i == pd_idx || i == qd_idx) continue; + off_srcs[count] = sh->dev[i].offset; xor_srcs[count++] = sh->dev[i].page; } init_async_submit(&submit, 0, NULL, NULL, NULL, to_addr_conv(sh, percpu, 0)); - tx = async_xor_val(xor_dest, xor_srcs, 0, count, + tx = async_xor_val_offs(xor_dest, off_dest, xor_srcs, off_srcs, count, RAID5_STRIPE_SIZE(sh->raid_conf), &sh->ops.zero_sum_result, &submit); @@ -2049,6 +2187,7 @@ static void ops_run_check_p(struct stripe_head *sh, struct raid5_percpu *percpu) static void ops_run_check_pq(struct stripe_head *sh, struct raid5_percpu *percpu, int checkp) { struct page **srcs = to_addr_page(percpu, 0); + unsigned int *offs = to_addr_offs(sh, percpu); struct async_submit_ctl submit; int count; @@ -2056,16 +2195,16 @@ static void ops_run_check_pq(struct stripe_head *sh, struct raid5_percpu *percpu (unsigned long long)sh->sector, checkp); BUG_ON(sh->batch_head); - count = set_syndrome_sources(srcs, sh, SYNDROME_SRC_ALL); + count = set_syndrome_sources(srcs, offs, sh, SYNDROME_SRC_ALL); if (!checkp) srcs[count] = NULL; atomic_inc(&sh->count); init_async_submit(&submit, ASYNC_TX_ACK, NULL, ops_complete_check, sh, to_addr_conv(sh, percpu, 0)); - async_syndrome_val(srcs, 0, count+2, + async_syndrome_val(srcs, offs, count+2, RAID5_STRIPE_SIZE(sh->raid_conf), - &sh->ops.zero_sum_result, percpu->spare_page, &submit); + &sh->ops.zero_sum_result, percpu->spare_page, 0, &submit); } static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) @@ -2142,6 +2281,9 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) static void free_stripe(struct kmem_cache *sc, struct stripe_head *sh) { +#if PAGE_SIZE != DEFAULT_STRIPE_SIZE + kfree(sh->pages); +#endif if (sh->ppl_page) __free_page(sh->ppl_page); kmem_cache_free(sc, sh); @@ -2175,9 +2317,15 @@ static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp, sh->ppl_page = alloc_page(gfp); if (!sh->ppl_page) { free_stripe(sc, sh); - sh = NULL; + return NULL; } } +#if PAGE_SIZE != DEFAULT_STRIPE_SIZE + if (init_stripe_shared_pages(sh, conf, disks)) { + free_stripe(sc, sh); + return NULL; + } +#endif } return sh; } @@ -2253,8 +2401,9 @@ static int scribble_alloc(struct raid5_percpu *percpu, int num, int cnt) { size_t obj_size = - sizeof(struct page *) * (num+2) + - sizeof(addr_conv_t) * (num+2); + sizeof(struct page *) * (num + 2) + + sizeof(addr_conv_t) * (num + 2) + + sizeof(unsigned int) * (num + 2); void *scribble; /* @@ -2386,9 +2535,16 @@ static int resize_stripes(struct r5conf *conf, int newsize) osh = get_free_stripe(conf, hash); unlock_device_hash_lock(conf, hash); +#if PAGE_SIZE != DEFAULT_STRIPE_SIZE + for (i = 0; i < osh->nr_pages; i++) { + nsh->pages[i] = osh->pages[i]; + osh->pages[i] = NULL; + } +#endif for(i=0; i<conf->pool_size; i++) { nsh->dev[i].page = osh->dev[i].page; nsh->dev[i].orig_page = osh->dev[i].page; + nsh->dev[i].offset = osh->dev[i].offset; } nsh->hash_lock_index = hash; free_stripe(conf->slab_cache, osh); @@ -2429,8 +2585,6 @@ static int resize_stripes(struct r5conf *conf, int newsize) } else err = -ENOMEM; - mutex_unlock(&conf->cache_size_mutex); - conf->slab_cache = sc; conf->active_name = 1-conf->active_name; @@ -2439,20 +2593,41 @@ static int resize_stripes(struct r5conf *conf, int newsize) nsh = list_entry(newstripes.next, struct stripe_head, lru); list_del_init(&nsh->lru); +#if PAGE_SIZE != DEFAULT_STRIPE_SIZE + for (i = 0; i < nsh->nr_pages; i++) { + if (nsh->pages[i]) + continue; + nsh->pages[i] = alloc_page(GFP_NOIO); + if (!nsh->pages[i]) + err = -ENOMEM; + } + + for (i = conf->raid_disks; i < newsize; i++) { + if (nsh->dev[i].page) + continue; + nsh->dev[i].page = raid5_get_dev_page(nsh, i); + nsh->dev[i].orig_page = nsh->dev[i].page; + nsh->dev[i].offset = raid5_get_page_offset(nsh, i); + } +#else for (i=conf->raid_disks; i < newsize; i++) if (nsh->dev[i].page == NULL) { struct page *p = alloc_page(GFP_NOIO); nsh->dev[i].page = p; nsh->dev[i].orig_page = p; + nsh->dev[i].offset = 0; if (!p) err = -ENOMEM; } +#endif raid5_release_stripe(nsh); } /* critical section pass, GFP_NOIO no longer needed */ if (!err) conf->pool_size = newsize; + mutex_unlock(&conf->cache_size_mutex); + return err; } @@ -4083,7 +4258,7 @@ static void handle_parity_checks5(struct r5conf *conf, struct stripe_head *sh, break; } dev = &sh->dev[s->failed_num[0]]; - /* fall through */ + fallthrough; case check_state_compute_result: sh->check_state = check_state_idle; if (!dev) @@ -4214,7 +4389,7 @@ static void handle_parity_checks6(struct r5conf *conf, struct stripe_head *sh, /* we have 2-disk failure */ BUG_ON(s->failed != 2); - /* fall through */ + fallthrough; case check_state_compute_result: sh->check_state = check_state_idle; @@ -4369,7 +4544,8 @@ static void handle_stripe_expansion(struct r5conf *conf, struct stripe_head *sh) /* place all the copies on one channel */ init_async_submit(&submit, 0, tx, NULL, NULL, NULL); tx = async_memcpy(sh2->dev[dd_idx].page, - sh->dev[i].page, 0, 0, RAID5_STRIPE_SIZE(conf), + sh->dev[i].page, sh2->dev[dd_idx].offset, + sh->dev[i].offset, RAID5_STRIPE_SIZE(conf), &submit); set_bit(R5_Expanded, &sh2->dev[dd_idx].flags); @@ -6506,6 +6682,7 @@ raid5_store_stripe_size(struct mddev *mddev, const char *page, size_t len) struct r5conf *conf; unsigned long new; int err; + int size; if (len >= PAGE_SIZE) return -EINVAL; @@ -6514,9 +6691,12 @@ raid5_store_stripe_size(struct mddev *mddev, const char *page, size_t len) /* * The value should not be bigger than PAGE_SIZE. It requires to - * be multiple of DEFAULT_STRIPE_SIZE. + * be multiple of DEFAULT_STRIPE_SIZE and the value should be power + * of two. */ - if (new % DEFAULT_STRIPE_SIZE != 0 || new > PAGE_SIZE || new == 0) + if (new % DEFAULT_STRIPE_SIZE != 0 || + new > PAGE_SIZE || new == 0 || + new != roundup_pow_of_two(new)) return -EINVAL; err = mddev_lock(mddev); @@ -6535,10 +6715,29 @@ raid5_store_stripe_size(struct mddev *mddev, const char *page, size_t len) pr_debug("md/raid: change stripe_size from %lu to %lu\n", conf->stripe_size, new); + if (mddev->sync_thread || + test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || + mddev->reshape_position != MaxSector || + mddev->sysfs_active) { + err = -EBUSY; + goto out_unlock; + } + mddev_suspend(mddev); + mutex_lock(&conf->cache_size_mutex); + size = conf->max_nr_stripes; + + shrink_stripes(conf); + conf->stripe_size = new; conf->stripe_shift = ilog2(new) - 9; conf->stripe_sectors = new >> 9; + if (grow_stripes(conf, size)) { + pr_warn("md/raid:%s: couldn't allocate buffers\n", + mdname(mddev)); + err = -ENOMEM; + } + mutex_unlock(&conf->cache_size_mutex); mddev_resume(mddev); out_unlock: @@ -6635,14 +6834,14 @@ raid5_store_skip_copy(struct mddev *mddev, const char *page, size_t len) if (!conf) err = -ENODEV; else if (new != conf->skip_copy) { + struct request_queue *q = mddev->queue; + mddev_suspend(mddev); conf->skip_copy = new; if (new) - mddev->queue->backing_dev_info->capabilities |= - BDI_CAP_STABLE_WRITES; + blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q); else - mddev->queue->backing_dev_info->capabilities &= - ~BDI_CAP_STABLE_WRITES; + blk_queue_flag_clear(QUEUE_FLAG_STABLE_WRITES, q); mddev_resume(mddev); } mddev_unlock(mddev); @@ -7229,6 +7428,12 @@ static int only_parity(int raid_disk, int algo, int raid_disks, int max_degraded return 0; } +static void raid5_set_io_opt(struct r5conf *conf) +{ + blk_queue_io_opt(conf->mddev->queue, (conf->chunk_sectors << 9) * + (conf->raid_disks - conf->max_degraded)); +} + static int raid5_run(struct mddev *mddev) { struct r5conf *conf; @@ -7513,13 +7718,10 @@ static int raid5_run(struct mddev *mddev) int data_disks = conf->previous_raid_disks - conf->max_degraded; int stripe = data_disks * ((mddev->chunk_sectors << 9) / PAGE_SIZE); - if (mddev->queue->backing_dev_info->ra_pages < 2 * stripe) - mddev->queue->backing_dev_info->ra_pages = 2 * stripe; chunk_size = mddev->chunk_sectors << 9; blk_queue_io_min(mddev->queue, chunk_size); - blk_queue_io_opt(mddev->queue, chunk_size * - (conf->raid_disks - conf->max_degraded)); + raid5_set_io_opt(conf); mddev->queue->limits.raid_partial_stripes_expensive = 1; /* * We can only discard a whole stripe. It doesn't make sense to @@ -8103,16 +8305,8 @@ static void end_reshape(struct r5conf *conf) spin_unlock_irq(&conf->device_lock); wake_up(&conf->wait_for_overlap); - /* read-ahead size must cover two whole stripes, which is - * 2 * (datadisks) * chunksize where 'n' is the number of raid devices - */ - if (conf->mddev->queue) { - int data_disks = conf->raid_disks - conf->max_degraded; - int stripe = data_disks * ((conf->chunk_sectors << 9) - / PAGE_SIZE); - if (conf->mddev->queue->backing_dev_info->ra_pages < 2 * stripe) - conf->mddev->queue->backing_dev_info->ra_pages = 2 * stripe; - } + if (conf->mddev->queue) + raid5_set_io_opt(conf); } } |