diff options
Diffstat (limited to 'fs/btrfs/raid56.c')
-rw-r--r-- | fs/btrfs/raid56.c | 169 |
1 files changed, 137 insertions, 32 deletions
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 5ef4fbb49df2..2d90a6b5eb00 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -21,6 +21,7 @@ #include "raid56.h" #include "async-thread.h" #include "file-item.h" +#include "btrfs_inode.h" /* set when additional merges to this rbio are not allowed */ #define RBIO_RMW_LOCKED_BIT 1 @@ -1433,14 +1434,56 @@ static void rbio_update_error_bitmap(struct btrfs_raid_bio *rbio, struct bio *bi bio_size >> rbio->bioc->fs_info->sectorsize_bits); } +/* Verify the data sectors at read time. */ +static void verify_bio_data_sectors(struct btrfs_raid_bio *rbio, + struct bio *bio) +{ + struct btrfs_fs_info *fs_info = rbio->bioc->fs_info; + int total_sector_nr = get_bio_sector_nr(rbio, bio); + struct bio_vec *bvec; + struct bvec_iter_all iter_all; + + /* No data csum for the whole stripe, no need to verify. */ + if (!rbio->csum_bitmap || !rbio->csum_buf) + return; + + /* P/Q stripes, they have no data csum to verify against. */ + if (total_sector_nr >= rbio->nr_data * rbio->stripe_nsectors) + return; + + bio_for_each_segment_all(bvec, bio, iter_all) { + int bv_offset; + + for (bv_offset = bvec->bv_offset; + bv_offset < bvec->bv_offset + bvec->bv_len; + bv_offset += fs_info->sectorsize, total_sector_nr++) { + u8 csum_buf[BTRFS_CSUM_SIZE]; + u8 *expected_csum = rbio->csum_buf + + total_sector_nr * fs_info->csum_size; + int ret; + + /* No csum for this sector, skip to the next sector. */ + if (!test_bit(total_sector_nr, rbio->csum_bitmap)) + continue; + + ret = btrfs_check_sector_csum(fs_info, bvec->bv_page, + bv_offset, csum_buf, expected_csum); + if (ret < 0) + set_bit(total_sector_nr, rbio->error_bitmap); + } + } +} + static void raid_wait_read_end_io(struct bio *bio) { struct btrfs_raid_bio *rbio = bio->bi_private; - if (bio->bi_status) + if (bio->bi_status) { rbio_update_error_bitmap(rbio, bio); - else + } else { set_bio_pages_uptodate(rbio, bio); + verify_bio_data_sectors(rbio, bio); + } bio_put(bio); if (atomic_dec_and_test(&rbio->stripes_pending)) @@ -1469,37 +1512,25 @@ static void submit_read_bios(struct btrfs_raid_bio *rbio, static int rmw_assemble_read_bios(struct btrfs_raid_bio *rbio, struct bio_list *bio_list) { - const int nr_data_sectors = rbio->stripe_nsectors * rbio->nr_data; struct bio *bio; int total_sector_nr; int ret = 0; ASSERT(bio_list_size(bio_list) == 0); - /* Build a list of bios to read all the missing data sectors. */ - for (total_sector_nr = 0; total_sector_nr < nr_data_sectors; + /* + * Build a list of bios to read all sectors (including data and P/Q). + * + * This behaviro is to compensate the later csum verification and + * recovery. + */ + for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors; total_sector_nr++) { struct sector_ptr *sector; int stripe = total_sector_nr / rbio->stripe_nsectors; int sectornr = total_sector_nr % rbio->stripe_nsectors; - /* - * We want to find all the sectors missing from the rbio and - * read them from the disk. If sector_in_rbio() finds a page - * in the bio list we don't need to read it off the stripe. - */ - sector = sector_in_rbio(rbio, stripe, sectornr, 1); - if (sector) - continue; - sector = rbio_stripe_sector(rbio, stripe, sectornr); - /* - * The bio cache may have handed us an uptodate page. If so, - * use it. - */ - if (sector->uptodate) - continue; - ret = rbio_add_io_sector(rbio, bio_list, sector, stripe, sectornr, REQ_OP_READ); if (ret) @@ -1670,6 +1701,42 @@ fail: bio_endio(bio); } +static int verify_one_sector(struct btrfs_raid_bio *rbio, + int stripe_nr, int sector_nr) +{ + struct btrfs_fs_info *fs_info = rbio->bioc->fs_info; + struct sector_ptr *sector; + u8 csum_buf[BTRFS_CSUM_SIZE]; + u8 *csum_expected; + int ret; + + if (!rbio->csum_bitmap || !rbio->csum_buf) + return 0; + + /* No way to verify P/Q as they are not covered by data csum. */ + if (stripe_nr >= rbio->nr_data) + return 0; + /* + * If we're rebuilding a read, we have to use pages from the + * bio list if possible. + */ + if ((rbio->operation == BTRFS_RBIO_READ_REBUILD || + rbio->operation == BTRFS_RBIO_REBUILD_MISSING)) { + sector = sector_in_rbio(rbio, stripe_nr, sector_nr, 0); + } else { + sector = rbio_stripe_sector(rbio, stripe_nr, sector_nr); + } + + ASSERT(sector->page); + + csum_expected = rbio->csum_buf + + (stripe_nr * rbio->stripe_nsectors + sector_nr) * + fs_info->csum_size; + ret = btrfs_check_sector_csum(fs_info, sector->page, sector->pgoff, + csum_buf, csum_expected); + return ret; +} + /* * Recover a vertical stripe specified by @sector_nr. * @*pointers are the pre-allocated pointers by the caller, so we don't @@ -1685,6 +1752,7 @@ static int recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr, int faila; int failb; int stripe_nr; + int ret = 0; /* * Now we just use bitmap to mark the horizontal stripes in @@ -1805,12 +1873,23 @@ pstripe: * uptodate. * Especially if we determine to cache the rbio, we need to * have at least all data sectors uptodate. + * + * If possible, also check if the repaired sector matches its data + * checksum. */ if (faila >= 0) { + ret = verify_one_sector(rbio, faila, sector_nr); + if (ret < 0) + goto cleanup; + sector = rbio_stripe_sector(rbio, faila, sector_nr); sector->uptodate = 1; } if (failb >= 0) { + ret = verify_one_sector(rbio, faila, sector_nr); + if (ret < 0) + goto cleanup; + sector = rbio_stripe_sector(rbio, failb, sector_nr); sector->uptodate = 1; } @@ -1818,7 +1897,7 @@ pstripe: cleanup: for (stripe_nr = rbio->real_stripes - 1; stripe_nr >= 0; stripe_nr--) kunmap_local(unmap_array[stripe_nr]); - return 0; + return ret; } static int recover_sectors(struct btrfs_raid_bio *rbio) @@ -2115,7 +2194,7 @@ no_csum: rbio->csum_bitmap = NULL; } -static int rmw_read_and_wait(struct btrfs_raid_bio *rbio) +static int rmw_read_wait_recover(struct btrfs_raid_bio *rbio) { struct bio_list bio_list; struct bio *bio; @@ -2136,6 +2215,12 @@ static int rmw_read_and_wait(struct btrfs_raid_bio *rbio) submit_read_bios(rbio, &bio_list); wait_event(rbio->io_wait, atomic_read(&rbio->stripes_pending) == 0); + + /* + * We may or may not have any corrupted sectors (including missing dev + * and csum mismatch), just let recover_sectors() to handle them all. + */ + ret = recover_sectors(rbio); return ret; out: while ((bio = bio_list_pop(&bio_list))) @@ -2175,6 +2260,28 @@ static void submit_write_bios(struct btrfs_raid_bio *rbio, } } +/* + * To determine if we need to read any sector from the disk. + * Should only be utilized in RMW path, to skip cached rbio. + */ +static bool need_read_stripe_sectors(struct btrfs_raid_bio *rbio) +{ + int i; + + for (i = 0; i < rbio->nr_data * rbio->stripe_nsectors; i++) { + struct sector_ptr *sector = &rbio->stripe_sectors[i]; + + /* + * We have a sector which doesn't have page nor uptodate, + * thus this rbio can not be cached one, as cached one must + * have all its data sectors present and uptodate. + */ + if (!sector->page || !sector->uptodate) + return true; + } + return false; +} + static int rmw_rbio(struct btrfs_raid_bio *rbio) { struct bio_list bio_list; @@ -2189,9 +2296,13 @@ static int rmw_rbio(struct btrfs_raid_bio *rbio) if (ret < 0) return ret; - /* Full stripe write, can write the full stripe right now. */ - if (rbio_is_full(rbio)) + /* + * Either full stripe write, or we have every data sector already + * cached, can go to write path immediately. + */ + if (rbio_is_full(rbio) || !need_read_stripe_sectors(rbio)) goto write; + /* * Now we're doing sub-stripe write, also need all data stripes to do * the full RMW. @@ -2202,16 +2313,10 @@ static int rmw_rbio(struct btrfs_raid_bio *rbio) index_rbio_pages(rbio); - ret = rmw_read_and_wait(rbio); + ret = rmw_read_wait_recover(rbio); if (ret < 0) return ret; - /* We have read errors, try recovery path. */ - if (!bitmap_empty(rbio->error_bitmap, rbio->nr_sectors)) { - ret = recover_rbio(rbio); - if (ret < 0) - return ret; - } write: /* * At this stage we're not allowed to add any new bios to the |