diff options
Diffstat (limited to 'fs/btrfs/subpage.c')
-rw-r--r-- | fs/btrfs/subpage.c | 140 |
1 files changed, 140 insertions, 0 deletions
diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c index c69049e7daa9..2d19089ab625 100644 --- a/fs/btrfs/subpage.c +++ b/fs/btrfs/subpage.c @@ -4,6 +4,64 @@ #include "ctree.h" #include "subpage.h" +/* + * Subpage (sectorsize < PAGE_SIZE) support overview: + * + * Limitations: + * + * - Only support 64K page size for now + * This is to make metadata handling easier, as 64K page would ensure + * all nodesize would fit inside one page, thus we don't need to handle + * cases where a tree block crosses several pages. + * + * - Only metadata read-write for now + * The data read-write part is in development. + * + * - Metadata can't cross 64K page boundary + * btrfs-progs and kernel have done that for a while, thus only ancient + * filesystems could have such problem. For such case, do a graceful + * rejection. + * + * Special behavior: + * + * - Metadata + * Metadata read is fully supported. + * Meaning when reading one tree block will only trigger the read for the + * needed range, other unrelated range in the same page will not be touched. + * + * Metadata write support is partial. + * The writeback is still for the full page, but we will only submit + * the dirty extent buffers in the page. + * + * This means, if we have a metadata page like this: + * + * Page offset + * 0 16K 32K 48K 64K + * |/////////| |///////////| + * \- Tree block A \- Tree block B + * + * Even if we just want to writeback tree block A, we will also writeback + * tree block B if it's also dirty. + * + * This may cause extra metadata writeback which results more COW. + * + * Implementation: + * + * - Common + * Both metadata and data will use a new structure, btrfs_subpage, to + * record the status of each sector inside a page. This provides the extra + * granularity needed. + * + * - Metadata + * Since we have multiple tree blocks inside one page, we can't rely on page + * locking anymore, or we will have greatly reduced concurrency or even + * deadlocks (hold one tree lock while trying to lock another tree lock in + * the same page). + * + * Thus for metadata locking, subpage support relies on io_tree locking only. + * This means a slightly higher tree locking latency. + */ + int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info, struct page *page, enum btrfs_subpage_type type) { @@ -220,6 +278,82 @@ void btrfs_subpage_clear_error(const struct btrfs_fs_info *fs_info, spin_unlock_irqrestore(&subpage->lock, flags); } +void btrfs_subpage_set_dirty(const struct btrfs_fs_info *fs_info, + struct page *page, u64 start, u32 len) +{ + struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; + u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len); + unsigned long flags; + + spin_lock_irqsave(&subpage->lock, flags); + subpage->dirty_bitmap |= tmp; + spin_unlock_irqrestore(&subpage->lock, flags); + set_page_dirty(page); +} + +/* + * Extra clear_and_test function for subpage dirty bitmap. + * + * Return true if we're the last bits in the dirty_bitmap and clear the + * dirty_bitmap. + * Return false otherwise. + * + * NOTE: Callers should manually clear page dirty for true case, as we have + * extra handling for tree blocks. + */ +bool btrfs_subpage_clear_and_test_dirty(const struct btrfs_fs_info *fs_info, + struct page *page, u64 start, u32 len) +{ + struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; + u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len); + unsigned long flags; + bool last = false; + + spin_lock_irqsave(&subpage->lock, flags); + subpage->dirty_bitmap &= ~tmp; + if (subpage->dirty_bitmap == 0) + last = true; + spin_unlock_irqrestore(&subpage->lock, flags); + return last; +} + +void btrfs_subpage_clear_dirty(const struct btrfs_fs_info *fs_info, + struct page *page, u64 start, u32 len) +{ + bool last; + + last = btrfs_subpage_clear_and_test_dirty(fs_info, page, start, len); + if (last) + clear_page_dirty_for_io(page); +} + +void btrfs_subpage_set_writeback(const struct btrfs_fs_info *fs_info, + struct page *page, u64 start, u32 len) +{ + struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; + u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len); + unsigned long flags; + + spin_lock_irqsave(&subpage->lock, flags); + subpage->writeback_bitmap |= tmp; + set_page_writeback(page); + spin_unlock_irqrestore(&subpage->lock, flags); +} + +void btrfs_subpage_clear_writeback(const struct btrfs_fs_info *fs_info, + struct page *page, u64 start, u32 len) +{ + struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; + u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len); + unsigned long flags; + + spin_lock_irqsave(&subpage->lock, flags); + subpage->writeback_bitmap &= ~tmp; + if (subpage->writeback_bitmap == 0) + end_page_writeback(page); + spin_unlock_irqrestore(&subpage->lock, flags); +} + /* * Unlike set/clear which is dependent on each page status, for test all bits * are tested in the same way. @@ -240,6 +374,8 @@ bool btrfs_subpage_test_##name(const struct btrfs_fs_info *fs_info, \ } IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(uptodate); IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(error); +IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(dirty); +IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(writeback); /* * Note that, in selftests (extent-io-tests), we can have empty fs_info passed @@ -276,3 +412,7 @@ bool btrfs_page_test_##name(const struct btrfs_fs_info *fs_info, \ IMPLEMENT_BTRFS_PAGE_OPS(uptodate, SetPageUptodate, ClearPageUptodate, PageUptodate); IMPLEMENT_BTRFS_PAGE_OPS(error, SetPageError, ClearPageError, PageError); +IMPLEMENT_BTRFS_PAGE_OPS(dirty, set_page_dirty, clear_page_dirty_for_io, + PageDirty); +IMPLEMENT_BTRFS_PAGE_OPS(writeback, set_page_writeback, end_page_writeback, + PageWriteback); |