diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-11-01 16:47:59 +0100 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-11-01 16:47:59 +0100 |
commit | 49f8275c7d9247cf1dd4440fc8162f784252c849 (patch) | |
tree | 7caefaa8b68d3162f60ecef7bafacbed0e1056d8 /mm/migrate.c | |
parent | Linux 5.15 (diff) | |
parent | mm/writeback: Add folio_write_one (diff) | |
download | linux-49f8275c7d9247cf1dd4440fc8162f784252c849.tar.xz linux-49f8275c7d9247cf1dd4440fc8162f784252c849.zip |
Merge tag 'folio-5.16' of git://git.infradead.org/users/willy/pagecache
Pull memory folios from Matthew Wilcox:
"Add memory folios, a new type to represent either order-0 pages or the
head page of a compound page. This should be enough infrastructure to
support filesystems converting from pages to folios.
The point of all this churn is to allow filesystems and the page cache
to manage memory in larger chunks than PAGE_SIZE. The original plan
was to use compound pages like THP does, but I ran into problems with
some functions expecting only a head page while others expect the
precise page containing a particular byte.
The folio type allows a function to declare that it's expecting only a
head page. Almost incidentally, this allows us to remove various calls
to VM_BUG_ON(PageTail(page)) and compound_head().
This converts just parts of the core MM and the page cache. For 5.17,
we intend to convert various filesystems (XFS and AFS are ready; other
filesystems may make it) and also convert more of the MM and page
cache to folios. For 5.18, multi-page folios should be ready.
The multi-page folios offer some improvement to some workloads. The
80% win is real, but appears to be an artificial benchmark (postgres
startup, which isn't a serious workload). Real workloads (eg building
the kernel, running postgres in a steady state, etc) seem to benefit
between 0-10%. I haven't heard of any performance losses as a result
of this series. Nobody has done any serious performance tuning; I
imagine that tweaking the readahead algorithm could provide some more
interesting wins. There are also other places where we could choose to
create large folios and currently do not, such as writes that are
larger than PAGE_SIZE.
I'd like to thank all my reviewers who've offered review/ack tags:
Christoph Hellwig, David Howells, Jan Kara, Jeff Layton, Johannes
Weiner, Kirill A. Shutemov, Michal Hocko, Mike Rapoport, Vlastimil
Babka, William Kucharski, Yu Zhao and Zi Yan.
I'd also like to thank those who gave feedback I incorporated but
haven't offered up review tags for this part of the series: Nick
Piggin, Mel Gorman, Ming Lei, Darrick Wong, Ted Ts'o, John Hubbard,
Hugh Dickins, and probably a few others who I forget"
* tag 'folio-5.16' of git://git.infradead.org/users/willy/pagecache: (90 commits)
mm/writeback: Add folio_write_one
mm/filemap: Add FGP_STABLE
mm/filemap: Add filemap_get_folio
mm/filemap: Convert mapping_get_entry to return a folio
mm/filemap: Add filemap_add_folio()
mm/filemap: Add filemap_alloc_folio
mm/page_alloc: Add folio allocation functions
mm/lru: Add folio_add_lru()
mm/lru: Convert __pagevec_lru_add_fn to take a folio
mm: Add folio_evictable()
mm/workingset: Convert workingset_refault() to take a folio
mm/filemap: Add readahead_folio()
mm/filemap: Add folio_mkwrite_check_truncate()
mm/filemap: Add i_blocks_per_folio()
mm/writeback: Add folio_redirty_for_writepage()
mm/writeback: Add folio_account_redirty()
mm/writeback: Add folio_clear_dirty_for_io()
mm/writeback: Add folio_cancel_dirty()
mm/writeback: Add folio_account_cleaned()
mm/writeback: Add filemap_dirty_folio()
...
Diffstat (limited to 'mm/migrate.c')
-rw-r--r-- | mm/migrate.c | 189 |
1 files changed, 94 insertions, 95 deletions
diff --git a/mm/migrate.c b/mm/migrate.c index 1852d787e6ab..efa9941ebe03 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -364,7 +364,7 @@ static int expected_page_refs(struct address_space *mapping, struct page *page) */ expected_count += is_device_private_page(page); if (mapping) - expected_count += thp_nr_pages(page) + page_has_private(page); + expected_count += compound_nr(page) + page_has_private(page); return expected_count; } @@ -377,74 +377,75 @@ static int expected_page_refs(struct address_space *mapping, struct page *page) * 2 for pages with a mapping * 3 for pages with a mapping and PagePrivate/PagePrivate2 set. */ -int migrate_page_move_mapping(struct address_space *mapping, - struct page *newpage, struct page *page, int extra_count) +int folio_migrate_mapping(struct address_space *mapping, + struct folio *newfolio, struct folio *folio, int extra_count) { - XA_STATE(xas, &mapping->i_pages, page_index(page)); + XA_STATE(xas, &mapping->i_pages, folio_index(folio)); struct zone *oldzone, *newzone; int dirty; - int expected_count = expected_page_refs(mapping, page) + extra_count; - int nr = thp_nr_pages(page); + int expected_count = expected_page_refs(mapping, &folio->page) + extra_count; + long nr = folio_nr_pages(folio); if (!mapping) { /* Anonymous page without mapping */ - if (page_count(page) != expected_count) + if (folio_ref_count(folio) != expected_count) return -EAGAIN; /* No turning back from here */ - newpage->index = page->index; - newpage->mapping = page->mapping; - if (PageSwapBacked(page)) - __SetPageSwapBacked(newpage); + newfolio->index = folio->index; + newfolio->mapping = folio->mapping; + if (folio_test_swapbacked(folio)) + __folio_set_swapbacked(newfolio); return MIGRATEPAGE_SUCCESS; } - oldzone = page_zone(page); - newzone = page_zone(newpage); + oldzone = folio_zone(folio); + newzone = folio_zone(newfolio); xas_lock_irq(&xas); - if (page_count(page) != expected_count || xas_load(&xas) != page) { + if (folio_ref_count(folio) != expected_count || + xas_load(&xas) != folio) { xas_unlock_irq(&xas); return -EAGAIN; } - if (!page_ref_freeze(page, expected_count)) { + if (!folio_ref_freeze(folio, expected_count)) { xas_unlock_irq(&xas); return -EAGAIN; } /* - * Now we know that no one else is looking at the page: + * Now we know that no one else is looking at the folio: * no turning back from here. */ - newpage->index = page->index; - newpage->mapping = page->mapping; - page_ref_add(newpage, nr); /* add cache reference */ - if (PageSwapBacked(page)) { - __SetPageSwapBacked(newpage); - if (PageSwapCache(page)) { - SetPageSwapCache(newpage); - set_page_private(newpage, page_private(page)); + newfolio->index = folio->index; + newfolio->mapping = folio->mapping; + folio_ref_add(newfolio, nr); /* add cache reference */ + if (folio_test_swapbacked(folio)) { + __folio_set_swapbacked(newfolio); + if (folio_test_swapcache(folio)) { + folio_set_swapcache(newfolio); + newfolio->private = folio_get_private(folio); } } else { - VM_BUG_ON_PAGE(PageSwapCache(page), page); + VM_BUG_ON_FOLIO(folio_test_swapcache(folio), folio); } /* Move dirty while page refs frozen and newpage not yet exposed */ - dirty = PageDirty(page); + dirty = folio_test_dirty(folio); if (dirty) { - ClearPageDirty(page); - SetPageDirty(newpage); + folio_clear_dirty(folio); + folio_set_dirty(newfolio); } - xas_store(&xas, newpage); - if (PageTransHuge(page)) { + xas_store(&xas, newfolio); + if (nr > 1) { int i; for (i = 1; i < nr; i++) { xas_next(&xas); - xas_store(&xas, newpage); + xas_store(&xas, newfolio); } } @@ -453,7 +454,7 @@ int migrate_page_move_mapping(struct address_space *mapping, * to one less reference. * We know this isn't the last reference. */ - page_ref_unfreeze(page, expected_count - nr); + folio_ref_unfreeze(folio, expected_count - nr); xas_unlock(&xas); /* Leave irq disabled to prevent preemption while updating stats */ @@ -472,18 +473,18 @@ int migrate_page_move_mapping(struct address_space *mapping, struct lruvec *old_lruvec, *new_lruvec; struct mem_cgroup *memcg; - memcg = page_memcg(page); + memcg = folio_memcg(folio); old_lruvec = mem_cgroup_lruvec(memcg, oldzone->zone_pgdat); new_lruvec = mem_cgroup_lruvec(memcg, newzone->zone_pgdat); __mod_lruvec_state(old_lruvec, NR_FILE_PAGES, -nr); __mod_lruvec_state(new_lruvec, NR_FILE_PAGES, nr); - if (PageSwapBacked(page) && !PageSwapCache(page)) { + if (folio_test_swapbacked(folio) && !folio_test_swapcache(folio)) { __mod_lruvec_state(old_lruvec, NR_SHMEM, -nr); __mod_lruvec_state(new_lruvec, NR_SHMEM, nr); } #ifdef CONFIG_SWAP - if (PageSwapCache(page)) { + if (folio_test_swapcache(folio)) { __mod_lruvec_state(old_lruvec, NR_SWAPCACHE, -nr); __mod_lruvec_state(new_lruvec, NR_SWAPCACHE, nr); } @@ -499,11 +500,11 @@ int migrate_page_move_mapping(struct address_space *mapping, return MIGRATEPAGE_SUCCESS; } -EXPORT_SYMBOL(migrate_page_move_mapping); +EXPORT_SYMBOL(folio_migrate_mapping); /* * The expected number of remaining references is the same as that - * of migrate_page_move_mapping(). + * of folio_migrate_mapping(). */ int migrate_huge_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page) @@ -538,91 +539,87 @@ int migrate_huge_page_move_mapping(struct address_space *mapping, } /* - * Copy the page to its new location + * Copy the flags and some other ancillary information */ -void migrate_page_states(struct page *newpage, struct page *page) +void folio_migrate_flags(struct folio *newfolio, struct folio *folio) { int cpupid; - if (PageError(page)) - SetPageError(newpage); - if (PageReferenced(page)) - SetPageReferenced(newpage); - if (PageUptodate(page)) - SetPageUptodate(newpage); - if (TestClearPageActive(page)) { - VM_BUG_ON_PAGE(PageUnevictable(page), page); - SetPageActive(newpage); - } else if (TestClearPageUnevictable(page)) - SetPageUnevictable(newpage); - if (PageWorkingset(page)) - SetPageWorkingset(newpage); - if (PageChecked(page)) - SetPageChecked(newpage); - if (PageMappedToDisk(page)) - SetPageMappedToDisk(newpage); - - /* Move dirty on pages not done by migrate_page_move_mapping() */ - if (PageDirty(page)) - SetPageDirty(newpage); - - if (page_is_young(page)) - set_page_young(newpage); - if (page_is_idle(page)) - set_page_idle(newpage); + if (folio_test_error(folio)) + folio_set_error(newfolio); + if (folio_test_referenced(folio)) + folio_set_referenced(newfolio); + if (folio_test_uptodate(folio)) + folio_mark_uptodate(newfolio); + if (folio_test_clear_active(folio)) { + VM_BUG_ON_FOLIO(folio_test_unevictable(folio), folio); + folio_set_active(newfolio); + } else if (folio_test_clear_unevictable(folio)) + folio_set_unevictable(newfolio); + if (folio_test_workingset(folio)) + folio_set_workingset(newfolio); + if (folio_test_checked(folio)) + folio_set_checked(newfolio); + if (folio_test_mappedtodisk(folio)) + folio_set_mappedtodisk(newfolio); + + /* Move dirty on pages not done by folio_migrate_mapping() */ + if (folio_test_dirty(folio)) + folio_set_dirty(newfolio); + + if (folio_test_young(folio)) + folio_set_young(newfolio); + if (folio_test_idle(folio)) + folio_set_idle(newfolio); /* * Copy NUMA information to the new page, to prevent over-eager * future migrations of this same page. */ - cpupid = page_cpupid_xchg_last(page, -1); - page_cpupid_xchg_last(newpage, cpupid); + cpupid = page_cpupid_xchg_last(&folio->page, -1); + page_cpupid_xchg_last(&newfolio->page, cpupid); - ksm_migrate_page(newpage, page); + folio_migrate_ksm(newfolio, folio); /* * Please do not reorder this without considering how mm/ksm.c's * get_ksm_page() depends upon ksm_migrate_page() and PageSwapCache(). */ - if (PageSwapCache(page)) - ClearPageSwapCache(page); - ClearPagePrivate(page); + if (folio_test_swapcache(folio)) + folio_clear_swapcache(folio); + folio_clear_private(folio); /* page->private contains hugetlb specific flags */ - if (!PageHuge(page)) - set_page_private(page, 0); + if (!folio_test_hugetlb(folio)) + folio->private = NULL; /* * If any waiters have accumulated on the new page then * wake them up. */ - if (PageWriteback(newpage)) - end_page_writeback(newpage); + if (folio_test_writeback(newfolio)) + folio_end_writeback(newfolio); /* * PG_readahead shares the same bit with PG_reclaim. The above * end_page_writeback() may clear PG_readahead mistakenly, so set the * bit after that. */ - if (PageReadahead(page)) - SetPageReadahead(newpage); + if (folio_test_readahead(folio)) + folio_set_readahead(newfolio); - copy_page_owner(page, newpage); + folio_copy_owner(newfolio, folio); - if (!PageHuge(page)) - mem_cgroup_migrate(page, newpage); + if (!folio_test_hugetlb(folio)) + mem_cgroup_migrate(folio, newfolio); } -EXPORT_SYMBOL(migrate_page_states); +EXPORT_SYMBOL(folio_migrate_flags); -void migrate_page_copy(struct page *newpage, struct page *page) +void folio_migrate_copy(struct folio *newfolio, struct folio *folio) { - if (PageHuge(page) || PageTransHuge(page)) - copy_huge_page(newpage, page); - else - copy_highpage(newpage, page); - - migrate_page_states(newpage, page); + folio_copy(newfolio, folio); + folio_migrate_flags(newfolio, folio); } -EXPORT_SYMBOL(migrate_page_copy); +EXPORT_SYMBOL(folio_migrate_copy); /************************************************************ * Migration functions @@ -638,19 +635,21 @@ int migrate_page(struct address_space *mapping, struct page *newpage, struct page *page, enum migrate_mode mode) { + struct folio *newfolio = page_folio(newpage); + struct folio *folio = page_folio(page); int rc; - BUG_ON(PageWriteback(page)); /* Writeback must be complete */ + BUG_ON(folio_test_writeback(folio)); /* Writeback must be complete */ - rc = migrate_page_move_mapping(mapping, newpage, page, 0); + rc = folio_migrate_mapping(mapping, newfolio, folio, 0); if (rc != MIGRATEPAGE_SUCCESS) return rc; if (mode != MIGRATE_SYNC_NO_COPY) - migrate_page_copy(newpage, page); + folio_migrate_copy(newfolio, folio); else - migrate_page_states(newpage, page); + folio_migrate_flags(newfolio, folio); return MIGRATEPAGE_SUCCESS; } EXPORT_SYMBOL(migrate_page); @@ -2468,7 +2467,7 @@ static void migrate_vma_collect(struct migrate_vma *migrate) * @page: struct page to check * * Pinned pages cannot be migrated. This is the same test as in - * migrate_page_move_mapping(), except that here we allow migration of a + * folio_migrate_mapping(), except that here we allow migration of a * ZONE_DEVICE page. */ static bool migrate_vma_check_page(struct page *page) @@ -2846,7 +2845,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate, if (unlikely(anon_vma_prepare(vma))) goto abort; - if (mem_cgroup_charge(page, vma->vm_mm, GFP_KERNEL)) + if (mem_cgroup_charge(page_folio(page), vma->vm_mm, GFP_KERNEL)) goto abort; /* |