diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-10-11 02:53:04 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-10-11 02:53:04 +0200 |
commit | 27bc50fc90647bbf7b734c3fc306a5e61350da53 (patch) | |
tree | 75fc525fbfec8c07a97a7875a89592317bcad4ca /mm/filemap.c | |
parent | Merge tag 'x86_mm_for_v6.1_rc1' of git://git.kernel.org/pub/scm/linux/kernel/... (diff) | |
parent | hugetlb: allocate vma lock for all sharable vmas (diff) | |
download | linux-27bc50fc90647bbf7b734c3fc306a5e61350da53.tar.xz linux-27bc50fc90647bbf7b734c3fc306a5e61350da53.zip |
Merge tag 'mm-stable-2022-10-08' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM updates from Andrew Morton:
- Yu Zhao's Multi-Gen LRU patches are here. They've been under test in
linux-next for a couple of months without, to my knowledge, any
negative reports (or any positive ones, come to that).
- Also the Maple Tree from Liam Howlett. An overlapping range-based
tree for vmas. It it apparently slightly more efficient in its own
right, but is mainly targeted at enabling work to reduce mmap_lock
contention.
Liam has identified a number of other tree users in the kernel which
could be beneficially onverted to mapletrees.
Yu Zhao has identified a hard-to-hit but "easy to fix" lockdep splat
at [1]. This has yet to be addressed due to Liam's unfortunately
timed vacation. He is now back and we'll get this fixed up.
- Dmitry Vyukov introduces KMSAN: the Kernel Memory Sanitizer. It uses
clang-generated instrumentation to detect used-unintialized bugs down
to the single bit level.
KMSAN keeps finding bugs. New ones, as well as the legacy ones.
- Yang Shi adds a userspace mechanism (madvise) to induce a collapse of
memory into THPs.
- Zach O'Keefe has expanded Yang Shi's madvise(MADV_COLLAPSE) to
support file/shmem-backed pages.
- userfaultfd updates from Axel Rasmussen
- zsmalloc cleanups from Alexey Romanov
- cleanups from Miaohe Lin: vmscan, hugetlb_cgroup, hugetlb and
memory-failure
- Huang Ying adds enhancements to NUMA balancing memory tiering mode's
page promotion, with a new way of detecting hot pages.
- memcg updates from Shakeel Butt: charging optimizations and reduced
memory consumption.
- memcg cleanups from Kairui Song.
- memcg fixes and cleanups from Johannes Weiner.
- Vishal Moola provides more folio conversions
- Zhang Yi removed ll_rw_block() :(
- migration enhancements from Peter Xu
- migration error-path bugfixes from Huang Ying
- Aneesh Kumar added ability for a device driver to alter the memory
tiering promotion paths. For optimizations by PMEM drivers, DRM
drivers, etc.
- vma merging improvements from Jakub Matěn.
- NUMA hinting cleanups from David Hildenbrand.
- xu xin added aditional userspace visibility into KSM merging
activity.
- THP & KSM code consolidation from Qi Zheng.
- more folio work from Matthew Wilcox.
- KASAN updates from Andrey Konovalov.
- DAMON cleanups from Kaixu Xia.
- DAMON work from SeongJae Park: fixes, cleanups.
- hugetlb sysfs cleanups from Muchun Song.
- Mike Kravetz fixes locking issues in hugetlbfs and in hugetlb core.
Link: https://lkml.kernel.org/r/CAOUHufZabH85CeUN-MEMgL8gJGzJEWUrkiM58JkTbBhh-jew0Q@mail.gmail.com [1]
* tag 'mm-stable-2022-10-08' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (555 commits)
hugetlb: allocate vma lock for all sharable vmas
hugetlb: take hugetlb vma_lock when clearing vma_lock->vma pointer
hugetlb: fix vma lock handling during split vma and range unmapping
mglru: mm/vmscan.c: fix imprecise comments
mm/mglru: don't sync disk for each aging cycle
mm: memcontrol: drop dead CONFIG_MEMCG_SWAP config symbol
mm: memcontrol: use do_memsw_account() in a few more places
mm: memcontrol: deprecate swapaccounting=0 mode
mm: memcontrol: don't allocate cgroup swap arrays when memcg is disabled
mm/secretmem: remove reduntant return value
mm/hugetlb: add available_huge_pages() func
mm: remove unused inline functions from include/linux/mm_inline.h
selftests/vm: add selftest for MADV_COLLAPSE of uffd-minor memory
selftests/vm: add file/shmem MADV_COLLAPSE selftest for cleared pmd
selftests/vm: add thp collapse shmem testing
selftests/vm: add thp collapse file and tmpfs testing
selftests/vm: modularize thp collapse memory operations
selftests/vm: dedup THP helpers
mm/khugepaged: add tracepoint to hpage_collapse_scan_file()
mm/madvise: add file and shmem support to MADV_COLLAPSE
...
Diffstat (limited to 'mm/filemap.c')
-rw-r--r-- | mm/filemap.c | 124 |
1 files changed, 66 insertions, 58 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index c943d1b90cc2..08341616ae7a 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -632,22 +632,23 @@ bool filemap_range_has_writeback(struct address_space *mapping, { XA_STATE(xas, &mapping->i_pages, start_byte >> PAGE_SHIFT); pgoff_t max = end_byte >> PAGE_SHIFT; - struct page *page; + struct folio *folio; if (end_byte < start_byte) return false; rcu_read_lock(); - xas_for_each(&xas, page, max) { - if (xas_retry(&xas, page)) + xas_for_each(&xas, folio, max) { + if (xas_retry(&xas, folio)) continue; - if (xa_is_value(page)) + if (xa_is_value(folio)) continue; - if (PageDirty(page) || PageLocked(page) || PageWriteback(page)) + if (folio_test_dirty(folio) || folio_test_locked(folio) || + folio_test_writeback(folio)) break; } rcu_read_unlock(); - return page != NULL; + return folio != NULL; } EXPORT_SYMBOL_GPL(filemap_range_has_writeback); @@ -1221,15 +1222,12 @@ static inline int folio_wait_bit_common(struct folio *folio, int bit_nr, struct wait_page_queue wait_page; wait_queue_entry_t *wait = &wait_page.wait; bool thrashing = false; - bool delayacct = false; unsigned long pflags; + bool in_thrashing; if (bit_nr == PG_locked && !folio_test_uptodate(folio) && folio_test_workingset(folio)) { - if (!folio_test_swapbacked(folio)) { - delayacct_thrashing_start(); - delayacct = true; - } + delayacct_thrashing_start(&in_thrashing); psi_memstall_enter(&pflags); thrashing = true; } @@ -1329,8 +1327,7 @@ repeat: finish_wait(q, wait); if (thrashing) { - if (delayacct) - delayacct_thrashing_end(); + delayacct_thrashing_end(&in_thrashing); psi_memstall_leave(&pflags); } @@ -1378,17 +1375,14 @@ void migration_entry_wait_on_locked(swp_entry_t entry, pte_t *ptep, struct wait_page_queue wait_page; wait_queue_entry_t *wait = &wait_page.wait; bool thrashing = false; - bool delayacct = false; unsigned long pflags; + bool in_thrashing; wait_queue_head_t *q; struct folio *folio = page_folio(pfn_swap_entry_to_page(entry)); q = folio_waitqueue(folio); if (!folio_test_uptodate(folio) && folio_test_workingset(folio)) { - if (!folio_test_swapbacked(folio)) { - delayacct_thrashing_start(); - delayacct = true; - } + delayacct_thrashing_start(&in_thrashing); psi_memstall_enter(&pflags); thrashing = true; } @@ -1435,8 +1429,7 @@ void migration_entry_wait_on_locked(swp_entry_t entry, pte_t *ptep, finish_wait(q, wait); if (thrashing) { - if (delayacct) - delayacct_thrashing_end(); + delayacct_thrashing_end(&in_thrashing); psi_memstall_leave(&pflags); } } @@ -1467,7 +1460,7 @@ EXPORT_SYMBOL(folio_wait_bit_killable); * * Return: 0 if the folio was unlocked or -EINTR if interrupted by a signal. */ -int folio_put_wait_locked(struct folio *folio, int state) +static int folio_put_wait_locked(struct folio *folio, int state) { return folio_wait_bit_common(folio, PG_locked, state, DROP); } @@ -1633,24 +1626,26 @@ EXPORT_SYMBOL(folio_end_writeback); */ void page_endio(struct page *page, bool is_write, int err) { + struct folio *folio = page_folio(page); + if (!is_write) { if (!err) { - SetPageUptodate(page); + folio_mark_uptodate(folio); } else { - ClearPageUptodate(page); - SetPageError(page); + folio_clear_uptodate(folio); + folio_set_error(folio); } - unlock_page(page); + folio_unlock(folio); } else { if (err) { struct address_space *mapping; - SetPageError(page); - mapping = page_mapping(page); + folio_set_error(folio); + mapping = folio_mapping(folio); if (mapping) mapping_set_error(mapping, err); } - end_page_writeback(page); + folio_end_writeback(folio); } } EXPORT_SYMBOL_GPL(page_endio); @@ -2195,30 +2190,31 @@ bool folio_more_pages(struct folio *folio, pgoff_t index, pgoff_t max) } /** - * find_get_pages_contig - gang contiguous pagecache lookup + * filemap_get_folios_contig - Get a batch of contiguous folios * @mapping: The address_space to search - * @index: The starting page index - * @nr_pages: The maximum number of pages - * @pages: Where the resulting pages are placed + * @start: The starting page index + * @end: The final page index (inclusive) + * @fbatch: The batch to fill * - * find_get_pages_contig() works exactly like find_get_pages_range(), - * except that the returned number of pages are guaranteed to be - * contiguous. + * filemap_get_folios_contig() works exactly like filemap_get_folios(), + * except the returned folios are guaranteed to be contiguous. This may + * not return all contiguous folios if the batch gets filled up. * - * Return: the number of pages which were found. + * Return: The number of folios found. + * Also update @start to be positioned for traversal of the next folio. */ -unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index, - unsigned int nr_pages, struct page **pages) + +unsigned filemap_get_folios_contig(struct address_space *mapping, + pgoff_t *start, pgoff_t end, struct folio_batch *fbatch) { - XA_STATE(xas, &mapping->i_pages, index); + XA_STATE(xas, &mapping->i_pages, *start); + unsigned long nr; struct folio *folio; - unsigned int ret = 0; - - if (unlikely(!nr_pages)) - return 0; rcu_read_lock(); - for (folio = xas_load(&xas); folio; folio = xas_next(&xas)) { + + for (folio = xas_load(&xas); folio && xas.xa_index <= end; + folio = xas_next(&xas)) { if (xas_retry(&xas, folio)) continue; /* @@ -2226,33 +2222,45 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index, * No current caller is looking for DAX entries. */ if (xa_is_value(folio)) - break; + goto update_start; if (!folio_try_get_rcu(folio)) goto retry; if (unlikely(folio != xas_reload(&xas))) - goto put_page; + goto put_folio; -again: - pages[ret] = folio_file_page(folio, xas.xa_index); - if (++ret == nr_pages) - break; - if (folio_more_pages(folio, xas.xa_index, ULONG_MAX)) { - xas.xa_index++; - folio_ref_inc(folio); - goto again; + if (!folio_batch_add(fbatch, folio)) { + nr = folio_nr_pages(folio); + + if (folio_test_hugetlb(folio)) + nr = 1; + *start = folio->index + nr; + goto out; } continue; -put_page: +put_folio: folio_put(folio); + retry: xas_reset(&xas); } + +update_start: + nr = folio_batch_count(fbatch); + + if (nr) { + folio = fbatch->folios[nr - 1]; + if (folio_test_hugetlb(folio)) + *start = folio->index + 1; + else + *start = folio->index + folio_nr_pages(folio); + } +out: rcu_read_unlock(); - return ret; + return folio_batch_count(fbatch); } -EXPORT_SYMBOL(find_get_pages_contig); +EXPORT_SYMBOL(filemap_get_folios_contig); /** * find_get_pages_range_tag - Find and return head pages matching @tag. @@ -3719,7 +3727,7 @@ ssize_t generic_perform_write(struct kiocb *iocb, struct iov_iter *i) unsigned long offset; /* Offset into pagecache page */ unsigned long bytes; /* Bytes to write to page */ size_t copied; /* Bytes copied from user */ - void *fsdata; + void *fsdata = NULL; offset = (pos & (PAGE_SIZE - 1)); bytes = min_t(unsigned long, PAGE_SIZE - offset, |