From b67bf49ce7aae72f63739abee6ac25f64bf20081 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 14 Feb 2022 18:21:52 -0800 Subject: mm/munlock: delete FOLL_MLOCK and FOLL_POPULATE If counting page mlocks, we must not double-count: follow_page_pte() can tell if a page has already been Mlocked or not, but cannot tell if a pte has already been counted or not: that will have to be done when the pte is mapped in (which lru_cache_add_inactive_or_unevictable() already tracks for new anon pages, but there's no such tracking yet for others). Delete all the FOLL_MLOCK code - faulting in the missing pages will do all that is necessary, without special mlock_vma_page() calls from here. But then FOLL_POPULATE turns out to serve no purpose - it was there so that its absence would tell faultin_page() not to faultin page when setting up VM_LOCKONFAULT areas; but if there's no special work needed here for mlock, then there's no work at all here for VM_LOCKONFAULT. Have I got that right? I've not looked into the history, but see that FOLL_POPULATE goes back before VM_LOCKONFAULT: did it serve a different purpose before? Ah, yes, it was used to skip the old stack guard page. And is it intentional that COW is not broken on existing pages when setting up a VM_LOCKONFAULT area? I can see that being argued either way, and have no reason to disagree with current behaviour. Signed-off-by: Hugh Dickins Acked-by: Vlastimil Babka Signed-off-by: Matthew Wilcox (Oracle) --- mm/gup.c | 43 ++++++++----------------------------------- 1 file changed, 8 insertions(+), 35 deletions(-) (limited to 'mm/gup.c') diff --git a/mm/gup.c b/mm/gup.c index a9d4d724aef7..87fec8a5c10d 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -597,32 +597,6 @@ retry: */ mark_page_accessed(page); } - if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) { - /* Do not mlock pte-mapped THP */ - if (PageTransCompound(page)) - goto out; - - /* - * The preliminary mapping check is mainly to avoid the - * pointless overhead of lock_page on the ZERO_PAGE - * which might bounce very badly if there is contention. - * - * If the page is already locked, we don't need to - * handle it now - vmscan will handle it later if and - * when it attempts to reclaim the page. - */ - if (page->mapping && trylock_page(page)) { - lru_add_drain(); /* push cached pages to LRU */ - /* - * Because we lock page here, and migration is - * blocked by the pte's page reference, and we - * know the page is still mapped, we don't even - * need to check for file-cache page truncation. - */ - mlock_vma_page(page); - unlock_page(page); - } - } out: pte_unmap_unlock(ptep, ptl); return page; @@ -945,9 +919,6 @@ static int faultin_page(struct vm_area_struct *vma, unsigned int fault_flags = 0; vm_fault_t ret; - /* mlock all present pages, but do not fault in new pages */ - if ((*flags & (FOLL_POPULATE | FOLL_MLOCK)) == FOLL_MLOCK) - return -ENOENT; if (*flags & FOLL_NOFAULT) return -EFAULT; if (*flags & FOLL_WRITE) @@ -1198,8 +1169,6 @@ retry: case -ENOMEM: case -EHWPOISON: goto out; - case -ENOENT: - goto next_page; } BUG(); } else if (PTR_ERR(page) == -EEXIST) { @@ -1497,9 +1466,14 @@ long populate_vma_page_range(struct vm_area_struct *vma, VM_BUG_ON_VMA(end > vma->vm_end, vma); mmap_assert_locked(mm); - gup_flags = FOLL_TOUCH | FOLL_POPULATE | FOLL_MLOCK; + /* + * Rightly or wrongly, the VM_LOCKONFAULT case has never used + * faultin_page() to break COW, so it has no work to do here. + */ if (vma->vm_flags & VM_LOCKONFAULT) - gup_flags &= ~FOLL_POPULATE; + return nr_pages; + + gup_flags = FOLL_TOUCH; /* * We want to touch writable mappings with a write fault in order * to break COW, except for shared mappings because these don't COW @@ -1566,10 +1540,9 @@ long faultin_vma_page_range(struct vm_area_struct *vma, unsigned long start, * in the page table. * FOLL_HWPOISON: Return -EHWPOISON instead of -EFAULT when we hit * a poisoned page. - * FOLL_POPULATE: Always populate memory with VM_LOCKONFAULT. * !FOLL_FORCE: Require proper access permissions. */ - gup_flags = FOLL_TOUCH | FOLL_POPULATE | FOLL_MLOCK | FOLL_HWPOISON; + gup_flags = FOLL_TOUCH | FOLL_HWPOISON; if (write) gup_flags |= FOLL_WRITE; -- cgit v1.2.3 From f9f38f78c5d5eef3717b48d84263b4b46ee0110a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Feb 2022 15:31:37 +1100 Subject: mm: refactor check_and_migrate_movable_pages Remove up to two levels of indentation by using continue statements and move variables to local scope where possible. Link: https://lkml.kernel.org/r/20220210072828.2930359-11-hch@lst.de Signed-off-by: Christoph Hellwig Tested-by: "Sierra Guiza, Alejandro (Alex)" Cc: Alex Deucher Cc: Alistair Popple Cc: Ben Skeggs Cc: Chaitanya Kulkarni Cc: Christian Knig Cc: Dan Williams Cc: Felix Kuehling Cc: Jason Gunthorpe Cc: Karol Herbst Cc: Logan Gunthorpe Cc: Lyude Paul Cc: Miaohe Lin Cc: Muchun Song Cc: "Pan, Xinhui" Cc: Ralph Campbell Signed-off-by: Andrew Morton Signed-off-by: Matthew Wilcox (Oracle) --- mm/gup.c | 81 +++++++++++++++++++++++++++++++++++----------------------------- 1 file changed, 44 insertions(+), 37 deletions(-) (limited to 'mm/gup.c') diff --git a/mm/gup.c b/mm/gup.c index 87fec8a5c10d..e54359ef75ac 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1841,72 +1841,79 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages, struct page **pages, unsigned int gup_flags) { - unsigned long i; - unsigned long isolation_error_count = 0; - bool drain_allow = true; - LIST_HEAD(movable_page_list); - long ret = 0; + unsigned long isolation_error_count = 0, i; struct page *prev_head = NULL; - struct page *head; - struct migration_target_control mtc = { - .nid = NUMA_NO_NODE, - .gfp_mask = GFP_USER | __GFP_NOWARN, - }; + LIST_HEAD(movable_page_list); + bool drain_allow = true; + int ret = 0; for (i = 0; i < nr_pages; i++) { - head = compound_head(pages[i]); + struct page *head = compound_head(pages[i]); + if (head == prev_head) continue; prev_head = head; + + if (is_pinnable_page(head)) + continue; + /* - * If we get a movable page, since we are going to be pinning - * these entries, try to move them out if possible. + * Try to move out any movable page before pinning the range. */ - if (!is_pinnable_page(head)) { - if (PageHuge(head)) { - if (!isolate_huge_page(head, &movable_page_list)) - isolation_error_count++; - } else { - if (!PageLRU(head) && drain_allow) { - lru_add_drain_all(); - drain_allow = false; - } + if (PageHuge(head)) { + if (!isolate_huge_page(head, &movable_page_list)) + isolation_error_count++; + continue; + } - if (isolate_lru_page(head)) { - isolation_error_count++; - continue; - } - list_add_tail(&head->lru, &movable_page_list); - mod_node_page_state(page_pgdat(head), - NR_ISOLATED_ANON + - page_is_file_lru(head), - thp_nr_pages(head)); - } + if (!PageLRU(head) && drain_allow) { + lru_add_drain_all(); + drain_allow = false; + } + + if (isolate_lru_page(head)) { + isolation_error_count++; + continue; } + list_add_tail(&head->lru, &movable_page_list); + mod_node_page_state(page_pgdat(head), + NR_ISOLATED_ANON + page_is_file_lru(head), + thp_nr_pages(head)); } + if (!list_empty(&movable_page_list) || isolation_error_count) + goto unpin_pages; + /* * If list is empty, and no isolation errors, means that all pages are * in the correct zone. */ - if (list_empty(&movable_page_list) && !isolation_error_count) - return nr_pages; + return nr_pages; +unpin_pages: if (gup_flags & FOLL_PIN) { unpin_user_pages(pages, nr_pages); } else { for (i = 0; i < nr_pages; i++) put_page(pages[i]); } + if (!list_empty(&movable_page_list)) { + struct migration_target_control mtc = { + .nid = NUMA_NO_NODE, + .gfp_mask = GFP_USER | __GFP_NOWARN, + }; + ret = migrate_pages(&movable_page_list, alloc_migration_target, NULL, (unsigned long)&mtc, MIGRATE_SYNC, MR_LONGTERM_PIN, NULL); - if (ret && !list_empty(&movable_page_list)) - putback_movable_pages(&movable_page_list); + if (ret > 0) /* number of pages not migrated */ + ret = -ENOMEM; } - return ret > 0 ? -ENOMEM : ret; + if (ret && !list_empty(&movable_page_list)) + putback_movable_pages(&movable_page_list); + return ret; } #else static long check_and_migrate_movable_pages(unsigned long nr_pages, -- cgit v1.2.3 From 8ea2979c1444cd455ddbe7f976de79cc09fdc38d Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 4 Feb 2022 09:24:26 -0500 Subject: mm/gup: Increment the page refcount before the pincount We should always increase the refcount before doing anything else to the page so that other page users see the elevated refcount first. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: John Hubbard Reviewed-by: Christoph Hellwig --- mm/gup.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'mm/gup.c') diff --git a/mm/gup.c b/mm/gup.c index e54359ef75ac..eb7747bc7f7d 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -213,25 +213,23 @@ bool __must_check try_grab_page(struct page *page, unsigned int flags) if (flags & FOLL_GET) return try_get_page(page); else if (flags & FOLL_PIN) { - int refs = 1; - page = compound_head(page); if (WARN_ON_ONCE(page_ref_count(page) <= 0)) return false; - if (hpage_pincount_available(page)) - hpage_pincount_add(page, 1); - else - refs = GUP_PIN_COUNTING_BIAS; - /* * Similar to try_grab_compound_head(): even if using the * hpage_pincount_add/_sub() routines, be sure to * *also* increment the normal page refcount field at least * once, so that the page really is pinned. */ - page_ref_add(page, refs); + if (hpage_pincount_available(page)) { + page_ref_add(page, 1); + hpage_pincount_add(page, 1); + } else { + page_ref_add(page, GUP_PIN_COUNTING_BIAS); + } mod_node_page_state(page_pgdat(page), NR_FOLL_PIN_ACQUIRED, 1); } -- cgit v1.2.3 From a5f100db6855dbfe2709887b7348ce727e990fb6 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 8 Jan 2022 20:23:46 -0500 Subject: mm/gup: Remove for_each_compound_range() This macro doesn't simplify the users; it's easier to just call compound_range_next() inside the loop. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: John Hubbard Reviewed-by: Jason Gunthorpe Reviewed-by: William Kucharski --- mm/gup.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'mm/gup.c') diff --git a/mm/gup.c b/mm/gup.c index eb7747bc7f7d..346bfcd6652b 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -259,9 +259,6 @@ static inline void compound_range_next(unsigned long i, unsigned long npages, struct page *next, *page; unsigned int nr = 1; - if (i >= npages) - return; - next = *list + i; page = compound_head(next); if (PageCompound(page) && compound_order(page) >= 1) @@ -272,12 +269,6 @@ static inline void compound_range_next(unsigned long i, unsigned long npages, *ntails = nr; } -#define for_each_compound_range(__i, __list, __npages, __head, __ntails) \ - for (__i = 0, \ - compound_range_next(__i, __npages, __list, &(__head), &(__ntails)); \ - __i < __npages; __i += __ntails, \ - compound_range_next(__i, __npages, __list, &(__head), &(__ntails))) - static inline void compound_next(unsigned long i, unsigned long npages, struct page **list, struct page **head, unsigned int *ntails) @@ -394,7 +385,8 @@ void unpin_user_page_range_dirty_lock(struct page *page, unsigned long npages, struct page *head; unsigned int ntails; - for_each_compound_range(index, &page, npages, head, ntails) { + for (index = 0; index < npages; index += ntails) { + compound_range_next(index, npages, &page, &head, &ntails); if (make_dirty && !PageDirty(head)) set_page_dirty_lock(head); put_compound_head(head, ntails, FOLL_PIN); -- cgit v1.2.3 From e76027488640802633c646210781b63221c2fdd2 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 8 Jan 2022 20:23:46 -0500 Subject: mm/gup: Remove for_each_compound_head() This macro doesn't simplify the users; it's easier to just call compound_next() inside a standard loop. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: John Hubbard Reviewed-by: Jason Gunthorpe Reviewed-by: William Kucharski --- mm/gup.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'mm/gup.c') diff --git a/mm/gup.c b/mm/gup.c index 346bfcd6652b..a88133b8e758 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -276,9 +276,6 @@ static inline void compound_next(unsigned long i, unsigned long npages, struct page *page; unsigned int nr; - if (i >= npages) - return; - page = compound_head(list[i]); for (nr = i + 1; nr < npages; nr++) { if (compound_head(list[nr]) != page) @@ -289,12 +286,6 @@ static inline void compound_next(unsigned long i, unsigned long npages, *ntails = nr - i; } -#define for_each_compound_head(__i, __list, __npages, __head, __ntails) \ - for (__i = 0, \ - compound_next(__i, __npages, __list, &(__head), &(__ntails)); \ - __i < __npages; __i += __ntails, \ - compound_next(__i, __npages, __list, &(__head), &(__ntails))) - /** * unpin_user_pages_dirty_lock() - release and optionally dirty gup-pinned pages * @pages: array of pages to be maybe marked dirty, and definitely released. @@ -329,7 +320,8 @@ void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages, return; } - for_each_compound_head(index, pages, npages, head, ntails) { + for (index = 0; index < npages; index += ntails) { + compound_next(index, npages, pages, &head, &ntails); /* * Checking PageDirty at this point may race with * clear_page_dirty_for_io(), but that's OK. Two key @@ -417,8 +409,10 @@ void unpin_user_pages(struct page **pages, unsigned long npages) if (WARN_ON(IS_ERR_VALUE(npages))) return; - for_each_compound_head(index, pages, npages, head, ntails) + for (index = 0; index < npages; index += ntails) { + compound_next(index, npages, pages, &head, &ntails); put_compound_head(head, ntails, FOLL_PIN); + } } EXPORT_SYMBOL(unpin_user_pages); -- cgit v1.2.3 From 8f39f5fcb7963f0a01b8077c92e627af279de65e Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 9 Jan 2022 16:05:11 -0500 Subject: mm/gup: Change the calling convention for compound_range_next() Return the head page instead of storing it to a passed parameter. Pass the start page directly instead of passing a pointer to it. Reorder the arguments to match the calling function's arguments. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: John Hubbard Reviewed-by: Jason Gunthorpe Reviewed-by: William Kucharski Reviewed-by: Christoph Hellwig --- mm/gup.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'mm/gup.c') diff --git a/mm/gup.c b/mm/gup.c index a88133b8e758..32143b680a0e 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -252,21 +252,20 @@ void unpin_user_page(struct page *page) } EXPORT_SYMBOL(unpin_user_page); -static inline void compound_range_next(unsigned long i, unsigned long npages, - struct page **list, struct page **head, - unsigned int *ntails) +static inline struct page *compound_range_next(struct page *start, + unsigned long npages, unsigned long i, unsigned int *ntails) { struct page *next, *page; unsigned int nr = 1; - next = *list + i; + next = start + i; page = compound_head(next); if (PageCompound(page) && compound_order(page) >= 1) nr = min_t(unsigned int, page + compound_nr(page) - next, npages - i); - *head = page; *ntails = nr; + return page; } static inline void compound_next(unsigned long i, unsigned long npages, @@ -378,7 +377,7 @@ void unpin_user_page_range_dirty_lock(struct page *page, unsigned long npages, unsigned int ntails; for (index = 0; index < npages; index += ntails) { - compound_range_next(index, npages, &page, &head, &ntails); + head = compound_range_next(page, npages, index, &ntails); if (make_dirty && !PageDirty(head)) set_page_dirty_lock(head); put_compound_head(head, ntails, FOLL_PIN); -- cgit v1.2.3 From 0b046e12ae5d6d286415a2e805fcfdd724b7add1 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 9 Jan 2022 16:21:23 -0500 Subject: mm/gup: Optimise compound_range_next() By definition, a compound page has an order >= 1, so the second half of the test was redundant. Also, this cannot be a tail page since it's the result of calling compound_head(), so use PageHead() instead of PageCompound(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: John Hubbard Reviewed-by: Jason Gunthorpe Reviewed-by: William Kucharski --- mm/gup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/gup.c') diff --git a/mm/gup.c b/mm/gup.c index 32143b680a0e..30d1daef3957 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -260,7 +260,7 @@ static inline struct page *compound_range_next(struct page *start, next = start + i; page = compound_head(next); - if (PageCompound(page) && compound_order(page) >= 1) + if (PageHead(page)) nr = min_t(unsigned int, page + compound_nr(page) - next, npages - i); -- cgit v1.2.3 From 28297dbcad7ed3d7bac373eef121339cb0cac326 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 9 Jan 2022 21:03:47 -0500 Subject: mm/gup: Change the calling convention for compound_next() Return the head page instead of storing it to a passed parameter. Reorder the arguments to match the calling function's arguments. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: John Hubbard Reviewed-by: Jason Gunthorpe Reviewed-by: William Kucharski --- mm/gup.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'mm/gup.c') diff --git a/mm/gup.c b/mm/gup.c index 30d1daef3957..15d5b3c6d21c 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -268,9 +268,8 @@ static inline struct page *compound_range_next(struct page *start, return page; } -static inline void compound_next(unsigned long i, unsigned long npages, - struct page **list, struct page **head, - unsigned int *ntails) +static inline struct page *compound_next(struct page **list, + unsigned long npages, unsigned long i, unsigned int *ntails) { struct page *page; unsigned int nr; @@ -281,8 +280,8 @@ static inline void compound_next(unsigned long i, unsigned long npages, break; } - *head = page; *ntails = nr - i; + return page; } /** @@ -320,7 +319,7 @@ void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages, } for (index = 0; index < npages; index += ntails) { - compound_next(index, npages, pages, &head, &ntails); + head = compound_next(pages, npages, index, &ntails); /* * Checking PageDirty at this point may race with * clear_page_dirty_for_io(), but that's OK. Two key @@ -409,7 +408,7 @@ void unpin_user_pages(struct page **pages, unsigned long npages) return; for (index = 0; index < npages; index += ntails) { - compound_next(index, npages, pages, &head, &ntails); + head = compound_next(pages, npages, index, &ntails); put_compound_head(head, ntails, FOLL_PIN); } } -- cgit v1.2.3 From c228afb11ac6938532703ac712992524497aff29 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 7 Jan 2022 13:25:55 -0500 Subject: mm/gup: Fix some contiguous memmap assumptions Several functions in gup.c assume that a compound page has virtually contiguous page structs. This isn't true for SPARSEMEM configs unless SPARSEMEM_VMEMMAP is also set. Fix them by using nth_page() instead of plain pointer arithmetic. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: John Hubbard Reviewed-by: Jason Gunthorpe Reviewed-by: William Kucharski --- mm/gup.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'mm/gup.c') diff --git a/mm/gup.c b/mm/gup.c index 15d5b3c6d21c..d585aa06afb2 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -258,7 +258,7 @@ static inline struct page *compound_range_next(struct page *start, struct page *next, *page; unsigned int nr = 1; - next = start + i; + next = nth_page(start, i); page = compound_head(next); if (PageHead(page)) nr = min_t(unsigned int, @@ -2440,8 +2440,8 @@ static int record_subpages(struct page *page, unsigned long addr, { int nr; - for (nr = 0; addr != end; addr += PAGE_SIZE) - pages[nr++] = page++; + for (nr = 0; addr != end; nr++, addr += PAGE_SIZE) + pages[nr] = nth_page(page, nr); return nr; } @@ -2476,7 +2476,7 @@ static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, VM_BUG_ON(!pfn_valid(pte_pfn(pte))); head = pte_page(pte); - page = head + ((addr & (sz-1)) >> PAGE_SHIFT); + page = nth_page(head, (addr & (sz - 1)) >> PAGE_SHIFT); refs = record_subpages(page, addr, end, pages + *nr); head = try_grab_compound_head(head, refs, flags); @@ -2536,7 +2536,7 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr, pages, nr); } - page = pmd_page(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT); + page = nth_page(pmd_page(orig), (addr & ~PMD_MASK) >> PAGE_SHIFT); refs = record_subpages(page, addr, end, pages + *nr); head = try_grab_compound_head(pmd_page(orig), refs, flags); @@ -2570,7 +2570,7 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr, pages, nr); } - page = pud_page(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT); + page = nth_page(pud_page(orig), (addr & ~PUD_MASK) >> PAGE_SHIFT); refs = record_subpages(page, addr, end, pages + *nr); head = try_grab_compound_head(pud_page(orig), refs, flags); @@ -2599,7 +2599,7 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr, BUILD_BUG_ON(pgd_devmap(orig)); - page = pgd_page(orig) + ((addr & ~PGDIR_MASK) >> PAGE_SHIFT); + page = nth_page(pgd_page(orig), (addr & ~PGDIR_MASK) >> PAGE_SHIFT); refs = record_subpages(page, addr, end, pages + *nr); head = try_grab_compound_head(pgd_page(orig), refs, flags); -- cgit v1.2.3 From 4c65422901154766e5cee17875ed680366a4a141 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 7 Jan 2022 13:45:25 -0500 Subject: mm/gup: Remove an assumption of a contiguous memmap This assumption needs the inverse of nth_page(), which is temporarily named page_nth() until it's renamed later in this series. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: John Hubbard Reviewed-by: Jason Gunthorpe Reviewed-by: William Kucharski --- include/linux/mm.h | 2 ++ mm/gup.c | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'mm/gup.c') diff --git a/include/linux/mm.h b/include/linux/mm.h index 0201d258c646..e3f8755f65ed 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -212,8 +212,10 @@ int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *, #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) +#define page_nth(head, tail) (page_to_pfn(tail) - page_to_pfn(head)) #else #define nth_page(page,n) ((page) + (n)) +#define page_nth(head, tail) ((tail) - (head)) #endif /* to align the pointer to the (next) page boundary */ diff --git a/mm/gup.c b/mm/gup.c index d585aa06afb2..ad120f470735 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -261,8 +261,8 @@ static inline struct page *compound_range_next(struct page *start, next = nth_page(start, i); page = compound_head(next); if (PageHead(page)) - nr = min_t(unsigned int, - page + compound_nr(page) - next, npages - i); + nr = min_t(unsigned int, npages - i, + compound_nr(page) - page_nth(page, next)); *ntails = nr; return page; -- cgit v1.2.3 From 59409373f60a0a493fe2a1b85dc8c6299c4fef37 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 7 Jan 2022 14:04:55 -0500 Subject: mm/gup: Handle page split race more efficiently If we hit the page split race, the current code returns NULL which will presumably trigger a retry under the mmap_lock. This isn't necessary; we can just retry the compound_head() lookup. This is a very minor optimisation of an unlikely path, but conceptually it matches (eg) the page cache RCU-protected lookup. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: John Hubbard Reviewed-by: Jason Gunthorpe Reviewed-by: William Kucharski --- mm/gup.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'mm/gup.c') diff --git a/mm/gup.c b/mm/gup.c index ad120f470735..d3e8266d8e70 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -68,7 +68,10 @@ static void put_page_refs(struct page *page, int refs) */ static inline struct page *try_get_compound_head(struct page *page, int refs) { - struct page *head = compound_head(page); + struct page *head; + +retry: + head = compound_head(page); if (WARN_ON_ONCE(page_ref_count(head) < 0)) return NULL; @@ -86,7 +89,7 @@ static inline struct page *try_get_compound_head(struct page *page, int refs) */ if (unlikely(compound_head(page) != head)) { put_page_refs(head, refs); - return NULL; + goto retry; } return head; -- cgit v1.2.3 From 78d9d6ced31ad2f242e44bd24b774fd99c2d663d Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 7 Jan 2022 14:15:11 -0500 Subject: mm/gup: Remove hpage_pincount_add() It's clearer to call atomic_add() in the callers; the assertions clearly can't fire there because they're part of the condition for calling atomic_add(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: John Hubbard Reviewed-by: Christoph Hellwig --- mm/gup.c | 31 +++++++++++-------------------- 1 file changed, 11 insertions(+), 20 deletions(-) (limited to 'mm/gup.c') diff --git a/mm/gup.c b/mm/gup.c index d3e8266d8e70..299b78736a4a 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -29,14 +29,6 @@ struct follow_page_context { unsigned int page_mask; }; -static void hpage_pincount_add(struct page *page, int refs) -{ - VM_BUG_ON_PAGE(!hpage_pincount_available(page), page); - VM_BUG_ON_PAGE(page != compound_head(page), page); - - atomic_add(refs, compound_pincount_ptr(page)); -} - static void hpage_pincount_sub(struct page *page, int refs) { VM_BUG_ON_PAGE(!hpage_pincount_available(page), page); @@ -151,17 +143,17 @@ __maybe_unused struct page *try_grab_compound_head(struct page *page, return NULL; /* - * When pinning a compound page of order > 1 (which is what - * hpage_pincount_available() checks for), use an exact count to - * track it, via hpage_pincount_add/_sub(). + * When pinning a compound page of order > 1 (which is + * what hpage_pincount_available() checks for), use an + * exact count to track it. * - * However, be sure to *also* increment the normal page refcount - * field at least once, so that the page really is pinned. - * That's why the refcount from the earlier + * However, be sure to *also* increment the normal page + * refcount field at least once, so that the page really + * is pinned. That's why the refcount from the earlier * try_get_compound_head() is left intact. */ if (hpage_pincount_available(page)) - hpage_pincount_add(page, refs); + atomic_add(refs, compound_pincount_ptr(page)); else page_ref_add(page, refs * (GUP_PIN_COUNTING_BIAS - 1)); @@ -222,14 +214,13 @@ bool __must_check try_grab_page(struct page *page, unsigned int flags) return false; /* - * Similar to try_grab_compound_head(): even if using the - * hpage_pincount_add/_sub() routines, be sure to - * *also* increment the normal page refcount field at least - * once, so that the page really is pinned. + * Similar to try_grab_compound_head(): be sure to *also* + * increment the normal page refcount field at least once, + * so that the page really is pinned. */ if (hpage_pincount_available(page)) { page_ref_add(page, 1); - hpage_pincount_add(page, 1); + atomic_add(1, compound_pincount_ptr(page)); } else { page_ref_add(page, GUP_PIN_COUNTING_BIAS); } -- cgit v1.2.3 From 6315d8a23ce308433cf615e435ca2ee2aee7d11c Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 7 Jan 2022 14:19:39 -0500 Subject: mm/gup: Remove hpage_pincount_sub() Move the assertion (and correct it to be a cheaper variant), and inline the atomic_sub() operation. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: John Hubbard Reviewed-by: Jason Gunthorpe Reviewed-by: William Kucharski --- mm/gup.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'mm/gup.c') diff --git a/mm/gup.c b/mm/gup.c index 299b78736a4a..1809dc037a8e 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -29,14 +29,6 @@ struct follow_page_context { unsigned int page_mask; }; -static void hpage_pincount_sub(struct page *page, int refs) -{ - VM_BUG_ON_PAGE(!hpage_pincount_available(page), page); - VM_BUG_ON_PAGE(page != compound_head(page), page); - - atomic_sub(refs, compound_pincount_ptr(page)); -} - /* Equivalent to calling put_page() @refs times. */ static void put_page_refs(struct page *page, int refs) { @@ -169,12 +161,13 @@ __maybe_unused struct page *try_grab_compound_head(struct page *page, static void put_compound_head(struct page *page, int refs, unsigned int flags) { + VM_BUG_ON_PAGE(PageTail(page), page); + if (flags & FOLL_PIN) { mod_node_page_state(page_pgdat(page), NR_FOLL_PIN_RELEASED, refs); - if (hpage_pincount_available(page)) - hpage_pincount_sub(page, refs); + atomic_sub(refs, compound_pincount_ptr(page)); else refs *= GUP_PIN_COUNTING_BIAS; } -- cgit v1.2.3 From 5232c63f46fdd779303527ec36c518cc1e9c6b4e Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 6 Jan 2022 16:46:43 -0500 Subject: mm: Make compound_pincount always available Move compound_pincount from the third page to the second page, which means it's available for all compound pages. That lets us delete hpage_pincount_available(). On 32-bit systems, there isn't enough space for both compound_pincount and compound_nr in the second page (it would collide with page->private, which is in use for pages in the swap cache), so revert the optimisation of storing both compound_order and compound_nr on 32-bit systems. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: John Hubbard Reviewed-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Reviewed-by: William Kucharski --- Documentation/core-api/pin_user_pages.rst | 18 +++++++++--------- include/linux/mm.h | 21 ++++++++------------- include/linux/mm_types.h | 7 +++++-- mm/debug.c | 14 ++++---------- mm/gup.c | 20 +++++++++----------- mm/hugetlb.c | 4 ++++ mm/page_alloc.c | 3 +-- mm/rmap.c | 6 ++---- 8 files changed, 42 insertions(+), 51 deletions(-) (limited to 'mm/gup.c') diff --git a/Documentation/core-api/pin_user_pages.rst b/Documentation/core-api/pin_user_pages.rst index fcf605be43d0..b18416f4500f 100644 --- a/Documentation/core-api/pin_user_pages.rst +++ b/Documentation/core-api/pin_user_pages.rst @@ -55,18 +55,18 @@ flags the caller provides. The caller is required to pass in a non-null struct pages* array, and the function then pins pages by incrementing each by a special value: GUP_PIN_COUNTING_BIAS. -For huge pages (and in fact, any compound page of more than 2 pages), the -GUP_PIN_COUNTING_BIAS scheme is not used. Instead, an exact form of pin counting -is achieved, by using the 3rd struct page in the compound page. A new struct -page field, hpage_pinned_refcount, has been added in order to support this. +For compound pages, the GUP_PIN_COUNTING_BIAS scheme is not used. Instead, +an exact form of pin counting is achieved, by using the 2nd struct page +in the compound page. A new struct page field, compound_pincount, has +been added in order to support this. This approach for compound pages avoids the counting upper limit problems that are discussed below. Those limitations would have been aggravated severely by huge pages, because each tail page adds a refcount to the head page. And in -fact, testing revealed that, without a separate hpage_pinned_refcount field, +fact, testing revealed that, without a separate compound_pincount field, page overflows were seen in some huge page stress tests. -This also means that huge pages and compound pages (of order > 1) do not suffer +This also means that huge pages and compound pages do not suffer from the false positives problem that is mentioned below.:: Function @@ -264,9 +264,9 @@ place.) Other diagnostics ================= -dump_page() has been enhanced slightly, to handle these new counting fields, and -to better report on compound pages in general. Specifically, for compound pages -with order > 1, the exact (hpage_pinned_refcount) pincount is reported. +dump_page() has been enhanced slightly, to handle these new counting +fields, and to better report on compound pages in general. Specifically, +for compound pages, the exact (compound_pincount) pincount is reported. References ========== diff --git a/include/linux/mm.h b/include/linux/mm.h index e3f8755f65ed..c64bd0b67d75 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -887,17 +887,6 @@ static inline void destroy_compound_page(struct page *page) compound_page_dtors[page[1].compound_dtor](page); } -static inline bool hpage_pincount_available(struct page *page) -{ - /* - * Can the page->hpage_pinned_refcount field be used? That field is in - * the 3rd page of the compound page, so the smallest (2-page) compound - * pages cannot support it. - */ - page = compound_head(page); - return PageCompound(page) && compound_order(page) > 1; -} - static inline int head_compound_pincount(struct page *head) { return atomic_read(compound_pincount_ptr(head)); @@ -905,7 +894,7 @@ static inline int head_compound_pincount(struct page *head) static inline int compound_pincount(struct page *page) { - VM_BUG_ON_PAGE(!hpage_pincount_available(page), page); + VM_BUG_ON_PAGE(!PageCompound(page), page); page = compound_head(page); return head_compound_pincount(page); } @@ -913,7 +902,9 @@ static inline int compound_pincount(struct page *page) static inline void set_compound_order(struct page *page, unsigned int order) { page[1].compound_order = order; +#ifdef CONFIG_64BIT page[1].compound_nr = 1U << order; +#endif } /* Returns the number of pages in this potentially compound page. */ @@ -921,7 +912,11 @@ static inline unsigned long compound_nr(struct page *page) { if (!PageHead(page)) return 1; +#ifdef CONFIG_64BIT return page[1].compound_nr; +#else + return 1UL << compound_order(page); +#endif } /* Returns the number of bytes in this potentially compound page. */ @@ -1269,7 +1264,7 @@ void unpin_user_pages(struct page **pages, unsigned long npages); */ static inline bool page_maybe_dma_pinned(struct page *page) { - if (hpage_pincount_available(page)) + if (PageCompound(page)) return compound_pincount(page) > 0; /* diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 475bdb282769..0e274c9b934e 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -135,11 +135,14 @@ struct page { unsigned char compound_dtor; unsigned char compound_order; atomic_t compound_mapcount; + atomic_t compound_pincount; +#ifdef CONFIG_64BIT unsigned int compound_nr; /* 1 << compound_order */ +#endif }; struct { /* Second tail page of compound page */ unsigned long _compound_pad_1; /* compound_head */ - atomic_t hpage_pinned_refcount; + unsigned long _compound_pad_2; /* For both global and memcg */ struct list_head deferred_list; }; @@ -300,7 +303,7 @@ static inline atomic_t *compound_mapcount_ptr(struct page *page) static inline atomic_t *compound_pincount_ptr(struct page *page) { - return &page[2].hpage_pinned_refcount; + return &page[1].compound_pincount; } /* diff --git a/mm/debug.c b/mm/debug.c index bc9ac87f0e08..c4cf44266430 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -92,16 +92,10 @@ static void __dump_page(struct page *page) page, page_ref_count(head), mapcount, mapping, page_to_pgoff(page), page_to_pfn(page)); if (compound) { - if (hpage_pincount_available(page)) { - pr_warn("head:%p order:%u compound_mapcount:%d compound_pincount:%d\n", - head, compound_order(head), - head_compound_mapcount(head), - head_compound_pincount(head)); - } else { - pr_warn("head:%p order:%u compound_mapcount:%d\n", - head, compound_order(head), - head_compound_mapcount(head)); - } + pr_warn("head:%p order:%u compound_mapcount:%d compound_pincount:%d\n", + head, compound_order(head), + head_compound_mapcount(head), + head_compound_pincount(head)); } #ifdef CONFIG_MEMCG diff --git a/mm/gup.c b/mm/gup.c index 1809dc037a8e..56b6b01a430b 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -99,12 +99,11 @@ retry: * * FOLL_GET: page's refcount will be incremented by @refs. * - * FOLL_PIN on compound pages that are > two pages long: page's refcount will - * be incremented by @refs, and page[2].hpage_pinned_refcount will be - * incremented by @refs * GUP_PIN_COUNTING_BIAS. + * FOLL_PIN on compound pages: page's refcount will be incremented by + * @refs, and page[1].compound_pincount will be incremented by @refs. * - * FOLL_PIN on normal pages, or compound pages that are two pages long: - * page's refcount will be incremented by @refs * GUP_PIN_COUNTING_BIAS. + * FOLL_PIN on normal pages: page's refcount will be incremented by + * @refs * GUP_PIN_COUNTING_BIAS. * * Return: head page (with refcount appropriately incremented) for success, or * NULL upon failure. If neither FOLL_GET nor FOLL_PIN was set, that's @@ -135,16 +134,15 @@ __maybe_unused struct page *try_grab_compound_head(struct page *page, return NULL; /* - * When pinning a compound page of order > 1 (which is - * what hpage_pincount_available() checks for), use an - * exact count to track it. + * When pinning a compound page, use an exact count to + * track it. * * However, be sure to *also* increment the normal page * refcount field at least once, so that the page really * is pinned. That's why the refcount from the earlier * try_get_compound_head() is left intact. */ - if (hpage_pincount_available(page)) + if (PageHead(page)) atomic_add(refs, compound_pincount_ptr(page)); else page_ref_add(page, refs * (GUP_PIN_COUNTING_BIAS - 1)); @@ -166,7 +164,7 @@ static void put_compound_head(struct page *page, int refs, unsigned int flags) if (flags & FOLL_PIN) { mod_node_page_state(page_pgdat(page), NR_FOLL_PIN_RELEASED, refs); - if (hpage_pincount_available(page)) + if (PageHead(page)) atomic_sub(refs, compound_pincount_ptr(page)); else refs *= GUP_PIN_COUNTING_BIAS; @@ -211,7 +209,7 @@ bool __must_check try_grab_page(struct page *page, unsigned int flags) * increment the normal page refcount field at least once, * so that the page really is pinned. */ - if (hpage_pincount_available(page)) { + if (PageHead(page)) { page_ref_add(page, 1); atomic_add(1, compound_pincount_ptr(page)); } else { diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 43fb3155298e..785d6e340292 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1320,7 +1320,9 @@ static void __destroy_compound_gigantic_page(struct page *page, } set_compound_order(page, 0); +#ifdef CONFIG_64BIT page[1].compound_nr = 0; +#endif __ClearPageHead(page); } @@ -1812,7 +1814,9 @@ out_error: for (; j < nr_pages; j++, p = mem_map_next(p, page, j)) __ClearPageReserved(p); set_compound_order(page, 0); +#ifdef CONFIG_64BIT page[1].compound_nr = 0; +#endif __ClearPageHead(page); return false; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3589febc6d31..02283598fd14 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -734,8 +734,7 @@ static void prep_compound_head(struct page *page, unsigned int order) set_compound_page_dtor(page, COMPOUND_PAGE_DTOR); set_compound_order(page, order); atomic_set(compound_mapcount_ptr(page), -1); - if (hpage_pincount_available(page)) - atomic_set(compound_pincount_ptr(page), 0); + atomic_set(compound_pincount_ptr(page), 0); } static void prep_compound_tail(struct page *head, int tail_idx) diff --git a/mm/rmap.c b/mm/rmap.c index c7921c102bc0..1a13d5d6cfc7 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1219,8 +1219,7 @@ void page_add_new_anon_rmap(struct page *page, VM_BUG_ON_PAGE(!PageTransHuge(page), page); /* increment count (starts at -1) */ atomic_set(compound_mapcount_ptr(page), 0); - if (hpage_pincount_available(page)) - atomic_set(compound_pincount_ptr(page), 0); + atomic_set(compound_pincount_ptr(page), 0); __mod_lruvec_page_state(page, NR_ANON_THPS, nr); } else { @@ -2353,8 +2352,7 @@ void hugepage_add_new_anon_rmap(struct page *page, { BUG_ON(address < vma->vm_start || address >= vma->vm_end); atomic_set(compound_mapcount_ptr(page), 0); - if (hpage_pincount_available(page)) - atomic_set(compound_pincount_ptr(page), 0); + atomic_set(compound_pincount_ptr(page), 0); __page_set_anon_rmap(page, vma, address, 1); } -- cgit v1.2.3 From ece1ed7bfa1208b527b3dc90bb45c55e0d139a88 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 4 Feb 2022 10:27:40 -0500 Subject: mm/gup: Add try_get_folio() and try_grab_folio() Convert try_get_compound_head() into try_get_folio() and convert try_grab_compound_head() into try_grab_folio(). Add a temporary try_grab_compound_head() wrapper around try_grab_folio() to let us convert callers individually. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: John Hubbard Reviewed-by: Jason Gunthorpe Reviewed-by: William Kucharski --- mm/gup.c | 99 +++++++++++++++++++++++++++++------------------------------ mm/internal.h | 5 +++ 2 files changed, 54 insertions(+), 50 deletions(-) (limited to 'mm/gup.c') diff --git a/mm/gup.c b/mm/gup.c index 56b6b01a430b..11eba8e812aa 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -47,75 +47,70 @@ static void put_page_refs(struct page *page, int refs) } /* - * Return the compound head page with ref appropriately incremented, + * Return the folio with ref appropriately incremented, * or NULL if that failed. */ -static inline struct page *try_get_compound_head(struct page *page, int refs) +static inline struct folio *try_get_folio(struct page *page, int refs) { - struct page *head; + struct folio *folio; retry: - head = compound_head(page); - - if (WARN_ON_ONCE(page_ref_count(head) < 0)) + folio = page_folio(page); + if (WARN_ON_ONCE(folio_ref_count(folio) < 0)) return NULL; - if (unlikely(!page_cache_add_speculative(head, refs))) + if (unlikely(!folio_ref_try_add_rcu(folio, refs))) return NULL; /* - * At this point we have a stable reference to the head page; but it - * could be that between the compound_head() lookup and the refcount - * increment, the compound page was split, in which case we'd end up - * holding a reference on a page that has nothing to do with the page + * At this point we have a stable reference to the folio; but it + * could be that between calling page_folio() and the refcount + * increment, the folio was split, in which case we'd end up + * holding a reference on a folio that has nothing to do with the page * we were given anymore. - * So now that the head page is stable, recheck that the pages still - * belong together. + * So now that the folio is stable, recheck that the page still + * belongs to this folio. */ - if (unlikely(compound_head(page) != head)) { - put_page_refs(head, refs); + if (unlikely(page_folio(page) != folio)) { + folio_put_refs(folio, refs); goto retry; } - return head; + return folio; } /** - * try_grab_compound_head() - attempt to elevate a page's refcount, by a - * flags-dependent amount. - * - * Even though the name includes "compound_head", this function is still - * appropriate for callers that have a non-compound @page to get. - * + * try_grab_folio() - Attempt to get or pin a folio. * @page: pointer to page to be grabbed - * @refs: the value to (effectively) add to the page's refcount + * @refs: the value to (effectively) add to the folio's refcount * @flags: gup flags: these are the FOLL_* flag values. * * "grab" names in this file mean, "look at flags to decide whether to use - * FOLL_PIN or FOLL_GET behavior, when incrementing the page's refcount. + * FOLL_PIN or FOLL_GET behavior, when incrementing the folio's refcount. * * Either FOLL_PIN or FOLL_GET (or neither) must be set, but not both at the * same time. (That's true throughout the get_user_pages*() and * pin_user_pages*() APIs.) Cases: * - * FOLL_GET: page's refcount will be incremented by @refs. + * FOLL_GET: folio's refcount will be incremented by @refs. * - * FOLL_PIN on compound pages: page's refcount will be incremented by - * @refs, and page[1].compound_pincount will be incremented by @refs. + * FOLL_PIN on large folios: folio's refcount will be incremented by + * @refs, and its compound_pincount will be incremented by @refs. * - * FOLL_PIN on normal pages: page's refcount will be incremented by + * FOLL_PIN on single-page folios: folio's refcount will be incremented by * @refs * GUP_PIN_COUNTING_BIAS. * - * Return: head page (with refcount appropriately incremented) for success, or - * NULL upon failure. If neither FOLL_GET nor FOLL_PIN was set, that's - * considered failure, and furthermore, a likely bug in the caller, so a warning - * is also emitted. + * Return: The folio containing @page (with refcount appropriately + * incremented) for success, or NULL upon failure. If neither FOLL_GET + * nor FOLL_PIN was set, that's considered failure, and furthermore, + * a likely bug in the caller, so a warning is also emitted. */ -__maybe_unused struct page *try_grab_compound_head(struct page *page, - int refs, unsigned int flags) +struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags) { if (flags & FOLL_GET) - return try_get_compound_head(page, refs); + return try_get_folio(page, refs); else if (flags & FOLL_PIN) { + struct folio *folio; + /* * Can't do FOLL_LONGTERM + FOLL_PIN gup fast path if not in a * right zone, so fail and let the caller fall back to the slow @@ -129,34 +124,38 @@ __maybe_unused struct page *try_grab_compound_head(struct page *page, * CAUTION: Don't use compound_head() on the page before this * point, the result won't be stable. */ - page = try_get_compound_head(page, refs); - if (!page) + folio = try_get_folio(page, refs); + if (!folio) return NULL; /* - * When pinning a compound page, use an exact count to - * track it. + * When pinning a large folio, use an exact count to track it. * - * However, be sure to *also* increment the normal page - * refcount field at least once, so that the page really + * However, be sure to *also* increment the normal folio + * refcount field at least once, so that the folio really * is pinned. That's why the refcount from the earlier - * try_get_compound_head() is left intact. + * try_get_folio() is left intact. */ - if (PageHead(page)) - atomic_add(refs, compound_pincount_ptr(page)); + if (folio_test_large(folio)) + atomic_add(refs, folio_pincount_ptr(folio)); else - page_ref_add(page, refs * (GUP_PIN_COUNTING_BIAS - 1)); + folio_ref_add(folio, + refs * (GUP_PIN_COUNTING_BIAS - 1)); + node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, refs); - mod_node_page_state(page_pgdat(page), NR_FOLL_PIN_ACQUIRED, - refs); - - return page; + return folio; } WARN_ON_ONCE(1); return NULL; } +struct page *try_grab_compound_head(struct page *page, + int refs, unsigned int flags) +{ + return &try_grab_folio(page, refs, flags)->page; +} + static void put_compound_head(struct page *page, int refs, unsigned int flags) { VM_BUG_ON_PAGE(PageTail(page), page); @@ -185,7 +184,7 @@ static void put_compound_head(struct page *page, int refs, unsigned int flags) * @flags: gup flags: these are the FOLL_* flag values. * * Either FOLL_PIN or FOLL_GET (or neither) may be set, but not both at the same - * time. Cases: please see the try_grab_compound_head() documentation, with + * time. Cases: please see the try_grab_folio() documentation, with * "refs=1". * * Return: true for success, or if no action was required (if neither FOLL_PIN diff --git a/mm/internal.h b/mm/internal.h index 3756dd5d2c92..98b97cb5a97b 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -737,4 +737,9 @@ int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, void free_zone_device_page(struct page *page); +/* + * mm/gup.c + */ +struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags); + #endif /* __MM_INTERNAL_H */ -- cgit v1.2.3 From 5fec0719908bdabdf9d017b0f488d18019ed00f7 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 4 Feb 2022 10:32:01 -0500 Subject: mm/gup: Convert try_grab_page() to use a folio Hoist the folio conversion and the folio_ref_count() check to the top of the function instead of using the one buried in try_get_page(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: John Hubbard Reviewed-by: Christoph Hellwig --- mm/gup.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) (limited to 'mm/gup.c') diff --git a/mm/gup.c b/mm/gup.c index 11eba8e812aa..81eb87604a70 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -174,15 +174,14 @@ static void put_compound_head(struct page *page, int refs, unsigned int flags) /** * try_grab_page() - elevate a page's refcount by a flag-dependent amount + * @page: pointer to page to be grabbed + * @flags: gup flags: these are the FOLL_* flag values. * * This might not do anything at all, depending on the flags argument. * * "grab" names in this file mean, "look at flags to decide whether to use * FOLL_PIN or FOLL_GET behavior, when incrementing the page's refcount. * - * @page: pointer to page to be grabbed - * @flags: gup flags: these are the FOLL_* flag values. - * * Either FOLL_PIN or FOLL_GET (or neither) may be set, but not both at the same * time. Cases: please see the try_grab_folio() documentation, with * "refs=1". @@ -193,29 +192,28 @@ static void put_compound_head(struct page *page, int refs, unsigned int flags) */ bool __must_check try_grab_page(struct page *page, unsigned int flags) { + struct folio *folio = page_folio(page); + WARN_ON_ONCE((flags & (FOLL_GET | FOLL_PIN)) == (FOLL_GET | FOLL_PIN)); + if (WARN_ON_ONCE(folio_ref_count(folio) <= 0)) + return false; if (flags & FOLL_GET) - return try_get_page(page); + folio_ref_inc(folio); else if (flags & FOLL_PIN) { - page = compound_head(page); - - if (WARN_ON_ONCE(page_ref_count(page) <= 0)) - return false; - /* - * Similar to try_grab_compound_head(): be sure to *also* + * Similar to try_grab_folio(): be sure to *also* * increment the normal page refcount field at least once, * so that the page really is pinned. */ - if (PageHead(page)) { - page_ref_add(page, 1); - atomic_add(1, compound_pincount_ptr(page)); + if (folio_test_large(folio)) { + folio_ref_add(folio, 1); + atomic_add(1, folio_pincount_ptr(folio)); } else { - page_ref_add(page, GUP_PIN_COUNTING_BIAS); + folio_ref_add(folio, GUP_PIN_COUNTING_BIAS); } - mod_node_page_state(page_pgdat(page), NR_FOLL_PIN_ACQUIRED, 1); + node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, 1); } return true; -- cgit v1.2.3 From d8ddc099c6b3dde887f9484da9a6677709d68b61 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 10 Dec 2021 15:39:04 -0500 Subject: mm/gup: Add gup_put_folio() Convert put_compound_head() to gup_put_folio() and hpage_pincount_sub() to folio_pincount_sub(). This removes the last call to put_page_refs(), so delete it. Add a temporary put_compound_head() wrapper which will be deleted by the end of this series. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: John Hubbard Reviewed-by: Jason Gunthorpe Reviewed-by: William Kucharski --- mm/gup.c | 38 ++++++++++++-------------------------- 1 file changed, 12 insertions(+), 26 deletions(-) (limited to 'mm/gup.c') diff --git a/mm/gup.c b/mm/gup.c index 81eb87604a70..cbbddcf8ff3f 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -29,23 +29,6 @@ struct follow_page_context { unsigned int page_mask; }; -/* Equivalent to calling put_page() @refs times. */ -static void put_page_refs(struct page *page, int refs) -{ -#ifdef CONFIG_DEBUG_VM - if (VM_WARN_ON_ONCE_PAGE(page_ref_count(page) < refs, page)) - return; -#endif - - /* - * Calling put_page() for each ref is unnecessarily slow. Only the last - * ref needs a put_page(). - */ - if (refs > 1) - page_ref_sub(page, refs - 1); - put_page(page); -} - /* * Return the folio with ref appropriately incremented, * or NULL if that failed. @@ -156,20 +139,23 @@ struct page *try_grab_compound_head(struct page *page, return &try_grab_folio(page, refs, flags)->page; } -static void put_compound_head(struct page *page, int refs, unsigned int flags) +static void gup_put_folio(struct folio *folio, int refs, unsigned int flags) { - VM_BUG_ON_PAGE(PageTail(page), page); - if (flags & FOLL_PIN) { - mod_node_page_state(page_pgdat(page), NR_FOLL_PIN_RELEASED, - refs); - if (PageHead(page)) - atomic_sub(refs, compound_pincount_ptr(page)); + node_stat_mod_folio(folio, NR_FOLL_PIN_RELEASED, refs); + if (folio_test_large(folio)) + atomic_sub(refs, folio_pincount_ptr(folio)); else refs *= GUP_PIN_COUNTING_BIAS; } - put_page_refs(page, refs); + folio_put_refs(folio, refs); +} + +static void put_compound_head(struct page *page, int refs, unsigned int flags) +{ + VM_BUG_ON_PAGE(PageTail(page), page); + gup_put_folio((struct folio *)page, refs, flags); } /** @@ -230,7 +216,7 @@ bool __must_check try_grab_page(struct page *page, unsigned int flags) */ void unpin_user_page(struct page *page) { - put_compound_head(compound_head(page), 1, FOLL_PIN); + gup_put_folio(page_folio(page), 1, FOLL_PIN); } EXPORT_SYMBOL(unpin_user_page); -- cgit v1.2.3 From 822951d84684d7a0c4f45e7231c960e7fe786d8f Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 8 Jan 2022 00:15:04 -0500 Subject: mm/hugetlb: Use try_grab_folio() instead of try_grab_compound_head() follow_hugetlb_page() only cares about success or failure, so it doesn't need to know the type of the returned pointer, only whether it's NULL or not. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: John Hubbard Reviewed-by: Jason Gunthorpe Reviewed-by: William Kucharski --- include/linux/mm.h | 3 --- mm/gup.c | 2 +- mm/hugetlb.c | 7 +++---- 3 files changed, 4 insertions(+), 8 deletions(-) (limited to 'mm/gup.c') diff --git a/include/linux/mm.h b/include/linux/mm.h index b764057022c8..dca5c99395c9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1124,9 +1124,6 @@ static inline void get_page(struct page *page) } bool __must_check try_grab_page(struct page *page, unsigned int flags); -struct page *try_grab_compound_head(struct page *page, int refs, - unsigned int flags); - static inline __must_check bool try_get_page(struct page *page) { diff --git a/mm/gup.c b/mm/gup.c index cbbddcf8ff3f..014004102e26 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -133,7 +133,7 @@ struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags) return NULL; } -struct page *try_grab_compound_head(struct page *page, +static inline struct page *try_grab_compound_head(struct page *page, int refs, unsigned int flags) { return &try_grab_folio(page, refs, flags)->page; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 785d6e340292..10203f3b1ccf 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -6076,7 +6076,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, if (pages) { /* - * try_grab_compound_head() should always succeed here, + * try_grab_folio() should always succeed here, * because: a) we hold the ptl lock, and b) we've just * checked that the huge page is present in the page * tables. If the huge page is present, then the tail @@ -6085,9 +6085,8 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, * any way. So this page must be available at this * point, unless the page refcount overflowed: */ - if (WARN_ON_ONCE(!try_grab_compound_head(pages[i], - refs, - flags))) { + if (WARN_ON_ONCE(!try_grab_folio(pages[i], refs, + flags))) { spin_unlock(ptl); remainder = 0; err = -ENOMEM; -- cgit v1.2.3 From b0496fe4effd83ef76c7440befb184f922b3ffbb Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 10 Dec 2021 15:54:11 -0500 Subject: mm/gup: Convert gup_pte_range() to use a folio We still call try_grab_folio() once per PTE; a future patch could optimise to just adjust the reference count for each page within the folio. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: John Hubbard Reviewed-by: Jason Gunthorpe Reviewed-by: William Kucharski --- mm/gup.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'mm/gup.c') diff --git a/mm/gup.c b/mm/gup.c index 014004102e26..94e0e2a130fc 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -2232,7 +2232,8 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, ptem = ptep = pte_offset_map(&pmd, addr); do { pte_t pte = ptep_get_lockless(ptep); - struct page *head, *page; + struct page *page; + struct folio *folio; /* * Similar to the PMD case below, NUMA hinting must take slow @@ -2259,22 +2260,20 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, VM_BUG_ON(!pfn_valid(pte_pfn(pte))); page = pte_page(pte); - head = try_grab_compound_head(page, 1, flags); - if (!head) + folio = try_grab_folio(page, 1, flags); + if (!folio) goto pte_unmap; if (unlikely(page_is_secretmem(page))) { - put_compound_head(head, 1, flags); + gup_put_folio(folio, 1, flags); goto pte_unmap; } if (unlikely(pte_val(pte) != pte_val(*ptep))) { - put_compound_head(head, 1, flags); + gup_put_folio(folio, 1, flags); goto pte_unmap; } - VM_BUG_ON_PAGE(compound_head(page) != head, page); - /* * We need to make the page accessible if and only if we are * going to access its content (the FOLL_PIN case). Please @@ -2284,14 +2283,13 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, if (flags & FOLL_PIN) { ret = arch_make_page_accessible(page); if (ret) { - unpin_user_page(page); + gup_put_folio(folio, 1, flags); goto pte_unmap; } } - SetPageReferenced(page); + folio_set_referenced(folio); pages[*nr] = page; (*nr)++; - } while (ptep++, addr += PAGE_SIZE, addr != end); ret = 1; -- cgit v1.2.3 From 09a1626effb89dddcde10c10f5e3c5e6f8b94136 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 22 Dec 2021 16:38:30 -0500 Subject: mm/gup: Convert gup_hugepte() to use a folio There should be little to no effect from this patch; just removing uses of some old APIs. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: John Hubbard Reviewed-by: Jason Gunthorpe Reviewed-by: William Kucharski --- mm/gup.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'mm/gup.c') diff --git a/mm/gup.c b/mm/gup.c index 94e0e2a130fc..bd5b49a1bc13 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -2425,7 +2425,8 @@ static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, struct page **pages, int *nr) { unsigned long pte_end; - struct page *head, *page; + struct page *page; + struct folio *folio; pte_t pte; int refs; @@ -2441,21 +2442,20 @@ static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, /* hugepages are never "special" */ VM_BUG_ON(!pfn_valid(pte_pfn(pte))); - head = pte_page(pte); - page = nth_page(head, (addr & (sz - 1)) >> PAGE_SHIFT); + page = nth_page(pte_page(pte), (addr & (sz - 1)) >> PAGE_SHIFT); refs = record_subpages(page, addr, end, pages + *nr); - head = try_grab_compound_head(head, refs, flags); - if (!head) + folio = try_grab_folio(page, refs, flags); + if (!folio) return 0; if (unlikely(pte_val(pte) != pte_val(*ptep))) { - put_compound_head(head, refs, flags); + gup_put_folio(folio, refs, flags); return 0; } *nr += refs; - SetPageReferenced(head); + folio_set_referenced(folio); return 1; } -- cgit v1.2.3 From 667ed1f7bb3b1c1ec2512e64cec04a07df7c5068 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 22 Dec 2021 16:57:23 -0500 Subject: mm/gup: Convert gup_huge_pmd() to use a folio Use the new folio-based APIs. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: John Hubbard Reviewed-by: Jason Gunthorpe Reviewed-by: William Kucharski --- mm/gup.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'mm/gup.c') diff --git a/mm/gup.c b/mm/gup.c index bd5b49a1bc13..15de59333c3d 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -2489,7 +2489,8 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr, unsigned long end, unsigned int flags, struct page **pages, int *nr) { - struct page *head, *page; + struct page *page; + struct folio *folio; int refs; if (!pmd_access_permitted(orig, flags & FOLL_WRITE)) @@ -2505,17 +2506,17 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr, page = nth_page(pmd_page(orig), (addr & ~PMD_MASK) >> PAGE_SHIFT); refs = record_subpages(page, addr, end, pages + *nr); - head = try_grab_compound_head(pmd_page(orig), refs, flags); - if (!head) + folio = try_grab_folio(page, refs, flags); + if (!folio) return 0; if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) { - put_compound_head(head, refs, flags); + gup_put_folio(folio, refs, flags); return 0; } *nr += refs; - SetPageReferenced(head); + folio_set_referenced(folio); return 1; } -- cgit v1.2.3 From 83afb52e47d5e31c7d58c07a6d31c43b5ef421a0 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 22 Dec 2021 18:07:47 -0500 Subject: mm/gup: Convert gup_huge_pud() to use a folio Use the new folio-based APIs. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: John Hubbard Reviewed-by: Jason Gunthorpe Reviewed-by: William Kucharski --- mm/gup.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'mm/gup.c') diff --git a/mm/gup.c b/mm/gup.c index 15de59333c3d..b6ca2364af8a 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -2524,7 +2524,8 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr, unsigned long end, unsigned int flags, struct page **pages, int *nr) { - struct page *head, *page; + struct page *page; + struct folio *folio; int refs; if (!pud_access_permitted(orig, flags & FOLL_WRITE)) @@ -2540,17 +2541,17 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr, page = nth_page(pud_page(orig), (addr & ~PUD_MASK) >> PAGE_SHIFT); refs = record_subpages(page, addr, end, pages + *nr); - head = try_grab_compound_head(pud_page(orig), refs, flags); - if (!head) + folio = try_grab_folio(page, refs, flags); + if (!folio) return 0; if (unlikely(pud_val(orig) != pud_val(*pudp))) { - put_compound_head(head, refs, flags); + gup_put_folio(folio, refs, flags); return 0; } *nr += refs; - SetPageReferenced(head); + folio_set_referenced(folio); return 1; } -- cgit v1.2.3 From 2d7919a29275dbb9bc3b6e6b4ea015a1eefc463f Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 22 Dec 2021 22:30:29 -0500 Subject: mm/gup: Convert gup_huge_pgd() to use a folio Use the new folio-based APIs. This was the last user of try_grab_compound_head(), so remove it. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: John Hubbard Reviewed-by: Jason Gunthorpe Reviewed-by: William Kucharski --- mm/gup.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) (limited to 'mm/gup.c') diff --git a/mm/gup.c b/mm/gup.c index b6ca2364af8a..23b83bc16e45 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -133,12 +133,6 @@ struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags) return NULL; } -static inline struct page *try_grab_compound_head(struct page *page, - int refs, unsigned int flags) -{ - return &try_grab_folio(page, refs, flags)->page; -} - static void gup_put_folio(struct folio *folio, int refs, unsigned int flags) { if (flags & FOLL_PIN) { @@ -2560,7 +2554,8 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr, struct page **pages, int *nr) { int refs; - struct page *head, *page; + struct page *page; + struct folio *folio; if (!pgd_access_permitted(orig, flags & FOLL_WRITE)) return 0; @@ -2570,17 +2565,17 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr, page = nth_page(pgd_page(orig), (addr & ~PGDIR_MASK) >> PAGE_SHIFT); refs = record_subpages(page, addr, end, pages + *nr); - head = try_grab_compound_head(pgd_page(orig), refs, flags); - if (!head) + folio = try_grab_folio(page, refs, flags); + if (!folio) return 0; if (unlikely(pgd_val(orig) != pgd_val(*pgdp))) { - put_compound_head(head, refs, flags); + gup_put_folio(folio, refs, flags); return 0; } *nr += refs; - SetPageReferenced(head); + folio_set_referenced(folio); return 1; } -- cgit v1.2.3 From 12521c7606b2037f8ac2a2fab19e955444a549cf Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 22 Dec 2021 23:43:16 -0500 Subject: mm/gup: Turn compound_next() into gup_folio_next() Convert both callers to work on folios instead of pages. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: John Hubbard Reviewed-by: Jason Gunthorpe Reviewed-by: William Kucharski --- mm/gup.c | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) (limited to 'mm/gup.c') diff --git a/mm/gup.c b/mm/gup.c index 23b83bc16e45..0bde28f0543f 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -230,20 +230,19 @@ static inline struct page *compound_range_next(struct page *start, return page; } -static inline struct page *compound_next(struct page **list, +static inline struct folio *gup_folio_next(struct page **list, unsigned long npages, unsigned long i, unsigned int *ntails) { - struct page *page; + struct folio *folio = page_folio(list[i]); unsigned int nr; - page = compound_head(list[i]); for (nr = i + 1; nr < npages; nr++) { - if (compound_head(list[nr]) != page) + if (page_folio(list[nr]) != folio) break; } *ntails = nr - i; - return page; + return folio; } /** @@ -271,17 +270,17 @@ static inline struct page *compound_next(struct page **list, void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages, bool make_dirty) { - unsigned long index; - struct page *head; - unsigned int ntails; + unsigned long i; + struct folio *folio; + unsigned int nr; if (!make_dirty) { unpin_user_pages(pages, npages); return; } - for (index = 0; index < npages; index += ntails) { - head = compound_next(pages, npages, index, &ntails); + for (i = 0; i < npages; i += nr) { + folio = gup_folio_next(pages, npages, i, &nr); /* * Checking PageDirty at this point may race with * clear_page_dirty_for_io(), but that's OK. Two key @@ -302,9 +301,12 @@ void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages, * written back, so it gets written back again in the * next writeback cycle. This is harmless. */ - if (!PageDirty(head)) - set_page_dirty_lock(head); - put_compound_head(head, ntails, FOLL_PIN); + if (!folio_test_dirty(folio)) { + folio_lock(folio); + folio_mark_dirty(folio); + folio_unlock(folio); + } + gup_put_folio(folio, nr, FOLL_PIN); } } EXPORT_SYMBOL(unpin_user_pages_dirty_lock); @@ -357,9 +359,9 @@ EXPORT_SYMBOL(unpin_user_page_range_dirty_lock); */ void unpin_user_pages(struct page **pages, unsigned long npages) { - unsigned long index; - struct page *head; - unsigned int ntails; + unsigned long i; + struct folio *folio; + unsigned int nr; /* * If this WARN_ON() fires, then the system *might* be leaking pages (by @@ -369,9 +371,9 @@ void unpin_user_pages(struct page **pages, unsigned long npages) if (WARN_ON(IS_ERR_VALUE(npages))) return; - for (index = 0; index < npages; index += ntails) { - head = compound_next(pages, npages, index, &ntails); - put_compound_head(head, ntails, FOLL_PIN); + for (i = 0; i < npages; i += nr) { + folio = gup_folio_next(pages, npages, i, &nr); + gup_put_folio(folio, nr, FOLL_PIN); } } EXPORT_SYMBOL(unpin_user_pages); -- cgit v1.2.3 From 659508f9c936aa6e3aaf6e9cf6a4a8836b8f8355 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 23 Dec 2021 10:20:12 -0500 Subject: mm/gup: Turn compound_range_next() into gup_folio_range_next() Convert the only caller to work on folios instead of pages. This removes the last caller of put_compound_head(), so delete it. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: John Hubbard Reviewed-by: Jason Gunthorpe Reviewed-by: William Kucharski --- include/linux/mm.h | 4 ++-- mm/gup.c | 38 +++++++++++++++++--------------------- 2 files changed, 19 insertions(+), 23 deletions(-) (limited to 'mm/gup.c') diff --git a/include/linux/mm.h b/include/linux/mm.h index dca5c99395c9..0d3f9057a807 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -212,10 +212,10 @@ int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *, #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) -#define page_nth(head, tail) (page_to_pfn(tail) - page_to_pfn(head)) +#define folio_page_idx(folio, p) (page_to_pfn(p) - folio_pfn(folio)) #else #define nth_page(page,n) ((page) + (n)) -#define page_nth(head, tail) ((tail) - (head)) +#define folio_page_idx(folio, p) ((p) - &(folio)->page) #endif /* to align the pointer to the (next) page boundary */ diff --git a/mm/gup.c b/mm/gup.c index 0bde28f0543f..5edd05df9c37 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -146,12 +146,6 @@ static void gup_put_folio(struct folio *folio, int refs, unsigned int flags) folio_put_refs(folio, refs); } -static void put_compound_head(struct page *page, int refs, unsigned int flags) -{ - VM_BUG_ON_PAGE(PageTail(page), page); - gup_put_folio((struct folio *)page, refs, flags); -} - /** * try_grab_page() - elevate a page's refcount by a flag-dependent amount * @page: pointer to page to be grabbed @@ -214,20 +208,19 @@ void unpin_user_page(struct page *page) } EXPORT_SYMBOL(unpin_user_page); -static inline struct page *compound_range_next(struct page *start, +static inline struct folio *gup_folio_range_next(struct page *start, unsigned long npages, unsigned long i, unsigned int *ntails) { - struct page *next, *page; + struct page *next = nth_page(start, i); + struct folio *folio = page_folio(next); unsigned int nr = 1; - next = nth_page(start, i); - page = compound_head(next); - if (PageHead(page)) + if (folio_test_large(folio)) nr = min_t(unsigned int, npages - i, - compound_nr(page) - page_nth(page, next)); + folio_nr_pages(folio) - folio_page_idx(folio, next)); *ntails = nr; - return page; + return folio; } static inline struct folio *gup_folio_next(struct page **list, @@ -335,15 +328,18 @@ EXPORT_SYMBOL(unpin_user_pages_dirty_lock); void unpin_user_page_range_dirty_lock(struct page *page, unsigned long npages, bool make_dirty) { - unsigned long index; - struct page *head; - unsigned int ntails; + unsigned long i; + struct folio *folio; + unsigned int nr; - for (index = 0; index < npages; index += ntails) { - head = compound_range_next(page, npages, index, &ntails); - if (make_dirty && !PageDirty(head)) - set_page_dirty_lock(head); - put_compound_head(head, ntails, FOLL_PIN); + for (i = 0; i < npages; i += nr) { + folio = gup_folio_range_next(page, npages, i, &nr); + if (make_dirty && !folio_test_dirty(folio)) { + folio_lock(folio); + folio_mark_dirty(folio); + folio_unlock(folio); + } + gup_put_folio(folio, nr, FOLL_PIN); } } EXPORT_SYMBOL(unpin_user_page_range_dirty_lock); -- cgit v1.2.3 From 1b7f7e58decccb52d6bc454413e3298f1ab3a9c6 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 17 Feb 2022 12:46:35 -0500 Subject: mm/gup: Convert check_and_migrate_movable_pages() to use a folio Switch from head pages to folios. This removes an assumption that THPs are the only way to have a high-order page. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: John Hubbard Reviewed-by: Jason Gunthorpe Reviewed-by: William Kucharski --- mm/gup.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) (limited to 'mm/gup.c') diff --git a/mm/gup.c b/mm/gup.c index 5edd05df9c37..35d550dde7ff 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1784,43 +1784,44 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages, unsigned int gup_flags) { unsigned long isolation_error_count = 0, i; - struct page *prev_head = NULL; + struct folio *prev_folio = NULL; LIST_HEAD(movable_page_list); bool drain_allow = true; int ret = 0; for (i = 0; i < nr_pages; i++) { - struct page *head = compound_head(pages[i]); + struct folio *folio = page_folio(pages[i]); - if (head == prev_head) + if (folio == prev_folio) continue; - prev_head = head; + prev_folio = folio; - if (is_pinnable_page(head)) + if (folio_is_pinnable(folio)) continue; /* * Try to move out any movable page before pinning the range. */ - if (PageHuge(head)) { - if (!isolate_huge_page(head, &movable_page_list)) + if (folio_test_hugetlb(folio)) { + if (!isolate_huge_page(&folio->page, + &movable_page_list)) isolation_error_count++; continue; } - if (!PageLRU(head) && drain_allow) { + if (!folio_test_lru(folio) && drain_allow) { lru_add_drain_all(); drain_allow = false; } - if (isolate_lru_page(head)) { + if (folio_isolate_lru(folio)) { isolation_error_count++; continue; } - list_add_tail(&head->lru, &movable_page_list); - mod_node_page_state(page_pgdat(head), - NR_ISOLATED_ANON + page_is_file_lru(head), - thp_nr_pages(head)); + list_add_tail(&folio->lru, &movable_page_list); + node_stat_mod_folio(folio, + NR_ISOLATED_ANON + folio_is_file_lru(folio), + folio_nr_pages(folio)); } if (!list_empty(&movable_page_list) || isolation_error_count) -- cgit v1.2.3