diff options
Diffstat (limited to 'mm/mempolicy.c')
-rw-r--r-- | mm/mempolicy.c | 189 |
1 files changed, 93 insertions, 96 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 02c8a712282f..a256a241fd1d 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -414,7 +414,7 @@ static const struct mempolicy_operations mpol_ops[MPOL_MAX] = { }, }; -static int migrate_page_add(struct page *page, struct list_head *pagelist, +static int migrate_folio_add(struct folio *folio, struct list_head *foliolist, unsigned long flags); struct queue_pages { @@ -427,36 +427,36 @@ struct queue_pages { }; /* - * Check if the page's nid is in qp->nmask. + * Check if the folio's nid is in qp->nmask. * * If MPOL_MF_INVERT is set in qp->flags, check if the nid is * in the invert of qp->nmask. */ -static inline bool queue_pages_required(struct page *page, +static inline bool queue_folio_required(struct folio *folio, struct queue_pages *qp) { - int nid = page_to_nid(page); + int nid = folio_nid(folio); unsigned long flags = qp->flags; return node_isset(nid, *qp->nmask) == !(flags & MPOL_MF_INVERT); } /* - * queue_pages_pmd() has three possible return values: - * 0 - pages are placed on the right node or queued successfully, or + * queue_folios_pmd() has three possible return values: + * 0 - folios are placed on the right node or queued successfully, or * special page is met, i.e. huge zero page. - * 1 - there is unmovable page, and MPOL_MF_MOVE* & MPOL_MF_STRICT were + * 1 - there is unmovable folio, and MPOL_MF_MOVE* & MPOL_MF_STRICT were * specified. * -EIO - is migration entry or only MPOL_MF_STRICT was specified and an - * existing page was already on a node that does not follow the + * existing folio was already on a node that does not follow the * policy. */ -static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, +static int queue_folios_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, unsigned long end, struct mm_walk *walk) __releases(ptl) { int ret = 0; - struct page *page; + struct folio *folio; struct queue_pages *qp = walk->private; unsigned long flags; @@ -464,19 +464,19 @@ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, ret = -EIO; goto unlock; } - page = pmd_page(*pmd); - if (is_huge_zero_page(page)) { + folio = pfn_folio(pmd_pfn(*pmd)); + if (is_huge_zero_page(&folio->page)) { walk->action = ACTION_CONTINUE; goto unlock; } - if (!queue_pages_required(page, qp)) + if (!queue_folio_required(folio, qp)) goto unlock; flags = qp->flags; - /* go to thp migration */ + /* go to folio migration */ if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { if (!vma_migratable(walk->vma) || - migrate_page_add(page, qp->pagelist, flags)) { + migrate_folio_add(folio, qp->pagelist, flags)) { ret = 1; goto unlock; } @@ -491,19 +491,19 @@ unlock: * Scan through pages checking if pages follow certain conditions, * and move them to the pagelist if they do. * - * queue_pages_pte_range() has three possible return values: - * 0 - pages are placed on the right node or queued successfully, or + * queue_folios_pte_range() has three possible return values: + * 0 - folios are placed on the right node or queued successfully, or * special page is met, i.e. zero page. - * 1 - there is unmovable page, and MPOL_MF_MOVE* & MPOL_MF_STRICT were + * 1 - there is unmovable folio, and MPOL_MF_MOVE* & MPOL_MF_STRICT were * specified. - * -EIO - only MPOL_MF_STRICT was specified and an existing page was already + * -EIO - only MPOL_MF_STRICT was specified and an existing folio was already * on a node that does not follow the policy. */ -static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, +static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, struct mm_walk *walk) { struct vm_area_struct *vma = walk->vma; - struct page *page; + struct folio *folio; struct queue_pages *qp = walk->private; unsigned long flags = qp->flags; bool has_unmovable = false; @@ -512,7 +512,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, ptl = pmd_trans_huge_lock(pmd, vma); if (ptl) - return queue_pages_pmd(pmd, ptl, addr, end, walk); + return queue_folios_pmd(pmd, ptl, addr, end, walk); if (pmd_trans_unstable(pmd)) return 0; @@ -521,16 +521,16 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, for (; addr != end; pte++, addr += PAGE_SIZE) { if (!pte_present(*pte)) continue; - page = vm_normal_page(vma, addr, *pte); - if (!page || is_zone_device_page(page)) + folio = vm_normal_folio(vma, addr, *pte); + if (!folio || folio_is_zone_device(folio)) continue; /* - * vm_normal_page() filters out zero pages, but there might - * still be PageReserved pages to skip, perhaps in a VDSO. + * vm_normal_folio() filters out zero pages, but there might + * still be reserved folios to skip, perhaps in a VDSO. */ - if (PageReserved(page)) + if (folio_test_reserved(folio)) continue; - if (!queue_pages_required(page, qp)) + if (!queue_folio_required(folio, qp)) continue; if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { /* MPOL_MF_STRICT must be specified if we get here */ @@ -544,7 +544,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, * temporary off LRU pages in the range. Still * need migrate other LRU pages. */ - if (migrate_page_add(page, qp->pagelist, flags)) + if (migrate_folio_add(folio, qp->pagelist, flags)) has_unmovable = true; } else break; @@ -558,7 +558,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, return addr != end ? -EIO : 0; } -static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask, +static int queue_folios_hugetlb(pte_t *pte, unsigned long hmask, unsigned long addr, unsigned long end, struct mm_walk *walk) { @@ -566,7 +566,7 @@ static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask, #ifdef CONFIG_HUGETLB_PAGE struct queue_pages *qp = walk->private; unsigned long flags = (qp->flags & MPOL_MF_VALID); - struct page *page; + struct folio *folio; spinlock_t *ptl; pte_t entry; @@ -574,13 +574,13 @@ static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask, entry = huge_ptep_get(pte); if (!pte_present(entry)) goto unlock; - page = pte_page(entry); - if (!queue_pages_required(page, qp)) + folio = pfn_folio(pte_pfn(entry)); + if (!queue_folio_required(folio, qp)) goto unlock; if (flags == MPOL_MF_STRICT) { /* - * STRICT alone means only detecting misplaced page and no + * STRICT alone means only detecting misplaced folio and no * need to further check other vma. */ ret = -EIO; @@ -591,20 +591,28 @@ static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask, /* * Must be STRICT with MOVE*, otherwise .test_walk() have * stopped walking current vma. - * Detecting misplaced page but allow migrating pages which + * Detecting misplaced folio but allow migrating folios which * have been queued. */ ret = 1; goto unlock; } - /* With MPOL_MF_MOVE, we migrate only unshared hugepage. */ + /* + * With MPOL_MF_MOVE, we try to migrate only unshared folios. If it + * is shared it is likely not worth migrating. + * + * To check if the folio is shared, ideally we want to make sure + * every page is mapped to the same process. Doing that is very + * expensive, so check the estimated mapcount of the folio instead. + */ if (flags & (MPOL_MF_MOVE_ALL) || - (flags & MPOL_MF_MOVE && page_mapcount(page) == 1)) { - if (isolate_hugetlb(page, qp->pagelist) && + (flags & MPOL_MF_MOVE && folio_estimated_sharers(folio) == 1 && + !hugetlb_pmd_shared(pte))) { + if (!isolate_hugetlb(folio, qp->pagelist) && (flags & MPOL_MF_STRICT)) /* - * Failed to isolate page but allow migrating pages + * Failed to isolate folio but allow migrating pages * which have been queued. */ ret = 1; @@ -631,13 +639,12 @@ unsigned long change_prot_numa(struct vm_area_struct *vma, unsigned long addr, unsigned long end) { struct mmu_gather tlb; - int nr_updated; + long nr_updated; tlb_gather_mmu(&tlb, vma->vm_mm); - nr_updated = change_protection(&tlb, vma, addr, end, PAGE_NONE, - MM_CP_PROT_NUMA); - if (nr_updated) + nr_updated = change_protection(&tlb, vma, addr, end, MM_CP_PROT_NUMA); + if (nr_updated > 0) count_vm_numa_events(NUMA_PTE_UPDATES, nr_updated); tlb_finish_mmu(&tlb); @@ -703,8 +710,8 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end, } static const struct mm_walk_ops queue_pages_walk_ops = { - .hugetlb_entry = queue_pages_hugetlb, - .pmd_entry = queue_pages_pte_range, + .hugetlb_entry = queue_folios_hugetlb, + .pmd_entry = queue_folios_pte_range, .test_walk = queue_pages_test_walk, }; @@ -787,24 +794,21 @@ static int vma_replace_policy(struct vm_area_struct *vma, static int mbind_range(struct mm_struct *mm, unsigned long start, unsigned long end, struct mempolicy *new_pol) { - MA_STATE(mas, &mm->mm_mt, start, start); + VMA_ITERATOR(vmi, mm, start); struct vm_area_struct *prev; struct vm_area_struct *vma; int err = 0; pgoff_t pgoff; - prev = mas_prev(&mas, 0); - if (unlikely(!prev)) - mas_set(&mas, start); - - vma = mas_find(&mas, end - 1); + prev = vma_prev(&vmi); + vma = vma_find(&vmi, end); if (WARN_ON(!vma)) return 0; if (start > vma->vm_start) prev = vma; - for (; vma; vma = mas_next(&mas, end - 1)) { + do { unsigned long vmstart = max(start, vma->vm_start); unsigned long vmend = min(end, vma->vm_end); @@ -813,29 +817,23 @@ static int mbind_range(struct mm_struct *mm, unsigned long start, pgoff = vma->vm_pgoff + ((vmstart - vma->vm_start) >> PAGE_SHIFT); - prev = vma_merge(mm, prev, vmstart, vmend, vma->vm_flags, + prev = vma_merge(&vmi, mm, prev, vmstart, vmend, vma->vm_flags, vma->anon_vma, vma->vm_file, pgoff, new_pol, vma->vm_userfaultfd_ctx, anon_vma_name(vma)); if (prev) { - /* vma_merge() invalidated the mas */ - mas_pause(&mas); vma = prev; goto replace; } if (vma->vm_start != vmstart) { - err = split_vma(vma->vm_mm, vma, vmstart, 1); + err = split_vma(&vmi, vma, vmstart, 1); if (err) goto out; - /* split_vma() invalidated the mas */ - mas_pause(&mas); } if (vma->vm_end != vmend) { - err = split_vma(vma->vm_mm, vma, vmend, 0); + err = split_vma(&vmi, vma, vmend, 0); if (err) goto out; - /* split_vma() invalidated the mas */ - mas_pause(&mas); } replace: err = vma_replace_policy(vma, new_pol); @@ -843,7 +841,7 @@ replace: goto out; next: prev = vma; - } + } for_each_vma_range(vmi, vma, end); out: return err; @@ -1023,27 +1021,28 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask, } #ifdef CONFIG_MIGRATION -/* - * page migration, thp tail pages can be passed. - */ -static int migrate_page_add(struct page *page, struct list_head *pagelist, +static int migrate_folio_add(struct folio *folio, struct list_head *foliolist, unsigned long flags) { - struct page *head = compound_head(page); /* - * Avoid migrating a page that is shared with others. + * We try to migrate only unshared folios. If it is shared it + * is likely not worth migrating. + * + * To check if the folio is shared, ideally we want to make sure + * every page is mapped to the same process. Doing that is very + * expensive, so check the estimated mapcount of the folio instead. */ - if ((flags & MPOL_MF_MOVE_ALL) || page_mapcount(head) == 1) { - if (!isolate_lru_page(head)) { - list_add_tail(&head->lru, pagelist); - mod_node_page_state(page_pgdat(head), - NR_ISOLATED_ANON + page_is_file_lru(head), - thp_nr_pages(head)); + if ((flags & MPOL_MF_MOVE_ALL) || folio_estimated_sharers(folio) == 1) { + if (folio_isolate_lru(folio)) { + list_add_tail(&folio->lru, foliolist); + node_stat_mod_folio(folio, + NR_ISOLATED_ANON + folio_is_file_lru(folio), + folio_nr_pages(folio)); } else if (flags & MPOL_MF_STRICT) { /* - * Non-movable page may reach here. And, there may be - * temporary off LRU pages or non-LRU movable pages. - * Treat them as unmovable pages since they can't be + * Non-movable folio may reach here. And, there may be + * temporary off LRU folios or non-LRU movable folios. + * Treat them as unmovable folios since they can't be * isolated, so they can't be moved at the moment. It * should return -EIO for this case too. */ @@ -1219,9 +1218,11 @@ static struct page *new_page(struct page *page, unsigned long start) break; } - if (folio_test_hugetlb(src)) - return alloc_huge_page_vma(page_hstate(&src->page), + if (folio_test_hugetlb(src)) { + dst = alloc_hugetlb_folio_vma(folio_hstate(src), vma, address); + return &dst->page; + } if (folio_test_large(src)) gfp = GFP_TRANSHUGE; @@ -1235,7 +1236,7 @@ static struct page *new_page(struct page *page, unsigned long start) } #else -static int migrate_page_add(struct page *page, struct list_head *pagelist, +static int migrate_folio_add(struct folio *folio, struct list_head *foliolist, unsigned long flags) { return -EIO; @@ -1489,7 +1490,7 @@ SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, start, unsigned long, le { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; - struct mempolicy *new; + struct mempolicy *new, *old; unsigned long vmstart; unsigned long vmend; unsigned long end; @@ -1521,31 +1522,27 @@ SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, start, unsigned long, le return 0; mmap_write_lock(mm); for_each_vma_range(vmi, vma, end) { - vmstart = max(start, vma->vm_start); - vmend = min(end, vma->vm_end); - new = mpol_dup(vma_policy(vma)); - if (IS_ERR(new)) { - err = PTR_ERR(new); - break; - } - /* - * Only update home node if there is an existing vma policy - */ - if (!new) - continue; - /* * If any vma in the range got policy other than MPOL_BIND * or MPOL_PREFERRED_MANY we return error. We don't reset * the home node for vmas we already updated before. */ - if (new->mode != MPOL_BIND && new->mode != MPOL_PREFERRED_MANY) { - mpol_put(new); + old = vma_policy(vma); + if (!old) + continue; + if (old->mode != MPOL_BIND && old->mode != MPOL_PREFERRED_MANY) { err = -EOPNOTSUPP; break; } + new = mpol_dup(old); + if (IS_ERR(new)) { + err = PTR_ERR(new); + break; + } new->home_node = home_node; + vmstart = max(start, vma->vm_start); + vmend = min(end, vma->vm_end); err = mbind_range(mm, vmstart, vmend, new); mpol_put(new); if (err) |