diff options
Diffstat (limited to 'mm/khugepaged.c')
-rw-r--r-- | mm/khugepaged.c | 81 |
1 files changed, 42 insertions, 39 deletions
diff --git a/mm/khugepaged.c b/mm/khugepaged.c index a4e5eaf3eb01..16be62d493cd 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -365,9 +365,7 @@ int hugepage_madvise(struct vm_area_struct *vma, * register it here without waiting a page fault that * may not happen any time soon. */ - if (!(*vm_flags & VM_NO_KHUGEPAGED) && - khugepaged_enter_vma_merge(vma, *vm_flags)) - return -ENOMEM; + khugepaged_enter_vma(vma, *vm_flags); break; case MADV_NOHUGEPAGE: *vm_flags &= ~VM_HUGEPAGE; @@ -439,12 +437,19 @@ static inline int khugepaged_test_exit(struct mm_struct *mm) return atomic_read(&mm->mm_users) == 0; } -static bool hugepage_vma_check(struct vm_area_struct *vma, - unsigned long vm_flags) +bool hugepage_vma_check(struct vm_area_struct *vma, + unsigned long vm_flags) { if (!transhuge_vma_enabled(vma, vm_flags)) return false; + if (vm_flags & VM_NO_KHUGEPAGED) + return false; + + /* Don't run khugepaged against DAX vma */ + if (vma_is_dax(vma)) + return false; + if (vma->vm_file && !IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff, HPAGE_PMD_NR)) return false; @@ -458,35 +463,31 @@ static bool hugepage_vma_check(struct vm_area_struct *vma, return false; /* Only regular file is valid */ - if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && vma->vm_file && - (vm_flags & VM_EXEC)) { - struct inode *inode = vma->vm_file->f_inode; + if (file_thp_enabled(vma)) + return true; - return !inode_is_open_for_write(inode) && - S_ISREG(inode->i_mode); - } - - if (!vma->anon_vma || vma->vm_ops) + if (!vma->anon_vma || !vma_is_anonymous(vma)) return false; if (vma_is_temporary_stack(vma)) return false; - return !(vm_flags & VM_NO_KHUGEPAGED); + + return true; } -int __khugepaged_enter(struct mm_struct *mm) +void __khugepaged_enter(struct mm_struct *mm) { struct mm_slot *mm_slot; int wakeup; mm_slot = alloc_mm_slot(); if (!mm_slot) - return -ENOMEM; + return; /* __khugepaged_exit() must not run from under us */ VM_BUG_ON_MM(khugepaged_test_exit(mm), mm); if (unlikely(test_and_set_bit(MMF_VM_HUGEPAGE, &mm->flags))) { free_mm_slot(mm_slot); - return 0; + return; } spin_lock(&khugepaged_mm_lock); @@ -502,28 +503,18 @@ int __khugepaged_enter(struct mm_struct *mm) mmgrab(mm); if (wakeup) wake_up_interruptible(&khugepaged_wait); - - return 0; } -int khugepaged_enter_vma_merge(struct vm_area_struct *vma, - unsigned long vm_flags) +void khugepaged_enter_vma(struct vm_area_struct *vma, + unsigned long vm_flags) { - unsigned long hstart, hend; - - /* - * khugepaged only supports read-only files for non-shmem files. - * khugepaged does not yet work on special mappings. And - * file-private shmem THP is not supported. - */ - if (!hugepage_vma_check(vma, vm_flags)) - return 0; - - hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; - hend = vma->vm_end & HPAGE_PMD_MASK; - if (hstart < hend) - return khugepaged_enter(vma, vm_flags); - return 0; + if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags) && + khugepaged_enabled() && + (((vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK) < + (vma->vm_end & HPAGE_PMD_MASK))) { + if (hugepage_vma_check(vma, vm_flags)) + __khugepaged_enter(vma->vm_mm); + } } void __khugepaged_exit(struct mm_struct *mm) @@ -972,7 +963,7 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address, if (!hugepage_vma_check(vma, vma->vm_flags)) return SCAN_VMA_CHECK; /* Anon VMA expected */ - if (!vma->anon_vma || vma->vm_ops) + if (!vma->anon_vma || !vma_is_anonymous(vma)) return SCAN_VMA_CHECK; return 0; } @@ -1183,7 +1174,7 @@ static void collapse_huge_page(struct mm_struct *mm, spin_lock(pmd_ptl); BUG_ON(!pmd_none(*pmd)); - page_add_new_anon_rmap(new_page, vma, address, true); + page_add_new_anon_rmap(new_page, vma, address); lru_cache_add_inactive_or_unevictable(new_page, vma); pgtable_trans_huge_deposit(mm, pmd, pgtable); set_pmd_at(mm, address, pmd, _pmd); @@ -1456,6 +1447,10 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) if (!hugepage_vma_check(vma, vma->vm_flags | VM_HUGEPAGE)) return; + /* Keep pmd pgtable for uffd-wp; see comment in retract_page_tables() */ + if (userfaultfd_wp(vma)) + return; + hpage = find_lock_page(vma->vm_file->f_mapping, linear_page_index(vma, haddr)); if (!hpage) @@ -1591,7 +1586,15 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) * reverse order. Trylock is a way to avoid deadlock. */ if (mmap_write_trylock(mm)) { - if (!khugepaged_test_exit(mm)) + /* + * When a vma is registered with uffd-wp, we can't + * recycle the pmd pgtable because there can be pte + * markers installed. Skip it only, so the rest mm/vma + * can still have the same file mapped hugely, however + * it'll always mapped in small page size for uffd-wp + * registered ranges. + */ + if (!khugepaged_test_exit(mm) && !userfaultfd_wp(vma)) collapse_and_free_pmd(mm, vma, addr, pmd); mmap_write_unlock(mm); } else { |