summaryrefslogtreecommitdiffstats
path: root/mm/khugepaged.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/khugepaged.c')
-rw-r--r--mm/khugepaged.c81
1 files changed, 42 insertions, 39 deletions
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index a4e5eaf3eb01..16be62d493cd 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -365,9 +365,7 @@ int hugepage_madvise(struct vm_area_struct *vma,
* register it here without waiting a page fault that
* may not happen any time soon.
*/
- if (!(*vm_flags & VM_NO_KHUGEPAGED) &&
- khugepaged_enter_vma_merge(vma, *vm_flags))
- return -ENOMEM;
+ khugepaged_enter_vma(vma, *vm_flags);
break;
case MADV_NOHUGEPAGE:
*vm_flags &= ~VM_HUGEPAGE;
@@ -439,12 +437,19 @@ static inline int khugepaged_test_exit(struct mm_struct *mm)
return atomic_read(&mm->mm_users) == 0;
}
-static bool hugepage_vma_check(struct vm_area_struct *vma,
- unsigned long vm_flags)
+bool hugepage_vma_check(struct vm_area_struct *vma,
+ unsigned long vm_flags)
{
if (!transhuge_vma_enabled(vma, vm_flags))
return false;
+ if (vm_flags & VM_NO_KHUGEPAGED)
+ return false;
+
+ /* Don't run khugepaged against DAX vma */
+ if (vma_is_dax(vma))
+ return false;
+
if (vma->vm_file && !IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) -
vma->vm_pgoff, HPAGE_PMD_NR))
return false;
@@ -458,35 +463,31 @@ static bool hugepage_vma_check(struct vm_area_struct *vma,
return false;
/* Only regular file is valid */
- if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && vma->vm_file &&
- (vm_flags & VM_EXEC)) {
- struct inode *inode = vma->vm_file->f_inode;
+ if (file_thp_enabled(vma))
+ return true;
- return !inode_is_open_for_write(inode) &&
- S_ISREG(inode->i_mode);
- }
-
- if (!vma->anon_vma || vma->vm_ops)
+ if (!vma->anon_vma || !vma_is_anonymous(vma))
return false;
if (vma_is_temporary_stack(vma))
return false;
- return !(vm_flags & VM_NO_KHUGEPAGED);
+
+ return true;
}
-int __khugepaged_enter(struct mm_struct *mm)
+void __khugepaged_enter(struct mm_struct *mm)
{
struct mm_slot *mm_slot;
int wakeup;
mm_slot = alloc_mm_slot();
if (!mm_slot)
- return -ENOMEM;
+ return;
/* __khugepaged_exit() must not run from under us */
VM_BUG_ON_MM(khugepaged_test_exit(mm), mm);
if (unlikely(test_and_set_bit(MMF_VM_HUGEPAGE, &mm->flags))) {
free_mm_slot(mm_slot);
- return 0;
+ return;
}
spin_lock(&khugepaged_mm_lock);
@@ -502,28 +503,18 @@ int __khugepaged_enter(struct mm_struct *mm)
mmgrab(mm);
if (wakeup)
wake_up_interruptible(&khugepaged_wait);
-
- return 0;
}
-int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
- unsigned long vm_flags)
+void khugepaged_enter_vma(struct vm_area_struct *vma,
+ unsigned long vm_flags)
{
- unsigned long hstart, hend;
-
- /*
- * khugepaged only supports read-only files for non-shmem files.
- * khugepaged does not yet work on special mappings. And
- * file-private shmem THP is not supported.
- */
- if (!hugepage_vma_check(vma, vm_flags))
- return 0;
-
- hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
- hend = vma->vm_end & HPAGE_PMD_MASK;
- if (hstart < hend)
- return khugepaged_enter(vma, vm_flags);
- return 0;
+ if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags) &&
+ khugepaged_enabled() &&
+ (((vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK) <
+ (vma->vm_end & HPAGE_PMD_MASK))) {
+ if (hugepage_vma_check(vma, vm_flags))
+ __khugepaged_enter(vma->vm_mm);
+ }
}
void __khugepaged_exit(struct mm_struct *mm)
@@ -972,7 +963,7 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
if (!hugepage_vma_check(vma, vma->vm_flags))
return SCAN_VMA_CHECK;
/* Anon VMA expected */
- if (!vma->anon_vma || vma->vm_ops)
+ if (!vma->anon_vma || !vma_is_anonymous(vma))
return SCAN_VMA_CHECK;
return 0;
}
@@ -1183,7 +1174,7 @@ static void collapse_huge_page(struct mm_struct *mm,
spin_lock(pmd_ptl);
BUG_ON(!pmd_none(*pmd));
- page_add_new_anon_rmap(new_page, vma, address, true);
+ page_add_new_anon_rmap(new_page, vma, address);
lru_cache_add_inactive_or_unevictable(new_page, vma);
pgtable_trans_huge_deposit(mm, pmd, pgtable);
set_pmd_at(mm, address, pmd, _pmd);
@@ -1456,6 +1447,10 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
if (!hugepage_vma_check(vma, vma->vm_flags | VM_HUGEPAGE))
return;
+ /* Keep pmd pgtable for uffd-wp; see comment in retract_page_tables() */
+ if (userfaultfd_wp(vma))
+ return;
+
hpage = find_lock_page(vma->vm_file->f_mapping,
linear_page_index(vma, haddr));
if (!hpage)
@@ -1591,7 +1586,15 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
* reverse order. Trylock is a way to avoid deadlock.
*/
if (mmap_write_trylock(mm)) {
- if (!khugepaged_test_exit(mm))
+ /*
+ * When a vma is registered with uffd-wp, we can't
+ * recycle the pmd pgtable because there can be pte
+ * markers installed. Skip it only, so the rest mm/vma
+ * can still have the same file mapped hugely, however
+ * it'll always mapped in small page size for uffd-wp
+ * registered ranges.
+ */
+ if (!khugepaged_test_exit(mm) && !userfaultfd_wp(vma))
collapse_and_free_pmd(mm, vma, addr, pmd);
mmap_write_unlock(mm);
} else {