diff options
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r-- | mm/hugetlb.c | 72 |
1 files changed, 60 insertions, 12 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 57b7b0b5d9eb..b586cdd75930 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5096,6 +5096,7 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct * unmapped and its refcount is dropped, so just clear pte here. */ if (unlikely(!pte_present(pte))) { +#ifdef CONFIG_PTE_MARKER_UFFD_WP /* * If the pte was wr-protected by uffd-wp in any of the * swap forms, meanwhile the caller does not want to @@ -5107,6 +5108,7 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct set_huge_pte_at(mm, address, ptep, make_pte_marker(PTE_MARKER_UFFD_WP)); else +#endif huge_pte_clear(mm, address, ptep, sz); spin_unlock(ptl); continue; @@ -5135,11 +5137,13 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct tlb_remove_huge_tlb_entry(h, tlb, ptep, address); if (huge_pte_dirty(pte)) set_page_dirty(page); +#ifdef CONFIG_PTE_MARKER_UFFD_WP /* Leave a uffd-wp pte marker if needed */ if (huge_pte_uffd_wp(pte) && !(zap_flags & ZAP_FLAG_DROP_MARKER)) set_huge_pte_at(mm, address, ptep, make_pte_marker(PTE_MARKER_UFFD_WP)); +#endif hugetlb_count_sub(pages_per_huge_page(h), mm); page_remove_rmap(page, vma, true); @@ -5531,6 +5535,23 @@ static inline vm_fault_t hugetlb_handle_userfault(struct vm_area_struct *vma, return handle_userfault(&vmf, reason); } +/* + * Recheck pte with pgtable lock. Returns true if pte didn't change, or + * false if pte changed or is changing. + */ +static bool hugetlb_pte_stable(struct hstate *h, struct mm_struct *mm, + pte_t *ptep, pte_t old_pte) +{ + spinlock_t *ptl; + bool same; + + ptl = huge_pte_lock(h, mm, ptep); + same = pte_same(huge_ptep_get(ptep), old_pte); + spin_unlock(ptl); + + return same; +} + static vm_fault_t hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma, struct address_space *mapping, pgoff_t idx, @@ -5571,10 +5592,33 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, if (idx >= size) goto out; /* Check for page in userfault range */ - if (userfaultfd_missing(vma)) - return hugetlb_handle_userfault(vma, mapping, idx, - flags, haddr, address, - VM_UFFD_MISSING); + if (userfaultfd_missing(vma)) { + /* + * Since hugetlb_no_page() was examining pte + * without pgtable lock, we need to re-test under + * lock because the pte may not be stable and could + * have changed from under us. Try to detect + * either changed or during-changing ptes and retry + * properly when needed. + * + * Note that userfaultfd is actually fine with + * false positives (e.g. caused by pte changed), + * but not wrong logical events (e.g. caused by + * reading a pte during changing). The latter can + * confuse the userspace, so the strictness is very + * much preferred. E.g., MISSING event should + * never happen on the page after UFFDIO_COPY has + * correctly installed the page and returned. + */ + if (!hugetlb_pte_stable(h, mm, ptep, old_pte)) { + ret = 0; + goto out; + } + + return hugetlb_handle_userfault(vma, mapping, idx, flags, + haddr, address, + VM_UFFD_MISSING); + } page = alloc_huge_page(vma, haddr, 0); if (IS_ERR(page)) { @@ -5590,11 +5634,10 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, * here. Before returning error, get ptl and make * sure there really is no pte entry. */ - ptl = huge_pte_lock(h, mm, ptep); - ret = 0; - if (huge_pte_none(huge_ptep_get(ptep))) + if (hugetlb_pte_stable(h, mm, ptep, old_pte)) ret = vmf_error(PTR_ERR(page)); - spin_unlock(ptl); + else + ret = 0; goto out; } clear_huge_page(page, address, pages_per_huge_page(h)); @@ -5640,9 +5683,14 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, if (userfaultfd_minor(vma)) { unlock_page(page); put_page(page); - return hugetlb_handle_userfault(vma, mapping, idx, - flags, haddr, address, - VM_UFFD_MINOR); + /* See comment in userfaultfd_missing() block above */ + if (!hugetlb_pte_stable(h, mm, ptep, old_pte)) { + ret = 0; + goto out; + } + return hugetlb_handle_userfault(vma, mapping, idx, flags, + haddr, address, + VM_UFFD_MINOR); } } @@ -6804,7 +6852,7 @@ void hugetlb_vma_lock_release(struct kref *kref) kfree(vma_lock); } -void __hugetlb_vma_unlock_write_put(struct hugetlb_vma_lock *vma_lock) +static void __hugetlb_vma_unlock_write_put(struct hugetlb_vma_lock *vma_lock) { struct vm_area_struct *vma = vma_lock->vma; |