summaryrefslogtreecommitdiffstats
path: root/mm/gup.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/gup.c')
-rw-r--r--mm/gup.c255
1 files changed, 51 insertions, 204 deletions
diff --git a/mm/gup.c b/mm/gup.c
index 98aac6201e1b..f212d571b563 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -545,42 +545,13 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) ==
(FOLL_PIN | FOLL_GET)))
return ERR_PTR(-EINVAL);
-
- /*
- * Considering PTE level hugetlb, like continuous-PTE hugetlb on
- * ARM64 architecture.
- */
- if (is_vm_hugetlb_page(vma)) {
- page = follow_huge_pmd_pte(vma, address, flags);
- if (page)
- return page;
- return no_page_table(vma, flags);
- }
-
-retry:
if (unlikely(pmd_bad(*pmd)))
return no_page_table(vma, flags);
ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
pte = *ptep;
- if (!pte_present(pte)) {
- swp_entry_t entry;
- /*
- * KSM's break_ksm() relies upon recognizing a ksm page
- * even while it is being migrated, so for that case we
- * need migration_entry_wait().
- */
- if (likely(!(flags & FOLL_MIGRATION)))
- goto no_page;
- if (pte_none(pte))
- goto no_page;
- entry = pte_to_swp_entry(pte);
- if (!is_migration_entry(entry))
- goto no_page;
- pte_unmap_unlock(ptep, ptl);
- migration_entry_wait(mm, pmd, address);
- goto retry;
- }
+ if (!pte_present(pte))
+ goto no_page;
if (pte_protnone(pte) && !gup_can_follow_protnone(flags))
goto no_page;
@@ -623,7 +594,7 @@ retry:
}
}
- if (!pte_write(pte) && gup_must_unshare(flags, page)) {
+ if (!pte_write(pte) && gup_must_unshare(vma, flags, page)) {
page = ERR_PTR(-EMLINK);
goto out;
}
@@ -690,42 +661,8 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma,
pmdval = READ_ONCE(*pmd);
if (pmd_none(pmdval))
return no_page_table(vma, flags);
- if (pmd_huge(pmdval) && is_vm_hugetlb_page(vma)) {
- page = follow_huge_pmd_pte(vma, address, flags);
- if (page)
- return page;
+ if (!pmd_present(pmdval))
return no_page_table(vma, flags);
- }
- if (is_hugepd(__hugepd(pmd_val(pmdval)))) {
- page = follow_huge_pd(vma, address,
- __hugepd(pmd_val(pmdval)), flags,
- PMD_SHIFT);
- if (page)
- return page;
- return no_page_table(vma, flags);
- }
-retry:
- if (!pmd_present(pmdval)) {
- /*
- * Should never reach here, if thp migration is not supported;
- * Otherwise, it must be a thp migration entry.
- */
- VM_BUG_ON(!thp_migration_supported() ||
- !is_pmd_migration_entry(pmdval));
-
- if (likely(!(flags & FOLL_MIGRATION)))
- return no_page_table(vma, flags);
-
- pmd_migration_entry_wait(mm, pmd);
- pmdval = READ_ONCE(*pmd);
- /*
- * MADV_DONTNEED may convert the pmd to null because
- * mmap_lock is held in read mode
- */
- if (pmd_none(pmdval))
- return no_page_table(vma, flags);
- goto retry;
- }
if (pmd_devmap(pmdval)) {
ptl = pmd_lock(mm, pmd);
page = follow_devmap_pmd(vma, address, pmd, flags, &ctx->pgmap);
@@ -739,18 +676,10 @@ retry:
if (pmd_protnone(pmdval) && !gup_can_follow_protnone(flags))
return no_page_table(vma, flags);
-retry_locked:
ptl = pmd_lock(mm, pmd);
- if (unlikely(pmd_none(*pmd))) {
- spin_unlock(ptl);
- return no_page_table(vma, flags);
- }
if (unlikely(!pmd_present(*pmd))) {
spin_unlock(ptl);
- if (likely(!(flags & FOLL_MIGRATION)))
- return no_page_table(vma, flags);
- pmd_migration_entry_wait(mm, pmd);
- goto retry_locked;
+ return no_page_table(vma, flags);
}
if (unlikely(!pmd_trans_huge(*pmd))) {
spin_unlock(ptl);
@@ -793,20 +722,6 @@ static struct page *follow_pud_mask(struct vm_area_struct *vma,
pud = pud_offset(p4dp, address);
if (pud_none(*pud))
return no_page_table(vma, flags);
- if (pud_huge(*pud) && is_vm_hugetlb_page(vma)) {
- page = follow_huge_pud(mm, address, pud, flags);
- if (page)
- return page;
- return no_page_table(vma, flags);
- }
- if (is_hugepd(__hugepd(pud_val(*pud)))) {
- page = follow_huge_pd(vma, address,
- __hugepd(pud_val(*pud)), flags,
- PUD_SHIFT);
- if (page)
- return page;
- return no_page_table(vma, flags);
- }
if (pud_devmap(*pud)) {
ptl = pud_lock(mm, pud);
page = follow_devmap_pud(vma, address, pud, flags, &ctx->pgmap);
@@ -826,7 +741,6 @@ static struct page *follow_p4d_mask(struct vm_area_struct *vma,
struct follow_page_context *ctx)
{
p4d_t *p4d;
- struct page *page;
p4d = p4d_offset(pgdp, address);
if (p4d_none(*p4d))
@@ -835,14 +749,6 @@ static struct page *follow_p4d_mask(struct vm_area_struct *vma,
if (unlikely(p4d_bad(*p4d)))
return no_page_table(vma, flags);
- if (is_hugepd(__hugepd(p4d_val(*p4d)))) {
- page = follow_huge_pd(vma, address,
- __hugepd(p4d_val(*p4d)), flags,
- P4D_SHIFT);
- if (page)
- return page;
- return no_page_table(vma, flags);
- }
return follow_pud_mask(vma, address, p4d, flags, ctx);
}
@@ -880,10 +786,18 @@ static struct page *follow_page_mask(struct vm_area_struct *vma,
ctx->page_mask = 0;
- /* make this handle hugepd */
- page = follow_huge_addr(mm, address, flags & FOLL_WRITE);
- if (!IS_ERR(page)) {
- WARN_ON_ONCE(flags & (FOLL_GET | FOLL_PIN));
+ /*
+ * Call hugetlb_follow_page_mask for hugetlb vmas as it will use
+ * special hugetlb page table walking code. This eliminates the
+ * need to check for hugetlb entries in the general walking code.
+ *
+ * hugetlb_follow_page_mask is only for follow_page() handling here.
+ * Ordinary GUP uses follow_hugetlb_page for hugetlb processing.
+ */
+ if (is_vm_hugetlb_page(vma)) {
+ page = hugetlb_follow_page_mask(vma, address, flags);
+ if (!page)
+ page = no_page_table(vma, flags);
return page;
}
@@ -892,21 +806,6 @@ static struct page *follow_page_mask(struct vm_area_struct *vma,
if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
return no_page_table(vma, flags);
- if (pgd_huge(*pgd)) {
- page = follow_huge_pgd(mm, address, pgd, flags);
- if (page)
- return page;
- return no_page_table(vma, flags);
- }
- if (is_hugepd(__hugepd(pgd_val(*pgd)))) {
- page = follow_huge_pd(vma, address,
- __hugepd(pgd_val(*pgd)), flags,
- PGDIR_SHIFT);
- if (page)
- return page;
- return no_page_table(vma, flags);
- }
-
return follow_p4d_mask(vma, address, pgd, flags, ctx);
}
@@ -1077,6 +976,9 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
if (!(vm_flags & VM_WRITE)) {
if (!(gup_flags & FOLL_FORCE))
return -EFAULT;
+ /* hugetlb does not support FOLL_FORCE|FOLL_WRITE. */
+ if (is_vm_hugetlb_page(vma))
+ return -EFAULT;
/*
* We used to let the write,force case do COW in a
* VM_MAYWRITE VM_SHARED !VM_WRITE vma, so ptrace could
@@ -2117,14 +2019,19 @@ static long __gup_longterm_locked(struct mm_struct *mm,
unsigned long nr_pages,
struct page **pages,
struct vm_area_struct **vmas,
+ int *locked,
unsigned int gup_flags)
{
+ bool must_unlock = false;
unsigned int flags;
long rc, nr_pinned_pages;
+ if (locked && WARN_ON_ONCE(!*locked))
+ return -EINVAL;
+
if (!(gup_flags & FOLL_LONGTERM))
return __get_user_pages_locked(mm, start, nr_pages, pages, vmas,
- NULL, gup_flags);
+ locked, gup_flags);
/*
* If we get to this point then FOLL_LONGTERM is set, and FOLL_LONGTERM
@@ -2138,8 +2045,13 @@ static long __gup_longterm_locked(struct mm_struct *mm,
return -EINVAL;
flags = memalloc_pin_save();
do {
+ if (locked && !*locked) {
+ mmap_read_lock(mm);
+ must_unlock = true;
+ *locked = 1;
+ }
nr_pinned_pages = __get_user_pages_locked(mm, start, nr_pages,
- pages, vmas, NULL,
+ pages, vmas, locked,
gup_flags);
if (nr_pinned_pages <= 0) {
rc = nr_pinned_pages;
@@ -2149,6 +2061,10 @@ static long __gup_longterm_locked(struct mm_struct *mm,
} while (rc == -EAGAIN);
memalloc_pin_restore(flags);
+ if (locked && *locked && must_unlock) {
+ mmap_read_unlock(mm);
+ *locked = 0;
+ }
return rc ? rc : nr_pinned_pages;
}
@@ -2172,35 +2088,6 @@ static bool is_valid_gup_flags(unsigned int gup_flags)
}
#ifdef CONFIG_MMU
-static long __get_user_pages_remote(struct mm_struct *mm,
- unsigned long start, unsigned long nr_pages,
- unsigned int gup_flags, struct page **pages,
- struct vm_area_struct **vmas, int *locked)
-{
- /*
- * Parts of FOLL_LONGTERM behavior are incompatible with
- * FAULT_FLAG_ALLOW_RETRY because of the FS DAX check requirement on
- * vmas. However, this only comes up if locked is set, and there are
- * callers that do request FOLL_LONGTERM, but do not set locked. So,
- * allow what we can.
- */
- if (gup_flags & FOLL_LONGTERM) {
- if (WARN_ON_ONCE(locked))
- return -EINVAL;
- /*
- * This will check the vmas (even if our vmas arg is NULL)
- * and return -ENOTSUPP if DAX isn't allowed in this case:
- */
- return __gup_longterm_locked(mm, start, nr_pages, pages,
- vmas, gup_flags | FOLL_TOUCH |
- FOLL_REMOTE);
- }
-
- return __get_user_pages_locked(mm, start, nr_pages, pages, vmas,
- locked,
- gup_flags | FOLL_TOUCH | FOLL_REMOTE);
-}
-
/**
* get_user_pages_remote() - pin user pages in memory
* @mm: mm_struct of target mm
@@ -2269,8 +2156,8 @@ long get_user_pages_remote(struct mm_struct *mm,
if (!is_valid_gup_flags(gup_flags))
return -EINVAL;
- return __get_user_pages_remote(mm, start, nr_pages, gup_flags,
- pages, vmas, locked);
+ return __gup_longterm_locked(mm, start, nr_pages, pages, vmas, locked,
+ gup_flags | FOLL_TOUCH | FOLL_REMOTE);
}
EXPORT_SYMBOL(get_user_pages_remote);
@@ -2282,14 +2169,6 @@ long get_user_pages_remote(struct mm_struct *mm,
{
return 0;
}
-
-static long __get_user_pages_remote(struct mm_struct *mm,
- unsigned long start, unsigned long nr_pages,
- unsigned int gup_flags, struct page **pages,
- struct vm_area_struct **vmas, int *locked)
-{
- return 0;
-}
#endif /* !CONFIG_MMU */
/**
@@ -2316,7 +2195,7 @@ long get_user_pages(unsigned long start, unsigned long nr_pages,
return -EINVAL;
return __gup_longterm_locked(current->mm, start, nr_pages,
- pages, vmas, gup_flags | FOLL_TOUCH);
+ pages, vmas, NULL, gup_flags | FOLL_TOUCH);
}
EXPORT_SYMBOL(get_user_pages);
@@ -2342,18 +2221,9 @@ long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
int locked = 1;
long ret;
- /*
- * FIXME: Current FOLL_LONGTERM behavior is incompatible with
- * FAULT_FLAG_ALLOW_RETRY because of the FS DAX check requirement on
- * vmas. As there are no users of this flag in this call we simply
- * disallow this option for now.
- */
- if (WARN_ON_ONCE(gup_flags & FOLL_LONGTERM))
- return -EINVAL;
-
mmap_read_lock(mm);
- ret = __get_user_pages_locked(mm, start, nr_pages, pages, NULL,
- &locked, gup_flags | FOLL_TOUCH);
+ ret = __gup_longterm_locked(mm, start, nr_pages, pages, NULL, &locked,
+ gup_flags | FOLL_TOUCH);
if (locked)
mmap_read_unlock(mm);
return ret;
@@ -2480,7 +2350,7 @@ static int gup_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
goto pte_unmap;
}
- if (!pte_write(pte) && gup_must_unshare(flags, page)) {
+ if (!pte_write(pte) && gup_must_unshare(NULL, flags, page)) {
gup_put_folio(folio, 1, flags);
goto pte_unmap;
}
@@ -2672,7 +2542,7 @@ static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
return 0;
}
- if (!pte_write(pte) && gup_must_unshare(flags, &folio->page)) {
+ if (!pte_write(pte) && gup_must_unshare(NULL, flags, &folio->page)) {
gup_put_folio(folio, refs, flags);
return 0;
}
@@ -2738,7 +2608,7 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
return 0;
}
- if (!pmd_write(orig) && gup_must_unshare(flags, &folio->page)) {
+ if (!pmd_write(orig) && gup_must_unshare(NULL, flags, &folio->page)) {
gup_put_folio(folio, refs, flags);
return 0;
}
@@ -2778,7 +2648,7 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
return 0;
}
- if (!pud_write(orig) && gup_must_unshare(flags, &folio->page)) {
+ if (!pud_write(orig) && gup_must_unshare(NULL, flags, &folio->page)) {
gup_put_folio(folio, refs, flags);
return 0;
}
@@ -2953,29 +2823,6 @@ static bool gup_fast_permitted(unsigned long start, unsigned long end)
}
#endif
-static int __gup_longterm_unlocked(unsigned long start, int nr_pages,
- unsigned int gup_flags, struct page **pages)
-{
- int ret;
-
- /*
- * FIXME: FOLL_LONGTERM does not work with
- * get_user_pages_unlocked() (see comments in that function)
- */
- if (gup_flags & FOLL_LONGTERM) {
- mmap_read_lock(current->mm);
- ret = __gup_longterm_locked(current->mm,
- start, nr_pages,
- pages, NULL, gup_flags);
- mmap_read_unlock(current->mm);
- } else {
- ret = get_user_pages_unlocked(start, nr_pages,
- pages, gup_flags);
- }
-
- return ret;
-}
-
static unsigned long lockless_pages_from_mm(unsigned long start,
unsigned long end,
unsigned int gup_flags,
@@ -3060,8 +2907,8 @@ static int internal_get_user_pages_fast(unsigned long start,
/* Slow path: try to get the remaining pages with get_user_pages */
start += nr_pinned << PAGE_SHIFT;
pages += nr_pinned;
- ret = __gup_longterm_unlocked(start, nr_pages - nr_pinned, gup_flags,
- pages);
+ ret = get_user_pages_unlocked(start, nr_pages - nr_pinned, pages,
+ gup_flags);
if (ret < 0) {
/*
* The caller has to unpin the pages we already pinned so
@@ -3260,9 +3107,9 @@ long pin_user_pages_remote(struct mm_struct *mm,
if (WARN_ON_ONCE(!pages))
return -EINVAL;
- gup_flags |= FOLL_PIN;
- return __get_user_pages_remote(mm, start, nr_pages, gup_flags,
- pages, vmas, locked);
+ return __gup_longterm_locked(mm, start, nr_pages, pages, vmas, locked,
+ gup_flags | FOLL_PIN | FOLL_TOUCH |
+ FOLL_REMOTE);
}
EXPORT_SYMBOL(pin_user_pages_remote);
@@ -3296,7 +3143,7 @@ long pin_user_pages(unsigned long start, unsigned long nr_pages,
gup_flags |= FOLL_PIN;
return __gup_longterm_locked(current->mm, start, nr_pages,
- pages, vmas, gup_flags);
+ pages, vmas, NULL, gup_flags);
}
EXPORT_SYMBOL(pin_user_pages);