diff options
author | Andrea Arcangeli <aarcange@redhat.com> | 2011-01-14 00:46:43 +0100 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-01-14 02:32:40 +0100 |
commit | 8ac1f8320a0073f28cf9e0491af4cd98f504f92a (patch) | |
tree | 4dad891c302587fdc7b099b18e05d7dbc5526c64 /mm | |
parent | thp: bail out gup_fast on splitting pmd (diff) | |
download | linux-8ac1f8320a0073f28cf9e0491af4cd98f504f92a.tar.xz linux-8ac1f8320a0073f28cf9e0491af4cd98f504f92a.zip |
thp: pte alloc trans splitting
pte alloc routines must wait for split_huge_page if the pmd is not present
and not null (i.e. pmd_trans_splitting). The additional branches are
optimized away at compile time by pmd_trans_splitting if the config option
is off. However we must pass the vma down in order to know the anon_vma
lock to wait for.
[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/memory.c | 19 | ||||
-rw-r--r-- | mm/mremap.c | 8 |
2 files changed, 18 insertions, 9 deletions
diff --git a/mm/memory.c b/mm/memory.c index bdf19366b705..567bca80ea53 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -394,9 +394,11 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma, } } -int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address) +int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, + pmd_t *pmd, unsigned long address) { pgtable_t new = pte_alloc_one(mm, address); + int wait_split_huge_page; if (!new) return -ENOMEM; @@ -416,14 +418,18 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address) smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */ spin_lock(&mm->page_table_lock); - if (!pmd_present(*pmd)) { /* Has another populated it ? */ + wait_split_huge_page = 0; + if (likely(pmd_none(*pmd))) { /* Has another populated it ? */ mm->nr_ptes++; pmd_populate(mm, pmd, new); new = NULL; - } + } else if (unlikely(pmd_trans_splitting(*pmd))) + wait_split_huge_page = 1; spin_unlock(&mm->page_table_lock); if (new) pte_free(mm, new); + if (wait_split_huge_page) + wait_split_huge_page(vma->anon_vma, pmd); return 0; } @@ -436,10 +442,11 @@ int __pte_alloc_kernel(pmd_t *pmd, unsigned long address) smp_wmb(); /* See comment in __pte_alloc */ spin_lock(&init_mm.page_table_lock); - if (!pmd_present(*pmd)) { /* Has another populated it ? */ + if (likely(pmd_none(*pmd))) { /* Has another populated it ? */ pmd_populate_kernel(&init_mm, pmd, new); new = NULL; - } + } else + VM_BUG_ON(pmd_trans_splitting(*pmd)); spin_unlock(&init_mm.page_table_lock); if (new) pte_free_kernel(&init_mm, new); @@ -3253,7 +3260,7 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, pmd = pmd_alloc(mm, pud, address); if (!pmd) return VM_FAULT_OOM; - pte = pte_alloc_map(mm, pmd, address); + pte = pte_alloc_map(mm, vma, pmd, address); if (!pte) return VM_FAULT_OOM; diff --git a/mm/mremap.c b/mm/mremap.c index 563fbdd6293a..b09eefaea0b8 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -47,7 +47,8 @@ static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr) return pmd; } -static pmd_t *alloc_new_pmd(struct mm_struct *mm, unsigned long addr) +static pmd_t *alloc_new_pmd(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long addr) { pgd_t *pgd; pud_t *pud; @@ -62,7 +63,8 @@ static pmd_t *alloc_new_pmd(struct mm_struct *mm, unsigned long addr) if (!pmd) return NULL; - if (!pmd_present(*pmd) && __pte_alloc(mm, pmd, addr)) + VM_BUG_ON(pmd_trans_huge(*pmd)); + if (pmd_none(*pmd) && __pte_alloc(mm, vma, pmd, addr)) return NULL; return pmd; @@ -147,7 +149,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma, old_pmd = get_old_pmd(vma->vm_mm, old_addr); if (!old_pmd) continue; - new_pmd = alloc_new_pmd(vma->vm_mm, new_addr); + new_pmd = alloc_new_pmd(vma->vm_mm, vma, new_addr); if (!new_pmd) break; next = (new_addr + PMD_SIZE) & PMD_MASK; |