summaryrefslogtreecommitdiffstats
path: root/mm/memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c109
1 files changed, 85 insertions, 24 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 93897f23cc11..a1b93d9e4449 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -789,6 +789,46 @@ out:
return pfn_to_page(pfn);
}
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
+ pmd_t pmd)
+{
+ unsigned long pfn = pmd_pfn(pmd);
+
+ /*
+ * There is no pmd_special() but there may be special pmds, e.g.
+ * in a direct-access (dax) mapping, so let's just replicate the
+ * !HAVE_PTE_SPECIAL case from vm_normal_page() here.
+ */
+ if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) {
+ if (vma->vm_flags & VM_MIXEDMAP) {
+ if (!pfn_valid(pfn))
+ return NULL;
+ goto out;
+ } else {
+ unsigned long off;
+ off = (addr - vma->vm_start) >> PAGE_SHIFT;
+ if (pfn == vma->vm_pgoff + off)
+ return NULL;
+ if (!is_cow_mapping(vma->vm_flags))
+ return NULL;
+ }
+ }
+
+ if (is_zero_pfn(pfn))
+ return NULL;
+ if (unlikely(pfn > highest_memmap_pfn))
+ return NULL;
+
+ /*
+ * NOTE! We still have PageReserved() pages in the page tables.
+ * eg. VDSO mappings can cause them to exist.
+ */
+out:
+ return pfn_to_page(pfn);
+}
+#endif
+
/*
* copy one vm_area from one task to the other. Assumes the page tables
* already present in the new task to be cleared in the whole range
@@ -1182,15 +1222,8 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
next = pmd_addr_end(addr, end);
if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) {
if (next - addr != HPAGE_PMD_SIZE) {
-#ifdef CONFIG_DEBUG_VM
- if (!rwsem_is_locked(&tlb->mm->mmap_sem)) {
- pr_err("%s: mmap_sem is unlocked! addr=0x%lx end=0x%lx vma->vm_start=0x%lx vma->vm_end=0x%lx\n",
- __func__, addr, end,
- vma->vm_start,
- vma->vm_end);
- BUG();
- }
-#endif
+ VM_BUG_ON_VMA(vma_is_anonymous(vma) &&
+ !rwsem_is_locked(&tlb->mm->mmap_sem), vma);
split_huge_pmd(vma, pmd, addr);
} else if (zap_huge_pmd(tlb, vma, pmd, addr))
goto next;
@@ -1711,6 +1744,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
unsigned long next;
unsigned long end = addr + PAGE_ALIGN(size);
struct mm_struct *mm = vma->vm_mm;
+ unsigned long remap_pfn = pfn;
int err;
/*
@@ -1737,7 +1771,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
vma->vm_pgoff = pfn;
}
- err = track_pfn_remap(vma, &prot, pfn, addr, PAGE_ALIGN(size));
+ err = track_pfn_remap(vma, &prot, remap_pfn, addr, PAGE_ALIGN(size));
if (err)
return -EINVAL;
@@ -1756,7 +1790,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
} while (pgd++, addr = next, addr != end);
if (err)
- untrack_pfn(vma, pfn, PAGE_ALIGN(size));
+ untrack_pfn(vma, remap_pfn, PAGE_ALIGN(size));
return err;
}
@@ -2340,6 +2374,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
* not dirty accountable.
*/
if (PageAnon(old_page) && !PageKsm(old_page)) {
+ int total_mapcount;
if (!trylock_page(old_page)) {
get_page(old_page);
pte_unmap_unlock(page_table, ptl);
@@ -2354,13 +2389,18 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
}
put_page(old_page);
}
- if (reuse_swap_page(old_page)) {
- /*
- * The page is all ours. Move it to our anon_vma so
- * the rmap code will not search our parent or siblings.
- * Protected against the rmap code by the page lock.
- */
- page_move_anon_rmap(old_page, vma, address);
+ if (reuse_swap_page(old_page, &total_mapcount)) {
+ if (total_mapcount == 1) {
+ /*
+ * The page is all ours. Move it to
+ * our anon_vma so the rmap code will
+ * not search our parent or siblings.
+ * Protected against the rmap code by
+ * the page lock.
+ */
+ page_move_anon_rmap(compound_head(old_page),
+ vma, address);
+ }
unlock_page(old_page);
return wp_page_reuse(mm, vma, address, page_table, ptl,
orig_pte, old_page, 0, 0);
@@ -2584,7 +2624,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
inc_mm_counter_fast(mm, MM_ANONPAGES);
dec_mm_counter_fast(mm, MM_SWAPENTS);
pte = mk_pte(page, vma->vm_page_prot);
- if ((flags & FAULT_FLAG_WRITE) && reuse_swap_page(page)) {
+ if ((flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) {
pte = maybe_mkwrite(pte_mkdirty(pte), vma);
flags &= ~FAULT_FLAG_WRITE;
ret |= VM_FAULT_WRITE;
@@ -2836,7 +2876,7 @@ static int __do_fault(struct vm_area_struct *vma, unsigned long address,
* vm_ops->map_pages.
*/
void do_set_pte(struct vm_area_struct *vma, unsigned long address,
- struct page *page, pte_t *pte, bool write, bool anon)
+ struct page *page, pte_t *pte, bool write, bool anon, bool old)
{
pte_t entry;
@@ -2844,6 +2884,8 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address,
entry = mk_pte(page, vma->vm_page_prot);
if (write)
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+ if (old)
+ entry = pte_mkold(entry);
if (anon) {
inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
page_add_new_anon_rmap(page, vma, address, false);
@@ -2857,8 +2899,16 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address,
update_mmu_cache(vma, address, pte);
}
+/*
+ * If architecture emulates "accessed" or "young" bit without HW support,
+ * there is no much gain with fault_around.
+ */
static unsigned long fault_around_bytes __read_mostly =
+#ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+ PAGE_SIZE;
+#else
rounddown_pow_of_two(65536);
+#endif
#ifdef CONFIG_DEBUG_FS
static int fault_around_bytes_get(void *data, u64 *val)
@@ -2981,9 +3031,20 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
*/
if (vma->vm_ops->map_pages && fault_around_bytes >> PAGE_SHIFT > 1) {
pte = pte_offset_map_lock(mm, pmd, address, &ptl);
- do_fault_around(vma, address, pte, pgoff, flags);
if (!pte_same(*pte, orig_pte))
goto unlock_out;
+ do_fault_around(vma, address, pte, pgoff, flags);
+ /* Check if the fault is handled by faultaround */
+ if (!pte_same(*pte, orig_pte)) {
+ /*
+ * Faultaround produce old pte, but the pte we've
+ * handler fault for should be young.
+ */
+ pte_t entry = pte_mkyoung(*pte);
+ if (ptep_set_access_flags(vma, address, pte, entry, 0))
+ update_mmu_cache(vma, address, pte);
+ goto unlock_out;
+ }
pte_unmap_unlock(pte, ptl);
}
@@ -2998,7 +3059,7 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
put_page(fault_page);
return ret;
}
- do_set_pte(vma, address, fault_page, pte, false, false);
+ do_set_pte(vma, address, fault_page, pte, false, false, false);
unlock_page(fault_page);
unlock_out:
pte_unmap_unlock(pte, ptl);
@@ -3050,7 +3111,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
}
goto uncharge_out;
}
- do_set_pte(vma, address, new_page, pte, true, true);
+ do_set_pte(vma, address, new_page, pte, true, true, false);
mem_cgroup_commit_charge(new_page, memcg, false, false);
lru_cache_add_active_or_unevictable(new_page, vma);
pte_unmap_unlock(pte, ptl);
@@ -3107,7 +3168,7 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
put_page(fault_page);
return ret;
}
- do_set_pte(vma, address, fault_page, pte, true, false);
+ do_set_pte(vma, address, fault_page, pte, true, false, false);
pte_unmap_unlock(pte, ptl);
if (set_page_dirty(fault_page))