diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-02-11 01:45:56 +0100 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-02-11 01:45:56 +0100 |
commit | 992de5a8eca7cbd3215e3eb2c439b2c11582a58b (patch) | |
tree | 863988f84c1dd57a02fa337ecbce49263a3b9511 /mm/memory.c | |
parent | Merge tag 'gfs2-merge-window' of git://git.kernel.org/pub/scm/linux/kernel/gi... (diff) | |
parent | memcg: zap memcg_slab_caches and memcg_slab_mutex (diff) | |
download | linux-992de5a8eca7cbd3215e3eb2c439b2c11582a58b.tar.xz linux-992de5a8eca7cbd3215e3eb2c439b2c11582a58b.zip |
Merge branch 'akpm' (patches from Andrew)
Merge misc updates from Andrew Morton:
"Bite-sized chunks this time, to avoid the MTA ratelimiting woes.
- fs/notify updates
- ocfs2
- some of MM"
That laconic "some MM" is mainly the removal of remap_file_pages(),
which is a big simplification of the VM, and which gets rid of a *lot*
of random cruft and special cases because we no longer support the
non-linear mappings that it used.
From a user interface perspective, nothing has changed, because the
remap_file_pages() syscall still exists, it's just done by emulating the
old behavior by creating a lot of individual small mappings instead of
one non-linear one.
The emulation is slower than the old "native" non-linear mappings, but
nobody really uses or cares about remap_file_pages(), and simplifying
the VM is a big advantage.
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (78 commits)
memcg: zap memcg_slab_caches and memcg_slab_mutex
memcg: zap memcg_name argument of memcg_create_kmem_cache
memcg: zap __memcg_{charge,uncharge}_slab
mm/page_alloc.c: place zone_id check before VM_BUG_ON_PAGE check
mm: hugetlb: fix type of hugetlb_treat_as_movable variable
mm, hugetlb: remove unnecessary lower bound on sysctl handlers"?
mm: memory: merge shared-writable dirtying branches in do_wp_page()
mm: memory: remove ->vm_file check on shared writable vmas
xtensa: drop _PAGE_FILE and pte_file()-related helpers
x86: drop _PAGE_FILE and pte_file()-related helpers
unicore32: drop pte_file()-related helpers
um: drop _PAGE_FILE and pte_file()-related helpers
tile: drop pte_file()-related helpers
sparc: drop pte_file()-related helpers
sh: drop _PAGE_FILE and pte_file()-related helpers
score: drop _PAGE_FILE and pte_file()-related helpers
s390: drop pte_file()-related helpers
parisc: drop _PAGE_FILE and pte_file()-related helpers
openrisc: drop _PAGE_FILE and pte_file()-related helpers
nios2: drop _PAGE_FILE and pte_file()-related helpers
...
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 276 |
1 files changed, 84 insertions, 192 deletions
diff --git a/mm/memory.c b/mm/memory.c index d707c4dfbbb4..d63849b5188f 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -813,42 +813,40 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, /* pte contains position in swap or file, so copy. */ if (unlikely(!pte_present(pte))) { - if (!pte_file(pte)) { - swp_entry_t entry = pte_to_swp_entry(pte); - - if (likely(!non_swap_entry(entry))) { - if (swap_duplicate(entry) < 0) - return entry.val; - - /* make sure dst_mm is on swapoff's mmlist. */ - if (unlikely(list_empty(&dst_mm->mmlist))) { - spin_lock(&mmlist_lock); - if (list_empty(&dst_mm->mmlist)) - list_add(&dst_mm->mmlist, - &src_mm->mmlist); - spin_unlock(&mmlist_lock); - } - rss[MM_SWAPENTS]++; - } else if (is_migration_entry(entry)) { - page = migration_entry_to_page(entry); - - if (PageAnon(page)) - rss[MM_ANONPAGES]++; - else - rss[MM_FILEPAGES]++; - - if (is_write_migration_entry(entry) && - is_cow_mapping(vm_flags)) { - /* - * COW mappings require pages in both - * parent and child to be set to read. - */ - make_migration_entry_read(&entry); - pte = swp_entry_to_pte(entry); - if (pte_swp_soft_dirty(*src_pte)) - pte = pte_swp_mksoft_dirty(pte); - set_pte_at(src_mm, addr, src_pte, pte); - } + swp_entry_t entry = pte_to_swp_entry(pte); + + if (likely(!non_swap_entry(entry))) { + if (swap_duplicate(entry) < 0) + return entry.val; + + /* make sure dst_mm is on swapoff's mmlist. */ + if (unlikely(list_empty(&dst_mm->mmlist))) { + spin_lock(&mmlist_lock); + if (list_empty(&dst_mm->mmlist)) + list_add(&dst_mm->mmlist, + &src_mm->mmlist); + spin_unlock(&mmlist_lock); + } + rss[MM_SWAPENTS]++; + } else if (is_migration_entry(entry)) { + page = migration_entry_to_page(entry); + + if (PageAnon(page)) + rss[MM_ANONPAGES]++; + else + rss[MM_FILEPAGES]++; + + if (is_write_migration_entry(entry) && + is_cow_mapping(vm_flags)) { + /* + * COW mappings require pages in both + * parent and child to be set to read. + */ + make_migration_entry_read(&entry); + pte = swp_entry_to_pte(entry); + if (pte_swp_soft_dirty(*src_pte)) + pte = pte_swp_mksoft_dirty(pte); + set_pte_at(src_mm, addr, src_pte, pte); } } goto out_set_pte; @@ -1022,11 +1020,9 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, * readonly mappings. The tradeoff is that copy_page_range is more * efficient than faulting. */ - if (!(vma->vm_flags & (VM_HUGETLB | VM_NONLINEAR | - VM_PFNMAP | VM_MIXEDMAP))) { - if (!vma->anon_vma) - return 0; - } + if (!(vma->vm_flags & (VM_HUGETLB | VM_PFNMAP | VM_MIXEDMAP)) && + !vma->anon_vma) + return 0; if (is_vm_hugetlb_page(vma)) return copy_hugetlb_page_range(dst_mm, src_mm, vma); @@ -1084,6 +1080,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, spinlock_t *ptl; pte_t *start_pte; pte_t *pte; + swp_entry_t entry; again: init_rss_vec(rss); @@ -1109,28 +1106,12 @@ again: if (details->check_mapping && details->check_mapping != page->mapping) continue; - /* - * Each page->index must be checked when - * invalidating or truncating nonlinear. - */ - if (details->nonlinear_vma && - (page->index < details->first_index || - page->index > details->last_index)) - continue; } ptent = ptep_get_and_clear_full(mm, addr, pte, tlb->fullmm); tlb_remove_tlb_entry(tlb, pte, addr); if (unlikely(!page)) continue; - if (unlikely(details) && details->nonlinear_vma - && linear_page_index(details->nonlinear_vma, - addr) != page->index) { - pte_t ptfile = pgoff_to_pte(page->index); - if (pte_soft_dirty(ptent)) - ptfile = pte_file_mksoft_dirty(ptfile); - set_pte_at(mm, addr, pte, ptfile); - } if (PageAnon(page)) rss[MM_ANONPAGES]--; else { @@ -1153,33 +1134,25 @@ again: } continue; } - /* - * If details->check_mapping, we leave swap entries; - * if details->nonlinear_vma, we leave file entries. - */ + /* If details->check_mapping, we leave swap entries. */ if (unlikely(details)) continue; - if (pte_file(ptent)) { - if (unlikely(!(vma->vm_flags & VM_NONLINEAR))) - print_bad_pte(vma, addr, ptent, NULL); - } else { - swp_entry_t entry = pte_to_swp_entry(ptent); - if (!non_swap_entry(entry)) - rss[MM_SWAPENTS]--; - else if (is_migration_entry(entry)) { - struct page *page; + entry = pte_to_swp_entry(ptent); + if (!non_swap_entry(entry)) + rss[MM_SWAPENTS]--; + else if (is_migration_entry(entry)) { + struct page *page; - page = migration_entry_to_page(entry); + page = migration_entry_to_page(entry); - if (PageAnon(page)) - rss[MM_ANONPAGES]--; - else - rss[MM_FILEPAGES]--; - } - if (unlikely(!free_swap_and_cache(entry))) - print_bad_pte(vma, addr, ptent, NULL); + if (PageAnon(page)) + rss[MM_ANONPAGES]--; + else + rss[MM_FILEPAGES]--; } + if (unlikely(!free_swap_and_cache(entry))) + print_bad_pte(vma, addr, ptent, NULL); pte_clear_not_present_full(mm, addr, pte, tlb->fullmm); } while (pte++, addr += PAGE_SIZE, addr != end); @@ -1279,7 +1252,7 @@ static void unmap_page_range(struct mmu_gather *tlb, pgd_t *pgd; unsigned long next; - if (details && !details->check_mapping && !details->nonlinear_vma) + if (details && !details->check_mapping) details = NULL; BUG_ON(addr >= end); @@ -1373,7 +1346,7 @@ void unmap_vmas(struct mmu_gather *tlb, * @vma: vm_area_struct holding the applicable pages * @start: starting address of pages to zap * @size: number of bytes to zap - * @details: details of nonlinear truncation or shared cache invalidation + * @details: details of shared cache invalidation * * Caller must protect the VMA list */ @@ -1399,7 +1372,7 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long start, * @vma: vm_area_struct holding the applicable pages * @address: starting address of pages to zap * @size: number of bytes to zap - * @details: details of nonlinear truncation or shared cache invalidation + * @details: details of shared cache invalidation * * The range must fit into one VMA. */ @@ -1924,12 +1897,11 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr, EXPORT_SYMBOL_GPL(apply_to_page_range); /* - * handle_pte_fault chooses page fault handler according to an entry - * which was read non-atomically. Before making any commitment, on - * those architectures or configurations (e.g. i386 with PAE) which - * might give a mix of unmatched parts, do_swap_page and do_nonlinear_fault - * must check under lock before unmapping the pte and proceeding - * (but do_wp_page is only called after already making such a check; + * handle_pte_fault chooses page fault handler according to an entry which was + * read non-atomically. Before making any commitment, on those architectures + * or configurations (e.g. i386 with PAE) which might give a mix of unmatched + * parts, do_swap_page must check under lock before unmapping the pte and + * proceeding (but do_wp_page is only called after already making such a check; * and do_anonymous_page can safely check later on). */ static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd, @@ -2035,7 +2007,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, pte_t entry; int ret = 0; int page_mkwrite = 0; - struct page *dirty_page = NULL; + bool dirty_shared = false; unsigned long mmun_start = 0; /* For mmu_notifiers */ unsigned long mmun_end = 0; /* For mmu_notifiers */ struct mem_cgroup *memcg; @@ -2086,6 +2058,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, unlock_page(old_page); } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) == (VM_WRITE|VM_SHARED))) { + page_cache_get(old_page); /* * Only catch write-faults on shared writable pages, * read-only shared pages can get COWed by @@ -2093,7 +2066,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, */ if (vma->vm_ops && vma->vm_ops->page_mkwrite) { int tmp; - page_cache_get(old_page); + pte_unmap_unlock(page_table, ptl); tmp = do_page_mkwrite(vma, old_page, address); if (unlikely(!tmp || (tmp & @@ -2113,11 +2086,10 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, unlock_page(old_page); goto unlock; } - page_mkwrite = 1; } - dirty_page = old_page; - get_page(dirty_page); + + dirty_shared = true; reuse: /* @@ -2136,20 +2108,20 @@ reuse: pte_unmap_unlock(page_table, ptl); ret |= VM_FAULT_WRITE; - if (!dirty_page) - return ret; - - if (!page_mkwrite) { + if (dirty_shared) { struct address_space *mapping; int dirtied; - lock_page(dirty_page); - dirtied = set_page_dirty(dirty_page); - VM_BUG_ON_PAGE(PageAnon(dirty_page), dirty_page); - mapping = dirty_page->mapping; - unlock_page(dirty_page); + if (!page_mkwrite) + lock_page(old_page); - if (dirtied && mapping) { + dirtied = set_page_dirty(old_page); + VM_BUG_ON_PAGE(PageAnon(old_page), old_page); + mapping = old_page->mapping; + unlock_page(old_page); + page_cache_release(old_page); + + if ((dirtied || page_mkwrite) && mapping) { /* * Some device drivers do not set page.mapping * but still dirty their pages @@ -2157,25 +2129,9 @@ reuse: balance_dirty_pages_ratelimited(mapping); } - /* file_update_time outside page_lock */ - if (vma->vm_file) + if (!page_mkwrite) file_update_time(vma->vm_file); } - put_page(dirty_page); - if (page_mkwrite) { - struct address_space *mapping = dirty_page->mapping; - - set_page_dirty(dirty_page); - unlock_page(dirty_page); - page_cache_release(dirty_page); - if (mapping) { - /* - * Some device drivers do not set page.mapping - * but still dirty their pages - */ - balance_dirty_pages_ratelimited(mapping); - } - } return ret; } @@ -2333,25 +2289,11 @@ static inline void unmap_mapping_range_tree(struct rb_root *root, } } -static inline void unmap_mapping_range_list(struct list_head *head, - struct zap_details *details) -{ - struct vm_area_struct *vma; - - /* - * In nonlinear VMAs there is no correspondence between virtual address - * offset and file offset. So we must perform an exhaustive search - * across *all* the pages in each nonlinear VMA, not just the pages - * whose virtual address lies outside the file truncation point. - */ - list_for_each_entry(vma, head, shared.nonlinear) { - details->nonlinear_vma = vma; - unmap_mapping_range_vma(vma, vma->vm_start, vma->vm_end, details); - } -} - /** - * unmap_mapping_range - unmap the portion of all mmaps in the specified address_space corresponding to the specified page range in the underlying file. + * unmap_mapping_range - unmap the portion of all mmaps in the specified + * address_space corresponding to the specified page range in the underlying + * file. + * * @mapping: the address space containing mmaps to be unmapped. * @holebegin: byte in first page to unmap, relative to the start of * the underlying file. This will be rounded down to a PAGE_SIZE @@ -2380,7 +2322,6 @@ void unmap_mapping_range(struct address_space *mapping, } details.check_mapping = even_cows? NULL: mapping; - details.nonlinear_vma = NULL; details.first_index = hba; details.last_index = hba + hlen - 1; if (details.last_index < details.first_index) @@ -2390,8 +2331,6 @@ void unmap_mapping_range(struct address_space *mapping, i_mmap_lock_write(mapping); if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap))) unmap_mapping_range_tree(&mapping->i_mmap, &details); - if (unlikely(!list_empty(&mapping->i_mmap_nonlinear))) - unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details); i_mmap_unlock_write(mapping); } EXPORT_SYMBOL(unmap_mapping_range); @@ -2752,8 +2691,6 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address, entry = mk_pte(page, vma->vm_page_prot); if (write) entry = maybe_mkwrite(pte_mkdirty(entry), vma); - else if (pte_file(*pte) && pte_file_soft_dirty(*pte)) - entry = pte_mksoft_dirty(entry); if (anon) { inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); page_add_new_anon_rmap(page, vma, address); @@ -2888,8 +2825,7 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma, * if page by the offset is not ready to be mapped (cold cache or * something). */ - if (vma->vm_ops->map_pages && !(flags & FAULT_FLAG_NONLINEAR) && - fault_around_bytes >> PAGE_SHIFT > 1) { + if (vma->vm_ops->map_pages && fault_around_bytes >> PAGE_SHIFT > 1) { pte = pte_offset_map_lock(mm, pmd, address, &ptl); do_fault_around(vma, address, pte, pgoff, flags); if (!pte_same(*pte, orig_pte)) @@ -3021,8 +2957,7 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma, balance_dirty_pages_ratelimited(mapping); } - /* file_update_time outside page_lock */ - if (vma->vm_file && !vma->vm_ops->page_mkwrite) + if (!vma->vm_ops->page_mkwrite) file_update_time(vma->vm_file); return ret; @@ -3034,7 +2969,7 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma, * The mmap_sem may have been released depending on flags and our * return value. See filemap_fault() and __lock_page_or_retry(). */ -static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, +static int do_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pte_t *page_table, pmd_t *pmd, unsigned int flags, pte_t orig_pte) { @@ -3051,46 +2986,6 @@ static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, return do_shared_fault(mm, vma, address, pmd, pgoff, flags, orig_pte); } -/* - * Fault of a previously existing named mapping. Repopulate the pte - * from the encoded file_pte if possible. This enables swappable - * nonlinear vmas. - * - * We enter with non-exclusive mmap_sem (to exclude vma changes, - * but allow concurrent faults), and pte mapped but not yet locked. - * We return with pte unmapped and unlocked. - * The mmap_sem may have been released depending on flags and our - * return value. See filemap_fault() and __lock_page_or_retry(). - */ -static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long address, pte_t *page_table, pmd_t *pmd, - unsigned int flags, pte_t orig_pte) -{ - pgoff_t pgoff; - - flags |= FAULT_FLAG_NONLINEAR; - - if (!pte_unmap_same(mm, pmd, page_table, orig_pte)) - return 0; - - if (unlikely(!(vma->vm_flags & VM_NONLINEAR))) { - /* - * Page table corrupted: show pte and kill process. - */ - print_bad_pte(vma, address, orig_pte, NULL); - return VM_FAULT_SIGBUS; - } - - pgoff = pte_to_pgoff(orig_pte); - if (!(flags & FAULT_FLAG_WRITE)) - return do_read_fault(mm, vma, address, pmd, pgoff, flags, - orig_pte); - if (!(vma->vm_flags & VM_SHARED)) - return do_cow_fault(mm, vma, address, pmd, pgoff, flags, - orig_pte); - return do_shared_fault(mm, vma, address, pmd, pgoff, flags, orig_pte); -} - static int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, unsigned long addr, int page_nid, int *flags) @@ -3218,15 +3113,12 @@ static int handle_pte_fault(struct mm_struct *mm, if (pte_none(entry)) { if (vma->vm_ops) { if (likely(vma->vm_ops->fault)) - return do_linear_fault(mm, vma, address, - pte, pmd, flags, entry); + return do_fault(mm, vma, address, pte, + pmd, flags, entry); } return do_anonymous_page(mm, vma, address, pte, pmd, flags); } - if (pte_file(entry)) - return do_nonlinear_fault(mm, vma, address, - pte, pmd, flags, entry); return do_swap_page(mm, vma, address, pte, pmd, flags, entry); } |