diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-07 05:49:49 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-07 05:49:49 +0200 |
commit | d34fc1adf01ff87026da85fb972dc259dc347540 (patch) | |
tree | 27356073d423187157b7cdb69da32b53102fb9e7 /mm/mmap.c | |
parent | x86/mm: Document how CR4.PCIDE restore works (diff) | |
parent | mm,fork: introduce MADV_WIPEONFORK (diff) | |
download | linux-d34fc1adf01ff87026da85fb972dc259dc347540.tar.xz linux-d34fc1adf01ff87026da85fb972dc259dc347540.zip |
Merge branch 'akpm' (patches from Andrew)
Merge updates from Andrew Morton:
- various misc bits
- DAX updates
- OCFS2
- most of MM
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (119 commits)
mm,fork: introduce MADV_WIPEONFORK
x86,mpx: make mpx depend on x86-64 to free up VMA flag
mm: add /proc/pid/smaps_rollup
mm: hugetlb: clear target sub-page last when clearing huge page
mm: oom: let oom_reap_task and exit_mmap run concurrently
swap: choose swap device according to numa node
mm: replace TIF_MEMDIE checks by tsk_is_oom_victim
mm, oom: do not rely on TIF_MEMDIE for memory reserves access
z3fold: use per-cpu unbuddied lists
mm, swap: don't use VMA based swap readahead if HDD is used as swap
mm, swap: add sysfs interface for VMA based swap readahead
mm, swap: VMA based swap readahead
mm, swap: fix swap readahead marking
mm, swap: add swap readahead hit statistics
mm/vmalloc.c: don't reinvent the wheel but use existing llist API
mm/vmstat.c: fix wrong comment
selftests/memfd: add memfd_create hugetlbfs selftest
mm/shmem: add hugetlbfs support to memfd_create()
mm, devm_memremap_pages: use multi-order radix for ZONE_DEVICE lookups
mm/vmalloc.c: halve the number of comparisons performed in pcpu_get_vm_areas()
...
Diffstat (limited to 'mm/mmap.c')
-rw-r--r-- | mm/mmap.c | 46 |
1 files changed, 36 insertions, 10 deletions
diff --git a/mm/mmap.c b/mm/mmap.c index f19efcf75418..4c5981651407 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -44,6 +44,7 @@ #include <linux/userfaultfd_k.h> #include <linux/moduleparam.h> #include <linux/pkeys.h> +#include <linux/oom.h> #include <linux/uaccess.h> #include <asm/cacheflush.h> @@ -2639,13 +2640,6 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, if (vma->vm_start >= end) return 0; - if (uf) { - int error = userfaultfd_unmap_prep(vma, start, end, uf); - - if (error) - return error; - } - /* * If we need to split any vma, do it now to save pain later. * @@ -2679,6 +2673,21 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, } vma = prev ? prev->vm_next : mm->mmap; + if (unlikely(uf)) { + /* + * If userfaultfd_unmap_prep returns an error the vmas + * will remain splitted, but userland will get a + * highly unexpected error anyway. This is no + * different than the case where the first of the two + * __split_vma fails, but we don't undo the first + * split, despite we could. This is unlikely enough + * failure that it's not worth optimizing it for. + */ + int error = userfaultfd_unmap_prep(vma, start, end, uf); + if (error) + return error; + } + /* * unlock any mlock()ed ranges before detaching vmas */ @@ -2993,6 +3002,23 @@ void exit_mmap(struct mm_struct *mm) /* Use -1 here to ensure all VMAs in the mm are unmapped */ unmap_vmas(&tlb, vma, 0, -1); + set_bit(MMF_OOM_SKIP, &mm->flags); + if (unlikely(tsk_is_oom_victim(current))) { + /* + * Wait for oom_reap_task() to stop working on this + * mm. Because MMF_OOM_SKIP is already set before + * calling down_read(), oom_reap_task() will not run + * on this "mm" post up_write(). + * + * tsk_is_oom_victim() cannot be set from under us + * either because current->mm is already set to NULL + * under task_lock before calling mmput and oom_mm is + * set not NULL by the OOM killer only if current->mm + * is found not NULL while holding the task_lock. + */ + down_write(&mm->mmap_sem); + up_write(&mm->mmap_sem); + } free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING); tlb_finish_mmu(&tlb, 0, -1); @@ -3514,7 +3540,7 @@ static int init_user_reserve(void) { unsigned long free_kbytes; - free_kbytes = global_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10); + free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10); sysctl_user_reserve_kbytes = min(free_kbytes / 32, 1UL << 17); return 0; @@ -3535,7 +3561,7 @@ static int init_admin_reserve(void) { unsigned long free_kbytes; - free_kbytes = global_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10); + free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10); sysctl_admin_reserve_kbytes = min(free_kbytes / 32, 1UL << 13); return 0; @@ -3579,7 +3605,7 @@ static int reserve_mem_notifier(struct notifier_block *nb, break; case MEM_OFFLINE: - free_kbytes = global_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10); + free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10); if (sysctl_user_reserve_kbytes > free_kbytes) { init_user_reserve(); |