diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/hugetlb.c | 9 | ||||
-rw-r--r-- | mm/internal.h | 5 | ||||
-rw-r--r-- | mm/kasan/report.c | 1 | ||||
-rw-r--r-- | mm/madvise.c | 1 | ||||
-rw-r--r-- | mm/memory.c | 1 | ||||
-rw-r--r-- | mm/mprotect.c | 1 | ||||
-rw-r--r-- | mm/mremap.c | 8 | ||||
-rw-r--r-- | mm/page_alloc.c | 2 | ||||
-rw-r--r-- | mm/page_io.c | 7 | ||||
-rw-r--r-- | mm/rmap.c | 36 | ||||
-rw-r--r-- | mm/zsmalloc.c | 1 |
11 files changed, 62 insertions, 10 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index bc48ee783dd9..a1a0ac0ad6f6 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4078,6 +4078,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long vaddr = *position; unsigned long remainder = *nr_pages; struct hstate *h = hstate_vma(vma); + int err = -EFAULT; while (vaddr < vma->vm_end && remainder) { pte_t *pte; @@ -4154,11 +4155,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, } ret = hugetlb_fault(mm, vma, vaddr, fault_flags); if (ret & VM_FAULT_ERROR) { - int err = vm_fault_to_errno(ret, flags); - - if (err) - return err; - + err = vm_fault_to_errno(ret, flags); remainder = 0; break; } @@ -4213,7 +4210,7 @@ same_page: */ *position = vaddr; - return i ? i : -EFAULT; + return i ? i : err; } #ifndef __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE diff --git a/mm/internal.h b/mm/internal.h index 24d88f084705..4ef49fc55e58 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -498,6 +498,7 @@ extern struct workqueue_struct *mm_percpu_wq; #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH void try_to_unmap_flush(void); void try_to_unmap_flush_dirty(void); +void flush_tlb_batched_pending(struct mm_struct *mm); #else static inline void try_to_unmap_flush(void) { @@ -505,7 +506,9 @@ static inline void try_to_unmap_flush(void) static inline void try_to_unmap_flush_dirty(void) { } - +static inline void flush_tlb_batched_pending(struct mm_struct *mm) +{ +} #endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */ extern const struct trace_print_flags pageflag_names[]; diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 04bb1d3eb9ec..6bcfb01ba038 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -401,6 +401,7 @@ void kasan_report(unsigned long addr, size_t size, disable_trace_on_warning(); info.access_addr = (void *)addr; + info.first_bad_addr = (void *)addr; info.access_size = size; info.is_write = is_write; info.ip = ip; diff --git a/mm/madvise.c b/mm/madvise.c index 9976852f1e1c..47d8d8a25eae 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -320,6 +320,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, tlb_remove_check_page_size_change(tlb, PAGE_SIZE); orig_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl); + flush_tlb_batched_pending(mm); arch_enter_lazy_mmu_mode(); for (; addr != end; pte++, addr += PAGE_SIZE) { ptent = *pte; diff --git a/mm/memory.c b/mm/memory.c index 0e517be91a89..f65beaad319b 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1197,6 +1197,7 @@ again: init_rss_vec(rss); start_pte = pte_offset_map_lock(mm, pmd, addr, &ptl); pte = start_pte; + flush_tlb_batched_pending(mm); arch_enter_lazy_mmu_mode(); do { pte_t ptent = *pte; diff --git a/mm/mprotect.c b/mm/mprotect.c index 1a8c9ca83e48..4180ad8cc9c5 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -64,6 +64,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, atomic_read(&vma->vm_mm->mm_users) == 1) target_node = numa_node_id(); + flush_tlb_batched_pending(vma->vm_mm); arch_enter_lazy_mmu_mode(); do { oldpte = *pte; diff --git a/mm/mremap.c b/mm/mremap.c index cd8a1b199ef9..3f23715d3c69 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -152,6 +152,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, new_ptl = pte_lockptr(mm, new_pmd); if (new_ptl != old_ptl) spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); + flush_tlb_batched_pending(vma->vm_mm); arch_enter_lazy_mmu_mode(); for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE, @@ -428,6 +429,7 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr, static unsigned long mremap_to(unsigned long addr, unsigned long old_len, unsigned long new_addr, unsigned long new_len, bool *locked, struct vm_userfaultfd_ctx *uf, + struct list_head *uf_unmap_early, struct list_head *uf_unmap) { struct mm_struct *mm = current->mm; @@ -446,7 +448,7 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len, if (addr + old_len > new_addr && new_addr + new_len > addr) goto out; - ret = do_munmap(mm, new_addr, new_len, NULL); + ret = do_munmap(mm, new_addr, new_len, uf_unmap_early); if (ret) goto out; @@ -514,6 +516,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, unsigned long charged = 0; bool locked = false; struct vm_userfaultfd_ctx uf = NULL_VM_UFFD_CTX; + LIST_HEAD(uf_unmap_early); LIST_HEAD(uf_unmap); if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE)) @@ -541,7 +544,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, if (flags & MREMAP_FIXED) { ret = mremap_to(addr, old_len, new_addr, new_len, - &locked, &uf, &uf_unmap); + &locked, &uf, &uf_unmap_early, &uf_unmap); goto out; } @@ -621,6 +624,7 @@ out: up_write(¤t->mm->mmap_sem); if (locked && new_len > old_len) mm_populate(new_addr + old_len, new_len - old_len); + userfaultfd_unmap_complete(mm, &uf_unmap_early); mremap_userfaultfd_complete(&uf, addr, new_addr, old_len); userfaultfd_unmap_complete(mm, &uf_unmap); return ret; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6d30e914afb6..fc32aa81f359 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4891,9 +4891,11 @@ int numa_zonelist_order_handler(struct ctl_table *table, int write, NUMA_ZONELIST_ORDER_LEN); user_zonelist_order = oldval; } else if (oldval != user_zonelist_order) { + mem_hotplug_begin(); mutex_lock(&zonelists_mutex); build_all_zonelists(NULL, NULL); mutex_unlock(&zonelists_mutex); + mem_hotplug_done(); } } out: diff --git a/mm/page_io.c b/mm/page_io.c index b6c4ac388209..5f61b54ee1f3 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -22,6 +22,7 @@ #include <linux/frontswap.h> #include <linux/blkdev.h> #include <linux/uio.h> +#include <linux/sched/task.h> #include <asm/pgtable.h> static struct bio *get_swap_bio(gfp_t gfp_flags, @@ -136,6 +137,7 @@ out: WRITE_ONCE(bio->bi_private, NULL); bio_put(bio); wake_up_process(waiter); + put_task_struct(waiter); } int generic_swapfile_activate(struct swap_info_struct *sis, @@ -378,6 +380,11 @@ int swap_readpage(struct page *page, bool do_poll) goto out; } bdev = bio->bi_bdev; + /* + * Keep this task valid during swap readpage because the oom killer may + * attempt to access it in the page fault retry time check. + */ + get_task_struct(current); bio->bi_private = current; bio_set_op_attrs(bio, REQ_OP_READ, 0); count_vm_event(PSWPIN); diff --git a/mm/rmap.c b/mm/rmap.c index ced14f1af6dc..c8993c63eb25 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -605,6 +605,13 @@ static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable) tlb_ubc->flush_required = true; /* + * Ensure compiler does not re-order the setting of tlb_flush_batched + * before the PTE is cleared. + */ + barrier(); + mm->tlb_flush_batched = true; + + /* * If the PTE was dirty then it's best to assume it's writable. The * caller must use try_to_unmap_flush_dirty() or try_to_unmap_flush() * before the page is queued for IO. @@ -631,6 +638,35 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags) return should_defer; } + +/* + * Reclaim unmaps pages under the PTL but do not flush the TLB prior to + * releasing the PTL if TLB flushes are batched. It's possible for a parallel + * operation such as mprotect or munmap to race between reclaim unmapping + * the page and flushing the page. If this race occurs, it potentially allows + * access to data via a stale TLB entry. Tracking all mm's that have TLB + * batching in flight would be expensive during reclaim so instead track + * whether TLB batching occurred in the past and if so then do a flush here + * if required. This will cost one additional flush per reclaim cycle paid + * by the first operation at risk such as mprotect and mumap. + * + * This must be called under the PTL so that an access to tlb_flush_batched + * that is potentially a "reclaim vs mprotect/munmap/etc" race will synchronise + * via the PTL. + */ +void flush_tlb_batched_pending(struct mm_struct *mm) +{ + if (mm->tlb_flush_batched) { + flush_tlb_mm(mm); + + /* + * Do not allow the compiler to re-order the clearing of + * tlb_flush_batched before the tlb is flushed. + */ + barrier(); + mm->tlb_flush_batched = false; + } +} #else static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable) { diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 013eea76685e..308acb9d814b 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -2453,7 +2453,6 @@ void zs_destroy_pool(struct zs_pool *pool) } destroy_cache(pool); - kfree(pool->size_class); kfree(pool->name); kfree(pool); } |