diff options
author | Ingo Molnar <mingo@kernel.org> | 2019-12-25 10:41:37 +0100 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2019-12-25 10:41:37 +0100 |
commit | 1e5f8a308551b9816588e12bb795aeadebe37c4a (patch) | |
tree | bd71fc796fed24a3b7cc99df4a1d1bdaecc2b387 /mm | |
parent | stop_machine: remove try_stop_cpus helper (diff) | |
parent | Linux 5.5-rc3 (diff) | |
download | linux-1e5f8a308551b9816588e12bb795aeadebe37c4a.tar.xz linux-1e5f8a308551b9816588e12bb795aeadebe37c4a.zip |
Merge tag 'v5.5-rc3' into sched/core, to pick up fixes
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/kasan/common.c | 37 | ||||
-rw-r--r-- | mm/ksm.c | 1 | ||||
-rw-r--r-- | mm/memcontrol.c | 52 | ||||
-rw-r--r-- | mm/memory.c | 146 | ||||
-rw-r--r-- | mm/slab_common.c | 12 | ||||
-rw-r--r-- | mm/vmalloc.c | 129 | ||||
-rw-r--r-- | mm/vmscan.c | 2 | ||||
-rw-r--r-- | mm/vmstat.c | 60 |
8 files changed, 257 insertions, 182 deletions
diff --git a/mm/kasan/common.c b/mm/kasan/common.c index df3371d5c572..c15d8ae68c96 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -36,6 +36,7 @@ #include <linux/bug.h> #include <linux/uaccess.h> +#include <asm/cacheflush.h> #include <asm/tlbflush.h> #include "kasan.h" @@ -777,15 +778,17 @@ static int kasan_populate_vmalloc_pte(pte_t *ptep, unsigned long addr, return 0; } -int kasan_populate_vmalloc(unsigned long requested_size, struct vm_struct *area) +int kasan_populate_vmalloc(unsigned long addr, unsigned long size) { unsigned long shadow_start, shadow_end; int ret; - shadow_start = (unsigned long)kasan_mem_to_shadow(area->addr); + if (!is_vmalloc_or_module_addr((void *)addr)) + return 0; + + shadow_start = (unsigned long)kasan_mem_to_shadow((void *)addr); shadow_start = ALIGN_DOWN(shadow_start, PAGE_SIZE); - shadow_end = (unsigned long)kasan_mem_to_shadow(area->addr + - area->size); + shadow_end = (unsigned long)kasan_mem_to_shadow((void *)addr + size); shadow_end = ALIGN(shadow_end, PAGE_SIZE); ret = apply_to_page_range(&init_mm, shadow_start, @@ -796,10 +799,6 @@ int kasan_populate_vmalloc(unsigned long requested_size, struct vm_struct *area) flush_cache_vmap(shadow_start, shadow_end); - kasan_unpoison_shadow(area->addr, requested_size); - - area->flags |= VM_KASAN; - /* * We need to be careful about inter-cpu effects here. Consider: * @@ -842,12 +841,23 @@ int kasan_populate_vmalloc(unsigned long requested_size, struct vm_struct *area) * Poison the shadow for a vmalloc region. Called as part of the * freeing process at the time the region is freed. */ -void kasan_poison_vmalloc(void *start, unsigned long size) +void kasan_poison_vmalloc(const void *start, unsigned long size) { + if (!is_vmalloc_or_module_addr(start)) + return; + size = round_up(size, KASAN_SHADOW_SCALE_SIZE); kasan_poison_shadow(start, size, KASAN_VMALLOC_INVALID); } +void kasan_unpoison_vmalloc(const void *start, unsigned long size) +{ + if (!is_vmalloc_or_module_addr(start)) + return; + + kasan_unpoison_shadow(start, size); +} + static int kasan_depopulate_vmalloc_pte(pte_t *ptep, unsigned long addr, void *unused) { @@ -947,6 +957,7 @@ void kasan_release_vmalloc(unsigned long start, unsigned long end, { void *shadow_start, *shadow_end; unsigned long region_start, region_end; + unsigned long size; region_start = ALIGN(start, PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE); region_end = ALIGN_DOWN(end, PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE); @@ -969,9 +980,11 @@ void kasan_release_vmalloc(unsigned long start, unsigned long end, shadow_end = kasan_mem_to_shadow((void *)region_end); if (shadow_end > shadow_start) { - apply_to_page_range(&init_mm, (unsigned long)shadow_start, - (unsigned long)(shadow_end - shadow_start), - kasan_depopulate_vmalloc_pte, NULL); + size = shadow_end - shadow_start; + apply_to_existing_page_range(&init_mm, + (unsigned long)shadow_start, + size, kasan_depopulate_vmalloc_pte, + NULL); flush_tlb_kernel_range((unsigned long)shadow_start, (unsigned long)shadow_end); } @@ -2478,6 +2478,7 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start, return 0; } +EXPORT_SYMBOL_GPL(ksm_madvise); int __ksm_enter(struct mm_struct *mm) { diff --git a/mm/memcontrol.c b/mm/memcontrol.c index bc01423277c5..c5b5f74cfd4d 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -98,14 +98,6 @@ static bool do_memsw_account(void) return !cgroup_subsys_on_dfl(memory_cgrp_subsys) && do_swap_account; } -static const char *const mem_cgroup_lru_names[] = { - "inactive_anon", - "active_anon", - "inactive_file", - "active_file", - "unevictable", -}; - #define THRESHOLDS_EVENTS_TARGET 128 #define SOFTLIMIT_EVENTS_TARGET 1024 @@ -1421,7 +1413,7 @@ static char *memory_stat_format(struct mem_cgroup *memcg) PAGE_SIZE); for (i = 0; i < NR_LRU_LISTS; i++) - seq_buf_printf(&s, "%s %llu\n", mem_cgroup_lru_names[i], + seq_buf_printf(&s, "%s %llu\n", lru_list_name(i), (u64)memcg_page_state(memcg, NR_LRU_BASE + i) * PAGE_SIZE); @@ -1434,8 +1426,10 @@ static char *memory_stat_format(struct mem_cgroup *memcg) /* Accumulated memory events */ - seq_buf_printf(&s, "pgfault %lu\n", memcg_events(memcg, PGFAULT)); - seq_buf_printf(&s, "pgmajfault %lu\n", memcg_events(memcg, PGMAJFAULT)); + seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGFAULT), + memcg_events(memcg, PGFAULT)); + seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGMAJFAULT), + memcg_events(memcg, PGMAJFAULT)); seq_buf_printf(&s, "workingset_refault %lu\n", memcg_page_state(memcg, WORKINGSET_REFAULT)); @@ -1444,22 +1438,27 @@ static char *memory_stat_format(struct mem_cgroup *memcg) seq_buf_printf(&s, "workingset_nodereclaim %lu\n", memcg_page_state(memcg, WORKINGSET_NODERECLAIM)); - seq_buf_printf(&s, "pgrefill %lu\n", memcg_events(memcg, PGREFILL)); + seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGREFILL), + memcg_events(memcg, PGREFILL)); seq_buf_printf(&s, "pgscan %lu\n", memcg_events(memcg, PGSCAN_KSWAPD) + memcg_events(memcg, PGSCAN_DIRECT)); seq_buf_printf(&s, "pgsteal %lu\n", memcg_events(memcg, PGSTEAL_KSWAPD) + memcg_events(memcg, PGSTEAL_DIRECT)); - seq_buf_printf(&s, "pgactivate %lu\n", memcg_events(memcg, PGACTIVATE)); - seq_buf_printf(&s, "pgdeactivate %lu\n", memcg_events(memcg, PGDEACTIVATE)); - seq_buf_printf(&s, "pglazyfree %lu\n", memcg_events(memcg, PGLAZYFREE)); - seq_buf_printf(&s, "pglazyfreed %lu\n", memcg_events(memcg, PGLAZYFREED)); + seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGACTIVATE), + memcg_events(memcg, PGACTIVATE)); + seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGDEACTIVATE), + memcg_events(memcg, PGDEACTIVATE)); + seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGLAZYFREE), + memcg_events(memcg, PGLAZYFREE)); + seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGLAZYFREED), + memcg_events(memcg, PGLAZYFREED)); #ifdef CONFIG_TRANSPARENT_HUGEPAGE - seq_buf_printf(&s, "thp_fault_alloc %lu\n", + seq_buf_printf(&s, "%s %lu\n", vm_event_name(THP_FAULT_ALLOC), memcg_events(memcg, THP_FAULT_ALLOC)); - seq_buf_printf(&s, "thp_collapse_alloc %lu\n", + seq_buf_printf(&s, "%s %lu\n", vm_event_name(THP_COLLAPSE_ALLOC), memcg_events(memcg, THP_COLLAPSE_ALLOC)); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ @@ -3742,13 +3741,6 @@ static const unsigned int memcg1_events[] = { PGMAJFAULT, }; -static const char *const memcg1_event_names[] = { - "pgpgin", - "pgpgout", - "pgfault", - "pgmajfault", -}; - static int memcg_stat_show(struct seq_file *m, void *v) { struct mem_cgroup *memcg = mem_cgroup_from_seq(m); @@ -3757,7 +3749,6 @@ static int memcg_stat_show(struct seq_file *m, void *v) unsigned int i; BUILD_BUG_ON(ARRAY_SIZE(memcg1_stat_names) != ARRAY_SIZE(memcg1_stats)); - BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS); for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) { if (memcg1_stats[i] == MEMCG_SWAP && !do_memsw_account()) @@ -3768,11 +3759,11 @@ static int memcg_stat_show(struct seq_file *m, void *v) } for (i = 0; i < ARRAY_SIZE(memcg1_events); i++) - seq_printf(m, "%s %lu\n", memcg1_event_names[i], + seq_printf(m, "%s %lu\n", vm_event_name(memcg1_events[i]), memcg_events_local(memcg, memcg1_events[i])); for (i = 0; i < NR_LRU_LISTS; i++) - seq_printf(m, "%s %lu\n", mem_cgroup_lru_names[i], + seq_printf(m, "%s %lu\n", lru_list_name(i), memcg_page_state_local(memcg, NR_LRU_BASE + i) * PAGE_SIZE); @@ -3797,11 +3788,12 @@ static int memcg_stat_show(struct seq_file *m, void *v) } for (i = 0; i < ARRAY_SIZE(memcg1_events); i++) - seq_printf(m, "total_%s %llu\n", memcg1_event_names[i], + seq_printf(m, "total_%s %llu\n", + vm_event_name(memcg1_events[i]), (u64)memcg_events(memcg, memcg1_events[i])); for (i = 0; i < NR_LRU_LISTS; i++) - seq_printf(m, "total_%s %llu\n", mem_cgroup_lru_names[i], + seq_printf(m, "total_%s %llu\n", lru_list_name(i), (u64)memcg_page_state(memcg, NR_LRU_BASE + i) * PAGE_SIZE); diff --git a/mm/memory.c b/mm/memory.c index d56883c220f4..1c4be871a237 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -672,7 +672,7 @@ struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, if (pmd_devmap(pmd)) return NULL; - if (is_zero_pfn(pfn)) + if (is_huge_zero_pmd(pmd)) return NULL; if (unlikely(pfn > highest_memmap_pfn)) return NULL; @@ -2021,26 +2021,34 @@ EXPORT_SYMBOL(vm_iomap_memory); static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr, unsigned long end, - pte_fn_t fn, void *data) + pte_fn_t fn, void *data, bool create) { pte_t *pte; - int err; + int err = 0; spinlock_t *uninitialized_var(ptl); - pte = (mm == &init_mm) ? - pte_alloc_kernel(pmd, addr) : - pte_alloc_map_lock(mm, pmd, addr, &ptl); - if (!pte) - return -ENOMEM; + if (create) { + pte = (mm == &init_mm) ? + pte_alloc_kernel(pmd, addr) : + pte_alloc_map_lock(mm, pmd, addr, &ptl); + if (!pte) + return -ENOMEM; + } else { + pte = (mm == &init_mm) ? + pte_offset_kernel(pmd, addr) : + pte_offset_map_lock(mm, pmd, addr, &ptl); + } BUG_ON(pmd_huge(*pmd)); arch_enter_lazy_mmu_mode(); do { - err = fn(pte++, addr, data); - if (err) - break; + if (create || !pte_none(*pte)) { + err = fn(pte++, addr, data); + if (err) + break; + } } while (addr += PAGE_SIZE, addr != end); arch_leave_lazy_mmu_mode(); @@ -2052,77 +2060,95 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud, unsigned long addr, unsigned long end, - pte_fn_t fn, void *data) + pte_fn_t fn, void *data, bool create) { pmd_t *pmd; unsigned long next; - int err; + int err = 0; BUG_ON(pud_huge(*pud)); - pmd = pmd_alloc(mm, pud, addr); - if (!pmd) - return -ENOMEM; + if (create) { + pmd = pmd_alloc(mm, pud, addr); + if (!pmd) + return -ENOMEM; + } else { + pmd = pmd_offset(pud, addr); + } do { next = pmd_addr_end(addr, end); - err = apply_to_pte_range(mm, pmd, addr, next, fn, data); - if (err) - break; + if (create || !pmd_none_or_clear_bad(pmd)) { + err = apply_to_pte_range(mm, pmd, addr, next, fn, data, + create); + if (err) + break; + } } while (pmd++, addr = next, addr != end); return err; } static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d, unsigned long addr, unsigned long end, - pte_fn_t fn, void *data) + pte_fn_t fn, void *data, bool create) { pud_t *pud; unsigned long next; - int err; + int err = 0; - pud = pud_alloc(mm, p4d, addr); - if (!pud) - return -ENOMEM; + if (create) { + pud = pud_alloc(mm, p4d, addr); + if (!pud) + return -ENOMEM; + } else { + pud = pud_offset(p4d, addr); + } do { next = pud_addr_end(addr, end); - err = apply_to_pmd_range(mm, pud, addr, next, fn, data); - if (err) - break; + if (create || !pud_none_or_clear_bad(pud)) { + err = apply_to_pmd_range(mm, pud, addr, next, fn, data, + create); + if (err) + break; + } } while (pud++, addr = next, addr != end); return err; } static int apply_to_p4d_range(struct mm_struct *mm, pgd_t *pgd, unsigned long addr, unsigned long end, - pte_fn_t fn, void *data) + pte_fn_t fn, void *data, bool create) { p4d_t *p4d; unsigned long next; - int err; + int err = 0; - p4d = p4d_alloc(mm, pgd, addr); - if (!p4d) - return -ENOMEM; + if (create) { + p4d = p4d_alloc(mm, pgd, addr); + if (!p4d) + return -ENOMEM; + } else { + p4d = p4d_offset(pgd, addr); + } do { next = p4d_addr_end(addr, end); - err = apply_to_pud_range(mm, p4d, addr, next, fn, data); - if (err) - break; + if (create || !p4d_none_or_clear_bad(p4d)) { + err = apply_to_pud_range(mm, p4d, addr, next, fn, data, + create); + if (err) + break; + } } while (p4d++, addr = next, addr != end); return err; } -/* - * Scan a region of virtual memory, filling in page tables as necessary - * and calling a provided function on each leaf page table. - */ -int apply_to_page_range(struct mm_struct *mm, unsigned long addr, - unsigned long size, pte_fn_t fn, void *data) +static int __apply_to_page_range(struct mm_struct *mm, unsigned long addr, + unsigned long size, pte_fn_t fn, + void *data, bool create) { pgd_t *pgd; unsigned long next; unsigned long end = addr + size; - int err; + int err = 0; if (WARN_ON(addr >= end)) return -EINVAL; @@ -2130,16 +2156,42 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr, pgd = pgd_offset(mm, addr); do { next = pgd_addr_end(addr, end); - err = apply_to_p4d_range(mm, pgd, addr, next, fn, data); + if (!create && pgd_none_or_clear_bad(pgd)) + continue; + err = apply_to_p4d_range(mm, pgd, addr, next, fn, data, create); if (err) break; } while (pgd++, addr = next, addr != end); return err; } + +/* + * Scan a region of virtual memory, filling in page tables as necessary + * and calling a provided function on each leaf page table. + */ +int apply_to_page_range(struct mm_struct *mm, unsigned long addr, + unsigned long size, pte_fn_t fn, void *data) +{ + return __apply_to_page_range(mm, addr, size, fn, data, true); +} EXPORT_SYMBOL_GPL(apply_to_page_range); /* + * Scan a region of virtual memory, calling a provided function on + * each leaf page table where it exists. + * + * Unlike apply_to_page_range, this does _not_ fill in page tables + * where they are absent. + */ +int apply_to_existing_page_range(struct mm_struct *mm, unsigned long addr, + unsigned long size, pte_fn_t fn, void *data) +{ + return __apply_to_page_range(mm, addr, size, fn, data, false); +} +EXPORT_SYMBOL_GPL(apply_to_existing_page_range); + +/* * handle_pte_fault chooses page fault handler according to an entry which was * read non-atomically. Before making any commitment, on those architectures * or configurations (e.g. i386 with PAE) which might give a mix of unmatched @@ -4197,19 +4249,11 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) smp_wmb(); /* See comment in __pte_alloc */ ptl = pud_lock(mm, pud); -#ifndef __ARCH_HAS_4LEVEL_HACK if (!pud_present(*pud)) { mm_inc_nr_pmds(mm); pud_populate(mm, pud, new); } else /* Another has populated it */ pmd_free(mm, new); -#else - if (!pgd_present(*pud)) { - mm_inc_nr_pmds(mm); - pgd_populate(mm, pud, new); - } else /* Another has populated it */ - pmd_free(mm, new); -#endif /* __ARCH_HAS_4LEVEL_HACK */ spin_unlock(ptl); return 0; } diff --git a/mm/slab_common.c b/mm/slab_common.c index 8afa188f6e20..f0ab6d4ceb4c 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -904,6 +904,18 @@ static void flush_memcg_workqueue(struct kmem_cache *s) * previous workitems on workqueue are processed. */ flush_workqueue(memcg_kmem_cache_wq); + + /* + * If we're racing with children kmem_cache deactivation, it might + * take another rcu grace period to complete their destruction. + * At this moment the corresponding percpu_ref_kill() call should be + * done, but it might take another rcu grace period to complete + * switching to the atomic mode. + * Please, note that we check without grabbing the slab_mutex. It's safe + * because at this moment the children list can't grow. + */ + if (!list_empty(&s->memcg_params.children)) + rcu_barrier(); } #else static inline int shutdown_memcg_caches(struct kmem_cache *s) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 4d3b3d60d893..e9681dc4aa75 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1062,6 +1062,26 @@ __alloc_vmap_area(unsigned long size, unsigned long align, } /* + * Free a region of KVA allocated by alloc_vmap_area + */ +static void free_vmap_area(struct vmap_area *va) +{ + /* + * Remove from the busy tree/list. + */ + spin_lock(&vmap_area_lock); + unlink_va(va, &vmap_area_root); + spin_unlock(&vmap_area_lock); + + /* + * Insert/Merge it back to the free tree/list. + */ + spin_lock(&free_vmap_area_lock); + merge_or_add_vmap_area(va, &free_vmap_area_root, &free_vmap_area_list); + spin_unlock(&free_vmap_area_lock); +} + +/* * Allocate a region of KVA of the specified size and alignment, within the * vstart and vend. */ @@ -1073,6 +1093,7 @@ static struct vmap_area *alloc_vmap_area(unsigned long size, struct vmap_area *va, *pva; unsigned long addr; int purged = 0; + int ret; BUG_ON(!size); BUG_ON(offset_in_page(size)); @@ -1139,6 +1160,7 @@ retry: va->va_end = addr + size; va->vm = NULL; + spin_lock(&vmap_area_lock); insert_vmap_area(va, &vmap_area_root, &vmap_area_list); spin_unlock(&vmap_area_lock); @@ -1147,6 +1169,12 @@ retry: BUG_ON(va->va_start < vstart); BUG_ON(va->va_end > vend); + ret = kasan_populate_vmalloc(addr, size); + if (ret) { + free_vmap_area(va); + return ERR_PTR(ret); + } + return va; overflow: @@ -1186,26 +1214,6 @@ int unregister_vmap_purge_notifier(struct notifier_block *nb) EXPORT_SYMBOL_GPL(unregister_vmap_purge_notifier); /* - * Free a region of KVA allocated by alloc_vmap_area - */ -static void free_vmap_area(struct vmap_area *va) -{ - /* - * Remove from the busy tree/list. - */ - spin_lock(&vmap_area_lock); - unlink_va(va, &vmap_area_root); - spin_unlock(&vmap_area_lock); - - /* - * Insert/Merge it back to the free tree/list. - */ - spin_lock(&free_vmap_area_lock); - merge_or_add_vmap_area(va, &free_vmap_area_root, &free_vmap_area_list); - spin_unlock(&free_vmap_area_lock); -} - -/* * Clear the pagetable entries of a given vmap_area */ static void unmap_vmap_area(struct vmap_area *va) @@ -1771,6 +1779,8 @@ void vm_unmap_ram(const void *mem, unsigned int count) BUG_ON(addr > VMALLOC_END); BUG_ON(!PAGE_ALIGNED(addr)); + kasan_poison_vmalloc(mem, size); + if (likely(count <= VMAP_MAX_ALLOC)) { debug_check_no_locks_freed(mem, size); vb_free(mem, size); @@ -1821,6 +1831,9 @@ void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t pro addr = va->va_start; mem = (void *)addr; } + + kasan_unpoison_vmalloc(mem, size); + if (vmap_page_range(addr, addr + size, prot, pages) < 0) { vm_unmap_ram(mem, count); return NULL; @@ -2075,6 +2088,7 @@ static struct vm_struct *__get_vm_area_node(unsigned long size, { struct vmap_area *va; struct vm_struct *area; + unsigned long requested_size = size; BUG_ON(in_interrupt()); size = PAGE_ALIGN(size); @@ -2098,23 +2112,9 @@ static struct vm_struct *__get_vm_area_node(unsigned long size, return NULL; } - setup_vmalloc_vm(area, va, flags, caller); + kasan_unpoison_vmalloc((void *)va->va_start, requested_size); - /* - * For KASAN, if we are in vmalloc space, we need to cover the shadow - * area with real memory. If we come here through VM_ALLOC, this is - * done by a higher level function that has access to the true size, - * which might not be a full page. - * - * We assume module space comes via VM_ALLOC path. - */ - if (is_vmalloc_addr(area->addr) && !(area->flags & VM_ALLOC)) { - if (kasan_populate_vmalloc(area->size, area)) { - unmap_vmap_area(va); - kfree(area); - return NULL; - } - } + setup_vmalloc_vm(area, va, flags, caller); return area; } @@ -2293,8 +2293,7 @@ static void __vunmap(const void *addr, int deallocate_pages) debug_check_no_locks_freed(area->addr, get_vm_area_size(area)); debug_check_no_obj_freed(area->addr, get_vm_area_size(area)); - if (area->flags & VM_KASAN) - kasan_poison_vmalloc(area->addr, area->size); + kasan_poison_vmalloc(area->addr, area->size); vm_remove_mappings(area, deallocate_pages); @@ -2539,7 +2538,7 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, if (!size || (size >> PAGE_SHIFT) > totalram_pages()) goto fail; - area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNINITIALIZED | + area = __get_vm_area_node(real_size, align, VM_ALLOC | VM_UNINITIALIZED | vm_flags, start, end, node, gfp_mask, caller); if (!area) goto fail; @@ -2548,11 +2547,6 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, if (!addr) return NULL; - if (is_vmalloc_or_module_addr(area->addr)) { - if (kasan_populate_vmalloc(real_size, area)) - return NULL; - } - /* * In this function, newly allocated vm_struct has VM_UNINITIALIZED * flag. It means that vm_struct is not fully initialized. @@ -3294,7 +3288,7 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, struct vmap_area **vas, *va; struct vm_struct **vms; int area, area2, last_area, term_area; - unsigned long base, start, size, end, last_end; + unsigned long base, start, size, end, last_end, orig_start, orig_end; bool purged = false; enum fit_type type; @@ -3424,6 +3418,15 @@ retry: spin_unlock(&free_vmap_area_lock); + /* populate the kasan shadow space */ + for (area = 0; area < nr_vms; area++) { + if (kasan_populate_vmalloc(vas[area]->va_start, sizes[area])) + goto err_free_shadow; + + kasan_unpoison_vmalloc((void *)vas[area]->va_start, + sizes[area]); + } + /* insert all vm's */ spin_lock(&vmap_area_lock); for (area = 0; area < nr_vms; area++) { @@ -3434,12 +3437,6 @@ retry: } spin_unlock(&vmap_area_lock); - /* populate the shadow space outside of the lock */ - for (area = 0; area < nr_vms; area++) { - /* assume success here */ - kasan_populate_vmalloc(sizes[area], vms[area]); - } - kfree(vas); return vms; @@ -3451,8 +3448,12 @@ recovery: * and when pcpu_get_vm_areas() is success. */ while (area--) { - merge_or_add_vmap_area(vas[area], &free_vmap_area_root, - &free_vmap_area_list); + orig_start = vas[area]->va_start; + orig_end = vas[area]->va_end; + va = merge_or_add_vmap_area(vas[area], &free_vmap_area_root, + &free_vmap_area_list); + kasan_release_vmalloc(orig_start, orig_end, + va->va_start, va->va_end); vas[area] = NULL; } @@ -3487,6 +3488,28 @@ err_free2: kfree(vas); kfree(vms); return NULL; + +err_free_shadow: + spin_lock(&free_vmap_area_lock); + /* + * We release all the vmalloc shadows, even the ones for regions that + * hadn't been successfully added. This relies on kasan_release_vmalloc + * being able to tolerate this case. + */ + for (area = 0; area < nr_vms; area++) { + orig_start = vas[area]->va_start; + orig_end = vas[area]->va_end; + va = merge_or_add_vmap_area(vas[area], &free_vmap_area_root, + &free_vmap_area_list); + kasan_release_vmalloc(orig_start, orig_end, + va->va_start, va->va_end); + vas[area] = NULL; + kfree(vms[area]); + } + spin_unlock(&free_vmap_area_lock); + kfree(vas); + kfree(vms); + return NULL; } /** diff --git a/mm/vmscan.c b/mm/vmscan.c index 74e8edce83ca..572fb17c6273 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -387,7 +387,7 @@ void register_shrinker_prepared(struct shrinker *shrinker) { down_write(&shrinker_rwsem); list_add_tail(&shrinker->list, &shrinker_list); -#ifdef CONFIG_MEMCG_KMEM +#ifdef CONFIG_MEMCG if (shrinker->flags & SHRINKER_MEMCG_AWARE) idr_replace(&shrinker_idr, shrinker, shrinker->id); #endif diff --git a/mm/vmstat.c b/mm/vmstat.c index a8222041bd44..78d53378db99 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1084,7 +1084,8 @@ int fragmentation_index(struct zone *zone, unsigned int order) } #endif -#if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || defined(CONFIG_NUMA) +#if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || \ + defined(CONFIG_NUMA) || defined(CONFIG_MEMCG) #ifdef CONFIG_ZONE_DMA #define TEXT_FOR_DMA(xx) xx "_dma", #else @@ -1134,7 +1135,7 @@ const char * const vmstat_text[] = { "numa_other", #endif - /* Node-based counters */ + /* enum node_stat_item counters */ "nr_inactive_anon", "nr_active_anon", "nr_inactive_file", @@ -1172,7 +1173,7 @@ const char * const vmstat_text[] = { "nr_dirty_threshold", "nr_dirty_background_threshold", -#ifdef CONFIG_VM_EVENT_COUNTERS +#if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG) /* enum vm_event_item counters */ "pgpgin", "pgpgout", @@ -1291,9 +1292,9 @@ const char * const vmstat_text[] = { "swap_ra", "swap_ra_hit", #endif -#endif /* CONFIG_VM_EVENTS_COUNTERS */ +#endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */ }; -#endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA */ +#endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA || CONFIG_MEMCG */ #if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)) || \ defined(CONFIG_PROC_FS) @@ -1564,10 +1565,8 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, if (is_zone_first_populated(pgdat, zone)) { seq_printf(m, "\n per-node stats"); for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) { - seq_printf(m, "\n %-12s %lu", - vmstat_text[i + NR_VM_ZONE_STAT_ITEMS + - NR_VM_NUMA_STAT_ITEMS], - node_page_state(pgdat, i)); + seq_printf(m, "\n %-12s %lu", node_stat_name(i), + node_page_state(pgdat, i)); } } seq_printf(m, @@ -1600,14 +1599,13 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, } for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) - seq_printf(m, "\n %-12s %lu", vmstat_text[i], - zone_page_state(zone, i)); + seq_printf(m, "\n %-12s %lu", zone_stat_name(i), + zone_page_state(zone, i)); #ifdef CONFIG_NUMA for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) - seq_printf(m, "\n %-12s %lu", - vmstat_text[i + NR_VM_ZONE_STAT_ITEMS], - zone_numa_state_snapshot(zone, i)); + seq_printf(m, "\n %-12s %lu", numa_stat_name(i), + zone_numa_state_snapshot(zone, i)); #endif seq_printf(m, "\n pagesets"); @@ -1658,31 +1656,23 @@ static const struct seq_operations zoneinfo_op = { .show = zoneinfo_show, }; -enum writeback_stat_item { - NR_DIRTY_THRESHOLD, - NR_DIRTY_BG_THRESHOLD, - NR_VM_WRITEBACK_STAT_ITEMS, -}; +#define NR_VMSTAT_ITEMS (NR_VM_ZONE_STAT_ITEMS + \ + NR_VM_NUMA_STAT_ITEMS + \ + NR_VM_NODE_STAT_ITEMS + \ + NR_VM_WRITEBACK_STAT_ITEMS + \ + (IS_ENABLED(CONFIG_VM_EVENT_COUNTERS) ? \ + NR_VM_EVENT_ITEMS : 0)) static void *vmstat_start(struct seq_file *m, loff_t *pos) { unsigned long *v; - int i, stat_items_size; + int i; - if (*pos >= ARRAY_SIZE(vmstat_text)) + if (*pos >= NR_VMSTAT_ITEMS) return NULL; - stat_items_size = NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long) + - NR_VM_NUMA_STAT_ITEMS * sizeof(unsigned long) + - NR_VM_NODE_STAT_ITEMS * sizeof(unsigned long) + - NR_VM_WRITEBACK_STAT_ITEMS * sizeof(unsigned long); - -#ifdef CONFIG_VM_EVENT_COUNTERS - stat_items_size += sizeof(struct vm_event_state); -#endif - BUILD_BUG_ON(stat_items_size != - ARRAY_SIZE(vmstat_text) * sizeof(unsigned long)); - v = kmalloc(stat_items_size, GFP_KERNEL); + BUILD_BUG_ON(ARRAY_SIZE(vmstat_text) < NR_VMSTAT_ITEMS); + v = kmalloc_array(NR_VMSTAT_ITEMS, sizeof(unsigned long), GFP_KERNEL); m->private = v; if (!v) return ERR_PTR(-ENOMEM); @@ -1715,7 +1705,7 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos) static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos) { (*pos)++; - if (*pos >= ARRAY_SIZE(vmstat_text)) + if (*pos >= NR_VMSTAT_ITEMS) return NULL; return (unsigned long *)m->private + *pos; } @@ -1781,7 +1771,7 @@ int vmstat_refresh(struct ctl_table *table, int write, val = atomic_long_read(&vm_zone_stat[i]); if (val < 0) { pr_warn("%s: %s %ld\n", - __func__, vmstat_text[i], val); + __func__, zone_stat_name(i), val); err = -EINVAL; } } @@ -1790,7 +1780,7 @@ int vmstat_refresh(struct ctl_table *table, int write, val = atomic_long_read(&vm_numa_stat[i]); if (val < 0) { pr_warn("%s: %s %ld\n", - __func__, vmstat_text[i + NR_VM_ZONE_STAT_ITEMS], val); + __func__, numa_stat_name(i), val); err = -EINVAL; } } |