summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/backing-dev.c19
-rw-r--r--mm/cma.c16
-rw-r--r--mm/gup.c14
-rw-r--r--mm/hugetlb.c123
-rw-r--r--mm/kasan/Makefile23
-rw-r--r--mm/kasan/generic.c1
-rw-r--r--mm/kasan/kasan.h34
-rw-r--r--mm/kasan/tags.c1
-rw-r--r--mm/ksm.c12
-rw-r--r--mm/madvise.c18
-rw-r--r--mm/memblock.c2
-rw-r--r--mm/memcontrol.c18
-rw-r--r--mm/memory.c168
-rw-r--r--mm/memory_hotplug.c11
-rw-r--r--mm/memremap.c17
-rw-r--r--mm/mmap.c4
-rw-r--r--mm/mprotect.c4
-rw-r--r--mm/mremap.c15
-rw-r--r--mm/page_alloc.c14
-rw-r--r--mm/percpu.c14
-rw-r--r--mm/shmem.c13
-rw-r--r--mm/slab_common.c2
-rw-r--r--mm/slub.c57
-rw-r--r--mm/vmalloc.c16
-rw-r--r--mm/vmscan.c1
-rw-r--r--mm/z3fold.c11
26 files changed, 511 insertions, 117 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 62f05f605fb5..efc5b83acd2d 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -21,7 +21,7 @@ struct backing_dev_info noop_backing_dev_info = {
EXPORT_SYMBOL_GPL(noop_backing_dev_info);
static struct class *bdi_class;
-const char *bdi_unknown_name = "(unknown)";
+static const char *bdi_unknown_name = "(unknown)";
/*
* bdi_lock protects bdi_tree and updates to bdi_list. bdi_list has RCU
@@ -491,8 +491,8 @@ static void cgwb_release_workfn(struct work_struct *work)
css_put(wb->blkcg_css);
mutex_unlock(&wb->bdi->cgwb_release_mutex);
- /* triggers blkg destruction if cgwb_refcnt becomes zero */
- blkcg_cgwb_put(blkcg);
+ /* triggers blkg destruction if no online users left */
+ blkcg_unpin_online(blkcg);
fprop_local_destroy_percpu(&wb->memcg_completions);
percpu_ref_exit(&wb->refcnt);
@@ -592,7 +592,7 @@ static int cgwb_create(struct backing_dev_info *bdi,
list_add_tail_rcu(&wb->bdi_node, &bdi->wb_list);
list_add(&wb->memcg_node, memcg_cgwb_list);
list_add(&wb->blkcg_node, blkcg_cgwb_list);
- blkcg_cgwb_get(blkcg);
+ blkcg_pin_online(blkcg);
css_get(memcg_css);
css_get(blkcg_css);
}
@@ -938,7 +938,8 @@ int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args)
if (bdi->dev) /* The driver needs to use separate queues per device */
return 0;
- dev = device_create_vargs(bdi_class, NULL, MKDEV(0, 0), bdi, fmt, args);
+ vsnprintf(bdi->dev_name, sizeof(bdi->dev_name), fmt, args);
+ dev = device_create(bdi_class, NULL, MKDEV(0, 0), bdi, bdi->dev_name);
if (IS_ERR(dev))
return PTR_ERR(dev);
@@ -1043,6 +1044,14 @@ void bdi_put(struct backing_dev_info *bdi)
}
EXPORT_SYMBOL(bdi_put);
+const char *bdi_dev_name(struct backing_dev_info *bdi)
+{
+ if (!bdi || !bdi->dev)
+ return bdi_unknown_name;
+ return bdi->dev_name;
+}
+EXPORT_SYMBOL_GPL(bdi_dev_name);
+
static wait_queue_head_t congestion_wqh[2] = {
__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]),
__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])
diff --git a/mm/cma.c b/mm/cma.c
index be55d1988c67..0463ad2ce06b 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -220,7 +220,7 @@ int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size,
}
/**
- * cma_declare_contiguous() - reserve custom contiguous area
+ * cma_declare_contiguous_nid() - reserve custom contiguous area
* @base: Base address of the reserved area optional, use 0 for any
* @size: Size of the reserved area (in bytes),
* @limit: End address of the reserved memory (optional, 0 for any).
@@ -229,6 +229,7 @@ int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size,
* @fixed: hint about where to place the reserved area
* @name: The name of the area. See function cma_init_reserved_mem()
* @res_cma: Pointer to store the created cma region.
+ * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
*
* This function reserves memory from early allocator. It should be
* called by arch specific code once the early allocator (memblock or bootmem)
@@ -238,10 +239,11 @@ int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size,
* If @fixed is true, reserve contiguous area at exactly @base. If false,
* reserve in range from @base to @limit.
*/
-int __init cma_declare_contiguous(phys_addr_t base,
+int __init cma_declare_contiguous_nid(phys_addr_t base,
phys_addr_t size, phys_addr_t limit,
phys_addr_t alignment, unsigned int order_per_bit,
- bool fixed, const char *name, struct cma **res_cma)
+ bool fixed, const char *name, struct cma **res_cma,
+ int nid)
{
phys_addr_t memblock_end = memblock_end_of_DRAM();
phys_addr_t highmem_start;
@@ -336,14 +338,14 @@ int __init cma_declare_contiguous(phys_addr_t base,
* memory in case of failure.
*/
if (base < highmem_start && limit > highmem_start) {
- addr = memblock_phys_alloc_range(size, alignment,
- highmem_start, limit);
+ addr = memblock_alloc_range_nid(size, alignment,
+ highmem_start, limit, nid, false);
limit = highmem_start;
}
if (!addr) {
- addr = memblock_phys_alloc_range(size, alignment, base,
- limit);
+ addr = memblock_alloc_range_nid(size, alignment, base,
+ limit, nid, false);
if (!addr) {
ret = -ENOMEM;
goto err;
diff --git a/mm/gup.c b/mm/gup.c
index 6076df8e04a4..87a6a59fe667 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1088,7 +1088,7 @@ retry:
* potentially allocating memory.
*/
if (fatal_signal_pending(current)) {
- ret = -ERESTARTSYS;
+ ret = -EINTR;
goto out;
}
cond_resched();
@@ -1218,6 +1218,10 @@ retry:
if (!vma_permits_fault(vma, fault_flags))
return -EFAULT;
+ if ((fault_flags & FAULT_FLAG_KILLABLE) &&
+ fatal_signal_pending(current))
+ return -EINTR;
+
ret = handle_mm_fault(vma, address, fault_flags);
major |= ret & VM_FAULT_MAJOR;
if (ret & VM_FAULT_ERROR) {
@@ -1230,11 +1234,9 @@ retry:
if (ret & VM_FAULT_RETRY) {
down_read(&mm->mmap_sem);
- if (!(fault_flags & FAULT_FLAG_TRIED)) {
- *unlocked = true;
- fault_flags |= FAULT_FLAG_TRIED;
- goto retry;
- }
+ *unlocked = true;
+ fault_flags |= FAULT_FLAG_TRIED;
+ goto retry;
}
if (tsk) {
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index f5fb53fdfa02..bcabbe02192b 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -28,6 +28,7 @@
#include <linux/jhash.h>
#include <linux/numa.h>
#include <linux/llist.h>
+#include <linux/cma.h>
#include <asm/page.h>
#include <asm/pgtable.h>
@@ -44,6 +45,9 @@
int hugetlb_max_hstate __read_mostly;
unsigned int default_hstate_idx;
struct hstate hstates[HUGE_MAX_HSTATE];
+
+static struct cma *hugetlb_cma[MAX_NUMNODES];
+
/*
* Minimum page order among possible hugepage sizes, set to a proper value
* at boot time.
@@ -1228,6 +1232,14 @@ static void destroy_compound_gigantic_page(struct page *page,
static void free_gigantic_page(struct page *page, unsigned int order)
{
+ /*
+ * If the page isn't allocated using the cma allocator,
+ * cma_release() returns false.
+ */
+ if (IS_ENABLED(CONFIG_CMA) &&
+ cma_release(hugetlb_cma[page_to_nid(page)], page, 1 << order))
+ return;
+
free_contig_range(page_to_pfn(page), 1 << order);
}
@@ -1237,6 +1249,21 @@ static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
{
unsigned long nr_pages = 1UL << huge_page_order(h);
+ if (IS_ENABLED(CONFIG_CMA)) {
+ struct page *page;
+ int node;
+
+ for_each_node_mask(node, *nodemask) {
+ if (!hugetlb_cma[node])
+ continue;
+
+ page = cma_alloc(hugetlb_cma[node], nr_pages,
+ huge_page_order(h), true);
+ if (page)
+ return page;
+ }
+ }
+
return alloc_contig_pages(nr_pages, gfp_mask, nid, nodemask);
}
@@ -1281,8 +1308,14 @@ static void update_and_free_page(struct hstate *h, struct page *page)
set_compound_page_dtor(page, NULL_COMPOUND_DTOR);
set_page_refcounted(page);
if (hstate_is_gigantic(h)) {
+ /*
+ * Temporarily drop the hugetlb_lock, because
+ * we might block in free_gigantic_page().
+ */
+ spin_unlock(&hugetlb_lock);
destroy_compound_gigantic_page(page, huge_page_order(h));
free_gigantic_page(page, huge_page_order(h));
+ spin_lock(&hugetlb_lock);
} else {
__free_pages(page, huge_page_order(h));
}
@@ -2539,6 +2572,10 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h)
for (i = 0; i < h->max_huge_pages; ++i) {
if (hstate_is_gigantic(h)) {
+ if (IS_ENABLED(CONFIG_CMA) && hugetlb_cma[0]) {
+ pr_warn_once("HugeTLB: hugetlb_cma is enabled, skip boot time allocation\n");
+ break;
+ }
if (!alloc_bootmem_huge_page(h))
break;
} else if (!alloc_pool_huge_page(h,
@@ -3194,6 +3231,7 @@ static int __init hugetlb_init(void)
default_hstate.max_huge_pages = default_hstate_max_huge_pages;
}
+ hugetlb_cma_check();
hugetlb_init_hstates();
gather_bootmem_prealloc();
report_hugepages();
@@ -5327,8 +5365,8 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
{
pgd_t *pgd;
p4d_t *p4d;
- pud_t *pud;
- pmd_t *pmd;
+ pud_t *pud, pud_entry;
+ pmd_t *pmd, pmd_entry;
pgd = pgd_offset(mm, addr);
if (!pgd_present(*pgd))
@@ -5338,17 +5376,19 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
return NULL;
pud = pud_offset(p4d, addr);
- if (sz != PUD_SIZE && pud_none(*pud))
+ pud_entry = READ_ONCE(*pud);
+ if (sz != PUD_SIZE && pud_none(pud_entry))
return NULL;
/* hugepage or swap? */
- if (pud_huge(*pud) || !pud_present(*pud))
+ if (pud_huge(pud_entry) || !pud_present(pud_entry))
return (pte_t *)pud;
pmd = pmd_offset(pud, addr);
- if (sz != PMD_SIZE && pmd_none(*pmd))
+ pmd_entry = READ_ONCE(*pmd);
+ if (sz != PMD_SIZE && pmd_none(pmd_entry))
return NULL;
/* hugepage or swap? */
- if (pmd_huge(*pmd) || !pmd_present(*pmd))
+ if (pmd_huge(pmd_entry) || !pmd_present(pmd_entry))
return (pte_t *)pmd;
return NULL;
@@ -5506,3 +5546,74 @@ void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason)
spin_unlock(&hugetlb_lock);
}
}
+
+#ifdef CONFIG_CMA
+static unsigned long hugetlb_cma_size __initdata;
+static bool cma_reserve_called __initdata;
+
+static int __init cmdline_parse_hugetlb_cma(char *p)
+{
+ hugetlb_cma_size = memparse(p, &p);
+ return 0;
+}
+
+early_param("hugetlb_cma", cmdline_parse_hugetlb_cma);
+
+void __init hugetlb_cma_reserve(int order)
+{
+ unsigned long size, reserved, per_node;
+ int nid;
+
+ cma_reserve_called = true;
+
+ if (!hugetlb_cma_size)
+ return;
+
+ if (hugetlb_cma_size < (PAGE_SIZE << order)) {
+ pr_warn("hugetlb_cma: cma area should be at least %lu MiB\n",
+ (PAGE_SIZE << order) / SZ_1M);
+ return;
+ }
+
+ /*
+ * If 3 GB area is requested on a machine with 4 numa nodes,
+ * let's allocate 1 GB on first three nodes and ignore the last one.
+ */
+ per_node = DIV_ROUND_UP(hugetlb_cma_size, nr_online_nodes);
+ pr_info("hugetlb_cma: reserve %lu MiB, up to %lu MiB per node\n",
+ hugetlb_cma_size / SZ_1M, per_node / SZ_1M);
+
+ reserved = 0;
+ for_each_node_state(nid, N_ONLINE) {
+ int res;
+
+ size = min(per_node, hugetlb_cma_size - reserved);
+ size = round_up(size, PAGE_SIZE << order);
+
+ res = cma_declare_contiguous_nid(0, size, 0, PAGE_SIZE << order,
+ 0, false, "hugetlb",
+ &hugetlb_cma[nid], nid);
+ if (res) {
+ pr_warn("hugetlb_cma: reservation failed: err %d, node %d",
+ res, nid);
+ continue;
+ }
+
+ reserved += size;
+ pr_info("hugetlb_cma: reserved %lu MiB on node %d\n",
+ size / SZ_1M, nid);
+
+ if (reserved >= hugetlb_cma_size)
+ break;
+ }
+}
+
+void __init hugetlb_cma_check(void)
+{
+ if (!hugetlb_cma_size || cma_reserve_called)
+ return;
+
+ pr_warn("hugetlb_cma: the option isn't supported by current arch\n");
+}
+
+#endif /* CONFIG_CMA */
diff --git a/mm/kasan/Makefile b/mm/kasan/Makefile
index 08b43de2383b..de3121848ddf 100644
--- a/mm/kasan/Makefile
+++ b/mm/kasan/Makefile
@@ -1,23 +1,28 @@
# SPDX-License-Identifier: GPL-2.0
KASAN_SANITIZE := n
-UBSAN_SANITIZE_common.o := n
-UBSAN_SANITIZE_generic.o := n
-UBSAN_SANITIZE_generic_report.o := n
-UBSAN_SANITIZE_tags.o := n
+UBSAN_SANITIZE := n
KCOV_INSTRUMENT := n
+# Disable ftrace to avoid recursion.
CFLAGS_REMOVE_common.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_generic.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_generic_report.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_init.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_quarantine.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_report.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_tags.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_tags_report.o = $(CC_FLAGS_FTRACE)
# Function splitter causes unnecessary splits in __asan_load1/__asan_store1
# see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63533
-
-CFLAGS_common.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector)
-CFLAGS_generic.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector)
-CFLAGS_generic_report.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector)
-CFLAGS_tags.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector)
+CFLAGS_common.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector) -DDISABLE_BRANCH_PROFILING
+CFLAGS_generic.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector) -DDISABLE_BRANCH_PROFILING
+CFLAGS_generic_report.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector) -DDISABLE_BRANCH_PROFILING
+CFLAGS_init.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector) -DDISABLE_BRANCH_PROFILING
+CFLAGS_quarantine.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector) -DDISABLE_BRANCH_PROFILING
+CFLAGS_report.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector) -DDISABLE_BRANCH_PROFILING
+CFLAGS_tags.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector) -DDISABLE_BRANCH_PROFILING
+CFLAGS_tags_report.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector) -DDISABLE_BRANCH_PROFILING
obj-$(CONFIG_KASAN) := common.o init.o report.o
obj-$(CONFIG_KASAN_GENERIC) += generic.o generic_report.o quarantine.o
diff --git a/mm/kasan/generic.c b/mm/kasan/generic.c
index 56ff8885fe2e..098a7dbaced6 100644
--- a/mm/kasan/generic.c
+++ b/mm/kasan/generic.c
@@ -15,7 +15,6 @@
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#define DISABLE_BRANCH_PROFILING
#include <linux/export.h>
#include <linux/interrupt.h>
diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
index e8f37199d885..cfade6413528 100644
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@ -212,8 +212,6 @@ static inline const void *arch_kasan_set_tag(const void *addr, u8 tag)
asmlinkage void kasan_unpoison_task_stack_below(const void *watermark);
void __asan_register_globals(struct kasan_global *globals, size_t size);
void __asan_unregister_globals(struct kasan_global *globals, size_t size);
-void __asan_loadN(unsigned long addr, size_t size);
-void __asan_storeN(unsigned long addr, size_t size);
void __asan_handle_no_return(void);
void __asan_alloca_poison(unsigned long addr, size_t size);
void __asan_allocas_unpoison(const void *stack_top, const void *stack_bottom);
@@ -228,6 +226,8 @@ void __asan_load8(unsigned long addr);
void __asan_store8(unsigned long addr);
void __asan_load16(unsigned long addr);
void __asan_store16(unsigned long addr);
+void __asan_loadN(unsigned long addr, size_t size);
+void __asan_storeN(unsigned long addr, size_t size);
void __asan_load1_noabort(unsigned long addr);
void __asan_store1_noabort(unsigned long addr);
@@ -239,6 +239,21 @@ void __asan_load8_noabort(unsigned long addr);
void __asan_store8_noabort(unsigned long addr);
void __asan_load16_noabort(unsigned long addr);
void __asan_store16_noabort(unsigned long addr);
+void __asan_loadN_noabort(unsigned long addr, size_t size);
+void __asan_storeN_noabort(unsigned long addr, size_t size);
+
+void __asan_report_load1_noabort(unsigned long addr);
+void __asan_report_store1_noabort(unsigned long addr);
+void __asan_report_load2_noabort(unsigned long addr);
+void __asan_report_store2_noabort(unsigned long addr);
+void __asan_report_load4_noabort(unsigned long addr);
+void __asan_report_store4_noabort(unsigned long addr);
+void __asan_report_load8_noabort(unsigned long addr);
+void __asan_report_store8_noabort(unsigned long addr);
+void __asan_report_load16_noabort(unsigned long addr);
+void __asan_report_store16_noabort(unsigned long addr);
+void __asan_report_load_n_noabort(unsigned long addr, size_t size);
+void __asan_report_store_n_noabort(unsigned long addr, size_t size);
void __asan_set_shadow_00(const void *addr, size_t size);
void __asan_set_shadow_f1(const void *addr, size_t size);
@@ -247,4 +262,19 @@ void __asan_set_shadow_f3(const void *addr, size_t size);
void __asan_set_shadow_f5(const void *addr, size_t size);
void __asan_set_shadow_f8(const void *addr, size_t size);
+void __hwasan_load1_noabort(unsigned long addr);
+void __hwasan_store1_noabort(unsigned long addr);
+void __hwasan_load2_noabort(unsigned long addr);
+void __hwasan_store2_noabort(unsigned long addr);
+void __hwasan_load4_noabort(unsigned long addr);
+void __hwasan_store4_noabort(unsigned long addr);
+void __hwasan_load8_noabort(unsigned long addr);
+void __hwasan_store8_noabort(unsigned long addr);
+void __hwasan_load16_noabort(unsigned long addr);
+void __hwasan_store16_noabort(unsigned long addr);
+void __hwasan_loadN_noabort(unsigned long addr, size_t size);
+void __hwasan_storeN_noabort(unsigned long addr, size_t size);
+
+void __hwasan_tag_memory(unsigned long addr, u8 tag, unsigned long size);
+
#endif
diff --git a/mm/kasan/tags.c b/mm/kasan/tags.c
index 25b7734e7013..8a959fdd30e3 100644
--- a/mm/kasan/tags.c
+++ b/mm/kasan/tags.c
@@ -12,7 +12,6 @@
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#define DISABLE_BRANCH_PROFILING
#include <linux/export.h>
#include <linux/interrupt.h>
diff --git a/mm/ksm.c b/mm/ksm.c
index a558da9e7177..281c00129a2e 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -2112,8 +2112,16 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
down_read(&mm->mmap_sem);
vma = find_mergeable_vma(mm, rmap_item->address);
- err = try_to_merge_one_page(vma, page,
- ZERO_PAGE(rmap_item->address));
+ if (vma) {
+ err = try_to_merge_one_page(vma, page,
+ ZERO_PAGE(rmap_item->address));
+ } else {
+ /*
+ * If the vma is out of date, we do not need to
+ * continue.
+ */
+ err = 0;
+ }
up_read(&mm->mmap_sem);
/*
* In case of failure, the page was not really empty, so we
diff --git a/mm/madvise.c b/mm/madvise.c
index 4bb30ed6c8d2..8cbd8c1bfe15 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -27,6 +27,7 @@
#include <linux/swapops.h>
#include <linux/shmem_fs.h>
#include <linux/mmu_notifier.h>
+#include <linux/sched/mm.h>
#include <asm/tlb.h>
@@ -1090,6 +1091,23 @@ int do_madvise(unsigned long start, size_t len_in, int behavior)
if (write) {
if (down_write_killable(&current->mm->mmap_sem))
return -EINTR;
+
+ /*
+ * We may have stolen the mm from another process
+ * that is undergoing core dumping.
+ *
+ * Right now that's io_ring, in the future it may
+ * be remote process management and not "current"
+ * at all.
+ *
+ * We need to fix core dumping to not do this,
+ * but for now we have the mmget_still_valid()
+ * model.
+ */
+ if (!mmget_still_valid(current->mm)) {
+ up_write(&current->mm->mmap_sem);
+ return -EINTR;
+ }
} else {
down_read(&current->mm->mmap_sem);
}
diff --git a/mm/memblock.c b/mm/memblock.c
index 4d06bbaded0f..c79ba6f9920c 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1349,7 +1349,7 @@ __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone,
* Return:
* Physical address of allocated memory block on success, %0 on failure.
*/
-static phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size,
+phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size,
phys_addr_t align, phys_addr_t start,
phys_addr_t end, int nid,
bool exact_nid)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 05b4ec2c6499..a3b97f103966 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2336,6 +2336,9 @@ static unsigned long calculate_high_delay(struct mem_cgroup *memcg,
usage = page_counter_read(&memcg->memory);
high = READ_ONCE(memcg->high);
+ if (usage <= high)
+ continue;
+
/*
* Prevent division by 0 in overage calculation by acting as if
* it was a threshold of 1 page
@@ -4987,19 +4990,22 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
unsigned int size;
int node;
int __maybe_unused i;
+ long error = -ENOMEM;
size = sizeof(struct mem_cgroup);
size += nr_node_ids * sizeof(struct mem_cgroup_per_node *);
memcg = kzalloc(size, GFP_KERNEL);
if (!memcg)
- return NULL;
+ return ERR_PTR(error);
memcg->id.id = idr_alloc(&mem_cgroup_idr, NULL,
1, MEM_CGROUP_ID_MAX,
GFP_KERNEL);
- if (memcg->id.id < 0)
+ if (memcg->id.id < 0) {
+ error = memcg->id.id;
goto fail;
+ }
memcg->vmstats_local = alloc_percpu(struct memcg_vmstats_percpu);
if (!memcg->vmstats_local)
@@ -5043,7 +5049,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
fail:
mem_cgroup_id_remove(memcg);
__mem_cgroup_free(memcg);
- return NULL;
+ return ERR_PTR(error);
}
static struct cgroup_subsys_state * __ref
@@ -5054,8 +5060,8 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
long error = -ENOMEM;
memcg = mem_cgroup_alloc();
- if (!memcg)
- return ERR_PTR(error);
+ if (IS_ERR(memcg))
+ return ERR_CAST(memcg);
WRITE_ONCE(memcg->high, PAGE_COUNTER_MAX);
memcg->soft_limit = PAGE_COUNTER_MAX;
@@ -5105,7 +5111,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
fail:
mem_cgroup_id_remove(memcg);
mem_cgroup_free(memcg);
- return ERR_PTR(-ENOMEM);
+ return ERR_PTR(error);
}
static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
diff --git a/mm/memory.c b/mm/memory.c
index 19874d133a66..f703fe8c8346 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1419,8 +1419,7 @@ void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
}
EXPORT_SYMBOL_GPL(zap_vma_ptes);
-pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr,
- spinlock_t **ptl)
+static pmd_t *walk_to_pmd(struct mm_struct *mm, unsigned long addr)
{
pgd_t *pgd;
p4d_t *p4d;
@@ -1439,9 +1438,40 @@ pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr,
return NULL;
VM_BUG_ON(pmd_trans_huge(*pmd));
+ return pmd;
+}
+
+pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr,
+ spinlock_t **ptl)
+{
+ pmd_t *pmd = walk_to_pmd(mm, addr);
+
+ if (!pmd)
+ return NULL;
return pte_alloc_map_lock(mm, pmd, addr, ptl);
}
+static int validate_page_before_insert(struct page *page)
+{
+ if (PageAnon(page) || PageSlab(page) || page_has_type(page))
+ return -EINVAL;
+ flush_dcache_page(page);
+ return 0;
+}
+
+static int insert_page_into_pte_locked(struct mm_struct *mm, pte_t *pte,
+ unsigned long addr, struct page *page, pgprot_t prot)
+{
+ if (!pte_none(*pte))
+ return -EBUSY;
+ /* Ok, finally just insert the thing.. */
+ get_page(page);
+ inc_mm_counter_fast(mm, mm_counter_file(page));
+ page_add_file_rmap(page, false);
+ set_pte_at(mm, addr, pte, mk_pte(page, prot));
+ return 0;
+}
+
/*
* This is the old fallback for page remapping.
*
@@ -1457,31 +1487,135 @@ static int insert_page(struct vm_area_struct *vma, unsigned long addr,
pte_t *pte;
spinlock_t *ptl;
- retval = -EINVAL;
- if (PageAnon(page) || PageSlab(page) || page_has_type(page))
+ retval = validate_page_before_insert(page);
+ if (retval)
goto out;
retval = -ENOMEM;
- flush_dcache_page(page);
pte = get_locked_pte(mm, addr, &ptl);
if (!pte)
goto out;
- retval = -EBUSY;
- if (!pte_none(*pte))
- goto out_unlock;
-
- /* Ok, finally just insert the thing.. */
- get_page(page);
- inc_mm_counter_fast(mm, mm_counter_file(page));
- page_add_file_rmap(page, false);
- set_pte_at(mm, addr, pte, mk_pte(page, prot));
-
- retval = 0;
-out_unlock:
+ retval = insert_page_into_pte_locked(mm, pte, addr, page, prot);
pte_unmap_unlock(pte, ptl);
out:
return retval;
}
+#ifdef pte_index
+static int insert_page_in_batch_locked(struct mm_struct *mm, pmd_t *pmd,
+ unsigned long addr, struct page *page, pgprot_t prot)
+{
+ int err;
+
+ if (!page_count(page))
+ return -EINVAL;
+ err = validate_page_before_insert(page);
+ return err ? err : insert_page_into_pte_locked(
+ mm, pte_offset_map(pmd, addr), addr, page, prot);
+}
+
+/* insert_pages() amortizes the cost of spinlock operations
+ * when inserting pages in a loop. Arch *must* define pte_index.
+ */
+static int insert_pages(struct vm_area_struct *vma, unsigned long addr,
+ struct page **pages, unsigned long *num, pgprot_t prot)
+{
+ pmd_t *pmd = NULL;
+ spinlock_t *pte_lock = NULL;
+ struct mm_struct *const mm = vma->vm_mm;
+ unsigned long curr_page_idx = 0;
+ unsigned long remaining_pages_total = *num;
+ unsigned long pages_to_write_in_pmd;
+ int ret;
+more:
+ ret = -EFAULT;
+ pmd = walk_to_pmd(mm, addr);
+ if (!pmd)
+ goto out;
+
+ pages_to_write_in_pmd = min_t(unsigned long,
+ remaining_pages_total, PTRS_PER_PTE - pte_index(addr));
+
+ /* Allocate the PTE if necessary; takes PMD lock once only. */
+ ret = -ENOMEM;
+ if (pte_alloc(mm, pmd))
+ goto out;
+ pte_lock = pte_lockptr(mm, pmd);
+
+ while (pages_to_write_in_pmd) {
+ int pte_idx = 0;
+ const int batch_size = min_t(int, pages_to_write_in_pmd, 8);
+
+ spin_lock(pte_lock);
+ for (; pte_idx < batch_size; ++pte_idx) {
+ int err = insert_page_in_batch_locked(mm, pmd,
+ addr, pages[curr_page_idx], prot);
+ if (unlikely(err)) {
+ spin_unlock(pte_lock);
+ ret = err;
+ remaining_pages_total -= pte_idx;
+ goto out;
+ }
+ addr += PAGE_SIZE;
+ ++curr_page_idx;
+ }
+ spin_unlock(pte_lock);
+ pages_to_write_in_pmd -= batch_size;
+ remaining_pages_total -= batch_size;
+ }
+ if (remaining_pages_total)
+ goto more;
+ ret = 0;
+out:
+ *num = remaining_pages_total;
+ return ret;
+}
+#endif /* ifdef pte_index */
+
+/**
+ * vm_insert_pages - insert multiple pages into user vma, batching the pmd lock.
+ * @vma: user vma to map to
+ * @addr: target start user address of these pages
+ * @pages: source kernel pages
+ * @num: in: number of pages to map. out: number of pages that were *not*
+ * mapped. (0 means all pages were successfully mapped).
+ *
+ * Preferred over vm_insert_page() when inserting multiple pages.
+ *
+ * In case of error, we may have mapped a subset of the provided
+ * pages. It is the caller's responsibility to account for this case.
+ *
+ * The same restrictions apply as in vm_insert_page().
+ */
+int vm_insert_pages(struct vm_area_struct *vma, unsigned long addr,
+ struct page **pages, unsigned long *num)
+{
+#ifdef pte_index
+ const unsigned long end_addr = addr + (*num * PAGE_SIZE) - 1;
+
+ if (addr < vma->vm_start || end_addr >= vma->vm_end)
+ return -EFAULT;
+ if (!(vma->vm_flags & VM_MIXEDMAP)) {
+ BUG_ON(down_read_trylock(&vma->vm_mm->mmap_sem));
+ BUG_ON(vma->vm_flags & VM_PFNMAP);
+ vma->vm_flags |= VM_MIXEDMAP;
+ }
+ /* Defer page refcount checking till we're about to map that page. */
+ return insert_pages(vma, addr, pages, num, vma->vm_page_prot);
+#else
+ unsigned long idx = 0, pgcount = *num;
+ int err;
+
+ for (; idx < pgcount; ++idx) {
+ err = vm_insert_page(vma, addr + (PAGE_SIZE * idx), pages[idx]);
+ if (err)
+ break;
+ }
+ *num = pgcount - idx;
+ return err;
+#endif /* ifdef pte_index */
+}
+EXPORT_SYMBOL(vm_insert_pages);
+
/**
* vm_insert_page - insert single page into user vma
* @vma: user vma to map to
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 635e8e286598..fc0aad0bc1f5 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -304,12 +304,15 @@ static int check_hotplug_memory_addressable(unsigned long pfn,
* add the new pages.
*/
int __ref __add_pages(int nid, unsigned long pfn, unsigned long nr_pages,
- struct mhp_restrictions *restrictions)
+ struct mhp_params *params)
{
const unsigned long end_pfn = pfn + nr_pages;
unsigned long cur_nr_pages;
int err;
- struct vmem_altmap *altmap = restrictions->altmap;
+ struct vmem_altmap *altmap = params->altmap;
+
+ if (WARN_ON_ONCE(!params->pgprot.pgprot))
+ return -EINVAL;
err = check_hotplug_memory_addressable(pfn, nr_pages);
if (err)
@@ -1002,7 +1005,7 @@ static int online_memory_block(struct memory_block *mem, void *arg)
*/
int __ref add_memory_resource(int nid, struct resource *res)
{
- struct mhp_restrictions restrictions = {};
+ struct mhp_params params = { .pgprot = PAGE_KERNEL };
u64 start, size;
bool new_node = false;
int ret;
@@ -1030,7 +1033,7 @@ int __ref add_memory_resource(int nid, struct resource *res)
new_node = ret;
/* call arch's memory hotadd */
- ret = arch_add_memory(nid, start, size, &restrictions);
+ ret = arch_add_memory(nid, start, size, &params);
if (ret < 0)
goto error;
diff --git a/mm/memremap.c b/mm/memremap.c
index bbf457c4f166..03e38b7a38f1 100644
--- a/mm/memremap.c
+++ b/mm/memremap.c
@@ -184,13 +184,13 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid)
{
struct resource *res = &pgmap->res;
struct dev_pagemap *conflict_pgmap;
- struct mhp_restrictions restrictions = {
+ struct mhp_params params = {
/*
* We do not want any optional features only our own memmap
*/
.altmap = pgmap_altmap(pgmap),
+ .pgprot = PAGE_KERNEL,
};
- pgprot_t pgprot = PAGE_KERNEL;
int error, is_ram;
bool need_devmap_managed = true;
@@ -217,7 +217,10 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid)
}
break;
case MEMORY_DEVICE_DEVDAX:
+ need_devmap_managed = false;
+ break;
case MEMORY_DEVICE_PCI_P2PDMA:
+ params.pgprot = pgprot_noncached(params.pgprot);
need_devmap_managed = false;
break;
default:
@@ -282,8 +285,8 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid)
if (nid < 0)
nid = numa_mem_id();
- error = track_pfn_remap(NULL, &pgprot, PHYS_PFN(res->start), 0,
- resource_size(res));
+ error = track_pfn_remap(NULL, &params.pgprot, PHYS_PFN(res->start),
+ 0, resource_size(res));
if (error)
goto err_pfn_remap;
@@ -302,7 +305,7 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid)
*/
if (pgmap->type == MEMORY_DEVICE_PRIVATE) {
error = add_pages(nid, PHYS_PFN(res->start),
- PHYS_PFN(resource_size(res)), &restrictions);
+ PHYS_PFN(resource_size(res)), &params);
} else {
error = kasan_add_zero_shadow(__va(res->start), resource_size(res));
if (error) {
@@ -311,7 +314,7 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid)
}
error = arch_add_memory(nid, res->start, resource_size(res),
- &restrictions);
+ &params);
}
if (!error) {
@@ -319,7 +322,7 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid)
zone = &NODE_DATA(nid)->node_zones[ZONE_DEVICE];
move_pfn_range_to_zone(zone, PHYS_PFN(res->start),
- PHYS_PFN(resource_size(res)), restrictions.altmap);
+ PHYS_PFN(resource_size(res)), params.altmap);
}
mem_hotplug_done();
diff --git a/mm/mmap.c b/mm/mmap.c
index 8d77dbbb80fe..f609e9ec4a25 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1224,7 +1224,7 @@ static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *
return a->vm_end == b->vm_start &&
mpol_equal(vma_policy(a), vma_policy(b)) &&
a->vm_file == b->vm_file &&
- !((a->vm_flags ^ b->vm_flags) & ~(VM_READ|VM_WRITE|VM_EXEC|VM_SOFTDIRTY)) &&
+ !((a->vm_flags ^ b->vm_flags) & ~(VM_ACCESS_FLAGS | VM_SOFTDIRTY)) &&
b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT);
}
@@ -2123,6 +2123,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
info.low_limit = mm->mmap_base;
info.high_limit = mmap_end;
info.align_mask = 0;
+ info.align_offset = 0;
return vm_unmapped_area(&info);
}
#endif
@@ -2164,6 +2165,7 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
info.low_limit = max(PAGE_SIZE, mmap_min_addr);
info.high_limit = arch_get_mmap_base(addr, mm->mmap_base);
info.align_mask = 0;
+ info.align_offset = 0;
addr = vm_unmapped_area(&info);
/*
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 1d823b050329..494192ca954b 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -419,7 +419,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
*/
if (arch_has_pfn_modify_check() &&
(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) &&
- (newflags & (VM_READ|VM_WRITE|VM_EXEC)) == 0) {
+ (newflags & VM_ACCESS_FLAGS) == 0) {
pgprot_t new_pgprot = vm_get_page_prot(newflags);
error = walk_page_range(current->mm, start, end,
@@ -598,7 +598,7 @@ static int do_mprotect_pkey(unsigned long start, size_t len,
newflags |= (vma->vm_flags & ~mask_off_old_flags);
/* newflags >> 4 shift VM_MAY% in place of VM_% */
- if ((newflags & ~(newflags >> 4)) & (VM_READ | VM_WRITE | VM_EXEC)) {
+ if ((newflags & ~(newflags >> 4)) & VM_ACCESS_FLAGS) {
error = -EACCES;
goto out;
}
diff --git a/mm/mremap.c b/mm/mremap.c
index a7e282ead438..6aa6ea605068 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -413,9 +413,20 @@ static unsigned long move_vma(struct vm_area_struct *vma,
/* Always put back VM_ACCOUNT since we won't unmap */
vma->vm_flags |= VM_ACCOUNT;
- vm_acct_memory(vma_pages(new_vma));
+ vm_acct_memory(new_len >> PAGE_SHIFT);
}
+ /*
+ * VMAs can actually be merged back together in copy_vma
+ * calling merge_vma. This can happen with anonymous vmas
+ * which have not yet been faulted, so if we were to consider
+ * this VMA split we'll end up adding VM_ACCOUNT on the
+ * next VMA, which is completely unrelated if this VMA
+ * was re-merged.
+ */
+ if (split && new_vma == vma)
+ split = 0;
+
/* We always clear VM_LOCKED[ONFAULT] on the old vma */
vma->vm_flags &= VM_LOCKED_CLEAR_MASK;
@@ -783,7 +794,7 @@ out:
if (locked && new_len > old_len)
mm_populate(new_addr + old_len, new_len - old_len);
userfaultfd_unmap_complete(mm, &uf_unmap_early);
- mremap_userfaultfd_complete(&uf, addr, new_addr, old_len);
+ mremap_userfaultfd_complete(&uf, addr, ret, old_len);
userfaultfd_unmap_complete(mm, &uf_unmap);
return ret;
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 114c56c3685d..13cc653122b7 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -103,8 +103,8 @@ struct pcpu_drain {
struct zone *zone;
struct work_struct work;
};
-DEFINE_MUTEX(pcpu_drain_mutex);
-DEFINE_PER_CPU(struct pcpu_drain, pcpu_drain);
+static DEFINE_MUTEX(pcpu_drain_mutex);
+static DEFINE_PER_CPU(struct pcpu_drain, pcpu_drain);
#ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY
volatile unsigned long latent_entropy __latent_entropy;
@@ -1607,6 +1607,7 @@ void set_zone_contiguous(struct zone *zone)
if (!__pageblock_pfn_to_page(block_start_pfn,
block_end_pfn, zone))
return;
+ cond_resched();
}
/* We confirm that there is no hole */
@@ -2400,6 +2401,14 @@ static inline void boost_watermark(struct zone *zone)
if (!watermark_boost_factor)
return;
+ /*
+ * Don't bother in zones that are unlikely to produce results.
+ * On small machines, including kdump capture kernels running
+ * in a small area, boosting the watermark can cause an out of
+ * memory situation immediately.
+ */
+ if ((pageblock_nr_pages * 4) > zone_managed_pages(zone))
+ return;
max_boost = mult_frac(zone->_watermark[WMARK_HIGH],
watermark_boost_factor, 10000);
@@ -3224,6 +3233,7 @@ int __isolate_free_page(struct page *page, unsigned int order)
* __putback_isolated_page - Return a now-isolated page back where we got it
* @page: Page that was isolated
* @order: Order of the isolated page
+ * @mt: The page's pageblock's migratetype
*
* This function is meant to return a page pulled from the free lists via
* __isolate_free_page back to the free lists they were pulled from.
diff --git a/mm/percpu.c b/mm/percpu.c
index d7e3bc649f4e..7da7d7737dab 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -80,6 +80,7 @@
#include <linux/workqueue.h>
#include <linux/kmemleak.h>
#include <linux/sched.h>
+#include <linux/sched/mm.h>
#include <asm/cacheflush.h>
#include <asm/sections.h>
@@ -1557,10 +1558,9 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
gfp_t gfp)
{
- /* whitelisted flags that can be passed to the backing allocators */
- gfp_t pcpu_gfp = gfp & (GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
- bool is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL;
- bool do_warn = !(gfp & __GFP_NOWARN);
+ gfp_t pcpu_gfp;
+ bool is_atomic;
+ bool do_warn;
static int warn_limit = 10;
struct pcpu_chunk *chunk, *next;
const char *err;
@@ -1569,6 +1569,12 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
void __percpu *ptr;
size_t bits, bit_align;
+ gfp = current_gfp_context(gfp);
+ /* whitelisted flags that can be passed to the backing allocators */
+ pcpu_gfp = gfp & (GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
+ is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL;
+ do_warn = !(gfp & __GFP_NOWARN);
+
/*
* There is now a minimum allocation size of PCPU_MIN_ALLOC_SIZE,
* therefore alignment must be a minimum of that many bytes.
diff --git a/mm/shmem.c b/mm/shmem.c
index d722eb830317..bd8840082c94 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -952,7 +952,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
VM_BUG_ON_PAGE(PageWriteback(page), page);
if (shmem_punch_compound(page, start, end))
truncate_inode_page(mapping, page);
- else {
+ else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
/* Wipe the page and don't get stuck */
clear_highpage(page);
flush_dcache_page(page);
@@ -2179,7 +2179,11 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user)
struct shmem_inode_info *info = SHMEM_I(inode);
int retval = -ENOMEM;
- spin_lock_irq(&info->lock);
+ /*
+ * What serializes the accesses to info->flags?
+ * ipc_lock_object() when called from shmctl_do_lock(),
+ * no serialization needed when called from shm_destroy().
+ */
if (lock && !(info->flags & VM_LOCKED)) {
if (!user_shm_lock(inode->i_size, user))
goto out_nomem;
@@ -2194,7 +2198,6 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user)
retval = 0;
out_nomem:
- spin_unlock_irq(&info->lock);
return retval;
}
@@ -2399,11 +2402,11 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
lru_cache_add_anon(page);
- spin_lock(&info->lock);
+ spin_lock_irq(&info->lock);
info->alloced++;
inode->i_blocks += BLOCKS_PER_PAGE;
shmem_recalc_inode(inode);
- spin_unlock(&info->lock);
+ spin_unlock_irq(&info->lock);
inc_mm_counter(dst_mm, mm_counter_file(page));
page_add_file_rmap(page, false);
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 93ec4a574d8d..23c7500eea7d 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -731,7 +731,7 @@ static void kmemcg_rcufn(struct rcu_head *head)
/*
* We need to grab blocking locks. Bounce to ->work. The
* work item shares the space with the RCU head and can't be
- * initialized eariler.
+ * initialized earlier.
*/
INIT_WORK(&s->memcg_params.work, kmemcg_workfn);
queue_work(memcg_kmem_cache_wq, &s->memcg_params.work);
diff --git a/mm/slub.c b/mm/slub.c
index 332d4b459a90..b762450fc9f0 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -551,15 +551,32 @@ static void print_section(char *level, char *text, u8 *addr,
metadata_access_disable();
}
+/*
+ * See comment in calculate_sizes().
+ */
+static inline bool freeptr_outside_object(struct kmem_cache *s)
+{
+ return s->offset >= s->inuse;
+}
+
+/*
+ * Return offset of the end of info block which is inuse + free pointer if
+ * not overlapping with object.
+ */
+static inline unsigned int get_info_end(struct kmem_cache *s)
+{
+ if (freeptr_outside_object(s))
+ return s->inuse + sizeof(void *);
+ else
+ return s->inuse;
+}
+
static struct track *get_track(struct kmem_cache *s, void *object,
enum track_item alloc)
{
struct track *p;
- if (s->offset)
- p = object + s->offset + sizeof(void *);
- else
- p = object + s->inuse;
+ p = object + get_info_end(s);
return p + alloc;
}
@@ -686,10 +703,7 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
print_section(KERN_ERR, "Redzone ", p + s->object_size,
s->inuse - s->object_size);
- if (s->offset)
- off = s->offset + sizeof(void *);
- else
- off = s->inuse;
+ off = get_info_end(s);
if (s->flags & SLAB_STORE_USER)
off += 2 * sizeof(struct track);
@@ -782,7 +796,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
* object address
* Bytes of the object to be managed.
* If the freepointer may overlay the object then the free
- * pointer is the first word of the object.
+ * pointer is at the middle of the object.
*
* Poisoning uses 0x6b (POISON_FREE) and the last byte is
* 0xa5 (POISON_END)
@@ -816,11 +830,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
{
- unsigned long off = s->inuse; /* The end of info */
-
- if (s->offset)
- /* Freepointer is placed after the object. */
- off += sizeof(void *);
+ unsigned long off = get_info_end(s); /* The end of info */
if (s->flags & SLAB_STORE_USER)
/* We also have user information there */
@@ -907,7 +917,7 @@ static int check_object(struct kmem_cache *s, struct page *page,
check_pad_bytes(s, page, p);
}
- if (!s->offset && val == SLUB_RED_ACTIVE)
+ if (!freeptr_outside_object(s) && val == SLUB_RED_ACTIVE)
/*
* Object and freepointer overlap. Cannot check
* freepointer while object is allocated.
@@ -3533,6 +3543,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
{
slab_flags_t flags = s->flags;
unsigned int size = s->object_size;
+ unsigned int freepointer_area;
unsigned int order;
/*
@@ -3541,6 +3552,13 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
* the possible location of the free pointer.
*/
size = ALIGN(size, sizeof(void *));
+ /*
+ * This is the area of the object where a freepointer can be
+ * safely written. If redzoning adds more to the inuse size, we
+ * can't use that portion for writing the freepointer, so
+ * s->offset must be limited within this for the general case.
+ */
+ freepointer_area = size;
#ifdef CONFIG_SLUB_DEBUG
/*
@@ -3579,16 +3597,21 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
*
* This is the case if we do RCU, have a constructor or
* destructor or are poisoning the objects.
+ *
+ * The assumption that s->offset >= s->inuse means free
+ * pointer is outside of the object is used in the
+ * freeptr_outside_object() function. If that is no
+ * longer true, the function needs to be modified.
*/
s->offset = size;
size += sizeof(void *);
- } else if (size > sizeof(void *)) {
+ } else if (freepointer_area > sizeof(void *)) {
/*
* Store freelist pointer near middle of object to keep
* it away from the edges of the object to avoid small
* sized over/underflows from neighboring allocations.
*/
- s->offset = ALIGN(size / 2, sizeof(void *));
+ s->offset = ALIGN(freepointer_area / 2, sizeof(void *));
}
#ifdef CONFIG_SLUB_DEBUG
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 399f219544f7..9a8227afa073 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -34,6 +34,7 @@
#include <linux/llist.h>
#include <linux/bitops.h>
#include <linux/rbtree_augmented.h>
+#include <linux/overflow.h>
#include <linux/uaccess.h>
#include <asm/tlbflush.h>
@@ -3054,6 +3055,7 @@ finished:
* @vma: vma to cover
* @uaddr: target user address to start at
* @kaddr: virtual address of vmalloc kernel memory
+ * @pgoff: offset from @kaddr to start at
* @size: size of map area
*
* Returns: 0 for success, -Exxx on failure
@@ -3066,9 +3068,15 @@ finished:
* Similar to remap_pfn_range() (see mm/memory.c)
*/
int remap_vmalloc_range_partial(struct vm_area_struct *vma, unsigned long uaddr,
- void *kaddr, unsigned long size)
+ void *kaddr, unsigned long pgoff,
+ unsigned long size)
{
struct vm_struct *area;
+ unsigned long off;
+ unsigned long end_index;
+
+ if (check_shl_overflow(pgoff, PAGE_SHIFT, &off))
+ return -EINVAL;
size = PAGE_ALIGN(size);
@@ -3082,8 +3090,10 @@ int remap_vmalloc_range_partial(struct vm_area_struct *vma, unsigned long uaddr,
if (!(area->flags & (VM_USERMAP | VM_DMA_COHERENT)))
return -EINVAL;
- if (kaddr + size > area->addr + get_vm_area_size(area))
+ if (check_add_overflow(size, off, &end_index) ||
+ end_index > get_vm_area_size(area))
return -EINVAL;
+ kaddr += off;
do {
struct page *page = vmalloc_to_page(kaddr);
@@ -3122,7 +3132,7 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
unsigned long pgoff)
{
return remap_vmalloc_range_partial(vma, vma->vm_start,
- addr + (pgoff << PAGE_SHIFT),
+ addr, pgoff,
vma->vm_end - vma->vm_start);
}
EXPORT_SYMBOL(remap_vmalloc_range);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index b06868fc4926..a37c87b5aee2 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1625,7 +1625,6 @@ static __always_inline void update_lru_sizes(struct lruvec *lruvec,
* @dst: The temp list to put pages on to.
* @nr_scanned: The number of pages that were scanned.
* @sc: The scan_control struct for this reclaim session
- * @mode: One of the LRU isolation modes
* @lru: LRU list id for isolating
*
* returns how many pages were moved onto *@dst.
diff --git a/mm/z3fold.c b/mm/z3fold.c
index 42f31c4b53ad..8c3bb5e508b8 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -318,16 +318,16 @@ static inline void free_handle(unsigned long handle)
slots = handle_to_slots(handle);
write_lock(&slots->lock);
*(unsigned long *)handle = 0;
- write_unlock(&slots->lock);
- if (zhdr->slots == slots)
+ if (zhdr->slots == slots) {
+ write_unlock(&slots->lock);
return; /* simple case, nothing else to do */
+ }
/* we are freeing a foreign handle if we are here */
zhdr->foreign_handles--;
is_free = true;
- read_lock(&slots->lock);
if (!test_bit(HANDLES_ORPHANED, &slots->pool)) {
- read_unlock(&slots->lock);
+ write_unlock(&slots->lock);
return;
}
for (i = 0; i <= BUDDY_MASK; i++) {
@@ -336,7 +336,7 @@ static inline void free_handle(unsigned long handle)
break;
}
}
- read_unlock(&slots->lock);
+ write_unlock(&slots->lock);
if (is_free) {
struct z3fold_pool *pool = slots_to_pool(slots);
@@ -422,6 +422,7 @@ static struct z3fold_header *init_z3fold_page(struct page *page, bool headless,
zhdr->start_middle = 0;
zhdr->cpu = -1;
zhdr->foreign_handles = 0;
+ zhdr->mapped_count = 0;
zhdr->slots = slots;
zhdr->pool = pool;
INIT_LIST_HEAD(&zhdr->buddy);