summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/highmem.c2
-rw-r--r--mm/huge_memory.c38
-rw-r--r--mm/hugetlb_cgroup.c198
-rw-r--r--mm/kasan/report.c40
-rw-r--r--mm/memcontrol.c37
-rw-r--r--mm/memory.c2
-rw-r--r--mm/mempolicy.c10
-rw-r--r--mm/mmap.c2
-rw-r--r--mm/page-writeback.c10
-rw-r--r--mm/page_alloc.c61
-rw-r--r--mm/percpu.c61
-rw-r--r--mm/shmem.c7
-rw-r--r--mm/slab.c4
-rw-r--r--mm/slab_common.c3
-rw-r--r--mm/slub.c16
-rw-r--r--mm/sparse.c9
-rw-r--r--mm/vmalloc.c12
17 files changed, 344 insertions, 168 deletions
diff --git a/mm/highmem.c b/mm/highmem.c
index 107b10f9878e..64d8dea47dd1 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -29,7 +29,7 @@
#include <linux/highmem.h>
#include <linux/kgdb.h>
#include <asm/tlbflush.h>
-
+#include <linux/vmalloc.h>
#if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32)
DEFINE_PER_CPU(int, __kmap_atomic_idx);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 41a0fbddc96b..a88093213674 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -527,13 +527,13 @@ void prep_transhuge_page(struct page *page)
set_compound_page_dtor(page, TRANSHUGE_PAGE_DTOR);
}
-static unsigned long __thp_get_unmapped_area(struct file *filp, unsigned long len,
+static unsigned long __thp_get_unmapped_area(struct file *filp,
+ unsigned long addr, unsigned long len,
loff_t off, unsigned long flags, unsigned long size)
{
- unsigned long addr;
loff_t off_end = off + len;
loff_t off_align = round_up(off, size);
- unsigned long len_pad;
+ unsigned long len_pad, ret;
if (off_end <= off_align || (off_end - off_align) < size)
return 0;
@@ -542,30 +542,40 @@ static unsigned long __thp_get_unmapped_area(struct file *filp, unsigned long le
if (len_pad < len || (off + len_pad) < off)
return 0;
- addr = current->mm->get_unmapped_area(filp, 0, len_pad,
+ ret = current->mm->get_unmapped_area(filp, addr, len_pad,
off >> PAGE_SHIFT, flags);
- if (IS_ERR_VALUE(addr))
+
+ /*
+ * The failure might be due to length padding. The caller will retry
+ * without the padding.
+ */
+ if (IS_ERR_VALUE(ret))
return 0;
- addr += (off - addr) & (size - 1);
- return addr;
+ /*
+ * Do not try to align to THP boundary if allocation at the address
+ * hint succeeds.
+ */
+ if (ret == addr)
+ return addr;
+
+ ret += (off - ret) & (size - 1);
+ return ret;
}
unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr,
unsigned long len, unsigned long pgoff, unsigned long flags)
{
+ unsigned long ret;
loff_t off = (loff_t)pgoff << PAGE_SHIFT;
- if (addr)
- goto out;
if (!IS_DAX(filp->f_mapping->host) || !IS_ENABLED(CONFIG_FS_DAX_PMD))
goto out;
- addr = __thp_get_unmapped_area(filp, len, off, flags, PMD_SIZE);
- if (addr)
- return addr;
-
- out:
+ ret = __thp_get_unmapped_area(filp, addr, len, off, flags, PMD_SIZE);
+ if (ret)
+ return ret;
+out:
return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags);
}
EXPORT_SYMBOL_GPL(thp_get_unmapped_area);
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index 2ac38bdc18a1..e434b05416c6 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -3,6 +3,10 @@
* Copyright IBM Corporation, 2012
* Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
*
+ * Cgroup v2
+ * Copyright (C) 2019 Red Hat, Inc.
+ * Author: Giuseppe Scrivano <gscrivan@redhat.com>
+ *
* This program is free software; you can redistribute it and/or modify it
* under the terms of version 2.1 of the GNU Lesser General Public License
* as published by the Free Software Foundation.
@@ -19,18 +23,36 @@
#include <linux/hugetlb.h>
#include <linux/hugetlb_cgroup.h>
+enum hugetlb_memory_event {
+ HUGETLB_MAX,
+ HUGETLB_NR_MEMORY_EVENTS,
+};
+
struct hugetlb_cgroup {
struct cgroup_subsys_state css;
+
/*
* the counter to account for hugepages from hugetlb.
*/
struct page_counter hugepage[HUGE_MAX_HSTATE];
+
+ atomic_long_t events[HUGE_MAX_HSTATE][HUGETLB_NR_MEMORY_EVENTS];
+ atomic_long_t events_local[HUGE_MAX_HSTATE][HUGETLB_NR_MEMORY_EVENTS];
+
+ /* Handle for "hugetlb.events" */
+ struct cgroup_file events_file[HUGE_MAX_HSTATE];
+
+ /* Handle for "hugetlb.events.local" */
+ struct cgroup_file events_local_file[HUGE_MAX_HSTATE];
};
#define MEMFILE_PRIVATE(x, val) (((x) << 16) | (val))
#define MEMFILE_IDX(val) (((val) >> 16) & 0xffff)
#define MEMFILE_ATTR(val) ((val) & 0xffff)
+#define hugetlb_cgroup_from_counter(counter, idx) \
+ container_of(counter, struct hugetlb_cgroup, hugepage[idx])
+
static struct hugetlb_cgroup *root_h_cgroup __read_mostly;
static inline
@@ -178,6 +200,19 @@ static void hugetlb_cgroup_css_offline(struct cgroup_subsys_state *css)
} while (hugetlb_cgroup_have_usage(h_cg));
}
+static inline void hugetlb_event(struct hugetlb_cgroup *hugetlb, int idx,
+ enum hugetlb_memory_event event)
+{
+ atomic_long_inc(&hugetlb->events_local[idx][event]);
+ cgroup_file_notify(&hugetlb->events_local_file[idx]);
+
+ do {
+ atomic_long_inc(&hugetlb->events[idx][event]);
+ cgroup_file_notify(&hugetlb->events_file[idx]);
+ } while ((hugetlb = parent_hugetlb_cgroup(hugetlb)) &&
+ !hugetlb_cgroup_is_root(hugetlb));
+}
+
int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
struct hugetlb_cgroup **ptr)
{
@@ -202,8 +237,12 @@ again:
}
rcu_read_unlock();
- if (!page_counter_try_charge(&h_cg->hugepage[idx], nr_pages, &counter))
+ if (!page_counter_try_charge(&h_cg->hugepage[idx], nr_pages,
+ &counter)) {
ret = -ENOMEM;
+ hugetlb_event(hugetlb_cgroup_from_counter(counter, idx), idx,
+ HUGETLB_MAX);
+ }
css_put(&h_cg->css);
done:
*ptr = h_cg;
@@ -283,10 +322,45 @@ static u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css,
}
}
+static int hugetlb_cgroup_read_u64_max(struct seq_file *seq, void *v)
+{
+ int idx;
+ u64 val;
+ struct cftype *cft = seq_cft(seq);
+ unsigned long limit;
+ struct page_counter *counter;
+ struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq));
+
+ idx = MEMFILE_IDX(cft->private);
+ counter = &h_cg->hugepage[idx];
+
+ limit = round_down(PAGE_COUNTER_MAX,
+ 1 << huge_page_order(&hstates[idx]));
+
+ switch (MEMFILE_ATTR(cft->private)) {
+ case RES_USAGE:
+ val = (u64)page_counter_read(counter);
+ seq_printf(seq, "%llu\n", val * PAGE_SIZE);
+ break;
+ case RES_LIMIT:
+ val = (u64)counter->max;
+ if (val == limit)
+ seq_puts(seq, "max\n");
+ else
+ seq_printf(seq, "%llu\n", val * PAGE_SIZE);
+ break;
+ default:
+ BUG();
+ }
+
+ return 0;
+}
+
static DEFINE_MUTEX(hugetlb_limit_mutex);
static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of,
- char *buf, size_t nbytes, loff_t off)
+ char *buf, size_t nbytes, loff_t off,
+ const char *max)
{
int ret, idx;
unsigned long nr_pages;
@@ -296,7 +370,7 @@ static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of,
return -EINVAL;
buf = strstrip(buf);
- ret = page_counter_memparse(buf, "-1", &nr_pages);
+ ret = page_counter_memparse(buf, max, &nr_pages);
if (ret)
return ret;
@@ -316,6 +390,18 @@ static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of,
return ret ?: nbytes;
}
+static ssize_t hugetlb_cgroup_write_legacy(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off)
+{
+ return hugetlb_cgroup_write(of, buf, nbytes, off, "-1");
+}
+
+static ssize_t hugetlb_cgroup_write_dfl(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off)
+{
+ return hugetlb_cgroup_write(of, buf, nbytes, off, "max");
+}
+
static ssize_t hugetlb_cgroup_reset(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off)
{
@@ -350,7 +436,36 @@ static char *mem_fmt(char *buf, int size, unsigned long hsize)
return buf;
}
-static void __init __hugetlb_cgroup_file_init(int idx)
+static int __hugetlb_events_show(struct seq_file *seq, bool local)
+{
+ int idx;
+ long max;
+ struct cftype *cft = seq_cft(seq);
+ struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq));
+
+ idx = MEMFILE_IDX(cft->private);
+
+ if (local)
+ max = atomic_long_read(&h_cg->events_local[idx][HUGETLB_MAX]);
+ else
+ max = atomic_long_read(&h_cg->events[idx][HUGETLB_MAX]);
+
+ seq_printf(seq, "max %lu\n", max);
+
+ return 0;
+}
+
+static int hugetlb_events_show(struct seq_file *seq, void *v)
+{
+ return __hugetlb_events_show(seq, false);
+}
+
+static int hugetlb_events_local_show(struct seq_file *seq, void *v)
+{
+ return __hugetlb_events_show(seq, true);
+}
+
+static void __init __hugetlb_cgroup_file_dfl_init(int idx)
{
char buf[32];
struct cftype *cft;
@@ -360,38 +475,93 @@ static void __init __hugetlb_cgroup_file_init(int idx)
mem_fmt(buf, 32, huge_page_size(h));
/* Add the limit file */
- cft = &h->cgroup_files[0];
+ cft = &h->cgroup_files_dfl[0];
+ snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max", buf);
+ cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT);
+ cft->seq_show = hugetlb_cgroup_read_u64_max;
+ cft->write = hugetlb_cgroup_write_dfl;
+ cft->flags = CFTYPE_NOT_ON_ROOT;
+
+ /* Add the current usage file */
+ cft = &h->cgroup_files_dfl[1];
+ snprintf(cft->name, MAX_CFTYPE_NAME, "%s.current", buf);
+ cft->private = MEMFILE_PRIVATE(idx, RES_USAGE);
+ cft->seq_show = hugetlb_cgroup_read_u64_max;
+ cft->flags = CFTYPE_NOT_ON_ROOT;
+
+ /* Add the events file */
+ cft = &h->cgroup_files_dfl[2];
+ snprintf(cft->name, MAX_CFTYPE_NAME, "%s.events", buf);
+ cft->private = MEMFILE_PRIVATE(idx, 0);
+ cft->seq_show = hugetlb_events_show;
+ cft->file_offset = offsetof(struct hugetlb_cgroup, events_file[idx]),
+ cft->flags = CFTYPE_NOT_ON_ROOT;
+
+ /* Add the events.local file */
+ cft = &h->cgroup_files_dfl[3];
+ snprintf(cft->name, MAX_CFTYPE_NAME, "%s.events.local", buf);
+ cft->private = MEMFILE_PRIVATE(idx, 0);
+ cft->seq_show = hugetlb_events_local_show;
+ cft->file_offset = offsetof(struct hugetlb_cgroup,
+ events_local_file[idx]),
+ cft->flags = CFTYPE_NOT_ON_ROOT;
+
+ /* NULL terminate the last cft */
+ cft = &h->cgroup_files_dfl[4];
+ memset(cft, 0, sizeof(*cft));
+
+ WARN_ON(cgroup_add_dfl_cftypes(&hugetlb_cgrp_subsys,
+ h->cgroup_files_dfl));
+}
+
+static void __init __hugetlb_cgroup_file_legacy_init(int idx)
+{
+ char buf[32];
+ struct cftype *cft;
+ struct hstate *h = &hstates[idx];
+
+ /* format the size */
+ mem_fmt(buf, 32, huge_page_size(h));
+
+ /* Add the limit file */
+ cft = &h->cgroup_files_legacy[0];
snprintf(cft->name, MAX_CFTYPE_NAME, "%s.limit_in_bytes", buf);
cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT);
cft->read_u64 = hugetlb_cgroup_read_u64;
- cft->write = hugetlb_cgroup_write;
+ cft->write = hugetlb_cgroup_write_legacy;
/* Add the usage file */
- cft = &h->cgroup_files[1];
+ cft = &h->cgroup_files_legacy[1];
snprintf(cft->name, MAX_CFTYPE_NAME, "%s.usage_in_bytes", buf);
cft->private = MEMFILE_PRIVATE(idx, RES_USAGE);
cft->read_u64 = hugetlb_cgroup_read_u64;
/* Add the MAX usage file */
- cft = &h->cgroup_files[2];
+ cft = &h->cgroup_files_legacy[2];
snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max_usage_in_bytes", buf);
cft->private = MEMFILE_PRIVATE(idx, RES_MAX_USAGE);
cft->write = hugetlb_cgroup_reset;
cft->read_u64 = hugetlb_cgroup_read_u64;
/* Add the failcntfile */
- cft = &h->cgroup_files[3];
+ cft = &h->cgroup_files_legacy[3];
snprintf(cft->name, MAX_CFTYPE_NAME, "%s.failcnt", buf);
cft->private = MEMFILE_PRIVATE(idx, RES_FAILCNT);
cft->write = hugetlb_cgroup_reset;
cft->read_u64 = hugetlb_cgroup_read_u64;
/* NULL terminate the last cft */
- cft = &h->cgroup_files[4];
+ cft = &h->cgroup_files_legacy[4];
memset(cft, 0, sizeof(*cft));
WARN_ON(cgroup_add_legacy_cftypes(&hugetlb_cgrp_subsys,
- h->cgroup_files));
+ h->cgroup_files_legacy));
+}
+
+static void __init __hugetlb_cgroup_file_init(int idx)
+{
+ __hugetlb_cgroup_file_dfl_init(idx);
+ __hugetlb_cgroup_file_legacy_init(idx);
}
void __init hugetlb_cgroup_file_init(void)
@@ -433,8 +603,14 @@ void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage)
return;
}
+static struct cftype hugetlb_files[] = {
+ {} /* terminate */
+};
+
struct cgroup_subsys hugetlb_cgrp_subsys = {
.css_alloc = hugetlb_cgroup_css_alloc,
.css_offline = hugetlb_cgroup_css_offline,
.css_free = hugetlb_cgroup_css_free,
+ .dfl_cftypes = hugetlb_files,
+ .legacy_cftypes = hugetlb_files,
};
diff --git a/mm/kasan/report.c b/mm/kasan/report.c
index 621782100eaa..5ef9f24f566b 100644
--- a/mm/kasan/report.c
+++ b/mm/kasan/report.c
@@ -512,3 +512,43 @@ void __kasan_report(unsigned long addr, size_t size, bool is_write, unsigned lon
end_report(&flags);
}
+
+#ifdef CONFIG_KASAN_INLINE
+/*
+ * With CONFIG_KASAN_INLINE, accesses to bogus pointers (outside the high
+ * canonical half of the address space) cause out-of-bounds shadow memory reads
+ * before the actual access. For addresses in the low canonical half of the
+ * address space, as well as most non-canonical addresses, that out-of-bounds
+ * shadow memory access lands in the non-canonical part of the address space.
+ * Help the user figure out what the original bogus pointer was.
+ */
+void kasan_non_canonical_hook(unsigned long addr)
+{
+ unsigned long orig_addr;
+ const char *bug_type;
+
+ if (addr < KASAN_SHADOW_OFFSET)
+ return;
+
+ orig_addr = (addr - KASAN_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT;
+ /*
+ * For faults near the shadow address for NULL, we can be fairly certain
+ * that this is a KASAN shadow memory access.
+ * For faults that correspond to shadow for low canonical addresses, we
+ * can still be pretty sure - that shadow region is a fairly narrow
+ * chunk of the non-canonical address space.
+ * But faults that look like shadow for non-canonical addresses are a
+ * really large chunk of the address space. In that case, we still
+ * print the decoded address, but make it clear that this is not
+ * necessarily what's actually going on.
+ */
+ if (orig_addr < PAGE_SIZE)
+ bug_type = "null-ptr-deref";
+ else if (orig_addr < TASK_SIZE)
+ bug_type = "probably user-memory-access";
+ else
+ bug_type = "maybe wild-memory-access";
+ pr_alert("KASAN: %s in range [0x%016lx-0x%016lx]\n", bug_type,
+ orig_addr, orig_addr + KASAN_SHADOW_MASK);
+}
+#endif
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c5b5f74cfd4d..6c83cf4ed970 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3287,49 +3287,34 @@ static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
}
}
-static void memcg_flush_percpu_vmstats(struct mem_cgroup *memcg, bool slab_only)
+static void memcg_flush_percpu_vmstats(struct mem_cgroup *memcg)
{
- unsigned long stat[MEMCG_NR_STAT];
+ unsigned long stat[MEMCG_NR_STAT] = {0};
struct mem_cgroup *mi;
int node, cpu, i;
- int min_idx, max_idx;
-
- if (slab_only) {
- min_idx = NR_SLAB_RECLAIMABLE;
- max_idx = NR_SLAB_UNRECLAIMABLE;
- } else {
- min_idx = 0;
- max_idx = MEMCG_NR_STAT;
- }
-
- for (i = min_idx; i < max_idx; i++)
- stat[i] = 0;
for_each_online_cpu(cpu)
- for (i = min_idx; i < max_idx; i++)
+ for (i = 0; i < MEMCG_NR_STAT; i++)
stat[i] += per_cpu(memcg->vmstats_percpu->stat[i], cpu);
for (mi = memcg; mi; mi = parent_mem_cgroup(mi))
- for (i = min_idx; i < max_idx; i++)
+ for (i = 0; i < MEMCG_NR_STAT; i++)
atomic_long_add(stat[i], &mi->vmstats[i]);
- if (!slab_only)
- max_idx = NR_VM_NODE_STAT_ITEMS;
-
for_each_node(node) {
struct mem_cgroup_per_node *pn = memcg->nodeinfo[node];
struct mem_cgroup_per_node *pi;
- for (i = min_idx; i < max_idx; i++)
+ for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
stat[i] = 0;
for_each_online_cpu(cpu)
- for (i = min_idx; i < max_idx; i++)
+ for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
stat[i] += per_cpu(
pn->lruvec_stat_cpu->count[i], cpu);
for (pi = pn; pi; pi = parent_nodeinfo(pi, node))
- for (i = min_idx; i < max_idx; i++)
+ for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
atomic_long_add(stat[i], &pi->lruvec_stat[i]);
}
}
@@ -3403,13 +3388,9 @@ static void memcg_offline_kmem(struct mem_cgroup *memcg)
parent = root_mem_cgroup;
/*
- * Deactivate and reparent kmem_caches. Then flush percpu
- * slab statistics to have precise values at the parent and
- * all ancestor levels. It's required to keep slab stats
- * accurate after the reparenting of kmem_caches.
+ * Deactivate and reparent kmem_caches.
*/
memcg_deactivate_kmem_caches(memcg, parent);
- memcg_flush_percpu_vmstats(memcg, true);
kmemcg_id = memcg->kmemcg_id;
BUG_ON(kmemcg_id < 0);
@@ -4913,7 +4894,7 @@ static void mem_cgroup_free(struct mem_cgroup *memcg)
* Flush percpu vmstats and vmevents to guarantee the value correctness
* on parent's and all ancestor levels.
*/
- memcg_flush_percpu_vmstats(memcg, false);
+ memcg_flush_percpu_vmstats(memcg);
memcg_flush_percpu_vmevents(memcg);
__mem_cgroup_free(memcg);
}
diff --git a/mm/memory.c b/mm/memory.c
index 45442d9a4f52..1c4be871a237 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2203,7 +2203,7 @@ static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd,
pte_t *page_table, pte_t orig_pte)
{
int same = 1;
-#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPTION)
if (sizeof(pte_t) > sizeof(unsigned long)) {
spinlock_t *ptl = pte_lockptr(mm, pmd);
spin_lock(ptl);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 067cf7d3daf5..b2920ae87a61 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2148,18 +2148,22 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
nmask = policy_nodemask(gfp, pol);
if (!nmask || node_isset(hpage_node, *nmask)) {
mpol_cond_put(pol);
+ /*
+ * First, try to allocate THP only on local node, but
+ * don't reclaim unnecessarily, just compact.
+ */
page = __alloc_pages_node(hpage_node,
- gfp | __GFP_THISNODE, order);
+ gfp | __GFP_THISNODE | __GFP_NORETRY, order);
/*
* If hugepage allocations are configured to always
* synchronous compact or the vma has been madvised
* to prefer hugepage backing, retry allowing remote
- * memory as well.
+ * memory with both reclaim and compact as well.
*/
if (!page && (gfp & __GFP_DIRECT_RECLAIM))
page = __alloc_pages_node(hpage_node,
- gfp | __GFP_NORETRY, order);
+ gfp, order);
goto out;
}
diff --git a/mm/mmap.c b/mm/mmap.c
index 71e4ffc83bcd..bc788548c4e5 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -3336,6 +3336,8 @@ static const struct vm_operations_struct special_mapping_vmops = {
.fault = special_mapping_fault,
.mremap = special_mapping_mremap,
.name = special_mapping_name,
+ /* vDSO code relies that VVAR can't be accessed remotely */
+ .access = NULL,
};
static const struct vm_operations_struct legacy_special_mapping_vmops = {
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 50055d2e4ea8..2caf780a42e7 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -201,11 +201,11 @@ static void wb_min_max_ratio(struct bdi_writeback *wb,
if (this_bw < tot_bw) {
if (min) {
min *= this_bw;
- do_div(min, tot_bw);
+ min = div64_ul(min, tot_bw);
}
if (max < 100) {
max *= this_bw;
- do_div(max, tot_bw);
+ max = div64_ul(max, tot_bw);
}
}
@@ -766,7 +766,7 @@ static unsigned long __wb_calc_thresh(struct dirty_throttle_control *dtc)
struct wb_domain *dom = dtc_dom(dtc);
unsigned long thresh = dtc->thresh;
u64 wb_thresh;
- long numerator, denominator;
+ unsigned long numerator, denominator;
unsigned long wb_min_ratio, wb_max_ratio;
/*
@@ -777,7 +777,7 @@ static unsigned long __wb_calc_thresh(struct dirty_throttle_control *dtc)
wb_thresh = (thresh * (100 - bdi_min_ratio)) / 100;
wb_thresh *= numerator;
- do_div(wb_thresh, denominator);
+ wb_thresh = div64_ul(wb_thresh, denominator);
wb_min_max_ratio(dtc->wb, &wb_min_ratio, &wb_max_ratio);
@@ -1102,7 +1102,7 @@ static void wb_update_write_bandwidth(struct bdi_writeback *wb,
bw = written - min(written, wb->written_stamp);
bw *= HZ;
if (unlikely(elapsed > period)) {
- do_div(bw, elapsed);
+ bw = div64_ul(bw, elapsed);
avg = bw;
goto out;
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4785a8a2040e..d047bf7d8fd4 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -694,34 +694,27 @@ void prep_compound_page(struct page *page, unsigned int order)
#ifdef CONFIG_DEBUG_PAGEALLOC
unsigned int _debug_guardpage_minorder;
-#ifdef CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT
-DEFINE_STATIC_KEY_TRUE(_debug_pagealloc_enabled);
-#else
+bool _debug_pagealloc_enabled_early __read_mostly
+ = IS_ENABLED(CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT);
+EXPORT_SYMBOL(_debug_pagealloc_enabled_early);
DEFINE_STATIC_KEY_FALSE(_debug_pagealloc_enabled);
-#endif
EXPORT_SYMBOL(_debug_pagealloc_enabled);
DEFINE_STATIC_KEY_FALSE(_debug_guardpage_enabled);
static int __init early_debug_pagealloc(char *buf)
{
- bool enable = false;
-
- if (kstrtobool(buf, &enable))
- return -EINVAL;
-
- if (enable)
- static_branch_enable(&_debug_pagealloc_enabled);
-
- return 0;
+ return kstrtobool(buf, &_debug_pagealloc_enabled_early);
}
early_param("debug_pagealloc", early_debug_pagealloc);
-static void init_debug_guardpage(void)
+void init_debug_pagealloc(void)
{
if (!debug_pagealloc_enabled())
return;
+ static_branch_enable(&_debug_pagealloc_enabled);
+
if (!debug_guardpage_minorder())
return;
@@ -1186,7 +1179,7 @@ static __always_inline bool free_pages_prepare(struct page *page,
*/
arch_free_page(page, order);
- if (debug_pagealloc_enabled())
+ if (debug_pagealloc_enabled_static())
kernel_map_pages(page, 1 << order, 0);
kasan_free_nondeferred_pages(page, order);
@@ -1207,7 +1200,7 @@ static bool free_pcp_prepare(struct page *page)
static bool bulkfree_pcp_prepare(struct page *page)
{
- if (debug_pagealloc_enabled())
+ if (debug_pagealloc_enabled_static())
return free_pages_check(page);
else
return false;
@@ -1221,7 +1214,7 @@ static bool bulkfree_pcp_prepare(struct page *page)
*/
static bool free_pcp_prepare(struct page *page)
{
- if (debug_pagealloc_enabled())
+ if (debug_pagealloc_enabled_static())
return free_pages_prepare(page, 0, true);
else
return free_pages_prepare(page, 0, false);
@@ -1973,10 +1966,6 @@ void __init page_alloc_init_late(void)
for_each_populated_zone(zone)
set_zone_contiguous(zone);
-
-#ifdef CONFIG_DEBUG_PAGEALLOC
- init_debug_guardpage();
-#endif
}
#ifdef CONFIG_CMA
@@ -2106,7 +2095,7 @@ static inline bool free_pages_prezeroed(void)
*/
static inline bool check_pcp_refill(struct page *page)
{
- if (debug_pagealloc_enabled())
+ if (debug_pagealloc_enabled_static())
return check_new_page(page);
else
return false;
@@ -2128,7 +2117,7 @@ static inline bool check_pcp_refill(struct page *page)
}
static inline bool check_new_pcp(struct page *page)
{
- if (debug_pagealloc_enabled())
+ if (debug_pagealloc_enabled_static())
return check_new_page(page);
else
return false;
@@ -2155,7 +2144,7 @@ inline void post_alloc_hook(struct page *page, unsigned int order,
set_page_refcounted(page);
arch_alloc_page(page, order);
- if (debug_pagealloc_enabled())
+ if (debug_pagealloc_enabled_static())
kernel_map_pages(page, 1 << order, 1);
kasan_alloc_pages(page, order);
kernel_poison_pages(page, 1 << order, 1);
@@ -4476,8 +4465,11 @@ retry_cpuset:
if (page)
goto got_pg;
- if (order >= pageblock_order && (gfp_mask & __GFP_IO) &&
- !(gfp_mask & __GFP_RETRY_MAYFAIL)) {
+ /*
+ * Checks for costly allocations with __GFP_NORETRY, which
+ * includes some THP page fault allocations
+ */
+ if (costly_order && (gfp_mask & __GFP_NORETRY)) {
/*
* If allocating entire pageblock(s) and compaction
* failed because all zones are below low watermarks
@@ -4498,23 +4490,6 @@ retry_cpuset:
if (compact_result == COMPACT_SKIPPED ||
compact_result == COMPACT_DEFERRED)
goto nopage;
- }
-
- /*
- * Checks for costly allocations with __GFP_NORETRY, which
- * includes THP page fault allocations
- */
- if (costly_order && (gfp_mask & __GFP_NORETRY)) {
- /*
- * If compaction is deferred for high-order allocations,
- * it is because sync compaction recently failed. If
- * this is the case and the caller requested a THP
- * allocation, we do not want to heavily disrupt the
- * system, so we fail the allocation instead of entering
- * direct reclaim.
- */
- if (compact_result == COMPACT_DEFERRED)
- goto nopage;
/*
* Looks like reclaim/compaction is worth trying, but
diff --git a/mm/percpu.c b/mm/percpu.c
index 7e06a1e58720..e9844086b236 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -270,33 +270,6 @@ static unsigned long pcpu_chunk_addr(struct pcpu_chunk *chunk,
pcpu_unit_page_offset(cpu, page_idx);
}
-static void pcpu_next_unpop(unsigned long *bitmap, int *rs, int *re, int end)
-{
- *rs = find_next_zero_bit(bitmap, end, *rs);
- *re = find_next_bit(bitmap, end, *rs + 1);
-}
-
-static void pcpu_next_pop(unsigned long *bitmap, int *rs, int *re, int end)
-{
- *rs = find_next_bit(bitmap, end, *rs);
- *re = find_next_zero_bit(bitmap, end, *rs + 1);
-}
-
-/*
- * Bitmap region iterators. Iterates over the bitmap between
- * [@start, @end) in @chunk. @rs and @re should be integer variables
- * and will be set to start and end index of the current free region.
- */
-#define pcpu_for_each_unpop_region(bitmap, rs, re, start, end) \
- for ((rs) = (start), pcpu_next_unpop((bitmap), &(rs), &(re), (end)); \
- (rs) < (re); \
- (rs) = (re) + 1, pcpu_next_unpop((bitmap), &(rs), &(re), (end)))
-
-#define pcpu_for_each_pop_region(bitmap, rs, re, start, end) \
- for ((rs) = (start), pcpu_next_pop((bitmap), &(rs), &(re), (end)); \
- (rs) < (re); \
- (rs) = (re) + 1, pcpu_next_pop((bitmap), &(rs), &(re), (end)))
-
/*
* The following are helper functions to help access bitmaps and convert
* between bitmap offsets to address offsets.
@@ -732,9 +705,8 @@ static void pcpu_chunk_refresh_hint(struct pcpu_chunk *chunk, bool full_scan)
}
bits = 0;
- pcpu_for_each_md_free_region(chunk, bit_off, bits) {
+ pcpu_for_each_md_free_region(chunk, bit_off, bits)
pcpu_block_update(chunk_md, bit_off, bit_off + bits);
- }
}
/**
@@ -749,7 +721,7 @@ static void pcpu_block_refresh_hint(struct pcpu_chunk *chunk, int index)
{
struct pcpu_block_md *block = chunk->md_blocks + index;
unsigned long *alloc_map = pcpu_index_alloc_map(chunk, index);
- int rs, re, start; /* region start, region end */
+ unsigned int rs, re, start; /* region start, region end */
/* promote scan_hint to contig_hint */
if (block->scan_hint) {
@@ -765,10 +737,9 @@ static void pcpu_block_refresh_hint(struct pcpu_chunk *chunk, int index)
block->right_free = 0;
/* iterate over free areas and update the contig hints */
- pcpu_for_each_unpop_region(alloc_map, rs, re, start,
- PCPU_BITMAP_BLOCK_BITS) {
+ bitmap_for_each_clear_region(alloc_map, rs, re, start,
+ PCPU_BITMAP_BLOCK_BITS)
pcpu_block_update(block, rs, re);
- }
}
/**
@@ -1041,13 +1012,13 @@ static void pcpu_block_update_hint_free(struct pcpu_chunk *chunk, int bit_off,
static bool pcpu_is_populated(struct pcpu_chunk *chunk, int bit_off, int bits,
int *next_off)
{
- int page_start, page_end, rs, re;
+ unsigned int page_start, page_end, rs, re;
page_start = PFN_DOWN(bit_off * PCPU_MIN_ALLOC_SIZE);
page_end = PFN_UP((bit_off + bits) * PCPU_MIN_ALLOC_SIZE);
rs = page_start;
- pcpu_next_unpop(chunk->populated, &rs, &re, page_end);
+ bitmap_next_clear_region(chunk->populated, &rs, &re, page_end);
if (rs >= page_end)
return true;
@@ -1702,13 +1673,13 @@ area_found:
/* populate if not all pages are already there */
if (!is_atomic) {
- int page_start, page_end, rs, re;
+ unsigned int page_start, page_end, rs, re;
page_start = PFN_DOWN(off);
page_end = PFN_UP(off + size);
- pcpu_for_each_unpop_region(chunk->populated, rs, re,
- page_start, page_end) {
+ bitmap_for_each_clear_region(chunk->populated, rs, re,
+ page_start, page_end) {
WARN_ON(chunk->immutable);
ret = pcpu_populate_chunk(chunk, rs, re, pcpu_gfp);
@@ -1858,10 +1829,10 @@ static void pcpu_balance_workfn(struct work_struct *work)
spin_unlock_irq(&pcpu_lock);
list_for_each_entry_safe(chunk, next, &to_free, list) {
- int rs, re;
+ unsigned int rs, re;
- pcpu_for_each_pop_region(chunk->populated, rs, re, 0,
- chunk->nr_pages) {
+ bitmap_for_each_set_region(chunk->populated, rs, re, 0,
+ chunk->nr_pages) {
pcpu_depopulate_chunk(chunk, rs, re);
spin_lock_irq(&pcpu_lock);
pcpu_chunk_depopulated(chunk, rs, re);
@@ -1893,7 +1864,7 @@ retry_pop:
}
for (slot = pcpu_size_to_slot(PAGE_SIZE); slot < pcpu_nr_slots; slot++) {
- int nr_unpop = 0, rs, re;
+ unsigned int nr_unpop = 0, rs, re;
if (!nr_to_pop)
break;
@@ -1910,9 +1881,9 @@ retry_pop:
continue;
/* @chunk can't go away while pcpu_alloc_mutex is held */
- pcpu_for_each_unpop_region(chunk->populated, rs, re, 0,
- chunk->nr_pages) {
- int nr = min(re - rs, nr_to_pop);
+ bitmap_for_each_clear_region(chunk->populated, rs, re, 0,
+ chunk->nr_pages) {
+ int nr = min_t(int, re - rs, nr_to_pop);
ret = pcpu_populate_chunk(chunk, rs, rs + nr, gfp);
if (!ret) {
diff --git a/mm/shmem.c b/mm/shmem.c
index 165fa6332993..8793e8cc1a48 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2107,9 +2107,10 @@ unsigned long shmem_get_unmapped_area(struct file *file,
/*
* Our priority is to support MAP_SHARED mapped hugely;
* and support MAP_PRIVATE mapped hugely too, until it is COWed.
- * But if caller specified an address hint, respect that as before.
+ * But if caller specified an address hint and we allocated area there
+ * successfully, respect that as before.
*/
- if (uaddr)
+ if (uaddr == addr)
return addr;
if (shmem_huge != SHMEM_HUGE_FORCE) {
@@ -2143,7 +2144,7 @@ unsigned long shmem_get_unmapped_area(struct file *file,
if (inflated_len < len)
return addr;
- inflated_addr = get_area(NULL, 0, inflated_len, 0, flags);
+ inflated_addr = get_area(NULL, uaddr, inflated_len, 0, flags);
if (IS_ERR_VALUE(inflated_addr))
return addr;
if (inflated_addr & ~PAGE_MASK)
diff --git a/mm/slab.c b/mm/slab.c
index f1e1840af533..a89633603b2d 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1416,7 +1416,7 @@ static void kmem_rcu_free(struct rcu_head *head)
#if DEBUG
static bool is_debug_pagealloc_cache(struct kmem_cache *cachep)
{
- if (debug_pagealloc_enabled() && OFF_SLAB(cachep) &&
+ if (debug_pagealloc_enabled_static() && OFF_SLAB(cachep) &&
(cachep->size % PAGE_SIZE) == 0)
return true;
@@ -2008,7 +2008,7 @@ int __kmem_cache_create(struct kmem_cache *cachep, slab_flags_t flags)
* to check size >= 256. It guarantees that all necessary small
* sized slab is initialized in current slab initialization sequence.
*/
- if (debug_pagealloc_enabled() && (flags & SLAB_POISON) &&
+ if (debug_pagealloc_enabled_static() && (flags & SLAB_POISON) &&
size >= 256 && cachep->object_size > cache_line_size()) {
if (size < PAGE_SIZE || size % PAGE_SIZE == 0) {
size_t tmp_size = ALIGN(size, PAGE_SIZE);
diff --git a/mm/slab_common.c b/mm/slab_common.c
index f0ab6d4ceb4c..0d95ddea13b0 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -903,7 +903,8 @@ static void flush_memcg_workqueue(struct kmem_cache *s)
* deactivates the memcg kmem_caches through workqueue. Make sure all
* previous workitems on workqueue are processed.
*/
- flush_workqueue(memcg_kmem_cache_wq);
+ if (likely(memcg_kmem_cache_wq))
+ flush_workqueue(memcg_kmem_cache_wq);
/*
* If we're racing with children kmem_cache deactivation, it might
diff --git a/mm/slub.c b/mm/slub.c
index d11389710b12..0ab92ec8c2a6 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -288,7 +288,7 @@ static inline void *get_freepointer_safe(struct kmem_cache *s, void *object)
unsigned long freepointer_addr;
void *p;
- if (!debug_pagealloc_enabled())
+ if (!debug_pagealloc_enabled_static())
return get_freepointer(s, object);
freepointer_addr = (unsigned long)object + s->offset;
@@ -1964,7 +1964,7 @@ static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
return get_any_partial(s, flags, c);
}
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
/*
* Calculate the next globally unique transaction for disambiguiation
* during cmpxchg. The transactions start with the cpu number and are then
@@ -2009,7 +2009,7 @@ static inline void note_cmpxchg_failure(const char *n,
pr_info("%s %s: cmpxchg redo ", n, s->name);
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
if (tid_to_cpu(tid) != tid_to_cpu(actual_tid))
pr_warn("due to cpu change %d -> %d\n",
tid_to_cpu(tid), tid_to_cpu(actual_tid));
@@ -2341,7 +2341,7 @@ static bool has_cpu_slab(int cpu, void *info)
static void flush_all(struct kmem_cache *s)
{
- on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC);
+ on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1);
}
/*
@@ -2637,7 +2637,7 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
unsigned long flags;
local_irq_save(flags);
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
/*
* We may have been preempted and rescheduled on a different
* cpu before disabling interrupts. Need to reload cpu area
@@ -2691,13 +2691,13 @@ redo:
* as we end up on the original cpu again when doing the cmpxchg.
*
* We should guarantee that tid and kmem_cache are retrieved on
- * the same cpu. It could be different if CONFIG_PREEMPT so we need
+ * the same cpu. It could be different if CONFIG_PREEMPTION so we need
* to check if it is matched or not.
*/
do {
tid = this_cpu_read(s->cpu_slab->tid);
c = raw_cpu_ptr(s->cpu_slab);
- } while (IS_ENABLED(CONFIG_PREEMPT) &&
+ } while (IS_ENABLED(CONFIG_PREEMPTION) &&
unlikely(tid != READ_ONCE(c->tid)));
/*
@@ -2971,7 +2971,7 @@ redo:
do {
tid = this_cpu_read(s->cpu_slab->tid);
c = raw_cpu_ptr(s->cpu_slab);
- } while (IS_ENABLED(CONFIG_PREEMPT) &&
+ } while (IS_ENABLED(CONFIG_PREEMPTION) &&
unlikely(tid != READ_ONCE(c->tid)));
/* Same with comment on barrier() in slab_alloc_node() */
diff --git a/mm/sparse.c b/mm/sparse.c
index b20ab7cdac86..3822ecbd8a1f 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -777,7 +777,14 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION)) {
unsigned long section_nr = pfn_to_section_nr(pfn);
- if (!section_is_early) {
+ /*
+ * When removing an early section, the usage map is kept (as the
+ * usage maps of other sections fall into the same page). It
+ * will be re-used when re-adding the section - which is then no
+ * longer an early section. If the usage map is PageReserved, it
+ * was allocated during boot.
+ */
+ if (!PageReserved(virt_to_page(ms->usage))) {
kfree(ms->usage);
ms->usage = NULL;
}
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index e9681dc4aa75..1f46c3b86f9f 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -41,6 +41,14 @@
#include "internal.h"
+bool is_vmalloc_addr(const void *x)
+{
+ unsigned long addr = (unsigned long)x;
+
+ return addr >= VMALLOC_START && addr < VMALLOC_END;
+}
+EXPORT_SYMBOL(is_vmalloc_addr);
+
struct vfree_deferred {
struct llist_head list;
struct work_struct wq;
@@ -1383,7 +1391,7 @@ static void free_unmap_vmap_area(struct vmap_area *va)
{
flush_cache_vunmap(va->va_start, va->va_end);
unmap_vmap_area(va);
- if (debug_pagealloc_enabled())
+ if (debug_pagealloc_enabled_static())
flush_tlb_kernel_range(va->va_start, va->va_end);
free_vmap_area_noflush(va);
@@ -1681,7 +1689,7 @@ static void vb_free(const void *addr, unsigned long size)
vunmap_page_range((unsigned long)addr, (unsigned long)addr + size);
- if (debug_pagealloc_enabled())
+ if (debug_pagealloc_enabled_static())
flush_tlb_kernel_range((unsigned long)addr,
(unsigned long)addr + size);