summaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c286
1 files changed, 188 insertions, 98 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 616a2c956b4b..7633c503a116 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -48,7 +48,7 @@
#include <linux/backing-dev.h>
#include <linux/fault-inject.h>
#include <linux/page-isolation.h>
-#include <linux/page_cgroup.h>
+#include <linux/page_ext.h>
#include <linux/debugobjects.h>
#include <linux/kmemleak.h>
#include <linux/compaction.h>
@@ -56,9 +56,10 @@
#include <linux/prefetch.h>
#include <linux/mm_inline.h>
#include <linux/migrate.h>
-#include <linux/page-debug-flags.h>
+#include <linux/page_ext.h>
#include <linux/hugetlb.h>
#include <linux/sched/rt.h>
+#include <linux/page_owner.h>
#include <asm/sections.h>
#include <asm/tlbflush.h>
@@ -110,6 +111,7 @@ static DEFINE_SPINLOCK(managed_page_count_lock);
unsigned long totalram_pages __read_mostly;
unsigned long totalreserve_pages __read_mostly;
+unsigned long totalcma_pages __read_mostly;
/*
* When calculating the number of globally allowed dirty pages, there
* is a certain number of per-zone reserves that should not be
@@ -425,6 +427,42 @@ static inline void prep_zero_page(struct page *page, unsigned int order,
#ifdef CONFIG_DEBUG_PAGEALLOC
unsigned int _debug_guardpage_minorder;
+bool _debug_pagealloc_enabled __read_mostly;
+bool _debug_guardpage_enabled __read_mostly;
+
+static int __init early_debug_pagealloc(char *buf)
+{
+ if (!buf)
+ return -EINVAL;
+
+ if (strcmp(buf, "on") == 0)
+ _debug_pagealloc_enabled = true;
+
+ return 0;
+}
+early_param("debug_pagealloc", early_debug_pagealloc);
+
+static bool need_debug_guardpage(void)
+{
+ /* If we don't use debug_pagealloc, we don't need guard page */
+ if (!debug_pagealloc_enabled())
+ return false;
+
+ return true;
+}
+
+static void init_debug_guardpage(void)
+{
+ if (!debug_pagealloc_enabled())
+ return;
+
+ _debug_guardpage_enabled = true;
+}
+
+struct page_ext_operations debug_guardpage_ops = {
+ .need = need_debug_guardpage,
+ .init = init_debug_guardpage,
+};
static int __init debug_guardpage_minorder_setup(char *buf)
{
@@ -440,18 +478,44 @@ static int __init debug_guardpage_minorder_setup(char *buf)
}
__setup("debug_guardpage_minorder=", debug_guardpage_minorder_setup);
-static inline void set_page_guard_flag(struct page *page)
+static inline void set_page_guard(struct zone *zone, struct page *page,
+ unsigned int order, int migratetype)
{
- __set_bit(PAGE_DEBUG_FLAG_GUARD, &page->debug_flags);
+ struct page_ext *page_ext;
+
+ if (!debug_guardpage_enabled())
+ return;
+
+ page_ext = lookup_page_ext(page);
+ __set_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags);
+
+ INIT_LIST_HEAD(&page->lru);
+ set_page_private(page, order);
+ /* Guard pages are not available for any usage */
+ __mod_zone_freepage_state(zone, -(1 << order), migratetype);
}
-static inline void clear_page_guard_flag(struct page *page)
+static inline void clear_page_guard(struct zone *zone, struct page *page,
+ unsigned int order, int migratetype)
{
- __clear_bit(PAGE_DEBUG_FLAG_GUARD, &page->debug_flags);
+ struct page_ext *page_ext;
+
+ if (!debug_guardpage_enabled())
+ return;
+
+ page_ext = lookup_page_ext(page);
+ __clear_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags);
+
+ set_page_private(page, 0);
+ if (!is_migrate_isolate(migratetype))
+ __mod_zone_freepage_state(zone, (1 << order), migratetype);
}
#else
-static inline void set_page_guard_flag(struct page *page) { }
-static inline void clear_page_guard_flag(struct page *page) { }
+struct page_ext_operations debug_guardpage_ops = { NULL, };
+static inline void set_page_guard(struct zone *zone, struct page *page,
+ unsigned int order, int migratetype) {}
+static inline void clear_page_guard(struct zone *zone, struct page *page,
+ unsigned int order, int migratetype) {}
#endif
static inline void set_page_order(struct page *page, unsigned int order)
@@ -582,12 +646,7 @@ static inline void __free_one_page(struct page *page,
* merge with it and move up one order.
*/
if (page_is_guard(buddy)) {
- clear_page_guard_flag(buddy);
- set_page_private(buddy, 0);
- if (!is_migrate_isolate(migratetype)) {
- __mod_zone_freepage_state(zone, 1 << order,
- migratetype);
- }
+ clear_page_guard(zone, buddy, order, migratetype);
} else {
list_del(&buddy->lru);
zone->free_area[order].nr_free--;
@@ -641,8 +700,10 @@ static inline int free_pages_check(struct page *page)
bad_reason = "PAGE_FLAGS_CHECK_AT_FREE flag(s) set";
bad_flags = PAGE_FLAGS_CHECK_AT_FREE;
}
- if (unlikely(mem_cgroup_bad_page_check(page)))
- bad_reason = "cgroup check failed";
+#ifdef CONFIG_MEMCG
+ if (unlikely(page->mem_cgroup))
+ bad_reason = "page still charged to cgroup";
+#endif
if (unlikely(bad_reason)) {
bad_page(page, bad_reason, bad_flags);
return 1;
@@ -741,6 +802,9 @@ static bool free_pages_prepare(struct page *page, unsigned int order)
int i;
int bad = 0;
+ VM_BUG_ON_PAGE(PageTail(page), page);
+ VM_BUG_ON_PAGE(PageHead(page) && compound_order(page) != order, page);
+
trace_mm_page_free(page, order);
kmemcheck_free_shadow(page, order);
@@ -751,6 +815,8 @@ static bool free_pages_prepare(struct page *page, unsigned int order)
if (bad)
return false;
+ reset_page_owner(page, order);
+
if (!PageHighMem(page)) {
debug_check_no_locks_freed(page_address(page),
PAGE_SIZE << order);
@@ -857,23 +923,18 @@ static inline void expand(struct zone *zone, struct page *page,
size >>= 1;
VM_BUG_ON_PAGE(bad_range(zone, &page[size]), &page[size]);
-#ifdef CONFIG_DEBUG_PAGEALLOC
- if (high < debug_guardpage_minorder()) {
+ if (IS_ENABLED(CONFIG_DEBUG_PAGEALLOC) &&
+ debug_guardpage_enabled() &&
+ high < debug_guardpage_minorder()) {
/*
* Mark as guard pages (or page), that will allow to
* merge back to allocator when buddy will be freed.
* Corresponding page table entries will not be touched,
* pages will stay not present in virtual address space
*/
- INIT_LIST_HEAD(&page[size].lru);
- set_page_guard_flag(&page[size]);
- set_page_private(&page[size], high);
- /* Guard pages are not available for any usage */
- __mod_zone_freepage_state(zone, -(1 << high),
- migratetype);
+ set_page_guard(zone, &page[size], high, migratetype);
continue;
}
-#endif
list_add(&page[size].lru, &area->free_list[migratetype]);
area->nr_free++;
set_page_order(&page[size], high);
@@ -898,8 +959,10 @@ static inline int check_new_page(struct page *page)
bad_reason = "PAGE_FLAGS_CHECK_AT_PREP flag set";
bad_flags = PAGE_FLAGS_CHECK_AT_PREP;
}
- if (unlikely(mem_cgroup_bad_page_check(page)))
- bad_reason = "cgroup check failed";
+#ifdef CONFIG_MEMCG
+ if (unlikely(page->mem_cgroup))
+ bad_reason = "page still charged to cgroup";
+#endif
if (unlikely(bad_reason)) {
bad_page(page, bad_reason, bad_flags);
return 1;
@@ -929,6 +992,8 @@ static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags)
if (order && (gfp_flags & __GFP_COMP))
prep_compound_page(page, order);
+ set_page_owner(page, order, gfp_flags);
+
return 0;
}
@@ -1267,55 +1332,75 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
#endif
/*
- * Drain pages of the indicated processor.
+ * Drain pcplists of the indicated processor and zone.
*
* The processor must either be the current processor and the
* thread pinned to the current processor or a processor that
* is not online.
*/
-static void drain_pages(unsigned int cpu)
+static void drain_pages_zone(unsigned int cpu, struct zone *zone)
{
unsigned long flags;
- struct zone *zone;
+ struct per_cpu_pageset *pset;
+ struct per_cpu_pages *pcp;
- for_each_populated_zone(zone) {
- struct per_cpu_pageset *pset;
- struct per_cpu_pages *pcp;
+ local_irq_save(flags);
+ pset = per_cpu_ptr(zone->pageset, cpu);
- local_irq_save(flags);
- pset = per_cpu_ptr(zone->pageset, cpu);
+ pcp = &pset->pcp;
+ if (pcp->count) {
+ free_pcppages_bulk(zone, pcp->count, pcp);
+ pcp->count = 0;
+ }
+ local_irq_restore(flags);
+}
- pcp = &pset->pcp;
- if (pcp->count) {
- free_pcppages_bulk(zone, pcp->count, pcp);
- pcp->count = 0;
- }
- local_irq_restore(flags);
+/*
+ * Drain pcplists of all zones on the indicated processor.
+ *
+ * The processor must either be the current processor and the
+ * thread pinned to the current processor or a processor that
+ * is not online.
+ */
+static void drain_pages(unsigned int cpu)
+{
+ struct zone *zone;
+
+ for_each_populated_zone(zone) {
+ drain_pages_zone(cpu, zone);
}
}
/*
* Spill all of this CPU's per-cpu pages back into the buddy allocator.
+ *
+ * The CPU has to be pinned. When zone parameter is non-NULL, spill just
+ * the single zone's pages.
*/
-void drain_local_pages(void *arg)
+void drain_local_pages(struct zone *zone)
{
- drain_pages(smp_processor_id());
+ int cpu = smp_processor_id();
+
+ if (zone)
+ drain_pages_zone(cpu, zone);
+ else
+ drain_pages(cpu);
}
/*
* Spill all the per-cpu pages from all CPUs back into the buddy allocator.
*
+ * When zone parameter is non-NULL, spill just the single zone's pages.
+ *
* Note that this code is protected against sending an IPI to an offline
* CPU but does not guarantee sending an IPI to newly hotplugged CPUs:
* on_each_cpu_mask() blocks hotplug and won't talk to offlined CPUs but
* nothing keeps CPUs from showing up after we populated the cpumask and
* before the call to on_each_cpu_mask().
*/
-void drain_all_pages(void)
+void drain_all_pages(struct zone *zone)
{
int cpu;
- struct per_cpu_pageset *pcp;
- struct zone *zone;
/*
* Allocate in the BSS so we wont require allocation in
@@ -1330,20 +1415,31 @@ void drain_all_pages(void)
* disables preemption as part of its processing
*/
for_each_online_cpu(cpu) {
+ struct per_cpu_pageset *pcp;
+ struct zone *z;
bool has_pcps = false;
- for_each_populated_zone(zone) {
+
+ if (zone) {
pcp = per_cpu_ptr(zone->pageset, cpu);
- if (pcp->pcp.count) {
+ if (pcp->pcp.count)
has_pcps = true;
- break;
+ } else {
+ for_each_populated_zone(z) {
+ pcp = per_cpu_ptr(z->pageset, cpu);
+ if (pcp->pcp.count) {
+ has_pcps = true;
+ break;
+ }
}
}
+
if (has_pcps)
cpumask_set_cpu(cpu, &cpus_with_pcps);
else
cpumask_clear_cpu(cpu, &cpus_with_pcps);
}
- on_each_cpu_mask(&cpus_with_pcps, drain_local_pages, NULL, 1);
+ on_each_cpu_mask(&cpus_with_pcps, (smp_call_func_t) drain_local_pages,
+ zone, 1);
}
#ifdef CONFIG_HIBERNATION
@@ -1470,8 +1566,11 @@ void split_page(struct page *page, unsigned int order)
split_page(virt_to_page(page[0].shadow), order);
#endif
- for (i = 1; i < (1 << order); i++)
+ set_page_owner(page, 0, 0);
+ for (i = 1; i < (1 << order); i++) {
set_page_refcounted(page + i);
+ set_page_owner(page + i, 0, 0);
+ }
}
EXPORT_SYMBOL_GPL(split_page);
@@ -1511,6 +1610,7 @@ int __isolate_free_page(struct page *page, unsigned int order)
}
}
+ set_page_owner(page, order, 0);
return 1UL << order;
}
@@ -1705,7 +1805,7 @@ static bool __zone_watermark_ok(struct zone *z, unsigned int order,
unsigned long mark, int classzone_idx, int alloc_flags,
long free_pages)
{
- /* free_pages my go negative - that's OK */
+ /* free_pages may go negative - that's OK */
long min = mark;
int o;
long free_cma = 0;
@@ -1953,7 +2053,7 @@ zonelist_scan:
/*
* Scan zonelist, looking for a zone with enough free.
- * See also __cpuset_node_allowed_softwall() comment in kernel/cpuset.c.
+ * See also __cpuset_node_allowed() comment in kernel/cpuset.c.
*/
for_each_zone_zonelist_nodemask(zone, z, zonelist,
high_zoneidx, nodemask) {
@@ -1964,7 +2064,7 @@ zonelist_scan:
continue;
if (cpusets_enabled() &&
(alloc_flags & ALLOC_CPUSET) &&
- !cpuset_zone_allowed_softwall(zone, gfp_mask))
+ !cpuset_zone_allowed(zone, gfp_mask))
continue;
/*
* Distribute pages in proportion to the individual
@@ -2296,7 +2396,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
int classzone_idx, int migratetype, enum migrate_mode mode,
int *contended_compaction, bool *deferred_compaction)
{
- struct zone *last_compact_zone = NULL;
unsigned long compact_result;
struct page *page;
@@ -2307,7 +2406,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
compact_result = try_to_compact_pages(zonelist, order, gfp_mask,
nodemask, mode,
contended_compaction,
- &last_compact_zone);
+ alloc_flags, classzone_idx);
current->flags &= ~PF_MEMALLOC;
switch (compact_result) {
@@ -2326,10 +2425,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
*/
count_vm_event(COMPACTSTALL);
- /* Page migration frees to the PCP lists but we want merging */
- drain_pages(get_cpu());
- put_cpu();
-
page = get_page_from_freelist(gfp_mask, nodemask,
order, zonelist, high_zoneidx,
alloc_flags & ~ALLOC_NO_WATERMARKS,
@@ -2345,14 +2440,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
}
/*
- * last_compact_zone is where try_to_compact_pages thought allocation
- * should succeed, so it did not defer compaction. But here we know
- * that it didn't succeed, so we do the defer.
- */
- if (last_compact_zone && mode != MIGRATE_ASYNC)
- defer_compaction(last_compact_zone, order);
-
- /*
* It's bad if compaction run occurs and fails. The most likely reason
* is that pages exist, but not enough to satisfy watermarks.
*/
@@ -2433,7 +2520,7 @@ retry:
* pages are pinned on the per-cpu lists. Drain them and try again
*/
if (!page && !drained) {
- drain_all_pages();
+ drain_all_pages(NULL);
drained = true;
goto retry;
}
@@ -2505,7 +2592,7 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
alloc_flags |= ALLOC_HARDER;
/*
* Ignore cpuset mems for GFP_ATOMIC rather than fail, see the
- * comment for __cpuset_node_allowed_softwall().
+ * comment for __cpuset_node_allowed().
*/
alloc_flags &= ~ALLOC_CPUSET;
} else if (unlikely(rt_task(current)) && !in_interrupt())
@@ -3893,14 +3980,14 @@ void __ref build_all_zonelists(pg_data_t *pgdat, struct zone *zone)
else
page_group_by_mobility_disabled = 0;
- printk("Built %i zonelists in %s order, mobility grouping %s. "
+ pr_info("Built %i zonelists in %s order, mobility grouping %s. "
"Total pages: %ld\n",
nr_online_nodes,
zonelist_order_name[current_zonelist_order],
page_group_by_mobility_disabled ? "off" : "on",
vm_total_pages);
#ifdef CONFIG_NUMA
- printk("Policy zone: %s\n", zone_names[policy_zone]);
+ pr_info("Policy zone: %s\n", zone_names[policy_zone]);
#endif
}
@@ -4832,7 +4919,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
#endif
init_waitqueue_head(&pgdat->kswapd_wait);
init_waitqueue_head(&pgdat->pfmemalloc_wait);
- pgdat_page_cgroup_init(pgdat);
+ pgdat_page_ext_init(pgdat);
for (j = 0; j < MAX_NR_ZONES; j++) {
struct zone *zone = pgdat->node_zones + j;
@@ -4851,16 +4938,18 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
* and per-cpu initialisations
*/
memmap_pages = calc_memmap_size(size, realsize);
- if (freesize >= memmap_pages) {
- freesize -= memmap_pages;
- if (memmap_pages)
- printk(KERN_DEBUG
- " %s zone: %lu pages used for memmap\n",
- zone_names[j], memmap_pages);
- } else
- printk(KERN_WARNING
- " %s zone: %lu pages exceeds freesize %lu\n",
- zone_names[j], memmap_pages, freesize);
+ if (!is_highmem_idx(j)) {
+ if (freesize >= memmap_pages) {
+ freesize -= memmap_pages;
+ if (memmap_pages)
+ printk(KERN_DEBUG
+ " %s zone: %lu pages used for memmap\n",
+ zone_names[j], memmap_pages);
+ } else
+ printk(KERN_WARNING
+ " %s zone: %lu pages exceeds freesize %lu\n",
+ zone_names[j], memmap_pages, freesize);
+ }
/* Account for reserved pages */
if (j == 0 && freesize > dma_reserve) {
@@ -5334,33 +5423,33 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
find_zone_movable_pfns_for_nodes();
/* Print out the zone ranges */
- printk("Zone ranges:\n");
+ pr_info("Zone ranges:\n");
for (i = 0; i < MAX_NR_ZONES; i++) {
if (i == ZONE_MOVABLE)
continue;
- printk(KERN_CONT " %-8s ", zone_names[i]);
+ pr_info(" %-8s ", zone_names[i]);
if (arch_zone_lowest_possible_pfn[i] ==
arch_zone_highest_possible_pfn[i])
- printk(KERN_CONT "empty\n");
+ pr_cont("empty\n");
else
- printk(KERN_CONT "[mem %0#10lx-%0#10lx]\n",
+ pr_cont("[mem %0#10lx-%0#10lx]\n",
arch_zone_lowest_possible_pfn[i] << PAGE_SHIFT,
(arch_zone_highest_possible_pfn[i]
<< PAGE_SHIFT) - 1);
}
/* Print out the PFNs ZONE_MOVABLE begins at in each node */
- printk("Movable zone start for each node\n");
+ pr_info("Movable zone start for each node\n");
for (i = 0; i < MAX_NUMNODES; i++) {
if (zone_movable_pfn[i])
- printk(" Node %d: %#010lx\n", i,
+ pr_info(" Node %d: %#010lx\n", i,
zone_movable_pfn[i] << PAGE_SHIFT);
}
/* Print out the early node map */
- printk("Early memory node ranges\n");
+ pr_info("Early memory node ranges\n");
for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid)
- printk(" node %3d: [mem %#010lx-%#010lx]\n", nid,
+ pr_info(" node %3d: [mem %#010lx-%#010lx]\n", nid,
start_pfn << PAGE_SHIFT, (end_pfn << PAGE_SHIFT) - 1);
/* Initialise every node */
@@ -5496,9 +5585,9 @@ void __init mem_init_print_info(const char *str)
#undef adj_init_size
- printk("Memory: %luK/%luK available "
+ pr_info("Memory: %luK/%luK available "
"(%luK kernel code, %luK rwdata, %luK rodata, "
- "%luK init, %luK bss, %luK reserved"
+ "%luK init, %luK bss, %luK reserved, %luK cma-reserved"
#ifdef CONFIG_HIGHMEM
", %luK highmem"
#endif
@@ -5506,7 +5595,8 @@ void __init mem_init_print_info(const char *str)
nr_free_pages() << (PAGE_SHIFT-10), physpages << (PAGE_SHIFT-10),
codesize >> 10, datasize >> 10, rosize >> 10,
(init_data_size + init_code_size) >> 10, bss_size >> 10,
- (physpages - totalram_pages) << (PAGE_SHIFT-10),
+ (physpages - totalram_pages - totalcma_pages) << (PAGE_SHIFT-10),
+ totalcma_pages << (PAGE_SHIFT-10),
#ifdef CONFIG_HIGHMEM
totalhigh_pages << (PAGE_SHIFT-10),
#endif
@@ -6198,9 +6288,9 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
if (!PageLRU(page))
found++;
/*
- * If there are RECLAIMABLE pages, we need to check it.
- * But now, memory offline itself doesn't call shrink_slab()
- * and it still to be fixed.
+ * If there are RECLAIMABLE pages, we need to check
+ * it. But now, memory offline itself doesn't call
+ * shrink_node_slabs() and it still to be fixed.
*/
/*
* If the page is not RAM, page_count()should be 0.
@@ -6385,7 +6475,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
*/
lru_add_drain_all();
- drain_all_pages();
+ drain_all_pages(cc.zone);
order = 0;
outer_start = start;