summaryrefslogtreecommitdiffstats
path: root/mm/compaction.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/compaction.c')
-rw-r--r--mm/compaction.c262
1 files changed, 191 insertions, 71 deletions
diff --git a/mm/compaction.c b/mm/compaction.c
index 8fa254043801..1427366ad673 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -42,6 +42,11 @@ static inline void count_compact_events(enum vm_event_item item, long delta)
#define CREATE_TRACE_POINTS
#include <trace/events/compaction.h>
+#define block_start_pfn(pfn, order) round_down(pfn, 1UL << (order))
+#define block_end_pfn(pfn, order) ALIGN((pfn) + 1, 1UL << (order))
+#define pageblock_start_pfn(pfn) block_start_pfn(pfn, pageblock_order)
+#define pageblock_end_pfn(pfn) block_end_pfn(pfn, pageblock_order)
+
static unsigned long release_freepages(struct list_head *freelist)
{
struct page *page, *next;
@@ -161,7 +166,7 @@ static void reset_cached_positions(struct zone *zone)
zone->compact_cached_migrate_pfn[0] = zone->zone_start_pfn;
zone->compact_cached_migrate_pfn[1] = zone->zone_start_pfn;
zone->compact_cached_free_pfn =
- round_down(zone_end_pfn(zone) - 1, pageblock_nr_pages);
+ pageblock_start_pfn(zone_end_pfn(zone) - 1);
}
/*
@@ -519,10 +524,10 @@ isolate_freepages_range(struct compact_control *cc,
LIST_HEAD(freelist);
pfn = start_pfn;
- block_start_pfn = pfn & ~(pageblock_nr_pages - 1);
+ block_start_pfn = pageblock_start_pfn(pfn);
if (block_start_pfn < cc->zone->zone_start_pfn)
block_start_pfn = cc->zone->zone_start_pfn;
- block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
+ block_end_pfn = pageblock_end_pfn(pfn);
for (; pfn < end_pfn; pfn += isolated,
block_start_pfn = block_end_pfn,
@@ -538,8 +543,8 @@ isolate_freepages_range(struct compact_control *cc,
* scanning range to right one.
*/
if (pfn >= block_end_pfn) {
- block_start_pfn = pfn & ~(pageblock_nr_pages - 1);
- block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
+ block_start_pfn = pageblock_start_pfn(pfn);
+ block_end_pfn = pageblock_end_pfn(pfn);
block_end_pfn = min(block_end_pfn, end_pfn);
}
@@ -633,12 +638,13 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
{
struct zone *zone = cc->zone;
unsigned long nr_scanned = 0, nr_isolated = 0;
- struct list_head *migratelist = &cc->migratepages;
struct lruvec *lruvec;
unsigned long flags = 0;
bool locked = false;
struct page *page = NULL, *valid_page = NULL;
unsigned long start_pfn = low_pfn;
+ bool skip_on_failure = false;
+ unsigned long next_skip_pfn = 0;
/*
* Ensure that there are not too many pages isolated from the LRU
@@ -659,10 +665,37 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
if (compact_should_abort(cc))
return 0;
+ if (cc->direct_compaction && (cc->mode == MIGRATE_ASYNC)) {
+ skip_on_failure = true;
+ next_skip_pfn = block_end_pfn(low_pfn, cc->order);
+ }
+
/* Time to isolate some pages for migration */
for (; low_pfn < end_pfn; low_pfn++) {
bool is_lru;
+ if (skip_on_failure && low_pfn >= next_skip_pfn) {
+ /*
+ * We have isolated all migration candidates in the
+ * previous order-aligned block, and did not skip it due
+ * to failure. We should migrate the pages now and
+ * hopefully succeed compaction.
+ */
+ if (nr_isolated)
+ break;
+
+ /*
+ * We failed to isolate in the previous order-aligned
+ * block. Set the new boundary to the end of the
+ * current block. Note we can't simply increase
+ * next_skip_pfn by 1 << order, as low_pfn might have
+ * been incremented by a higher number due to skipping
+ * a compound or a high-order buddy page in the
+ * previous loop iteration.
+ */
+ next_skip_pfn = block_end_pfn(low_pfn, cc->order);
+ }
+
/*
* Periodically drop the lock (if held) regardless of its
* contention, to give chance to IRQs. Abort async compaction
@@ -674,7 +707,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
break;
if (!pfn_valid_within(low_pfn))
- continue;
+ goto isolate_fail;
nr_scanned++;
page = pfn_to_page(low_pfn);
@@ -729,11 +762,11 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
if (likely(comp_order < MAX_ORDER))
low_pfn += (1UL << comp_order) - 1;
- continue;
+ goto isolate_fail;
}
if (!is_lru)
- continue;
+ goto isolate_fail;
/*
* Migration will fail if an anonymous page is pinned in memory,
@@ -742,7 +775,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
*/
if (!page_mapping(page) &&
page_count(page) > page_mapcount(page))
- continue;
+ goto isolate_fail;
/* If we already hold the lock, we can skip some rechecking */
if (!locked) {
@@ -753,7 +786,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
/* Recheck PageLRU and PageCompound under lock */
if (!PageLRU(page))
- continue;
+ goto isolate_fail;
/*
* Page become compound since the non-locked check,
@@ -762,7 +795,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
*/
if (unlikely(PageCompound(page))) {
low_pfn += (1UL << compound_order(page)) - 1;
- continue;
+ goto isolate_fail;
}
}
@@ -770,7 +803,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
/* Try isolate the page */
if (__isolate_lru_page(page, isolate_mode) != 0)
- continue;
+ goto isolate_fail;
VM_BUG_ON_PAGE(PageCompound(page), page);
@@ -778,15 +811,55 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
del_page_from_lru_list(page, lruvec, page_lru(page));
isolate_success:
- list_add(&page->lru, migratelist);
+ list_add(&page->lru, &cc->migratepages);
cc->nr_migratepages++;
nr_isolated++;
+ /*
+ * Record where we could have freed pages by migration and not
+ * yet flushed them to buddy allocator.
+ * - this is the lowest page that was isolated and likely be
+ * then freed by migration.
+ */
+ if (!cc->last_migrated_pfn)
+ cc->last_migrated_pfn = low_pfn;
+
/* Avoid isolating too much */
if (cc->nr_migratepages == COMPACT_CLUSTER_MAX) {
++low_pfn;
break;
}
+
+ continue;
+isolate_fail:
+ if (!skip_on_failure)
+ continue;
+
+ /*
+ * We have isolated some pages, but then failed. Release them
+ * instead of migrating, as we cannot form the cc->order buddy
+ * page anyway.
+ */
+ if (nr_isolated) {
+ if (locked) {
+ spin_unlock_irqrestore(&zone->lru_lock, flags);
+ locked = false;
+ }
+ acct_isolated(zone, cc);
+ putback_movable_pages(&cc->migratepages);
+ cc->nr_migratepages = 0;
+ cc->last_migrated_pfn = 0;
+ nr_isolated = 0;
+ }
+
+ if (low_pfn < next_skip_pfn) {
+ low_pfn = next_skip_pfn - 1;
+ /*
+ * The check near the loop beginning would have updated
+ * next_skip_pfn too, but this is a bit simpler.
+ */
+ next_skip_pfn += 1UL << cc->order;
+ }
}
/*
@@ -834,10 +907,10 @@ isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn,
/* Scan block by block. First and last block may be incomplete */
pfn = start_pfn;
- block_start_pfn = pfn & ~(pageblock_nr_pages - 1);
+ block_start_pfn = pageblock_start_pfn(pfn);
if (block_start_pfn < cc->zone->zone_start_pfn)
block_start_pfn = cc->zone->zone_start_pfn;
- block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
+ block_end_pfn = pageblock_end_pfn(pfn);
for (; pfn < end_pfn; pfn = block_end_pfn,
block_start_pfn = block_end_pfn,
@@ -924,10 +997,10 @@ static void isolate_freepages(struct compact_control *cc)
* is using.
*/
isolate_start_pfn = cc->free_pfn;
- block_start_pfn = cc->free_pfn & ~(pageblock_nr_pages-1);
+ block_start_pfn = pageblock_start_pfn(cc->free_pfn);
block_end_pfn = min(block_start_pfn + pageblock_nr_pages,
zone_end_pfn(zone));
- low_pfn = ALIGN(cc->migrate_pfn + 1, pageblock_nr_pages);
+ low_pfn = pageblock_end_pfn(cc->migrate_pfn);
/*
* Isolate free pages until enough are available to migrate the
@@ -1070,7 +1143,6 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
unsigned long block_start_pfn;
unsigned long block_end_pfn;
unsigned long low_pfn;
- unsigned long isolate_start_pfn;
struct page *page;
const isolate_mode_t isolate_mode =
(sysctl_compact_unevictable_allowed ? ISOLATE_UNEVICTABLE : 0) |
@@ -1081,12 +1153,12 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
* initialized by compact_zone()
*/
low_pfn = cc->migrate_pfn;
- block_start_pfn = cc->migrate_pfn & ~(pageblock_nr_pages - 1);
+ block_start_pfn = pageblock_start_pfn(low_pfn);
if (block_start_pfn < zone->zone_start_pfn)
block_start_pfn = zone->zone_start_pfn;
/* Only scan within a pageblock boundary */
- block_end_pfn = ALIGN(low_pfn + 1, pageblock_nr_pages);
+ block_end_pfn = pageblock_end_pfn(low_pfn);
/*
* Iterate over whole pageblocks until we find the first suitable.
@@ -1125,7 +1197,6 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
continue;
/* Perform the isolation */
- isolate_start_pfn = low_pfn;
low_pfn = isolate_migratepages_block(cc, low_pfn,
block_end_pfn, isolate_mode);
@@ -1135,15 +1206,6 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
}
/*
- * Record where we could have freed pages by migration and not
- * yet flushed them to buddy allocator.
- * - this is the lowest page that could have been isolated and
- * then freed by migration.
- */
- if (cc->nr_migratepages && !cc->last_migrated_pfn)
- cc->last_migrated_pfn = isolate_start_pfn;
-
- /*
* Either we isolated something and proceed with migration. Or
* we failed and compact_zone should decide if we should
* continue or not.
@@ -1167,7 +1229,7 @@ static inline bool is_via_compact_memory(int order)
return order == -1;
}
-static int __compact_finished(struct zone *zone, struct compact_control *cc,
+static enum compact_result __compact_finished(struct zone *zone, struct compact_control *cc,
const int migratetype)
{
unsigned int order;
@@ -1190,7 +1252,10 @@ static int __compact_finished(struct zone *zone, struct compact_control *cc,
if (cc->direct_compaction)
zone->compact_blockskip_flush = true;
- return COMPACT_COMPLETE;
+ if (cc->whole_zone)
+ return COMPACT_COMPLETE;
+ else
+ return COMPACT_PARTIAL_SKIPPED;
}
if (is_via_compact_memory(cc->order))
@@ -1230,8 +1295,9 @@ static int __compact_finished(struct zone *zone, struct compact_control *cc,
return COMPACT_NO_SUITABLE_PAGE;
}
-static int compact_finished(struct zone *zone, struct compact_control *cc,
- const int migratetype)
+static enum compact_result compact_finished(struct zone *zone,
+ struct compact_control *cc,
+ const int migratetype)
{
int ret;
@@ -1250,8 +1316,10 @@ static int compact_finished(struct zone *zone, struct compact_control *cc,
* COMPACT_PARTIAL - If the allocation would succeed without compaction
* COMPACT_CONTINUE - If compaction should run now
*/
-static unsigned long __compaction_suitable(struct zone *zone, int order,
- int alloc_flags, int classzone_idx)
+static enum compact_result __compaction_suitable(struct zone *zone, int order,
+ unsigned int alloc_flags,
+ int classzone_idx,
+ unsigned long wmark_target)
{
int fragindex;
unsigned long watermark;
@@ -1274,7 +1342,8 @@ static unsigned long __compaction_suitable(struct zone *zone, int order,
* allocated and for a short time, the footprint is higher
*/
watermark += (2UL << order);
- if (!zone_watermark_ok(zone, 0, watermark, classzone_idx, alloc_flags))
+ if (!__zone_watermark_ok(zone, 0, watermark, classzone_idx,
+ alloc_flags, wmark_target))
return COMPACT_SKIPPED;
/*
@@ -1295,12 +1364,14 @@ static unsigned long __compaction_suitable(struct zone *zone, int order,
return COMPACT_CONTINUE;
}
-unsigned long compaction_suitable(struct zone *zone, int order,
- int alloc_flags, int classzone_idx)
+enum compact_result compaction_suitable(struct zone *zone, int order,
+ unsigned int alloc_flags,
+ int classzone_idx)
{
- unsigned long ret;
+ enum compact_result ret;
- ret = __compaction_suitable(zone, order, alloc_flags, classzone_idx);
+ ret = __compaction_suitable(zone, order, alloc_flags, classzone_idx,
+ zone_page_state(zone, NR_FREE_PAGES));
trace_mm_compaction_suitable(zone, order, ret);
if (ret == COMPACT_NOT_SUITABLE_ZONE)
ret = COMPACT_SKIPPED;
@@ -1308,9 +1379,42 @@ unsigned long compaction_suitable(struct zone *zone, int order,
return ret;
}
-static int compact_zone(struct zone *zone, struct compact_control *cc)
+bool compaction_zonelist_suitable(struct alloc_context *ac, int order,
+ int alloc_flags)
{
- int ret;
+ struct zone *zone;
+ struct zoneref *z;
+
+ /*
+ * Make sure at least one zone would pass __compaction_suitable if we continue
+ * retrying the reclaim.
+ */
+ for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx,
+ ac->nodemask) {
+ unsigned long available;
+ enum compact_result compact_result;
+
+ /*
+ * Do not consider all the reclaimable memory because we do not
+ * want to trash just for a single high order allocation which
+ * is even not guaranteed to appear even if __compaction_suitable
+ * is happy about the watermark check.
+ */
+ available = zone_reclaimable_pages(zone) / order;
+ available += zone_page_state_snapshot(zone, NR_FREE_PAGES);
+ compact_result = __compaction_suitable(zone, order, alloc_flags,
+ ac_classzone_idx(ac), available);
+ if (compact_result != COMPACT_SKIPPED &&
+ compact_result != COMPACT_NOT_SUITABLE_ZONE)
+ return true;
+ }
+
+ return false;
+}
+
+static enum compact_result compact_zone(struct zone *zone, struct compact_control *cc)
+{
+ enum compact_result ret;
unsigned long start_pfn = zone->zone_start_pfn;
unsigned long end_pfn = zone_end_pfn(zone);
const int migratetype = gfpflags_to_migratetype(cc->gfp_mask);
@@ -1318,15 +1422,12 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
ret = compaction_suitable(zone, cc->order, cc->alloc_flags,
cc->classzone_idx);
- switch (ret) {
- case COMPACT_PARTIAL:
- case COMPACT_SKIPPED:
- /* Compaction is likely to fail */
+ /* Compaction is likely to fail */
+ if (ret == COMPACT_PARTIAL || ret == COMPACT_SKIPPED)
return ret;
- case COMPACT_CONTINUE:
- /* Fall through to compaction */
- ;
- }
+
+ /* huh, compaction_suitable is returning something unexpected */
+ VM_BUG_ON(ret != COMPACT_CONTINUE);
/*
* Clear pageblock skip if there were failures recently and compaction
@@ -1343,7 +1444,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
cc->migrate_pfn = zone->compact_cached_migrate_pfn[sync];
cc->free_pfn = zone->compact_cached_free_pfn;
if (cc->free_pfn < start_pfn || cc->free_pfn >= end_pfn) {
- cc->free_pfn = round_down(end_pfn - 1, pageblock_nr_pages);
+ cc->free_pfn = pageblock_start_pfn(end_pfn - 1);
zone->compact_cached_free_pfn = cc->free_pfn;
}
if (cc->migrate_pfn < start_pfn || cc->migrate_pfn >= end_pfn) {
@@ -1351,6 +1452,10 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
zone->compact_cached_migrate_pfn[0] = cc->migrate_pfn;
zone->compact_cached_migrate_pfn[1] = cc->migrate_pfn;
}
+
+ if (cc->migrate_pfn == start_pfn)
+ cc->whole_zone = true;
+
cc->last_migrated_pfn = 0;
trace_mm_compaction_begin(start_pfn, cc->migrate_pfn,
@@ -1398,6 +1503,18 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
ret = COMPACT_CONTENDED;
goto out;
}
+ /*
+ * We failed to migrate at least one page in the current
+ * order-aligned block, so skip the rest of it.
+ */
+ if (cc->direct_compaction &&
+ (cc->mode == MIGRATE_ASYNC)) {
+ cc->migrate_pfn = block_end_pfn(
+ cc->migrate_pfn - 1, cc->order);
+ /* Draining pcplists is useless in this case */
+ cc->last_migrated_pfn = 0;
+
+ }
}
check_drain:
@@ -1411,7 +1528,7 @@ check_drain:
if (cc->order > 0 && cc->last_migrated_pfn) {
int cpu;
unsigned long current_block_start =
- cc->migrate_pfn & ~((1UL << cc->order) - 1);
+ block_start_pfn(cc->migrate_pfn, cc->order);
if (cc->last_migrated_pfn < current_block_start) {
cpu = get_cpu();
@@ -1436,7 +1553,7 @@ out:
cc->nr_freepages = 0;
VM_BUG_ON(free_pfn == 0);
/* The cached pfn is always the first in a pageblock */
- free_pfn &= ~(pageblock_nr_pages-1);
+ free_pfn = pageblock_start_pfn(free_pfn);
/*
* Only go back, not forward. The cached pfn might have been
* already reset to zone end in compact_finished()
@@ -1454,11 +1571,11 @@ out:
return ret;
}
-static unsigned long compact_zone_order(struct zone *zone, int order,
+static enum compact_result compact_zone_order(struct zone *zone, int order,
gfp_t gfp_mask, enum migrate_mode mode, int *contended,
- int alloc_flags, int classzone_idx)
+ unsigned int alloc_flags, int classzone_idx)
{
- unsigned long ret;
+ enum compact_result ret;
struct compact_control cc = {
.nr_freepages = 0,
.nr_migratepages = 0,
@@ -1496,15 +1613,15 @@ int sysctl_extfrag_threshold = 500;
*
* This is the main entry point for direct page compaction.
*/
-unsigned long try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
- int alloc_flags, const struct alloc_context *ac,
- enum migrate_mode mode, int *contended)
+enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
+ unsigned int alloc_flags, const struct alloc_context *ac,
+ enum migrate_mode mode, int *contended)
{
int may_enter_fs = gfp_mask & __GFP_FS;
int may_perform_io = gfp_mask & __GFP_IO;
struct zoneref *z;
struct zone *zone;
- int rc = COMPACT_DEFERRED;
+ enum compact_result rc = COMPACT_SKIPPED;
int all_zones_contended = COMPACT_CONTENDED_LOCK; /* init for &= op */
*contended = COMPACT_CONTENDED_NONE;
@@ -1518,15 +1635,17 @@ unsigned long try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
/* Compact each zone in the list */
for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx,
ac->nodemask) {
- int status;
+ enum compact_result status;
int zone_contended;
- if (compaction_deferred(zone, order))
+ if (compaction_deferred(zone, order)) {
+ rc = max_t(enum compact_result, COMPACT_DEFERRED, rc);
continue;
+ }
status = compact_zone_order(zone, order, gfp_mask, mode,
&zone_contended, alloc_flags,
- ac->classzone_idx);
+ ac_classzone_idx(ac));
rc = max(status, rc);
/*
* It takes at least one zone that wasn't lock contended
@@ -1536,7 +1655,7 @@ unsigned long try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
/* If a normal allocation would succeed, stop compacting */
if (zone_watermark_ok(zone, order, low_wmark_pages(zone),
- ac->classzone_idx, alloc_flags)) {
+ ac_classzone_idx(ac), alloc_flags)) {
/*
* We think the allocation will succeed in this zone,
* but it is not certain, hence the false. The caller
@@ -1558,7 +1677,8 @@ unsigned long try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
goto break_loop;
}
- if (mode != MIGRATE_ASYNC && status == COMPACT_COMPLETE) {
+ if (mode != MIGRATE_ASYNC && (status == COMPACT_COMPLETE ||
+ status == COMPACT_PARTIAL_SKIPPED)) {
/*
* We think that allocation won't succeed in this zone
* so we defer compaction there. If it ends up
@@ -1593,7 +1713,7 @@ break_loop:
* If at least one zone wasn't deferred or skipped, we report if all
* zones that were tried were lock contended.
*/
- if (rc > COMPACT_SKIPPED && all_zones_contended)
+ if (rc > COMPACT_INACTIVE && all_zones_contended)
*contended = COMPACT_CONTENDED_LOCK;
return rc;
@@ -1742,7 +1862,7 @@ static bool kcompactd_node_suitable(pg_data_t *pgdat)
struct zone *zone;
enum zone_type classzone_idx = pgdat->kcompactd_classzone_idx;
- for (zoneid = 0; zoneid < classzone_idx; zoneid++) {
+ for (zoneid = 0; zoneid <= classzone_idx; zoneid++) {
zone = &pgdat->node_zones[zoneid];
if (!populated_zone(zone))
@@ -1777,7 +1897,7 @@ static void kcompactd_do_work(pg_data_t *pgdat)
cc.classzone_idx);
count_vm_event(KCOMPACTD_WAKE);
- for (zoneid = 0; zoneid < cc.classzone_idx; zoneid++) {
+ for (zoneid = 0; zoneid <= cc.classzone_idx; zoneid++) {
int status;
zone = &pgdat->node_zones[zoneid];
@@ -1805,7 +1925,7 @@ static void kcompactd_do_work(pg_data_t *pgdat)
cc.classzone_idx, 0)) {
success = true;
compaction_defer_reset(zone, cc.order, false);
- } else if (status == COMPACT_COMPLETE) {
+ } else if (status == COMPACT_PARTIAL_SKIPPED || status == COMPACT_COMPLETE) {
/*
* We use sync migration mode here, so we defer like
* sync direct compaction does.