diff options
Diffstat (limited to 'mm/compaction.c')
-rw-r--r-- | mm/compaction.c | 262 |
1 files changed, 191 insertions, 71 deletions
diff --git a/mm/compaction.c b/mm/compaction.c index 8fa254043801..1427366ad673 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -42,6 +42,11 @@ static inline void count_compact_events(enum vm_event_item item, long delta) #define CREATE_TRACE_POINTS #include <trace/events/compaction.h> +#define block_start_pfn(pfn, order) round_down(pfn, 1UL << (order)) +#define block_end_pfn(pfn, order) ALIGN((pfn) + 1, 1UL << (order)) +#define pageblock_start_pfn(pfn) block_start_pfn(pfn, pageblock_order) +#define pageblock_end_pfn(pfn) block_end_pfn(pfn, pageblock_order) + static unsigned long release_freepages(struct list_head *freelist) { struct page *page, *next; @@ -161,7 +166,7 @@ static void reset_cached_positions(struct zone *zone) zone->compact_cached_migrate_pfn[0] = zone->zone_start_pfn; zone->compact_cached_migrate_pfn[1] = zone->zone_start_pfn; zone->compact_cached_free_pfn = - round_down(zone_end_pfn(zone) - 1, pageblock_nr_pages); + pageblock_start_pfn(zone_end_pfn(zone) - 1); } /* @@ -519,10 +524,10 @@ isolate_freepages_range(struct compact_control *cc, LIST_HEAD(freelist); pfn = start_pfn; - block_start_pfn = pfn & ~(pageblock_nr_pages - 1); + block_start_pfn = pageblock_start_pfn(pfn); if (block_start_pfn < cc->zone->zone_start_pfn) block_start_pfn = cc->zone->zone_start_pfn; - block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages); + block_end_pfn = pageblock_end_pfn(pfn); for (; pfn < end_pfn; pfn += isolated, block_start_pfn = block_end_pfn, @@ -538,8 +543,8 @@ isolate_freepages_range(struct compact_control *cc, * scanning range to right one. */ if (pfn >= block_end_pfn) { - block_start_pfn = pfn & ~(pageblock_nr_pages - 1); - block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages); + block_start_pfn = pageblock_start_pfn(pfn); + block_end_pfn = pageblock_end_pfn(pfn); block_end_pfn = min(block_end_pfn, end_pfn); } @@ -633,12 +638,13 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, { struct zone *zone = cc->zone; unsigned long nr_scanned = 0, nr_isolated = 0; - struct list_head *migratelist = &cc->migratepages; struct lruvec *lruvec; unsigned long flags = 0; bool locked = false; struct page *page = NULL, *valid_page = NULL; unsigned long start_pfn = low_pfn; + bool skip_on_failure = false; + unsigned long next_skip_pfn = 0; /* * Ensure that there are not too many pages isolated from the LRU @@ -659,10 +665,37 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, if (compact_should_abort(cc)) return 0; + if (cc->direct_compaction && (cc->mode == MIGRATE_ASYNC)) { + skip_on_failure = true; + next_skip_pfn = block_end_pfn(low_pfn, cc->order); + } + /* Time to isolate some pages for migration */ for (; low_pfn < end_pfn; low_pfn++) { bool is_lru; + if (skip_on_failure && low_pfn >= next_skip_pfn) { + /* + * We have isolated all migration candidates in the + * previous order-aligned block, and did not skip it due + * to failure. We should migrate the pages now and + * hopefully succeed compaction. + */ + if (nr_isolated) + break; + + /* + * We failed to isolate in the previous order-aligned + * block. Set the new boundary to the end of the + * current block. Note we can't simply increase + * next_skip_pfn by 1 << order, as low_pfn might have + * been incremented by a higher number due to skipping + * a compound or a high-order buddy page in the + * previous loop iteration. + */ + next_skip_pfn = block_end_pfn(low_pfn, cc->order); + } + /* * Periodically drop the lock (if held) regardless of its * contention, to give chance to IRQs. Abort async compaction @@ -674,7 +707,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, break; if (!pfn_valid_within(low_pfn)) - continue; + goto isolate_fail; nr_scanned++; page = pfn_to_page(low_pfn); @@ -729,11 +762,11 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, if (likely(comp_order < MAX_ORDER)) low_pfn += (1UL << comp_order) - 1; - continue; + goto isolate_fail; } if (!is_lru) - continue; + goto isolate_fail; /* * Migration will fail if an anonymous page is pinned in memory, @@ -742,7 +775,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, */ if (!page_mapping(page) && page_count(page) > page_mapcount(page)) - continue; + goto isolate_fail; /* If we already hold the lock, we can skip some rechecking */ if (!locked) { @@ -753,7 +786,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, /* Recheck PageLRU and PageCompound under lock */ if (!PageLRU(page)) - continue; + goto isolate_fail; /* * Page become compound since the non-locked check, @@ -762,7 +795,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, */ if (unlikely(PageCompound(page))) { low_pfn += (1UL << compound_order(page)) - 1; - continue; + goto isolate_fail; } } @@ -770,7 +803,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, /* Try isolate the page */ if (__isolate_lru_page(page, isolate_mode) != 0) - continue; + goto isolate_fail; VM_BUG_ON_PAGE(PageCompound(page), page); @@ -778,15 +811,55 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, del_page_from_lru_list(page, lruvec, page_lru(page)); isolate_success: - list_add(&page->lru, migratelist); + list_add(&page->lru, &cc->migratepages); cc->nr_migratepages++; nr_isolated++; + /* + * Record where we could have freed pages by migration and not + * yet flushed them to buddy allocator. + * - this is the lowest page that was isolated and likely be + * then freed by migration. + */ + if (!cc->last_migrated_pfn) + cc->last_migrated_pfn = low_pfn; + /* Avoid isolating too much */ if (cc->nr_migratepages == COMPACT_CLUSTER_MAX) { ++low_pfn; break; } + + continue; +isolate_fail: + if (!skip_on_failure) + continue; + + /* + * We have isolated some pages, but then failed. Release them + * instead of migrating, as we cannot form the cc->order buddy + * page anyway. + */ + if (nr_isolated) { + if (locked) { + spin_unlock_irqrestore(&zone->lru_lock, flags); + locked = false; + } + acct_isolated(zone, cc); + putback_movable_pages(&cc->migratepages); + cc->nr_migratepages = 0; + cc->last_migrated_pfn = 0; + nr_isolated = 0; + } + + if (low_pfn < next_skip_pfn) { + low_pfn = next_skip_pfn - 1; + /* + * The check near the loop beginning would have updated + * next_skip_pfn too, but this is a bit simpler. + */ + next_skip_pfn += 1UL << cc->order; + } } /* @@ -834,10 +907,10 @@ isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn, /* Scan block by block. First and last block may be incomplete */ pfn = start_pfn; - block_start_pfn = pfn & ~(pageblock_nr_pages - 1); + block_start_pfn = pageblock_start_pfn(pfn); if (block_start_pfn < cc->zone->zone_start_pfn) block_start_pfn = cc->zone->zone_start_pfn; - block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages); + block_end_pfn = pageblock_end_pfn(pfn); for (; pfn < end_pfn; pfn = block_end_pfn, block_start_pfn = block_end_pfn, @@ -924,10 +997,10 @@ static void isolate_freepages(struct compact_control *cc) * is using. */ isolate_start_pfn = cc->free_pfn; - block_start_pfn = cc->free_pfn & ~(pageblock_nr_pages-1); + block_start_pfn = pageblock_start_pfn(cc->free_pfn); block_end_pfn = min(block_start_pfn + pageblock_nr_pages, zone_end_pfn(zone)); - low_pfn = ALIGN(cc->migrate_pfn + 1, pageblock_nr_pages); + low_pfn = pageblock_end_pfn(cc->migrate_pfn); /* * Isolate free pages until enough are available to migrate the @@ -1070,7 +1143,6 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, unsigned long block_start_pfn; unsigned long block_end_pfn; unsigned long low_pfn; - unsigned long isolate_start_pfn; struct page *page; const isolate_mode_t isolate_mode = (sysctl_compact_unevictable_allowed ? ISOLATE_UNEVICTABLE : 0) | @@ -1081,12 +1153,12 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, * initialized by compact_zone() */ low_pfn = cc->migrate_pfn; - block_start_pfn = cc->migrate_pfn & ~(pageblock_nr_pages - 1); + block_start_pfn = pageblock_start_pfn(low_pfn); if (block_start_pfn < zone->zone_start_pfn) block_start_pfn = zone->zone_start_pfn; /* Only scan within a pageblock boundary */ - block_end_pfn = ALIGN(low_pfn + 1, pageblock_nr_pages); + block_end_pfn = pageblock_end_pfn(low_pfn); /* * Iterate over whole pageblocks until we find the first suitable. @@ -1125,7 +1197,6 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, continue; /* Perform the isolation */ - isolate_start_pfn = low_pfn; low_pfn = isolate_migratepages_block(cc, low_pfn, block_end_pfn, isolate_mode); @@ -1135,15 +1206,6 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, } /* - * Record where we could have freed pages by migration and not - * yet flushed them to buddy allocator. - * - this is the lowest page that could have been isolated and - * then freed by migration. - */ - if (cc->nr_migratepages && !cc->last_migrated_pfn) - cc->last_migrated_pfn = isolate_start_pfn; - - /* * Either we isolated something and proceed with migration. Or * we failed and compact_zone should decide if we should * continue or not. @@ -1167,7 +1229,7 @@ static inline bool is_via_compact_memory(int order) return order == -1; } -static int __compact_finished(struct zone *zone, struct compact_control *cc, +static enum compact_result __compact_finished(struct zone *zone, struct compact_control *cc, const int migratetype) { unsigned int order; @@ -1190,7 +1252,10 @@ static int __compact_finished(struct zone *zone, struct compact_control *cc, if (cc->direct_compaction) zone->compact_blockskip_flush = true; - return COMPACT_COMPLETE; + if (cc->whole_zone) + return COMPACT_COMPLETE; + else + return COMPACT_PARTIAL_SKIPPED; } if (is_via_compact_memory(cc->order)) @@ -1230,8 +1295,9 @@ static int __compact_finished(struct zone *zone, struct compact_control *cc, return COMPACT_NO_SUITABLE_PAGE; } -static int compact_finished(struct zone *zone, struct compact_control *cc, - const int migratetype) +static enum compact_result compact_finished(struct zone *zone, + struct compact_control *cc, + const int migratetype) { int ret; @@ -1250,8 +1316,10 @@ static int compact_finished(struct zone *zone, struct compact_control *cc, * COMPACT_PARTIAL - If the allocation would succeed without compaction * COMPACT_CONTINUE - If compaction should run now */ -static unsigned long __compaction_suitable(struct zone *zone, int order, - int alloc_flags, int classzone_idx) +static enum compact_result __compaction_suitable(struct zone *zone, int order, + unsigned int alloc_flags, + int classzone_idx, + unsigned long wmark_target) { int fragindex; unsigned long watermark; @@ -1274,7 +1342,8 @@ static unsigned long __compaction_suitable(struct zone *zone, int order, * allocated and for a short time, the footprint is higher */ watermark += (2UL << order); - if (!zone_watermark_ok(zone, 0, watermark, classzone_idx, alloc_flags)) + if (!__zone_watermark_ok(zone, 0, watermark, classzone_idx, + alloc_flags, wmark_target)) return COMPACT_SKIPPED; /* @@ -1295,12 +1364,14 @@ static unsigned long __compaction_suitable(struct zone *zone, int order, return COMPACT_CONTINUE; } -unsigned long compaction_suitable(struct zone *zone, int order, - int alloc_flags, int classzone_idx) +enum compact_result compaction_suitable(struct zone *zone, int order, + unsigned int alloc_flags, + int classzone_idx) { - unsigned long ret; + enum compact_result ret; - ret = __compaction_suitable(zone, order, alloc_flags, classzone_idx); + ret = __compaction_suitable(zone, order, alloc_flags, classzone_idx, + zone_page_state(zone, NR_FREE_PAGES)); trace_mm_compaction_suitable(zone, order, ret); if (ret == COMPACT_NOT_SUITABLE_ZONE) ret = COMPACT_SKIPPED; @@ -1308,9 +1379,42 @@ unsigned long compaction_suitable(struct zone *zone, int order, return ret; } -static int compact_zone(struct zone *zone, struct compact_control *cc) +bool compaction_zonelist_suitable(struct alloc_context *ac, int order, + int alloc_flags) { - int ret; + struct zone *zone; + struct zoneref *z; + + /* + * Make sure at least one zone would pass __compaction_suitable if we continue + * retrying the reclaim. + */ + for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx, + ac->nodemask) { + unsigned long available; + enum compact_result compact_result; + + /* + * Do not consider all the reclaimable memory because we do not + * want to trash just for a single high order allocation which + * is even not guaranteed to appear even if __compaction_suitable + * is happy about the watermark check. + */ + available = zone_reclaimable_pages(zone) / order; + available += zone_page_state_snapshot(zone, NR_FREE_PAGES); + compact_result = __compaction_suitable(zone, order, alloc_flags, + ac_classzone_idx(ac), available); + if (compact_result != COMPACT_SKIPPED && + compact_result != COMPACT_NOT_SUITABLE_ZONE) + return true; + } + + return false; +} + +static enum compact_result compact_zone(struct zone *zone, struct compact_control *cc) +{ + enum compact_result ret; unsigned long start_pfn = zone->zone_start_pfn; unsigned long end_pfn = zone_end_pfn(zone); const int migratetype = gfpflags_to_migratetype(cc->gfp_mask); @@ -1318,15 +1422,12 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) ret = compaction_suitable(zone, cc->order, cc->alloc_flags, cc->classzone_idx); - switch (ret) { - case COMPACT_PARTIAL: - case COMPACT_SKIPPED: - /* Compaction is likely to fail */ + /* Compaction is likely to fail */ + if (ret == COMPACT_PARTIAL || ret == COMPACT_SKIPPED) return ret; - case COMPACT_CONTINUE: - /* Fall through to compaction */ - ; - } + + /* huh, compaction_suitable is returning something unexpected */ + VM_BUG_ON(ret != COMPACT_CONTINUE); /* * Clear pageblock skip if there were failures recently and compaction @@ -1343,7 +1444,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) cc->migrate_pfn = zone->compact_cached_migrate_pfn[sync]; cc->free_pfn = zone->compact_cached_free_pfn; if (cc->free_pfn < start_pfn || cc->free_pfn >= end_pfn) { - cc->free_pfn = round_down(end_pfn - 1, pageblock_nr_pages); + cc->free_pfn = pageblock_start_pfn(end_pfn - 1); zone->compact_cached_free_pfn = cc->free_pfn; } if (cc->migrate_pfn < start_pfn || cc->migrate_pfn >= end_pfn) { @@ -1351,6 +1452,10 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) zone->compact_cached_migrate_pfn[0] = cc->migrate_pfn; zone->compact_cached_migrate_pfn[1] = cc->migrate_pfn; } + + if (cc->migrate_pfn == start_pfn) + cc->whole_zone = true; + cc->last_migrated_pfn = 0; trace_mm_compaction_begin(start_pfn, cc->migrate_pfn, @@ -1398,6 +1503,18 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) ret = COMPACT_CONTENDED; goto out; } + /* + * We failed to migrate at least one page in the current + * order-aligned block, so skip the rest of it. + */ + if (cc->direct_compaction && + (cc->mode == MIGRATE_ASYNC)) { + cc->migrate_pfn = block_end_pfn( + cc->migrate_pfn - 1, cc->order); + /* Draining pcplists is useless in this case */ + cc->last_migrated_pfn = 0; + + } } check_drain: @@ -1411,7 +1528,7 @@ check_drain: if (cc->order > 0 && cc->last_migrated_pfn) { int cpu; unsigned long current_block_start = - cc->migrate_pfn & ~((1UL << cc->order) - 1); + block_start_pfn(cc->migrate_pfn, cc->order); if (cc->last_migrated_pfn < current_block_start) { cpu = get_cpu(); @@ -1436,7 +1553,7 @@ out: cc->nr_freepages = 0; VM_BUG_ON(free_pfn == 0); /* The cached pfn is always the first in a pageblock */ - free_pfn &= ~(pageblock_nr_pages-1); + free_pfn = pageblock_start_pfn(free_pfn); /* * Only go back, not forward. The cached pfn might have been * already reset to zone end in compact_finished() @@ -1454,11 +1571,11 @@ out: return ret; } -static unsigned long compact_zone_order(struct zone *zone, int order, +static enum compact_result compact_zone_order(struct zone *zone, int order, gfp_t gfp_mask, enum migrate_mode mode, int *contended, - int alloc_flags, int classzone_idx) + unsigned int alloc_flags, int classzone_idx) { - unsigned long ret; + enum compact_result ret; struct compact_control cc = { .nr_freepages = 0, .nr_migratepages = 0, @@ -1496,15 +1613,15 @@ int sysctl_extfrag_threshold = 500; * * This is the main entry point for direct page compaction. */ -unsigned long try_to_compact_pages(gfp_t gfp_mask, unsigned int order, - int alloc_flags, const struct alloc_context *ac, - enum migrate_mode mode, int *contended) +enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order, + unsigned int alloc_flags, const struct alloc_context *ac, + enum migrate_mode mode, int *contended) { int may_enter_fs = gfp_mask & __GFP_FS; int may_perform_io = gfp_mask & __GFP_IO; struct zoneref *z; struct zone *zone; - int rc = COMPACT_DEFERRED; + enum compact_result rc = COMPACT_SKIPPED; int all_zones_contended = COMPACT_CONTENDED_LOCK; /* init for &= op */ *contended = COMPACT_CONTENDED_NONE; @@ -1518,15 +1635,17 @@ unsigned long try_to_compact_pages(gfp_t gfp_mask, unsigned int order, /* Compact each zone in the list */ for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx, ac->nodemask) { - int status; + enum compact_result status; int zone_contended; - if (compaction_deferred(zone, order)) + if (compaction_deferred(zone, order)) { + rc = max_t(enum compact_result, COMPACT_DEFERRED, rc); continue; + } status = compact_zone_order(zone, order, gfp_mask, mode, &zone_contended, alloc_flags, - ac->classzone_idx); + ac_classzone_idx(ac)); rc = max(status, rc); /* * It takes at least one zone that wasn't lock contended @@ -1536,7 +1655,7 @@ unsigned long try_to_compact_pages(gfp_t gfp_mask, unsigned int order, /* If a normal allocation would succeed, stop compacting */ if (zone_watermark_ok(zone, order, low_wmark_pages(zone), - ac->classzone_idx, alloc_flags)) { + ac_classzone_idx(ac), alloc_flags)) { /* * We think the allocation will succeed in this zone, * but it is not certain, hence the false. The caller @@ -1558,7 +1677,8 @@ unsigned long try_to_compact_pages(gfp_t gfp_mask, unsigned int order, goto break_loop; } - if (mode != MIGRATE_ASYNC && status == COMPACT_COMPLETE) { + if (mode != MIGRATE_ASYNC && (status == COMPACT_COMPLETE || + status == COMPACT_PARTIAL_SKIPPED)) { /* * We think that allocation won't succeed in this zone * so we defer compaction there. If it ends up @@ -1593,7 +1713,7 @@ break_loop: * If at least one zone wasn't deferred or skipped, we report if all * zones that were tried were lock contended. */ - if (rc > COMPACT_SKIPPED && all_zones_contended) + if (rc > COMPACT_INACTIVE && all_zones_contended) *contended = COMPACT_CONTENDED_LOCK; return rc; @@ -1742,7 +1862,7 @@ static bool kcompactd_node_suitable(pg_data_t *pgdat) struct zone *zone; enum zone_type classzone_idx = pgdat->kcompactd_classzone_idx; - for (zoneid = 0; zoneid < classzone_idx; zoneid++) { + for (zoneid = 0; zoneid <= classzone_idx; zoneid++) { zone = &pgdat->node_zones[zoneid]; if (!populated_zone(zone)) @@ -1777,7 +1897,7 @@ static void kcompactd_do_work(pg_data_t *pgdat) cc.classzone_idx); count_vm_event(KCOMPACTD_WAKE); - for (zoneid = 0; zoneid < cc.classzone_idx; zoneid++) { + for (zoneid = 0; zoneid <= cc.classzone_idx; zoneid++) { int status; zone = &pgdat->node_zones[zoneid]; @@ -1805,7 +1925,7 @@ static void kcompactd_do_work(pg_data_t *pgdat) cc.classzone_idx, 0)) { success = true; compaction_defer_reset(zone, cc.order, false); - } else if (status == COMPACT_COMPLETE) { + } else if (status == COMPACT_PARTIAL_SKIPPED || status == COMPACT_COMPLETE) { /* * We use sync migration mode here, so we defer like * sync direct compaction does. |