summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorMel Gorman <mgorman@suse.de>2012-10-09 01:32:47 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-09 09:22:51 +0200
commit62997027ca5b3d4618198ed8b1aba40b61b1137b (patch)
treecf26352e091ae10f7201d98ca774a8c0e5f8cdfd /include
parentmm: compaction: Restart compaction from near where it left off (diff)
downloadlinux-62997027ca5b3d4618198ed8b1aba40b61b1137b.tar.xz
linux-62997027ca5b3d4618198ed8b1aba40b61b1137b.zip
mm: compaction: clear PG_migrate_skip based on compaction and reclaim activity
Compaction caches if a pageblock was scanned and no pages were isolated so that the pageblocks can be skipped in the future to reduce scanning. This information is not cleared by the page allocator based on activity due to the impact it would have to the page allocator fast paths. Hence there is a requirement that something clear the cache or pageblocks will be skipped forever. Currently the cache is cleared if there were a number of recent allocation failures and it has not been cleared within the last 5 seconds. Time-based decisions like this are terrible as they have no relationship to VM activity and is basically a big hammer. Unfortunately, accurate heuristics would add cost to some hot paths so this patch implements a rough heuristic. There are two cases where the cache is cleared. 1. If a !kswapd process completes a compaction cycle (migrate and free scanner meet), the zone is marked compact_blockskip_flush. When kswapd goes to sleep, it will clear the cache. This is expected to be the common case where the cache is cleared. It does not really matter if kswapd happens to be asleep or going to sleep when the flag is set as it will be woken on the next allocation request. 2. If there have been multiple failures recently and compaction just finished being deferred then a process will clear the cache and start a full scan. This situation happens if there are multiple high-order allocation requests under heavy memory pressure. The clearing of the PG_migrate_skip bits and other scans is inherently racy but the race is harmless. For allocations that can fail such as THP, they will simply fail. For requests that cannot fail, they will retry the allocation. Tests indicated that scanning rates were roughly similar to when the time-based heuristic was used and the allocation success rates were similar. Signed-off-by: Mel Gorman <mgorman@suse.de> Cc: Rik van Riel <riel@redhat.com> Cc: Richard Davies <richard@arachsys.com> Cc: Shaohua Li <shli@kernel.org> Cc: Avi Kivity <avi@redhat.com> Cc: Rafael Aquini <aquini@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include')
-rw-r--r--include/linux/compaction.h15
-rw-r--r--include/linux/mmzone.h3
2 files changed, 17 insertions, 1 deletions
diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 0e38a1deeb23..6ecb6dc2f303 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -24,6 +24,7 @@ extern unsigned long try_to_compact_pages(struct zonelist *zonelist,
int order, gfp_t gfp_mask, nodemask_t *mask,
bool sync, bool *contended, struct page **page);
extern int compact_pgdat(pg_data_t *pgdat, int order);
+extern void reset_isolation_suitable(pg_data_t *pgdat);
extern unsigned long compaction_suitable(struct zone *zone, int order);
/* Do not skip compaction more than 64 times */
@@ -61,6 +62,16 @@ static inline bool compaction_deferred(struct zone *zone, int order)
return zone->compact_considered < defer_limit;
}
+/* Returns true if restarting compaction after many failures */
+static inline bool compaction_restarting(struct zone *zone, int order)
+{
+ if (order < zone->compact_order_failed)
+ return false;
+
+ return zone->compact_defer_shift == COMPACT_MAX_DEFER_SHIFT &&
+ zone->compact_considered >= 1UL << zone->compact_defer_shift;
+}
+
#else
static inline unsigned long try_to_compact_pages(struct zonelist *zonelist,
int order, gfp_t gfp_mask, nodemask_t *nodemask,
@@ -74,6 +85,10 @@ static inline int compact_pgdat(pg_data_t *pgdat, int order)
return COMPACT_CONTINUE;
}
+static inline void reset_isolation_suitable(pg_data_t *pgdat)
+{
+}
+
static inline unsigned long compaction_suitable(struct zone *zone, int order)
{
return COMPACT_SKIPPED;
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index c8b3abc97a1e..d240efa8f846 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -370,7 +370,8 @@ struct zone {
spinlock_t lock;
int all_unreclaimable; /* All pages pinned */
#if defined CONFIG_COMPACTION || defined CONFIG_CMA
- unsigned long compact_blockskip_expire;
+ /* Set to true when the PG_migrate_skip bits should be cleared */
+ bool compact_blockskip_flush;
/* pfns where compaction scanners should start */
unsigned long compact_cached_free_pfn;