summaryrefslogtreecommitdiffstats
path: root/mm/vmscan.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c274
1 files changed, 131 insertions, 143 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 0f16ffe8eb67..d2f65c856350 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -59,35 +59,20 @@
#include <trace/events/vmscan.h>
struct scan_control {
- /* Incremented by the number of inactive pages that were scanned */
- unsigned long nr_scanned;
-
- /* Number of pages freed so far during a call to shrink_zones() */
- unsigned long nr_reclaimed;
-
/* How many pages shrink_list() should reclaim */
unsigned long nr_to_reclaim;
- unsigned long hibernation_mode;
-
/* This context's GFP mask */
gfp_t gfp_mask;
- int may_writepage;
-
- /* Can mapped pages be reclaimed? */
- int may_unmap;
-
- /* Can pages be swapped as part of reclaim? */
- int may_swap;
-
+ /* Allocation order */
int order;
- /* Scan (total_size >> priority) pages at once */
- int priority;
-
- /* anon vs. file LRUs scanning "ratio" */
- int swappiness;
+ /*
+ * Nodemask of nodes allowed by the caller. If NULL, all nodes
+ * are scanned.
+ */
+ nodemask_t *nodemask;
/*
* The memory cgroup that hit its limit and as a result is the
@@ -95,11 +80,27 @@ struct scan_control {
*/
struct mem_cgroup *target_mem_cgroup;
- /*
- * Nodemask of nodes allowed by the caller. If NULL, all nodes
- * are scanned.
- */
- nodemask_t *nodemask;
+ /* Scan (total_size >> priority) pages at once */
+ int priority;
+
+ unsigned int may_writepage:1;
+
+ /* Can mapped pages be reclaimed? */
+ unsigned int may_unmap:1;
+
+ /* Can pages be swapped as part of reclaim? */
+ unsigned int may_swap:1;
+
+ unsigned int hibernation_mode:1;
+
+ /* One of the zones is ready for compaction */
+ unsigned int compaction_ready:1;
+
+ /* Incremented by the number of inactive pages that were scanned */
+ unsigned long nr_scanned;
+
+ /* Number of pages freed so far during a call to shrink_zones() */
+ unsigned long nr_reclaimed;
};
#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
@@ -136,7 +137,11 @@ struct scan_control {
* From 0 .. 100. Higher means more swappy.
*/
int vm_swappiness = 60;
-unsigned long vm_total_pages; /* The total number of pages which the VM controls */
+/*
+ * The total number of pages which are beyond the high watermark within all
+ * zones.
+ */
+unsigned long vm_total_pages;
static LIST_HEAD(shrinker_list);
static DECLARE_RWSEM(shrinker_rwsem);
@@ -169,7 +174,8 @@ static unsigned long zone_reclaimable_pages(struct zone *zone)
bool zone_reclaimable(struct zone *zone)
{
- return zone->pages_scanned < zone_reclaimable_pages(zone) * 6;
+ return zone_page_state(zone, NR_PAGES_SCANNED) <
+ zone_reclaimable_pages(zone) * 6;
}
static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru)
@@ -1503,7 +1509,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
__mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken);
if (global_reclaim(sc)) {
- zone->pages_scanned += nr_scanned;
+ __mod_zone_page_state(zone, NR_PAGES_SCANNED, nr_scanned);
if (current_is_kswapd())
__count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scanned);
else
@@ -1693,7 +1699,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &l_hold,
&nr_scanned, sc, isolate_mode, lru);
if (global_reclaim(sc))
- zone->pages_scanned += nr_scanned;
+ __mod_zone_page_state(zone, NR_PAGES_SCANNED, nr_scanned);
reclaim_stat->recent_scanned[file] += nr_taken;
@@ -1750,7 +1756,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
* Count referenced pages from currently used mappings as rotated,
* even though only some of them are actually re-activated. This
* helps balance scan pressure between file and anonymous pages in
- * get_scan_ratio.
+ * get_scan_count.
*/
reclaim_stat->recent_rotated[file] += nr_rotated;
@@ -1865,8 +1871,8 @@ enum scan_balance {
* nr[0] = anon inactive pages to scan; nr[1] = anon active pages to scan
* nr[2] = file inactive pages to scan; nr[3] = file active pages to scan
*/
-static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
- unsigned long *nr)
+static void get_scan_count(struct lruvec *lruvec, int swappiness,
+ struct scan_control *sc, unsigned long *nr)
{
struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
u64 fraction[2];
@@ -1909,7 +1915,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
* using the memory controller's swap limit feature would be
* too expensive.
*/
- if (!global_reclaim(sc) && !sc->swappiness) {
+ if (!global_reclaim(sc) && !swappiness) {
scan_balance = SCAN_FILE;
goto out;
}
@@ -1919,16 +1925,11 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
* system is close to OOM, scan both anon and file equally
* (unless the swappiness setting disagrees with swapping).
*/
- if (!sc->priority && sc->swappiness) {
+ if (!sc->priority && swappiness) {
scan_balance = SCAN_EQUAL;
goto out;
}
- anon = get_lru_size(lruvec, LRU_ACTIVE_ANON) +
- get_lru_size(lruvec, LRU_INACTIVE_ANON);
- file = get_lru_size(lruvec, LRU_ACTIVE_FILE) +
- get_lru_size(lruvec, LRU_INACTIVE_FILE);
-
/*
* Prevent the reclaimer from falling into the cache trap: as
* cache pages start out inactive, every cache fault will tip
@@ -1939,9 +1940,14 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
* anon pages. Try to detect this based on file LRU size.
*/
if (global_reclaim(sc)) {
- unsigned long free = zone_page_state(zone, NR_FREE_PAGES);
+ unsigned long zonefile;
+ unsigned long zonefree;
- if (unlikely(file + free <= high_wmark_pages(zone))) {
+ zonefree = zone_page_state(zone, NR_FREE_PAGES);
+ zonefile = zone_page_state(zone, NR_ACTIVE_FILE) +
+ zone_page_state(zone, NR_INACTIVE_FILE);
+
+ if (unlikely(zonefile + zonefree <= high_wmark_pages(zone))) {
scan_balance = SCAN_ANON;
goto out;
}
@@ -1962,7 +1968,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
* With swappiness at 100, anonymous and file have the same priority.
* This scanning priority is essentially the inverse of IO cost.
*/
- anon_prio = sc->swappiness;
+ anon_prio = swappiness;
file_prio = 200 - anon_prio;
/*
@@ -1976,6 +1982,12 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
*
* anon in [0], file in [1]
*/
+
+ anon = get_lru_size(lruvec, LRU_ACTIVE_ANON) +
+ get_lru_size(lruvec, LRU_INACTIVE_ANON);
+ file = get_lru_size(lruvec, LRU_ACTIVE_FILE) +
+ get_lru_size(lruvec, LRU_INACTIVE_FILE);
+
spin_lock_irq(&zone->lru_lock);
if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) {
reclaim_stat->recent_scanned[0] /= 2;
@@ -2052,7 +2064,8 @@ out:
/*
* This is a basic per-zone page freer. Used by both kswapd and direct reclaim.
*/
-static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
+static void shrink_lruvec(struct lruvec *lruvec, int swappiness,
+ struct scan_control *sc)
{
unsigned long nr[NR_LRU_LISTS];
unsigned long targets[NR_LRU_LISTS];
@@ -2063,7 +2076,7 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
struct blk_plug plug;
bool scan_adjusted;
- get_scan_count(lruvec, sc, nr);
+ get_scan_count(lruvec, swappiness, sc, nr);
/* Record the original scan target for proportional adjustments later */
memcpy(targets, nr, sizeof(nr));
@@ -2241,9 +2254,10 @@ static inline bool should_continue_reclaim(struct zone *zone,
}
}
-static void shrink_zone(struct zone *zone, struct scan_control *sc)
+static bool shrink_zone(struct zone *zone, struct scan_control *sc)
{
unsigned long nr_reclaimed, nr_scanned;
+ bool reclaimable = false;
do {
struct mem_cgroup *root = sc->target_mem_cgroup;
@@ -2259,11 +2273,12 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc)
memcg = mem_cgroup_iter(root, NULL, &reclaim);
do {
struct lruvec *lruvec;
+ int swappiness;
lruvec = mem_cgroup_zone_lruvec(zone, memcg);
+ swappiness = mem_cgroup_swappiness(memcg);
- sc->swappiness = mem_cgroup_swappiness(memcg);
- shrink_lruvec(lruvec, sc);
+ shrink_lruvec(lruvec, swappiness, sc);
/*
* Direct reclaim and kswapd have to scan all memory
@@ -2287,20 +2302,21 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc)
sc->nr_scanned - nr_scanned,
sc->nr_reclaimed - nr_reclaimed);
+ if (sc->nr_reclaimed - nr_reclaimed)
+ reclaimable = true;
+
} while (should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed,
sc->nr_scanned - nr_scanned, sc));
+
+ return reclaimable;
}
/* Returns true if compaction should go ahead for a high-order request */
-static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
+static inline bool compaction_ready(struct zone *zone, int order)
{
unsigned long balance_gap, watermark;
bool watermark_ok;
- /* Do not consider compaction for orders reclaim is meant to satisfy */
- if (sc->order <= PAGE_ALLOC_COSTLY_ORDER)
- return false;
-
/*
* Compaction takes time to run and there are potentially other
* callers using the pages just freed. Continue reclaiming until
@@ -2309,18 +2325,18 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
*/
balance_gap = min(low_wmark_pages(zone), DIV_ROUND_UP(
zone->managed_pages, KSWAPD_ZONE_BALANCE_GAP_RATIO));
- watermark = high_wmark_pages(zone) + balance_gap + (2UL << sc->order);
+ watermark = high_wmark_pages(zone) + balance_gap + (2UL << order);
watermark_ok = zone_watermark_ok_safe(zone, 0, watermark, 0, 0);
/*
* If compaction is deferred, reclaim up to a point where
* compaction will have a chance of success when re-enabled
*/
- if (compaction_deferred(zone, sc->order))
+ if (compaction_deferred(zone, order))
return watermark_ok;
/* If compaction is not ready to start, keep reclaiming */
- if (!compaction_suitable(zone, sc->order))
+ if (!compaction_suitable(zone, order))
return false;
return watermark_ok;
@@ -2342,10 +2358,7 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
* If a zone is deemed to be full of pinned pages then just give it a light
* scan then give up on it.
*
- * This function returns true if a zone is being reclaimed for a costly
- * high-order allocation and compaction is ready to begin. This indicates to
- * the caller that it should consider retrying the allocation instead of
- * further reclaim.
+ * Returns true if a zone was reclaimable.
*/
static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
{
@@ -2354,13 +2367,13 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
unsigned long nr_soft_reclaimed;
unsigned long nr_soft_scanned;
unsigned long lru_pages = 0;
- bool aborted_reclaim = false;
struct reclaim_state *reclaim_state = current->reclaim_state;
gfp_t orig_mask;
struct shrink_control shrink = {
.gfp_mask = sc->gfp_mask,
};
enum zone_type requested_highidx = gfp_zone(sc->gfp_mask);
+ bool reclaimable = false;
/*
* If the number of buffer_heads in the machine exceeds the maximum
@@ -2391,22 +2404,24 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
if (sc->priority != DEF_PRIORITY &&
!zone_reclaimable(zone))
continue; /* Let kswapd poll it */
- if (IS_ENABLED(CONFIG_COMPACTION)) {
- /*
- * If we already have plenty of memory free for
- * compaction in this zone, don't free any more.
- * Even though compaction is invoked for any
- * non-zero order, only frequent costly order
- * reclamation is disruptive enough to become a
- * noticeable problem, like transparent huge
- * page allocations.
- */
- if ((zonelist_zone_idx(z) <= requested_highidx)
- && compaction_ready(zone, sc)) {
- aborted_reclaim = true;
- continue;
- }
+
+ /*
+ * If we already have plenty of memory free for
+ * compaction in this zone, don't free any more.
+ * Even though compaction is invoked for any
+ * non-zero order, only frequent costly order
+ * reclamation is disruptive enough to become a
+ * noticeable problem, like transparent huge
+ * page allocations.
+ */
+ if (IS_ENABLED(CONFIG_COMPACTION) &&
+ sc->order > PAGE_ALLOC_COSTLY_ORDER &&
+ zonelist_zone_idx(z) <= requested_highidx &&
+ compaction_ready(zone, sc->order)) {
+ sc->compaction_ready = true;
+ continue;
}
+
/*
* This steals pages from memory cgroups over softlimit
* and returns the number of reclaimed pages and
@@ -2419,10 +2434,17 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
&nr_soft_scanned);
sc->nr_reclaimed += nr_soft_reclaimed;
sc->nr_scanned += nr_soft_scanned;
+ if (nr_soft_reclaimed)
+ reclaimable = true;
/* need some check for avoid more shrink_zone() */
}
- shrink_zone(zone, sc);
+ if (shrink_zone(zone, sc))
+ reclaimable = true;
+
+ if (global_reclaim(sc) &&
+ !reclaimable && zone_reclaimable(zone))
+ reclaimable = true;
}
/*
@@ -2445,27 +2467,7 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
*/
sc->gfp_mask = orig_mask;
- return aborted_reclaim;
-}
-
-/* All zones in zonelist are unreclaimable? */
-static bool all_unreclaimable(struct zonelist *zonelist,
- struct scan_control *sc)
-{
- struct zoneref *z;
- struct zone *zone;
-
- for_each_zone_zonelist_nodemask(zone, z, zonelist,
- gfp_zone(sc->gfp_mask), sc->nodemask) {
- if (!populated_zone(zone))
- continue;
- if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
- continue;
- if (zone_reclaimable(zone))
- return false;
- }
-
- return true;
+ return reclaimable;
}
/*
@@ -2489,7 +2491,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
{
unsigned long total_scanned = 0;
unsigned long writeback_threshold;
- bool aborted_reclaim;
+ bool zones_reclaimable;
delayacct_freepages_start();
@@ -2500,11 +2502,14 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup,
sc->priority);
sc->nr_scanned = 0;
- aborted_reclaim = shrink_zones(zonelist, sc);
+ zones_reclaimable = shrink_zones(zonelist, sc);
total_scanned += sc->nr_scanned;
if (sc->nr_reclaimed >= sc->nr_to_reclaim)
- goto out;
+ break;
+
+ if (sc->compaction_ready)
+ break;
/*
* If we're getting trouble reclaiming, start doing
@@ -2526,28 +2531,19 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
WB_REASON_TRY_TO_FREE_PAGES);
sc->may_writepage = 1;
}
- } while (--sc->priority >= 0 && !aborted_reclaim);
+ } while (--sc->priority >= 0);
-out:
delayacct_freepages_end();
if (sc->nr_reclaimed)
return sc->nr_reclaimed;
- /*
- * As hibernation is going on, kswapd is freezed so that it can't mark
- * the zone into all_unreclaimable. Thus bypassing all_unreclaimable
- * check.
- */
- if (oom_killer_disabled)
- return 0;
-
/* Aborted reclaim to try compaction? don't OOM, then */
- if (aborted_reclaim)
+ if (sc->compaction_ready)
return 1;
- /* top priority shrink_zones still had more to do? don't OOM, then */
- if (global_reclaim(sc) && !all_unreclaimable(zonelist, sc))
+ /* Any of the zones still reclaimable? Don't OOM. */
+ if (zones_reclaimable)
return 1;
return 0;
@@ -2684,15 +2680,14 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
{
unsigned long nr_reclaimed;
struct scan_control sc = {
+ .nr_to_reclaim = SWAP_CLUSTER_MAX,
.gfp_mask = (gfp_mask = memalloc_noio_flags(gfp_mask)),
+ .order = order,
+ .nodemask = nodemask,
+ .priority = DEF_PRIORITY,
.may_writepage = !laptop_mode,
- .nr_to_reclaim = SWAP_CLUSTER_MAX,
.may_unmap = 1,
.may_swap = 1,
- .order = order,
- .priority = DEF_PRIORITY,
- .target_mem_cgroup = NULL,
- .nodemask = nodemask,
};
/*
@@ -2722,17 +2717,14 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
unsigned long *nr_scanned)
{
struct scan_control sc = {
- .nr_scanned = 0,
.nr_to_reclaim = SWAP_CLUSTER_MAX,
+ .target_mem_cgroup = memcg,
.may_writepage = !laptop_mode,
.may_unmap = 1,
.may_swap = !noswap,
- .order = 0,
- .priority = 0,
- .swappiness = mem_cgroup_swappiness(memcg),
- .target_mem_cgroup = memcg,
};
struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg);
+ int swappiness = mem_cgroup_swappiness(memcg);
sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
@@ -2748,7 +2740,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
* will pick up pages from other mem cgroup's as well. We hack
* the priority and make it zero.
*/
- shrink_lruvec(lruvec, &sc);
+ shrink_lruvec(lruvec, swappiness, &sc);
trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
@@ -2764,16 +2756,14 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
unsigned long nr_reclaimed;
int nid;
struct scan_control sc = {
- .may_writepage = !laptop_mode,
- .may_unmap = 1,
- .may_swap = !noswap,
.nr_to_reclaim = SWAP_CLUSTER_MAX,
- .order = 0,
- .priority = DEF_PRIORITY,
- .target_mem_cgroup = memcg,
- .nodemask = NULL, /* we don't care the placement */
.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK),
+ .target_mem_cgroup = memcg,
+ .priority = DEF_PRIORITY,
+ .may_writepage = !laptop_mode,
+ .may_unmap = 1,
+ .may_swap = !noswap,
};
/*
@@ -3031,12 +3021,11 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
unsigned long nr_soft_scanned;
struct scan_control sc = {
.gfp_mask = GFP_KERNEL,
+ .order = order,
.priority = DEF_PRIORITY,
+ .may_writepage = !laptop_mode,
.may_unmap = 1,
.may_swap = 1,
- .may_writepage = !laptop_mode,
- .order = order,
- .target_mem_cgroup = NULL,
};
count_vm_event(PAGEOUTRUN);
@@ -3417,14 +3406,13 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
{
struct reclaim_state reclaim_state;
struct scan_control sc = {
+ .nr_to_reclaim = nr_to_reclaim,
.gfp_mask = GFP_HIGHUSER_MOVABLE,
- .may_swap = 1,
- .may_unmap = 1,
+ .priority = DEF_PRIORITY,
.may_writepage = 1,
- .nr_to_reclaim = nr_to_reclaim,
+ .may_unmap = 1,
+ .may_swap = 1,
.hibernation_mode = 1,
- .order = 0,
- .priority = DEF_PRIORITY,
};
struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask);
struct task_struct *p = current;
@@ -3604,13 +3592,13 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
struct task_struct *p = current;
struct reclaim_state reclaim_state;
struct scan_control sc = {
- .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE),
- .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP),
- .may_swap = 1,
.nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX),
.gfp_mask = (gfp_mask = memalloc_noio_flags(gfp_mask)),
.order = order,
.priority = ZONE_RECLAIM_PRIORITY,
+ .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE),
+ .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP),
+ .may_swap = 1,
};
struct shrink_control shrink = {
.gfp_mask = sc.gfp_mask,