diff options
author | Mel Gorman <mgorman@techsingularity.net> | 2021-11-05 21:42:42 +0100 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-11-06 21:30:40 +0100 |
commit | c3f4a9a2b082c5392fbff17c6d8551154add5fdb (patch) | |
tree | 94211c0936f4a7fcadcc0ab42fd030a6cffd61e9 | |
parent | mm/page_alloc: remove the throttling logic from the page allocator (diff) | |
download | linux-c3f4a9a2b082c5392fbff17c6d8551154add5fdb.tar.xz linux-c3f4a9a2b082c5392fbff17c6d8551154add5fdb.zip |
mm/vmscan: centralise timeout values for reclaim_throttle
Neil Brown raised concerns about callers of reclaim_throttle specifying
a timeout value. The original timeout values to congestion_wait() were
probably pulled out of thin air or copy&pasted from somewhere else.
This patch centralises the timeout values and selects a timeout based on
the reason for reclaim throttling. These figures are also pulled out of
the same thin air but better values may be derived
Running a workload that is throttling for inappropriate periods and
tracing mm_vmscan_throttled can be used to pick a more appropriate
value. Excessive throttling would pick a lower timeout where as
excessive CPU usage in reclaim context would select a larger timeout.
Ideally a large value would always be used and the wakeups would occur
before a timeout but that requires careful testing.
Link: https://lkml.kernel.org/r/20211022144651.19914-7-mgorman@techsingularity.net
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Andreas Dilger <adilger.kernel@dilger.ca>
Cc: "Darrick J . Wong" <djwong@kernel.org>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: NeilBrown <neilb@suse.de>
Cc: Rik van Riel <riel@surriel.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to '')
-rw-r--r-- | mm/compaction.c | 2 | ||||
-rw-r--r-- | mm/internal.h | 3 | ||||
-rw-r--r-- | mm/page-writeback.c | 2 | ||||
-rw-r--r-- | mm/vmscan.c | 50 |
4 files changed, 40 insertions, 17 deletions
diff --git a/mm/compaction.c b/mm/compaction.c index 7359093d8ac0..151b04c4dab3 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -828,7 +828,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, if (cc->mode == MIGRATE_ASYNC) return -EAGAIN; - reclaim_throttle(pgdat, VMSCAN_THROTTLE_ISOLATED, HZ/10); + reclaim_throttle(pgdat, VMSCAN_THROTTLE_ISOLATED); if (fatal_signal_pending(current)) return -EINTR; diff --git a/mm/internal.h b/mm/internal.h index 7dfe74f827bf..f3de3a2f3e30 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -130,8 +130,7 @@ extern unsigned long highest_memmap_pfn; */ extern int isolate_lru_page(struct page *page); extern void putback_lru_page(struct page *page); -extern void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason, - long timeout); +extern void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason); /* * in mm/rmap.c: diff --git a/mm/page-writeback.c b/mm/page-writeback.c index f34f54fcd5b4..4b01a6872f9e 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2374,7 +2374,7 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc) * guess as any. */ reclaim_throttle(NODE_DATA(numa_node_id()), - VMSCAN_THROTTLE_WRITEBACK, HZ/50); + VMSCAN_THROTTLE_WRITEBACK); } /* * Usually few pages are written by now from those we've just submitted diff --git a/mm/vmscan.c b/mm/vmscan.c index 7d3fe5938e3b..599e5616b123 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1006,12 +1006,10 @@ static void handle_write_error(struct address_space *mapping, unlock_page(page); } -void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason, - long timeout) +void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason) { wait_queue_head_t *wqh = &pgdat->reclaim_wait[reason]; - long ret; - bool acct_writeback = (reason == VMSCAN_THROTTLE_WRITEBACK); + long timeout, ret; DEFINE_WAIT(wait); /* @@ -1023,17 +1021,43 @@ void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason, current->flags & (PF_IO_WORKER|PF_KTHREAD)) return; - if (acct_writeback && - atomic_inc_return(&pgdat->nr_writeback_throttled) == 1) { - WRITE_ONCE(pgdat->nr_reclaim_start, - node_page_state(pgdat, NR_THROTTLED_WRITTEN)); + /* + * These figures are pulled out of thin air. + * VMSCAN_THROTTLE_ISOLATED is a transient condition based on too many + * parallel reclaimers which is a short-lived event so the timeout is + * short. Failing to make progress or waiting on writeback are + * potentially long-lived events so use a longer timeout. This is shaky + * logic as a failure to make progress could be due to anything from + * writeback to a slow device to excessive references pages at the tail + * of the inactive LRU. + */ + switch(reason) { + case VMSCAN_THROTTLE_WRITEBACK: + timeout = HZ/10; + + if (atomic_inc_return(&pgdat->nr_writeback_throttled) == 1) { + WRITE_ONCE(pgdat->nr_reclaim_start, + node_page_state(pgdat, NR_THROTTLED_WRITTEN)); + } + + break; + case VMSCAN_THROTTLE_NOPROGRESS: + timeout = HZ/10; + break; + case VMSCAN_THROTTLE_ISOLATED: + timeout = HZ/50; + break; + default: + WARN_ON_ONCE(1); + timeout = HZ; + break; } prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE); ret = schedule_timeout(timeout); finish_wait(wqh, &wait); - if (acct_writeback) + if (reason == VMSCAN_THROTTLE_WRITEBACK) atomic_dec(&pgdat->nr_writeback_throttled); trace_mm_vmscan_throttled(pgdat->node_id, jiffies_to_usecs(timeout), @@ -2318,7 +2342,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, /* wait a bit for the reclaimer. */ stalled = true; - reclaim_throttle(pgdat, VMSCAN_THROTTLE_ISOLATED, HZ/10); + reclaim_throttle(pgdat, VMSCAN_THROTTLE_ISOLATED); /* We are about to die and free our memory. Return now. */ if (fatal_signal_pending(current)) @@ -3250,7 +3274,7 @@ again: * until some pages complete writeback. */ if (sc->nr.immediate) - reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK, HZ/10); + reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK); } /* @@ -3274,7 +3298,7 @@ again: if (!current_is_kswapd() && current_may_throttle() && !sc->hibernation_mode && test_bit(LRUVEC_CONGESTED, &target_lruvec->flags)) - reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK, HZ/10); + reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK); if (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed, sc)) @@ -3346,7 +3370,7 @@ static void consider_reclaim_throttle(pg_data_t *pgdat, struct scan_control *sc) /* Throttle if making no progress at high prioities. */ if (sc->priority < DEF_PRIORITY - 2) - reclaim_throttle(pgdat, VMSCAN_THROTTLE_NOPROGRESS, HZ/10); + reclaim_throttle(pgdat, VMSCAN_THROTTLE_NOPROGRESS); } /* |