summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2011-07-01 21:31:25 +0200
committerWu Fengguang <fengguang.wu@intel.com>2011-07-24 04:51:52 +0200
commitbcff25fc8aa47a13faff8b4b992589813f7b450a (patch)
treeae93e2b8ba1417bf6327f79154c69b9afc8328bb
parentwriteback: don't busy retry writeback on new/freeing inodes (diff)
downloadlinux-bcff25fc8aa47a13faff8b4b992589813f7b450a.tar.xz
linux-bcff25fc8aa47a13faff8b4b992589813f7b450a.zip
mm: properly reflect task dirty limits in dirty_exceeded logic
We set bdi->dirty_exceeded (and thus ratelimiting code starts to call balance_dirty_pages() every 8 pages) when a per-bdi limit is exceeded or global limit is exceeded. But per-bdi limit also depends on the task. Thus different tasks reach the limit on that bdi at different levels of dirty pages. The result is that with current code bdi->dirty_exceeded ping-ponged between 1 and 0 depending on which task just got into balance_dirty_pages(). We fix the issue by clearing bdi->dirty_exceeded only when per-bdi amount of dirty pages drops below the threshold (7/8 * bdi_dirty_limit) where task limits already do not have any influence. Impact: The end result is, the dirty pages are kept more tightly under control, with the average number slightly lowered than before. This reduces the risk to throttle light dirtiers and hence more responsive. However it may add overheads by enforcing balance_dirty_pages() calls on every 8 pages when there are 2+ heavy dirtiers. CC: Andrew Morton <akpm@linux-foundation.org> CC: Christoph Hellwig <hch@infradead.org> CC: Dave Chinner <david@fromorbit.com> CC: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
-rw-r--r--mm/page-writeback.c26
1 files changed, 20 insertions, 6 deletions
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index f9d9f5476d58..1d781803e629 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -281,12 +281,13 @@ static inline void task_dirties_fraction(struct task_struct *tsk,
* effectively curb the growth of dirty pages. Light dirtiers with high enough
* dirty threshold may never get throttled.
*/
+#define TASK_LIMIT_FRACTION 8
static unsigned long task_dirty_limit(struct task_struct *tsk,
unsigned long bdi_dirty)
{
long numerator, denominator;
unsigned long dirty = bdi_dirty;
- u64 inv = dirty >> 3;
+ u64 inv = dirty / TASK_LIMIT_FRACTION;
task_dirties_fraction(tsk, &numerator, &denominator);
inv *= numerator;
@@ -297,6 +298,12 @@ static unsigned long task_dirty_limit(struct task_struct *tsk,
return max(dirty, bdi_dirty/2);
}
+/* Minimum limit for any task */
+static unsigned long task_min_dirty_limit(unsigned long bdi_dirty)
+{
+ return bdi_dirty - bdi_dirty / TASK_LIMIT_FRACTION;
+}
+
/*
*
*/
@@ -651,9 +658,12 @@ static void balance_dirty_pages(struct address_space *mapping,
unsigned long background_thresh;
unsigned long dirty_thresh;
unsigned long bdi_thresh;
+ unsigned long task_bdi_thresh;
+ unsigned long min_task_bdi_thresh;
unsigned long pages_written = 0;
unsigned long pause = 1;
bool dirty_exceeded = false;
+ bool clear_dirty_exceeded = true;
struct backing_dev_info *bdi = mapping->backing_dev_info;
unsigned long start_time = jiffies;
@@ -673,7 +683,8 @@ static void balance_dirty_pages(struct address_space *mapping,
break;
bdi_thresh = bdi_dirty_limit(bdi, dirty_thresh);
- bdi_thresh = task_dirty_limit(current, bdi_thresh);
+ min_task_bdi_thresh = task_min_dirty_limit(bdi_thresh);
+ task_bdi_thresh = task_dirty_limit(current, bdi_thresh);
/*
* In order to avoid the stacked BDI deadlock we need
@@ -685,7 +696,7 @@ static void balance_dirty_pages(struct address_space *mapping,
* actually dirty; with m+n sitting in the percpu
* deltas.
*/
- if (bdi_thresh < 2*bdi_stat_error(bdi)) {
+ if (task_bdi_thresh < 2 * bdi_stat_error(bdi)) {
bdi_nr_reclaimable = bdi_stat_sum(bdi, BDI_RECLAIMABLE);
bdi_dirty = bdi_nr_reclaimable +
bdi_stat_sum(bdi, BDI_WRITEBACK);
@@ -701,8 +712,10 @@ static void balance_dirty_pages(struct address_space *mapping,
* bdi or process from holding back light ones; The latter is
* the last resort safeguard.
*/
- dirty_exceeded = (bdi_dirty > bdi_thresh) ||
+ dirty_exceeded = (bdi_dirty > task_bdi_thresh) ||
(nr_dirty > dirty_thresh);
+ clear_dirty_exceeded = (bdi_dirty <= min_task_bdi_thresh) &&
+ (nr_dirty <= dirty_thresh);
if (!dirty_exceeded)
break;
@@ -723,7 +736,7 @@ static void balance_dirty_pages(struct address_space *mapping,
* up.
*/
trace_balance_dirty_start(bdi);
- if (bdi_nr_reclaimable > bdi_thresh) {
+ if (bdi_nr_reclaimable > task_bdi_thresh) {
pages_written += writeback_inodes_wb(&bdi->wb,
write_chunk);
trace_balance_dirty_written(bdi, pages_written);
@@ -766,7 +779,8 @@ static void balance_dirty_pages(struct address_space *mapping,
pause = HZ / 10;
}
- if (!dirty_exceeded && bdi->dirty_exceeded)
+ /* Clear dirty_exceeded flag only when no task can exceed the limit */
+ if (clear_dirty_exceeded && bdi->dirty_exceeded)
bdi->dirty_exceeded = 0;
if (writeback_in_progress(bdi))