summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWu Fengguang <fengguang.wu@intel.com>2011-04-05 21:21:19 +0200
committerWu Fengguang <fengguang.wu@intel.com>2011-12-18 07:20:20 +0100
commit54848d73f9f254631303d6eab9b976855988b266 (patch)
tree9fb4b7e564f2c0df88d0bde2f482b9b7efc847fa
parentwriteback: Include all dirty inodes in background writeback (diff)
downloadlinux-54848d73f9f254631303d6eab9b976855988b266.tar.xz
linux-54848d73f9f254631303d6eab9b976855988b266.zip
writeback: charge leaked page dirties to active tasks
It's a years long problem that a large number of short-lived dirtiers (eg. gcc instances in a fast kernel build) may starve long-run dirtiers (eg. dd) as well as pushing the dirty pages to the global hard limit. The solution is to charge the pages dirtied by the exited gcc to the other random dirtying tasks. It sounds not perfect, however should behave good enough in practice, seeing as that throttled tasks aren't actually running so those that are running are more likely to pick it up and get throttled, therefore promoting an equal spread. Randy: fix compile error: 'dirty_throttle_leaks' undeclared in exit.c Acked-by: Jan Kara <jack@suse.cz> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Randy Dunlap <rdunlap@xenotime.net> Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
-rw-r--r--include/linux/writeback.h2
-rw-r--r--kernel/exit.c3
-rw-r--r--mm/page-writeback.c27
3 files changed, 32 insertions, 0 deletions
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index a378c295851f..05eaf5e3aad7 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -7,6 +7,8 @@
#include <linux/sched.h>
#include <linux/fs.h>
+DECLARE_PER_CPU(int, dirty_throttle_leaks);
+
/*
* The 1/4 region under the global dirty thresh is for smooth dirty throttling:
*
diff --git a/kernel/exit.c b/kernel/exit.c
index d0b7d988f873..d4aac24cc469 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -51,6 +51,7 @@
#include <trace/events/sched.h>
#include <linux/hw_breakpoint.h>
#include <linux/oom.h>
+#include <linux/writeback.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
@@ -1037,6 +1038,8 @@ NORET_TYPE void do_exit(long code)
validate_creds_for_do_exit(tsk);
preempt_disable();
+ if (tsk->nr_dirtied)
+ __this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied);
exit_rcu();
/* causes final put_task_struct in finish_task_switch(). */
tsk->state = TASK_DEAD;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 50f08241f981..619c445fc03c 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1214,6 +1214,22 @@ void set_page_dirty_balance(struct page *page, int page_mkwrite)
static DEFINE_PER_CPU(int, bdp_ratelimits);
+/*
+ * Normal tasks are throttled by
+ * loop {
+ * dirty tsk->nr_dirtied_pause pages;
+ * take a snap in balance_dirty_pages();
+ * }
+ * However there is a worst case. If every task exit immediately when dirtied
+ * (tsk->nr_dirtied_pause - 1) pages, balance_dirty_pages() will never be
+ * called to throttle the page dirties. The solution is to save the not yet
+ * throttled page dirties in dirty_throttle_leaks on task exit and charge them
+ * randomly into the running tasks. This works well for the above worst case,
+ * as the new task will pick up and accumulate the old task's leaked dirty
+ * count and eventually get throttled.
+ */
+DEFINE_PER_CPU(int, dirty_throttle_leaks) = 0;
+
/**
* balance_dirty_pages_ratelimited_nr - balance dirty memory state
* @mapping: address_space which was dirtied
@@ -1261,6 +1277,17 @@ void balance_dirty_pages_ratelimited_nr(struct address_space *mapping,
ratelimit = 0;
}
}
+ /*
+ * Pick up the dirtied pages by the exited tasks. This avoids lots of
+ * short-lived tasks (eg. gcc invocations in a kernel build) escaping
+ * the dirty throttling and livelock other long-run dirtiers.
+ */
+ p = &__get_cpu_var(dirty_throttle_leaks);
+ if (*p > 0 && current->nr_dirtied < ratelimit) {
+ nr_pages_dirtied = min(*p, ratelimit - current->nr_dirtied);
+ *p -= nr_pages_dirtied;
+ current->nr_dirtied += nr_pages_dirtied;
+ }
preempt_enable();
if (unlikely(current->nr_dirtied >= ratelimit))