sched/balancing: Periodically decay max cost of idle balance

This patch builds on patch 2 and periodically decays that max value to do idle balancing per sched domain by approximately 1% per second. Also decay the rq's max_idle_balance_cost value. Signed-off-by: Jason Low <jason.low2@hp.com> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/1379096813-3032-4-git-send-email-jason.low2@hp.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
author: Jason Low <jason.low2@hp.com> 2013-09-13 20:26:53 +0200
committer: Ingo Molnar <mingo@kernel.org> 2013-09-20 12:03:46 +0200
commit: f48627e686a69f5215cb0761e731edb3d9859dd9 (patch)
tree: 510f55a0971da16bdb2922da2c0ebad47bdc5e3d /kernel
parent: sched/balancing: Consider max cost of idle balance per sched domain (diff)
download: linux-f48627e686a69f5215cb0761e731edb3d9859dd9.tar.xz
linux-f48627e686a69f5215cb0761e731edb3d9859dd9.zip
1 files changed, 31 insertions, 7 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index ffc99d8f0a95..2b89cd244b0d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5681,15 +5681,39 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
 	/* Earliest time when we have to do rebalance again */
 	unsigned long next_balance = jiffies + 60*HZ;
 	int update_next_balance = 0;
-	int need_serialize;
+	int need_serialize, need_decay = 0;
+	u64 max_cost = 0;
 
 	update_blocked_averages(cpu);
 
 	rcu_read_lock();
 	for_each_domain(cpu, sd) {
+		/*
+		 * Decay the newidle max times here because this is a regular
+		 * visit to all the domains. Decay ~1% per second.
+		 */
+		if (time_after(jiffies, sd->next_decay_max_lb_cost)) {
+			sd->max_newidle_lb_cost =
+				(sd->max_newidle_lb_cost * 253) / 256;
+			sd->next_decay_max_lb_cost = jiffies + HZ;
+			need_decay = 1;
+		}
+		max_cost += sd->max_newidle_lb_cost;
+
 		if (!(sd->flags & SD_LOAD_BALANCE))
 			continue;
 
+		/*
+		 * Stop the load balance at this level. There is another
+		 * CPU in our sched group which is doing load balancing more
+		 * actively.
+		 */
+		if (!continue_balancing) {
+			if (need_decay)
+				continue;
+			break;
+		}
+
 		interval = sd->balance_interval;
 		if (idle != CPU_IDLE)
 			interval *= sd->busy_factor;
@@ -5723,14 +5747,14 @@ out:
 			next_balance = sd->last_balance + interval;
 			update_next_balance = 1;
 		}
-
+	}
+	if (need_decay) {
 		/*
-		 * Stop the load balance at this level. There is another
-		 * CPU in our sched group which is doing load balancing more
-		 * actively.
+		 * Ensure the rq-wide value also decays but keep it at a
+		 * reasonable floor to avoid funnies with rq->avg_idle.
 		 */
-		if (!continue_balancing)
-			break;
+		rq->max_idle_balance_cost =
+			max((u64)sysctl_sched_migration_cost, max_cost);
 	}
 	rcu_read_unlock();
author	Jason Low <jason.low2@hp.com>	2013-09-13 20:26:53 +0200
committer	Ingo Molnar <mingo@kernel.org>	2013-09-20 12:03:46 +0200
commit	f48627e686a69f5215cb0761e731edb3d9859dd9 (patch)
tree	510f55a0971da16bdb2922da2c0ebad47bdc5e3d /kernel
parent	sched/balancing: Consider max cost of idle balance per sched domain (diff)
download	linux-f48627e686a69f5215cb0761e731edb3d9859dd9.tar.xz linux-f48627e686a69f5215cb0761e731edb3d9859dd9.zip