summaryrefslogtreecommitdiffstats
path: root/kernel/sched/fair.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched/fair.c')
-rw-r--r--kernel/sched/fair.c81
1 files changed, 50 insertions, 31 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index bc3d6518a06c..7a3c66f1762f 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1520,6 +1520,7 @@ struct task_numa_env {
static unsigned long cpu_load(struct rq *rq);
static unsigned long cpu_util(int cpu);
+static inline long adjust_numa_imbalance(int imbalance, int src_nr_running);
static inline enum
numa_type numa_classify(unsigned int imbalance_pct,
@@ -1594,11 +1595,6 @@ static bool load_too_imbalanced(long src_load, long dst_load,
long orig_src_load, orig_dst_load;
long src_capacity, dst_capacity;
-
- /* If dst node has spare capacity, there is no real load imbalance */
- if (env->dst_stats.node_type == node_has_spare)
- return false;
-
/*
* The load is corrected for the CPU capacity available on each node.
*
@@ -1757,19 +1753,42 @@ unlock:
static void task_numa_find_cpu(struct task_numa_env *env,
long taskimp, long groupimp)
{
- long src_load, dst_load, load;
bool maymove = false;
int cpu;
- load = task_h_load(env->p);
- dst_load = env->dst_stats.load + load;
- src_load = env->src_stats.load - load;
-
/*
- * If the improvement from just moving env->p direction is better
- * than swapping tasks around, check if a move is possible.
+ * If dst node has spare capacity, then check if there is an
+ * imbalance that would be overruled by the load balancer.
*/
- maymove = !load_too_imbalanced(src_load, dst_load, env);
+ if (env->dst_stats.node_type == node_has_spare) {
+ unsigned int imbalance;
+ int src_running, dst_running;
+
+ /*
+ * Would movement cause an imbalance? Note that if src has
+ * more running tasks that the imbalance is ignored as the
+ * move improves the imbalance from the perspective of the
+ * CPU load balancer.
+ * */
+ src_running = env->src_stats.nr_running - 1;
+ dst_running = env->dst_stats.nr_running + 1;
+ imbalance = max(0, dst_running - src_running);
+ imbalance = adjust_numa_imbalance(imbalance, src_running);
+
+ /* Use idle CPU if there is no imbalance */
+ if (!imbalance)
+ maymove = true;
+ } else {
+ long src_load, dst_load, load;
+ /*
+ * If the improvement from just moving env->p direction is better
+ * than swapping tasks around, check if a move is possible.
+ */
+ load = task_h_load(env->p);
+ dst_load = env->dst_stats.load + load;
+ src_load = env->src_stats.load - load;
+ maymove = !load_too_imbalanced(src_load, dst_load, env);
+ }
for_each_cpu(cpu, cpumask_of_node(env->dst_nid)) {
/* Skip this CPU if the source task cannot migrate */
@@ -8694,6 +8713,21 @@ next_group:
}
}
+static inline long adjust_numa_imbalance(int imbalance, int src_nr_running)
+{
+ unsigned int imbalance_min;
+
+ /*
+ * Allow a small imbalance based on a simple pair of communicating
+ * tasks that remain local when the source domain is almost idle.
+ */
+ imbalance_min = 2;
+ if (src_nr_running <= imbalance_min)
+ return 0;
+
+ return imbalance;
+}
+
/**
* calculate_imbalance - Calculate the amount of imbalance present within the
* groups of a given sched_domain during load balance.
@@ -8790,24 +8824,9 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
}
/* Consider allowing a small imbalance between NUMA groups */
- if (env->sd->flags & SD_NUMA) {
- unsigned int imbalance_min;
-
- /*
- * Compute an allowed imbalance based on a simple
- * pair of communicating tasks that should remain
- * local and ignore them.
- *
- * NOTE: Generally this would have been based on
- * the domain size and this was evaluated. However,
- * the benefit is similar across a range of workloads
- * and machines but scaling by the domain size adds
- * the risk that lower domains have to be rebalanced.
- */
- imbalance_min = 2;
- if (busiest->sum_nr_running <= imbalance_min)
- env->imbalance = 0;
- }
+ if (env->sd->flags & SD_NUMA)
+ env->imbalance = adjust_numa_imbalance(env->imbalance,
+ busiest->sum_nr_running);
return;
}