summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cpu.c14
-rw-r--r--kernel/debug/kdb/kdb_io.c2
-rw-r--r--kernel/events/core.c7
-rw-r--r--kernel/irq/matrix.c4
-rw-r--r--kernel/locking/lockdep.c3
-rw-r--r--kernel/printk/printk.c3
-rw-r--r--kernel/sched/fair.c102
-rw-r--r--kernel/sched/wait.c2
8 files changed, 92 insertions, 45 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 04892a82f6ac..41376c3ac93b 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -780,8 +780,8 @@ static int takedown_cpu(unsigned int cpu)
BUG_ON(cpu_online(cpu));
/*
- * The CPUHP_AP_SCHED_MIGRATE_DYING callback will have removed all
- * runnable tasks from the cpu, there's only the idle task left now
+ * The teardown callback for CPUHP_AP_SCHED_STARTING will have removed
+ * all runnable tasks from the CPU, there's only the idle task left now
* that the migration thread is done doing the stop_machine thing.
*
* Wait for the stop thread to go away.
@@ -1289,11 +1289,6 @@ static struct cpuhp_step cpuhp_bp_states[] = {
.teardown.single = NULL,
.cant_stop = true,
},
- [CPUHP_AP_SMPCFD_DYING] = {
- .name = "smpcfd:dying",
- .startup.single = NULL,
- .teardown.single = smpcfd_dying_cpu,
- },
/*
* Handled on controll processor until the plugged processor manages
* this itself.
@@ -1335,6 +1330,11 @@ static struct cpuhp_step cpuhp_ap_states[] = {
.startup.single = NULL,
.teardown.single = rcutree_dying_cpu,
},
+ [CPUHP_AP_SMPCFD_DYING] = {
+ .name = "smpcfd:dying",
+ .startup.single = NULL,
+ .teardown.single = smpcfd_dying_cpu,
+ },
/* Entry state on starting. Interrupts enabled from here on. Transient
* state for synchronsization */
[CPUHP_AP_ONLINE] = {
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index e74be38245ad..ed5d34925ad0 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -350,7 +350,7 @@ poll_again:
}
kdb_printf("\n");
for (i = 0; i < count; i++) {
- if (kallsyms_symbol_next(p_tmp, i) < 0)
+ if (WARN_ON(!kallsyms_symbol_next(p_tmp, i)))
break;
kdb_printf("%s ", p_tmp);
*(p_tmp + len) = '\0';
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 16beab4767e1..4df5b695bf0d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6639,6 +6639,7 @@ static void perf_event_namespaces_output(struct perf_event *event,
struct perf_namespaces_event *namespaces_event = data;
struct perf_output_handle handle;
struct perf_sample_data sample;
+ u16 header_size = namespaces_event->event_id.header.size;
int ret;
if (!perf_event_namespaces_match(event))
@@ -6649,7 +6650,7 @@ static void perf_event_namespaces_output(struct perf_event *event,
ret = perf_output_begin(&handle, event,
namespaces_event->event_id.header.size);
if (ret)
- return;
+ goto out;
namespaces_event->event_id.pid = perf_event_pid(event,
namespaces_event->task);
@@ -6661,6 +6662,8 @@ static void perf_event_namespaces_output(struct perf_event *event,
perf_event__output_id_sample(event, &handle, &sample);
perf_output_end(&handle);
+out:
+ namespaces_event->event_id.header.size = header_size;
}
static void perf_fill_ns_link_info(struct perf_ns_link_info *ns_link_info,
@@ -7987,11 +7990,11 @@ static void bpf_overflow_handler(struct perf_event *event,
{
struct bpf_perf_event_data_kern ctx = {
.data = data,
- .regs = regs,
.event = event,
};
int ret = 0;
+ ctx.regs = perf_arch_bpf_user_pt_regs(regs);
preempt_disable();
if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1))
goto out;
diff --git a/kernel/irq/matrix.c b/kernel/irq/matrix.c
index 7df2480005f8..0ba0dd8863a7 100644
--- a/kernel/irq/matrix.c
+++ b/kernel/irq/matrix.c
@@ -384,7 +384,9 @@ unsigned int irq_matrix_available(struct irq_matrix *m, bool cpudown)
{
struct cpumap *cm = this_cpu_ptr(m->maps);
- return (m->global_available - cpudown) ? cm->available : 0;
+ if (!cpudown)
+ return m->global_available;
+ return m->global_available - cm->available;
}
/**
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 9776da8db180..670d8d7d8087 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -4790,7 +4790,8 @@ void lockdep_invariant_state(bool force)
* Verify the former, enforce the latter.
*/
WARN_ON_ONCE(!force && current->lockdep_depth);
- invalidate_xhlock(&xhlock(current->xhlock_idx));
+ if (current->xhlocks)
+ invalidate_xhlock(&xhlock(current->xhlock_idx));
}
static int cross_lock(struct lockdep_map *lock)
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 5d81206a572d..b9006617710f 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -3141,9 +3141,6 @@ void dump_stack_print_info(const char *log_lvl)
void show_regs_print_info(const char *log_lvl)
{
dump_stack_print_info(log_lvl);
-
- printk("%stask: %p task.stack: %p\n",
- log_lvl, current, task_stack_page(current));
}
#endif
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 4037e19bbca2..2fe3aa853e4d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3413,9 +3413,9 @@ void set_task_rq_fair(struct sched_entity *se,
* _IFF_ we look at the pure running and runnable sums. Because they
* represent the very same entity, just at different points in the hierarchy.
*
- *
- * Per the above update_tg_cfs_util() is trivial (and still 'wrong') and
- * simply copies the running sum over.
+ * Per the above update_tg_cfs_util() is trivial and simply copies the running
+ * sum over (but still wrong, because the group entity and group rq do not have
+ * their PELT windows aligned).
*
* However, update_tg_cfs_runnable() is more complex. So we have:
*
@@ -3424,11 +3424,11 @@ void set_task_rq_fair(struct sched_entity *se,
* And since, like util, the runnable part should be directly transferable,
* the following would _appear_ to be the straight forward approach:
*
- * grq->avg.load_avg = grq->load.weight * grq->avg.running_avg (3)
+ * grq->avg.load_avg = grq->load.weight * grq->avg.runnable_avg (3)
*
* And per (1) we have:
*
- * ge->avg.running_avg == grq->avg.running_avg
+ * ge->avg.runnable_avg == grq->avg.runnable_avg
*
* Which gives:
*
@@ -3447,27 +3447,28 @@ void set_task_rq_fair(struct sched_entity *se,
* to (shortly) return to us. This only works by keeping the weights as
* integral part of the sum. We therefore cannot decompose as per (3).
*
- * OK, so what then?
+ * Another reason this doesn't work is that runnable isn't a 0-sum entity.
+ * Imagine a rq with 2 tasks that each are runnable 2/3 of the time. Then the
+ * rq itself is runnable anywhere between 2/3 and 1 depending on how the
+ * runnable section of these tasks overlap (or not). If they were to perfectly
+ * align the rq as a whole would be runnable 2/3 of the time. If however we
+ * always have at least 1 runnable task, the rq as a whole is always runnable.
*
+ * So we'll have to approximate.. :/
*
- * Another way to look at things is:
+ * Given the constraint:
*
- * grq->avg.load_avg = \Sum se->avg.load_avg
+ * ge->avg.running_sum <= ge->avg.runnable_sum <= LOAD_AVG_MAX
*
- * Therefore, per (2):
+ * We can construct a rule that adds runnable to a rq by assuming minimal
+ * overlap.
*
- * grq->avg.load_avg = \Sum se->load.weight * se->avg.runnable_avg
+ * On removal, we'll assume each task is equally runnable; which yields:
*
- * And the very thing we're propagating is a change in that sum (someone
- * joined/left). So we can easily know the runnable change, which would be, per
- * (2) the already tracked se->load_avg divided by the corresponding
- * se->weight.
+ * grq->avg.runnable_sum = grq->avg.load_sum / grq->load.weight
*
- * Basically (4) but in differential form:
+ * XXX: only do this for the part of runnable > running ?
*
- * d(runnable_avg) += se->avg.load_avg / se->load.weight
- * (5)
- * ge->avg.load_avg += ge->load.weight * d(runnable_avg)
*/
static inline void
@@ -3479,6 +3480,14 @@ update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq
if (!delta)
return;
+ /*
+ * The relation between sum and avg is:
+ *
+ * LOAD_AVG_MAX - 1024 + sa->period_contrib
+ *
+ * however, the PELT windows are not aligned between grq and gse.
+ */
+
/* Set new sched_entity's utilization */
se->avg.util_avg = gcfs_rq->avg.util_avg;
se->avg.util_sum = se->avg.util_avg * LOAD_AVG_MAX;
@@ -3491,33 +3500,68 @@ update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq
static inline void
update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
{
- long runnable_sum = gcfs_rq->prop_runnable_sum;
- long runnable_load_avg, load_avg;
- s64 runnable_load_sum, load_sum;
+ long delta_avg, running_sum, runnable_sum = gcfs_rq->prop_runnable_sum;
+ unsigned long runnable_load_avg, load_avg;
+ u64 runnable_load_sum, load_sum = 0;
+ s64 delta_sum;
if (!runnable_sum)
return;
gcfs_rq->prop_runnable_sum = 0;
+ if (runnable_sum >= 0) {
+ /*
+ * Add runnable; clip at LOAD_AVG_MAX. Reflects that until
+ * the CPU is saturated running == runnable.
+ */
+ runnable_sum += se->avg.load_sum;
+ runnable_sum = min(runnable_sum, (long)LOAD_AVG_MAX);
+ } else {
+ /*
+ * Estimate the new unweighted runnable_sum of the gcfs_rq by
+ * assuming all tasks are equally runnable.
+ */
+ if (scale_load_down(gcfs_rq->load.weight)) {
+ load_sum = div_s64(gcfs_rq->avg.load_sum,
+ scale_load_down(gcfs_rq->load.weight));
+ }
+
+ /* But make sure to not inflate se's runnable */
+ runnable_sum = min(se->avg.load_sum, load_sum);
+ }
+
+ /*
+ * runnable_sum can't be lower than running_sum
+ * As running sum is scale with cpu capacity wehreas the runnable sum
+ * is not we rescale running_sum 1st
+ */
+ running_sum = se->avg.util_sum /
+ arch_scale_cpu_capacity(NULL, cpu_of(rq_of(cfs_rq)));
+ runnable_sum = max(runnable_sum, running_sum);
+
load_sum = (s64)se_weight(se) * runnable_sum;
load_avg = div_s64(load_sum, LOAD_AVG_MAX);
- add_positive(&se->avg.load_sum, runnable_sum);
- add_positive(&se->avg.load_avg, load_avg);
+ delta_sum = load_sum - (s64)se_weight(se) * se->avg.load_sum;
+ delta_avg = load_avg - se->avg.load_avg;
- add_positive(&cfs_rq->avg.load_avg, load_avg);
- add_positive(&cfs_rq->avg.load_sum, load_sum);
+ se->avg.load_sum = runnable_sum;
+ se->avg.load_avg = load_avg;
+ add_positive(&cfs_rq->avg.load_avg, delta_avg);
+ add_positive(&cfs_rq->avg.load_sum, delta_sum);
runnable_load_sum = (s64)se_runnable(se) * runnable_sum;
runnable_load_avg = div_s64(runnable_load_sum, LOAD_AVG_MAX);
+ delta_sum = runnable_load_sum - se_weight(se) * se->avg.runnable_load_sum;
+ delta_avg = runnable_load_avg - se->avg.runnable_load_avg;
- add_positive(&se->avg.runnable_load_sum, runnable_sum);
- add_positive(&se->avg.runnable_load_avg, runnable_load_avg);
+ se->avg.runnable_load_sum = runnable_sum;
+ se->avg.runnable_load_avg = runnable_load_avg;
if (se->on_rq) {
- add_positive(&cfs_rq->avg.runnable_load_avg, runnable_load_avg);
- add_positive(&cfs_rq->avg.runnable_load_sum, runnable_load_sum);
+ add_positive(&cfs_rq->avg.runnable_load_avg, delta_avg);
+ add_positive(&cfs_rq->avg.runnable_load_sum, delta_sum);
}
}
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 98feab7933c7..929ecb7d6b78 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -27,7 +27,7 @@ void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq
wq_entry->flags &= ~WQ_FLAG_EXCLUSIVE;
spin_lock_irqsave(&wq_head->lock, flags);
- __add_wait_queue_entry_tail(wq_head, wq_entry);
+ __add_wait_queue(wq_head, wq_entry);
spin_unlock_irqrestore(&wq_head->lock, flags);
}
EXPORT_SYMBOL(add_wait_queue);