diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/bpf/core.c | 3 | ||||
-rw-r--r-- | kernel/bpf/offload.c | 15 | ||||
-rw-r--r-- | kernel/cpu.c | 14 | ||||
-rw-r--r-- | kernel/debug/kdb/kdb_io.c | 2 | ||||
-rw-r--r-- | kernel/events/core.c | 7 | ||||
-rw-r--r-- | kernel/futex.c | 4 | ||||
-rw-r--r-- | kernel/irq/matrix.c | 4 | ||||
-rw-r--r-- | kernel/kallsyms.c | 8 | ||||
-rw-r--r-- | kernel/locking/lockdep.c | 3 | ||||
-rw-r--r-- | kernel/module.c | 6 | ||||
-rw-r--r-- | kernel/printk/printk.c | 3 | ||||
-rw-r--r-- | kernel/sched/fair.c | 102 | ||||
-rw-r--r-- | kernel/sched/wait.c | 2 | ||||
-rw-r--r-- | kernel/trace/blktrace.c | 30 | ||||
-rw-r--r-- | kernel/trace/bpf_trace.c | 8 | ||||
-rw-r--r-- | kernel/workqueue.c | 33 |
16 files changed, 150 insertions, 94 deletions
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index b9f8686a84cf..86b50aa26ee8 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1447,7 +1447,8 @@ int bpf_prog_array_length(struct bpf_prog_array __rcu *progs) rcu_read_lock(); prog = rcu_dereference(progs)->progs; for (; *prog; prog++) - cnt++; + if (*prog != &dummy_bpf_prog.prog) + cnt++; rcu_read_unlock(); return cnt; } diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 68ec884440b7..8455b89d1bbf 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -1,3 +1,18 @@ +/* + * Copyright (C) 2017 Netronome Systems, Inc. + * + * This software is licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree. + * + * THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE + * OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME + * THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + */ + #include <linux/bpf.h> #include <linux/bpf_verifier.h> #include <linux/bug.h> diff --git a/kernel/cpu.c b/kernel/cpu.c index 04892a82f6ac..41376c3ac93b 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -780,8 +780,8 @@ static int takedown_cpu(unsigned int cpu) BUG_ON(cpu_online(cpu)); /* - * The CPUHP_AP_SCHED_MIGRATE_DYING callback will have removed all - * runnable tasks from the cpu, there's only the idle task left now + * The teardown callback for CPUHP_AP_SCHED_STARTING will have removed + * all runnable tasks from the CPU, there's only the idle task left now * that the migration thread is done doing the stop_machine thing. * * Wait for the stop thread to go away. @@ -1289,11 +1289,6 @@ static struct cpuhp_step cpuhp_bp_states[] = { .teardown.single = NULL, .cant_stop = true, }, - [CPUHP_AP_SMPCFD_DYING] = { - .name = "smpcfd:dying", - .startup.single = NULL, - .teardown.single = smpcfd_dying_cpu, - }, /* * Handled on controll processor until the plugged processor manages * this itself. @@ -1335,6 +1330,11 @@ static struct cpuhp_step cpuhp_ap_states[] = { .startup.single = NULL, .teardown.single = rcutree_dying_cpu, }, + [CPUHP_AP_SMPCFD_DYING] = { + .name = "smpcfd:dying", + .startup.single = NULL, + .teardown.single = smpcfd_dying_cpu, + }, /* Entry state on starting. Interrupts enabled from here on. Transient * state for synchronsization */ [CPUHP_AP_ONLINE] = { diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c index e74be38245ad..ed5d34925ad0 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c @@ -350,7 +350,7 @@ poll_again: } kdb_printf("\n"); for (i = 0; i < count; i++) { - if (kallsyms_symbol_next(p_tmp, i) < 0) + if (WARN_ON(!kallsyms_symbol_next(p_tmp, i))) break; kdb_printf("%s ", p_tmp); *(p_tmp + len) = '\0'; diff --git a/kernel/events/core.c b/kernel/events/core.c index 16beab4767e1..4df5b695bf0d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6639,6 +6639,7 @@ static void perf_event_namespaces_output(struct perf_event *event, struct perf_namespaces_event *namespaces_event = data; struct perf_output_handle handle; struct perf_sample_data sample; + u16 header_size = namespaces_event->event_id.header.size; int ret; if (!perf_event_namespaces_match(event)) @@ -6649,7 +6650,7 @@ static void perf_event_namespaces_output(struct perf_event *event, ret = perf_output_begin(&handle, event, namespaces_event->event_id.header.size); if (ret) - return; + goto out; namespaces_event->event_id.pid = perf_event_pid(event, namespaces_event->task); @@ -6661,6 +6662,8 @@ static void perf_event_namespaces_output(struct perf_event *event, perf_event__output_id_sample(event, &handle, &sample); perf_output_end(&handle); +out: + namespaces_event->event_id.header.size = header_size; } static void perf_fill_ns_link_info(struct perf_ns_link_info *ns_link_info, @@ -7987,11 +7990,11 @@ static void bpf_overflow_handler(struct perf_event *event, { struct bpf_perf_event_data_kern ctx = { .data = data, - .regs = regs, .event = event, }; int ret = 0; + ctx.regs = perf_arch_bpf_user_pt_regs(regs); preempt_disable(); if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) goto out; diff --git a/kernel/futex.c b/kernel/futex.c index 76ed5921117a..57d0b3657e16 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1582,8 +1582,8 @@ static int futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr) { unsigned int op = (encoded_op & 0x70000000) >> 28; unsigned int cmp = (encoded_op & 0x0f000000) >> 24; - int oparg = sign_extend32((encoded_op & 0x00fff000) >> 12, 12); - int cmparg = sign_extend32(encoded_op & 0x00000fff, 12); + int oparg = sign_extend32((encoded_op & 0x00fff000) >> 12, 11); + int cmparg = sign_extend32(encoded_op & 0x00000fff, 11); int oldval, ret; if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) { diff --git a/kernel/irq/matrix.c b/kernel/irq/matrix.c index 7df2480005f8..0ba0dd8863a7 100644 --- a/kernel/irq/matrix.c +++ b/kernel/irq/matrix.c @@ -384,7 +384,9 @@ unsigned int irq_matrix_available(struct irq_matrix *m, bool cpudown) { struct cpumap *cm = this_cpu_ptr(m->maps); - return (m->global_available - cpudown) ? cm->available : 0; + if (!cpudown) + return m->global_available; + return m->global_available - cm->available; } /** diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 531ffa984bc2..d5fa4116688a 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -614,14 +614,14 @@ static void s_stop(struct seq_file *m, void *p) static int s_show(struct seq_file *m, void *p) { - unsigned long value; + void *value; struct kallsym_iter *iter = m->private; /* Some debugging symbols have no name. Ignore them. */ if (!iter->name[0]) return 0; - value = iter->show_value ? iter->value : 0; + value = iter->show_value ? (void *)iter->value : NULL; if (iter->module_name[0]) { char type; @@ -632,10 +632,10 @@ static int s_show(struct seq_file *m, void *p) */ type = iter->exported ? toupper(iter->type) : tolower(iter->type); - seq_printf(m, KALLSYM_FMT " %c %s\t[%s]\n", value, + seq_printf(m, "%px %c %s\t[%s]\n", value, type, iter->name, iter->module_name); } else - seq_printf(m, KALLSYM_FMT " %c %s\n", value, + seq_printf(m, "%px %c %s\n", value, iter->type, iter->name); return 0; } diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 9776da8db180..670d8d7d8087 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -4790,7 +4790,8 @@ void lockdep_invariant_state(bool force) * Verify the former, enforce the latter. */ WARN_ON_ONCE(!force && current->lockdep_depth); - invalidate_xhlock(&xhlock(current->xhlock_idx)); + if (current->xhlocks) + invalidate_xhlock(&xhlock(current->xhlock_idx)); } static int cross_lock(struct lockdep_map *lock) diff --git a/kernel/module.c b/kernel/module.c index f0411a271765..dea01ac9cb74 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -4157,7 +4157,7 @@ static int m_show(struct seq_file *m, void *p) { struct module *mod = list_entry(p, struct module, list); char buf[MODULE_FLAGS_BUF_SIZE]; - unsigned long value; + void *value; /* We always ignore unformed modules. */ if (mod->state == MODULE_STATE_UNFORMED) @@ -4173,8 +4173,8 @@ static int m_show(struct seq_file *m, void *p) mod->state == MODULE_STATE_COMING ? "Loading" : "Live"); /* Used by oprofile and other similar tools. */ - value = m->private ? 0 : (unsigned long)mod->core_layout.base; - seq_printf(m, " 0x" KALLSYM_FMT, value); + value = m->private ? NULL : mod->core_layout.base; + seq_printf(m, " 0x%px", value); /* Taints info */ if (mod->taints) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 5d81206a572d..b9006617710f 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -3141,9 +3141,6 @@ void dump_stack_print_info(const char *log_lvl) void show_regs_print_info(const char *log_lvl) { dump_stack_print_info(log_lvl); - - printk("%stask: %p task.stack: %p\n", - log_lvl, current, task_stack_page(current)); } #endif diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 4037e19bbca2..2fe3aa853e4d 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3413,9 +3413,9 @@ void set_task_rq_fair(struct sched_entity *se, * _IFF_ we look at the pure running and runnable sums. Because they * represent the very same entity, just at different points in the hierarchy. * - * - * Per the above update_tg_cfs_util() is trivial (and still 'wrong') and - * simply copies the running sum over. + * Per the above update_tg_cfs_util() is trivial and simply copies the running + * sum over (but still wrong, because the group entity and group rq do not have + * their PELT windows aligned). * * However, update_tg_cfs_runnable() is more complex. So we have: * @@ -3424,11 +3424,11 @@ void set_task_rq_fair(struct sched_entity *se, * And since, like util, the runnable part should be directly transferable, * the following would _appear_ to be the straight forward approach: * - * grq->avg.load_avg = grq->load.weight * grq->avg.running_avg (3) + * grq->avg.load_avg = grq->load.weight * grq->avg.runnable_avg (3) * * And per (1) we have: * - * ge->avg.running_avg == grq->avg.running_avg + * ge->avg.runnable_avg == grq->avg.runnable_avg * * Which gives: * @@ -3447,27 +3447,28 @@ void set_task_rq_fair(struct sched_entity *se, * to (shortly) return to us. This only works by keeping the weights as * integral part of the sum. We therefore cannot decompose as per (3). * - * OK, so what then? + * Another reason this doesn't work is that runnable isn't a 0-sum entity. + * Imagine a rq with 2 tasks that each are runnable 2/3 of the time. Then the + * rq itself is runnable anywhere between 2/3 and 1 depending on how the + * runnable section of these tasks overlap (or not). If they were to perfectly + * align the rq as a whole would be runnable 2/3 of the time. If however we + * always have at least 1 runnable task, the rq as a whole is always runnable. * + * So we'll have to approximate.. :/ * - * Another way to look at things is: + * Given the constraint: * - * grq->avg.load_avg = \Sum se->avg.load_avg + * ge->avg.running_sum <= ge->avg.runnable_sum <= LOAD_AVG_MAX * - * Therefore, per (2): + * We can construct a rule that adds runnable to a rq by assuming minimal + * overlap. * - * grq->avg.load_avg = \Sum se->load.weight * se->avg.runnable_avg + * On removal, we'll assume each task is equally runnable; which yields: * - * And the very thing we're propagating is a change in that sum (someone - * joined/left). So we can easily know the runnable change, which would be, per - * (2) the already tracked se->load_avg divided by the corresponding - * se->weight. + * grq->avg.runnable_sum = grq->avg.load_sum / grq->load.weight * - * Basically (4) but in differential form: + * XXX: only do this for the part of runnable > running ? * - * d(runnable_avg) += se->avg.load_avg / se->load.weight - * (5) - * ge->avg.load_avg += ge->load.weight * d(runnable_avg) */ static inline void @@ -3479,6 +3480,14 @@ update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq if (!delta) return; + /* + * The relation between sum and avg is: + * + * LOAD_AVG_MAX - 1024 + sa->period_contrib + * + * however, the PELT windows are not aligned between grq and gse. + */ + /* Set new sched_entity's utilization */ se->avg.util_avg = gcfs_rq->avg.util_avg; se->avg.util_sum = se->avg.util_avg * LOAD_AVG_MAX; @@ -3491,33 +3500,68 @@ update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq static inline void update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq) { - long runnable_sum = gcfs_rq->prop_runnable_sum; - long runnable_load_avg, load_avg; - s64 runnable_load_sum, load_sum; + long delta_avg, running_sum, runnable_sum = gcfs_rq->prop_runnable_sum; + unsigned long runnable_load_avg, load_avg; + u64 runnable_load_sum, load_sum = 0; + s64 delta_sum; if (!runnable_sum) return; gcfs_rq->prop_runnable_sum = 0; + if (runnable_sum >= 0) { + /* + * Add runnable; clip at LOAD_AVG_MAX. Reflects that until + * the CPU is saturated running == runnable. + */ + runnable_sum += se->avg.load_sum; + runnable_sum = min(runnable_sum, (long)LOAD_AVG_MAX); + } else { + /* + * Estimate the new unweighted runnable_sum of the gcfs_rq by + * assuming all tasks are equally runnable. + */ + if (scale_load_down(gcfs_rq->load.weight)) { + load_sum = div_s64(gcfs_rq->avg.load_sum, + scale_load_down(gcfs_rq->load.weight)); + } + + /* But make sure to not inflate se's runnable */ + runnable_sum = min(se->avg.load_sum, load_sum); + } + + /* + * runnable_sum can't be lower than running_sum + * As running sum is scale with cpu capacity wehreas the runnable sum + * is not we rescale running_sum 1st + */ + running_sum = se->avg.util_sum / + arch_scale_cpu_capacity(NULL, cpu_of(rq_of(cfs_rq))); + runnable_sum = max(runnable_sum, running_sum); + load_sum = (s64)se_weight(se) * runnable_sum; load_avg = div_s64(load_sum, LOAD_AVG_MAX); - add_positive(&se->avg.load_sum, runnable_sum); - add_positive(&se->avg.load_avg, load_avg); + delta_sum = load_sum - (s64)se_weight(se) * se->avg.load_sum; + delta_avg = load_avg - se->avg.load_avg; - add_positive(&cfs_rq->avg.load_avg, load_avg); - add_positive(&cfs_rq->avg.load_sum, load_sum); + se->avg.load_sum = runnable_sum; + se->avg.load_avg = load_avg; + add_positive(&cfs_rq->avg.load_avg, delta_avg); + add_positive(&cfs_rq->avg.load_sum, delta_sum); runnable_load_sum = (s64)se_runnable(se) * runnable_sum; runnable_load_avg = div_s64(runnable_load_sum, LOAD_AVG_MAX); + delta_sum = runnable_load_sum - se_weight(se) * se->avg.runnable_load_sum; + delta_avg = runnable_load_avg - se->avg.runnable_load_avg; - add_positive(&se->avg.runnable_load_sum, runnable_sum); - add_positive(&se->avg.runnable_load_avg, runnable_load_avg); + se->avg.runnable_load_sum = runnable_sum; + se->avg.runnable_load_avg = runnable_load_avg; if (se->on_rq) { - add_positive(&cfs_rq->avg.runnable_load_avg, runnable_load_avg); - add_positive(&cfs_rq->avg.runnable_load_sum, runnable_load_sum); + add_positive(&cfs_rq->avg.runnable_load_avg, delta_avg); + add_positive(&cfs_rq->avg.runnable_load_sum, delta_sum); } } diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 98feab7933c7..929ecb7d6b78 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c @@ -27,7 +27,7 @@ void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq wq_entry->flags &= ~WQ_FLAG_EXCLUSIVE; spin_lock_irqsave(&wq_head->lock, flags); - __add_wait_queue_entry_tail(wq_head, wq_entry); + __add_wait_queue(wq_head, wq_entry); spin_unlock_irqrestore(&wq_head->lock, flags); } EXPORT_SYMBOL(add_wait_queue); diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 206e0e2ace53..987d9a9ae283 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -591,7 +591,7 @@ static int __blk_trace_setup(struct request_queue *q, char *name, dev_t dev, return ret; if (copy_to_user(arg, &buts, sizeof(buts))) { - blk_trace_remove(q); + __blk_trace_remove(q); return -EFAULT; } return 0; @@ -637,7 +637,7 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name, return ret; if (copy_to_user(arg, &buts.name, ARRAY_SIZE(buts.name))) { - blk_trace_remove(q); + __blk_trace_remove(q); return -EFAULT; } @@ -872,7 +872,7 @@ static void blk_add_trace_rq_complete(void *ignore, struct request *rq, * **/ static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, - u32 what, int error, union kernfs_node_id *cgid) + u32 what, int error) { struct blk_trace *bt = q->blk_trace; @@ -880,22 +880,21 @@ static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, return; __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, - bio_op(bio), bio->bi_opf, what, error, 0, NULL, cgid); + bio_op(bio), bio->bi_opf, what, error, 0, NULL, + blk_trace_bio_get_cgid(q, bio)); } static void blk_add_trace_bio_bounce(void *ignore, struct request_queue *q, struct bio *bio) { - blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0, - blk_trace_bio_get_cgid(q, bio)); + blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0); } static void blk_add_trace_bio_complete(void *ignore, struct request_queue *q, struct bio *bio, int error) { - blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error, - blk_trace_bio_get_cgid(q, bio)); + blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error); } static void blk_add_trace_bio_backmerge(void *ignore, @@ -903,8 +902,7 @@ static void blk_add_trace_bio_backmerge(void *ignore, struct request *rq, struct bio *bio) { - blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE, 0, - blk_trace_bio_get_cgid(q, bio)); + blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE, 0); } static void blk_add_trace_bio_frontmerge(void *ignore, @@ -912,15 +910,13 @@ static void blk_add_trace_bio_frontmerge(void *ignore, struct request *rq, struct bio *bio) { - blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE, 0, - blk_trace_bio_get_cgid(q, bio)); + blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE, 0); } static void blk_add_trace_bio_queue(void *ignore, struct request_queue *q, struct bio *bio) { - blk_add_trace_bio(q, bio, BLK_TA_QUEUE, 0, - blk_trace_bio_get_cgid(q, bio)); + blk_add_trace_bio(q, bio, BLK_TA_QUEUE, 0); } static void blk_add_trace_getrq(void *ignore, @@ -928,8 +924,7 @@ static void blk_add_trace_getrq(void *ignore, struct bio *bio, int rw) { if (bio) - blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0, - blk_trace_bio_get_cgid(q, bio)); + blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0); else { struct blk_trace *bt = q->blk_trace; @@ -945,8 +940,7 @@ static void blk_add_trace_sleeprq(void *ignore, struct bio *bio, int rw) { if (bio) - blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0, - blk_trace_bio_get_cgid(q, bio)); + blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0); else { struct blk_trace *bt = q->blk_trace; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 27d1f4ffa3de..0ce99c379c30 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -759,6 +759,8 @@ const struct bpf_prog_ops perf_event_prog_ops = { static DEFINE_MUTEX(bpf_event_mutex); +#define BPF_TRACE_MAX_PROGS 64 + int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog) { @@ -772,6 +774,12 @@ int perf_event_attach_bpf_prog(struct perf_event *event, goto unlock; old_array = event->tp_event->prog_array; + if (old_array && + bpf_prog_array_length(old_array) >= BPF_TRACE_MAX_PROGS) { + ret = -E2BIG; + goto unlock; + } + ret = bpf_prog_array_copy(old_array, NULL, prog, &new_array); if (ret < 0) goto unlock; diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 8fdb710bfdd7..43d18cb46308 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -38,7 +38,6 @@ #include <linux/hardirq.h> #include <linux/mempolicy.h> #include <linux/freezer.h> -#include <linux/kallsyms.h> #include <linux/debug_locks.h> #include <linux/lockdep.h> #include <linux/idr.h> @@ -48,6 +47,7 @@ #include <linux/nodemask.h> #include <linux/moduleparam.h> #include <linux/uaccess.h> +#include <linux/sched/isolation.h> #include "workqueue_internal.h" @@ -1634,7 +1634,7 @@ static void worker_enter_idle(struct worker *worker) mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT); /* - * Sanity check nr_running. Because wq_unbind_fn() releases + * Sanity check nr_running. Because unbind_workers() releases * pool->lock between setting %WORKER_UNBOUND and zapping * nr_running, the warning may trigger spuriously. Check iff * unbind is not in progress. @@ -4510,9 +4510,8 @@ void show_workqueue_state(void) * cpu comes back online. */ -static void wq_unbind_fn(struct work_struct *work) +static void unbind_workers(int cpu) { - int cpu = smp_processor_id(); struct worker_pool *pool; struct worker *worker; @@ -4589,16 +4588,6 @@ static void rebind_workers(struct worker_pool *pool) spin_lock_irq(&pool->lock); - /* - * XXX: CPU hotplug notifiers are weird and can call DOWN_FAILED - * w/o preceding DOWN_PREPARE. Work around it. CPU hotplug is - * being reworked and this can go away in time. - */ - if (!(pool->flags & POOL_DISASSOCIATED)) { - spin_unlock_irq(&pool->lock); - return; - } - pool->flags &= ~POOL_DISASSOCIATED; for_each_pool_worker(worker, pool) { @@ -4709,12 +4698,13 @@ int workqueue_online_cpu(unsigned int cpu) int workqueue_offline_cpu(unsigned int cpu) { - struct work_struct unbind_work; struct workqueue_struct *wq; /* unbinding per-cpu workers should happen on the local CPU */ - INIT_WORK_ONSTACK(&unbind_work, wq_unbind_fn); - queue_work_on(cpu, system_highpri_wq, &unbind_work); + if (WARN_ON(cpu != smp_processor_id())) + return -1; + + unbind_workers(cpu); /* update NUMA affinity of unbound workqueues */ mutex_lock(&wq_pool_mutex); @@ -4722,9 +4712,6 @@ int workqueue_offline_cpu(unsigned int cpu) wq_update_unbound_numa(wq, cpu, false); mutex_unlock(&wq_pool_mutex); - /* wait for per-cpu unbinding to finish */ - flush_work(&unbind_work); - destroy_work_on_stack(&unbind_work); return 0; } @@ -4957,6 +4944,10 @@ int workqueue_set_unbound_cpumask(cpumask_var_t cpumask) if (!zalloc_cpumask_var(&saved_cpumask, GFP_KERNEL)) return -ENOMEM; + /* + * Not excluding isolated cpus on purpose. + * If the user wishes to include them, we allow that. + */ cpumask_and(cpumask, cpumask, cpu_possible_mask); if (!cpumask_empty(cpumask)) { apply_wqattrs_lock(); @@ -5555,7 +5546,7 @@ int __init workqueue_init_early(void) WARN_ON(__alignof__(struct pool_workqueue) < __alignof__(long long)); BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL)); - cpumask_copy(wq_unbound_cpumask, cpu_possible_mask); + cpumask_copy(wq_unbound_cpumask, housekeeping_cpumask(HK_FLAG_DOMAIN)); pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC); |