diff options
Diffstat (limited to 'kernel/cgroup')
-rw-r--r-- | kernel/cgroup/cgroup-v1.c | 8 | ||||
-rw-r--r-- | kernel/cgroup/cgroup.c | 27 | ||||
-rw-r--r-- | kernel/cgroup/cpuset.c | 221 | ||||
-rw-r--r-- | kernel/cgroup/rstat.c | 19 |
4 files changed, 183 insertions, 92 deletions
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index de2c432dee20..35b920328344 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -50,6 +50,8 @@ bool cgroup1_ssid_disabled(int ssid) * cgroup_attach_task_all - attach task 'tsk' to all cgroups of task 'from' * @from: attach to all cgroups of a given task * @tsk: the task to be attached + * + * Return: %0 on success or a negative errno code on failure */ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk) { @@ -80,7 +82,7 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk) EXPORT_SYMBOL_GPL(cgroup_attach_task_all); /** - * cgroup_trasnsfer_tasks - move tasks from one cgroup to another + * cgroup_transfer_tasks - move tasks from one cgroup to another * @to: cgroup to which the tasks will be moved * @from: cgroup in which the tasks currently reside * @@ -89,6 +91,8 @@ EXPORT_SYMBOL_GPL(cgroup_attach_task_all); * is guaranteed to be either visible in the source cgroup after the * parent's migration is complete or put into the target cgroup. No task * can slip out of migration through forking. + * + * Return: %0 on success or a negative errno code on failure */ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from) { @@ -682,6 +686,8 @@ int proc_cgroupstats_show(struct seq_file *m, void *v) * * Build and fill cgroupstats so that taskstats can export it to user * space. + * + * Return: %0 on success or a negative errno code on failure */ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry) { diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 3a0161c21b6b..881ce1470beb 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -68,6 +68,14 @@ #define CGROUP_FILE_NOTIFY_MIN_INTV DIV_ROUND_UP(HZ, 100) /* + * To avoid confusing the compiler (and generating warnings) with code + * that attempts to access what would be a 0-element array (i.e. sized + * to a potentially empty array when CGROUP_SUBSYS_COUNT == 0), this + * constant expression can be added. + */ +#define CGROUP_HAS_SUBSYS_CONFIG (CGROUP_SUBSYS_COUNT > 0) + +/* * cgroup_mutex is the master lock. Any modification to cgroup or its * hierarchy must be performed while holding it. * @@ -248,7 +256,7 @@ static int cgroup_addrm_files(struct cgroup_subsys_state *css, */ bool cgroup_ssid_enabled(int ssid) { - if (CGROUP_SUBSYS_COUNT == 0) + if (!CGROUP_HAS_SUBSYS_CONFIG) return false; return static_key_enabled(cgroup_subsys_enabled_key[ssid]); @@ -472,7 +480,7 @@ static u16 cgroup_ss_mask(struct cgroup *cgrp) static struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp, struct cgroup_subsys *ss) { - if (ss) + if (CGROUP_HAS_SUBSYS_CONFIG && ss) return rcu_dereference_check(cgrp->subsys[ss->id], lockdep_is_held(&cgroup_mutex)); else @@ -550,6 +558,9 @@ struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp, { struct cgroup_subsys_state *css; + if (!CGROUP_HAS_SUBSYS_CONFIG) + return NULL; + do { css = cgroup_css(cgrp, ss); @@ -577,6 +588,9 @@ struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgrp, { struct cgroup_subsys_state *css; + if (!CGROUP_HAS_SUBSYS_CONFIG) + return NULL; + rcu_read_lock(); do { @@ -647,7 +661,7 @@ struct cgroup_subsys_state *of_css(struct kernfs_open_file *of) * the matching css from the cgroup's subsys table is guaranteed to * be and stay valid until the enclosing operation is complete. */ - if (cft->ss) + if (CGROUP_HAS_SUBSYS_CONFIG && cft->ss) return rcu_dereference_raw(cgrp->subsys[cft->ss->id]); else return &cgrp->self; @@ -695,7 +709,7 @@ EXPORT_SYMBOL_GPL(of_css); */ #define do_each_subsys_mask(ss, ssid, ss_mask) do { \ unsigned long __ss_mask = (ss_mask); \ - if (!CGROUP_SUBSYS_COUNT) { /* to avoid spurious gcc warning */ \ + if (!CGROUP_HAS_SUBSYS_CONFIG) { \ (ssid) = 0; \ break; \ } \ @@ -2169,7 +2183,6 @@ static void cgroup_kill_sb(struct super_block *sb) /* * If @root doesn't have any children, start killing it. * This prevents new mounts by disabling percpu_ref_tryget_live(). - * cgroup_mount() may wait for @root's release. * * And don't kill the default root. */ @@ -2373,7 +2386,7 @@ struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset, struct css_set *cset = tset->cur_cset; struct task_struct *task = tset->cur_task; - while (&cset->mg_node != tset->csets) { + while (CGROUP_HAS_SUBSYS_CONFIG && &cset->mg_node != tset->csets) { if (!task) task = list_first_entry(&cset->mg_tasks, struct task_struct, cg_list); @@ -4644,7 +4657,7 @@ void css_task_iter_start(struct cgroup_subsys_state *css, unsigned int flags, it->ss = css->ss; it->flags = flags; - if (it->ss) + if (CGROUP_HAS_SUBSYS_CONFIG && it->ss) it->cset_pos = &css->cgroup->e_csets[css->ss->id]; else it->cset_pos = &css->cgroup->cset_links; diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index adb5190c4429..df1ccf4558f8 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -160,6 +160,9 @@ struct cpuset { */ int use_parent_ecpus; int child_ecpus_count; + + /* Handle for cpuset.cpus.partition */ + struct cgroup_file partition_file; }; /* @@ -263,6 +266,16 @@ static inline int is_partition_root(const struct cpuset *cs) return cs->partition_root_state > 0; } +/* + * Send notification event of whenever partition_root_state changes. + */ +static inline void notify_partition_change(struct cpuset *cs, + int old_prs, int new_prs) +{ + if (old_prs != new_prs) + cgroup_file_notify(&cs->partition_file); +} + static struct cpuset top_cpuset = { .flags = ((1 << CS_ONLINE) | (1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)), @@ -372,18 +385,29 @@ static inline bool is_in_v2_mode(void) } /* - * Return in pmask the portion of a cpusets's cpus_allowed that - * are online. If none are online, walk up the cpuset hierarchy - * until we find one that does have some online cpus. + * Return in pmask the portion of a task's cpusets's cpus_allowed that + * are online and are capable of running the task. If none are found, + * walk up the cpuset hierarchy until we find one that does have some + * appropriate cpus. * * One way or another, we guarantee to return some non-empty subset * of cpu_online_mask. * * Call with callback_lock or cpuset_mutex held. */ -static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask) +static void guarantee_online_cpus(struct task_struct *tsk, + struct cpumask *pmask) { - while (!cpumask_intersects(cs->effective_cpus, cpu_online_mask)) { + const struct cpumask *possible_mask = task_cpu_possible_mask(tsk); + struct cpuset *cs; + + if (WARN_ON(!cpumask_and(pmask, possible_mask, cpu_online_mask))) + cpumask_copy(pmask, cpu_online_mask); + + rcu_read_lock(); + cs = task_cs(tsk); + + while (!cpumask_intersects(cs->effective_cpus, pmask)) { cs = parent_cs(cs); if (unlikely(!cs)) { /* @@ -393,11 +417,13 @@ static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask) * cpuset's effective_cpus is on its way to be * identical to cpu_online_mask. */ - cpumask_copy(pmask, cpu_online_mask); - return; + goto out_unlock; } } - cpumask_and(pmask, cs->effective_cpus, cpu_online_mask); + cpumask_and(pmask, pmask, cs->effective_cpus); + +out_unlock: + rcu_read_unlock(); } /* @@ -979,7 +1005,7 @@ partition_and_rebuild_sched_domains(int ndoms_new, cpumask_var_t doms_new[], * 'cpus' is removed, then call this routine to rebuild the * scheduler's dynamic sched domains. * - * Call with cpuset_mutex held. Takes get_online_cpus(). + * Call with cpuset_mutex held. Takes cpus_read_lock(). */ static void rebuild_sched_domains_locked(void) { @@ -1040,11 +1066,11 @@ static void rebuild_sched_domains_locked(void) void rebuild_sched_domains(void) { - get_online_cpus(); + cpus_read_lock(); percpu_down_write(&cpuset_rwsem); rebuild_sched_domains_locked(); percpu_up_write(&cpuset_rwsem); - put_online_cpus(); + cpus_read_unlock(); } /** @@ -1114,7 +1140,7 @@ enum subparts_cmd { * cpus_allowed can be granted or an error code will be returned. * * For partcmd_disable, the cpuset is being transofrmed from a partition - * root back to a non-partition root. any CPUs in cpus_allowed that are in + * root back to a non-partition root. Any CPUs in cpus_allowed that are in * parent's subparts_cpus will be taken away from that cpumask and put back * into parent's effective_cpus. 0 should always be returned. * @@ -1148,6 +1174,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd, struct cpuset *parent = parent_cs(cpuset); int adding; /* Moving cpus from effective_cpus to subparts_cpus */ int deleting; /* Moving cpus from subparts_cpus to effective_cpus */ + int old_prs, new_prs; bool part_error = false; /* Partition error? */ percpu_rwsem_assert_held(&cpuset_rwsem); @@ -1183,6 +1210,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd, * A cpumask update cannot make parent's effective_cpus become empty. */ adding = deleting = false; + old_prs = new_prs = cpuset->partition_root_state; if (cmd == partcmd_enable) { cpumask_copy(tmp->addmask, cpuset->cpus_allowed); adding = true; @@ -1225,7 +1253,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd, /* * partcmd_update w/o newmask: * - * addmask = cpus_allowed & parent->effectiveb_cpus + * addmask = cpus_allowed & parent->effective_cpus * * Note that parent's subparts_cpus may have been * pre-shrunk in case there is a change in the cpu list. @@ -1247,11 +1275,11 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd, switch (cpuset->partition_root_state) { case PRS_ENABLED: if (part_error) - cpuset->partition_root_state = PRS_ERROR; + new_prs = PRS_ERROR; break; case PRS_ERROR: if (!part_error) - cpuset->partition_root_state = PRS_ENABLED; + new_prs = PRS_ENABLED; break; } /* @@ -1260,10 +1288,10 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd, part_error = (prev_prs == PRS_ERROR); } - if (!part_error && (cpuset->partition_root_state == PRS_ERROR)) + if (!part_error && (new_prs == PRS_ERROR)) return 0; /* Nothing need to be done */ - if (cpuset->partition_root_state == PRS_ERROR) { + if (new_prs == PRS_ERROR) { /* * Remove all its cpus from parent's subparts_cpus. */ @@ -1272,7 +1300,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd, parent->subparts_cpus); } - if (!adding && !deleting) + if (!adding && !deleting && (new_prs == old_prs)) return 0; /* @@ -1299,7 +1327,12 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd, } parent->nr_subparts_cpus = cpumask_weight(parent->subparts_cpus); + + if (old_prs != new_prs) + cpuset->partition_root_state = new_prs; + spin_unlock_irq(&callback_lock); + notify_partition_change(cpuset, old_prs, new_prs); return cmd == partcmd_update; } @@ -1321,6 +1354,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp) struct cpuset *cp; struct cgroup_subsys_state *pos_css; bool need_rebuild_sched_domains = false; + int old_prs, new_prs; rcu_read_lock(); cpuset_for_each_descendant_pre(cp, pos_css, cs) { @@ -1360,17 +1394,18 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp) * update_tasks_cpumask() again for tasks in the parent * cpuset if the parent's subparts_cpus changes. */ - if ((cp != cs) && cp->partition_root_state) { + old_prs = new_prs = cp->partition_root_state; + if ((cp != cs) && old_prs) { switch (parent->partition_root_state) { case PRS_DISABLED: /* * If parent is not a partition root or an - * invalid partition root, clear the state - * state and the CS_CPU_EXCLUSIVE flag. + * invalid partition root, clear its state + * and its CS_CPU_EXCLUSIVE flag. */ WARN_ON_ONCE(cp->partition_root_state != PRS_ERROR); - cp->partition_root_state = 0; + new_prs = PRS_DISABLED; /* * clear_bit() is an atomic operation and @@ -1391,11 +1426,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp) /* * When parent is invalid, it has to be too. */ - cp->partition_root_state = PRS_ERROR; - if (cp->nr_subparts_cpus) { - cp->nr_subparts_cpus = 0; - cpumask_clear(cp->subparts_cpus); - } + new_prs = PRS_ERROR; break; } } @@ -1407,8 +1438,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp) spin_lock_irq(&callback_lock); cpumask_copy(cp->effective_cpus, tmp->new_cpus); - if (cp->nr_subparts_cpus && - (cp->partition_root_state != PRS_ENABLED)) { + if (cp->nr_subparts_cpus && (new_prs != PRS_ENABLED)) { cp->nr_subparts_cpus = 0; cpumask_clear(cp->subparts_cpus); } else if (cp->nr_subparts_cpus) { @@ -1435,7 +1465,12 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp) = cpumask_weight(cp->subparts_cpus); } } + + if (new_prs != old_prs) + cp->partition_root_state = new_prs; + spin_unlock_irq(&callback_lock); + notify_partition_change(cp, old_prs, new_prs); WARN_ON(!is_in_v2_mode() && !cpumask_equal(cp->cpus_allowed, cp->effective_cpus)); @@ -1612,6 +1647,11 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from, { struct cpuset_migrate_mm_work *mwork; + if (nodes_equal(*from, *to)) { + mmput(mm); + return; + } + mwork = kzalloc(sizeof(*mwork), GFP_KERNEL); if (mwork) { mwork->mm = mm; @@ -1937,34 +1977,32 @@ out: /* * update_prstate - update partititon_root_state - * cs: the cpuset to update - * val: 0 - disabled, 1 - enabled + * cs: the cpuset to update + * new_prs: new partition root state * * Call with cpuset_mutex held. */ -static int update_prstate(struct cpuset *cs, int val) +static int update_prstate(struct cpuset *cs, int new_prs) { - int err; + int err, old_prs = cs->partition_root_state; struct cpuset *parent = parent_cs(cs); - struct tmpmasks tmp; + struct tmpmasks tmpmask; - if ((val != 0) && (val != 1)) - return -EINVAL; - if (val == cs->partition_root_state) + if (old_prs == new_prs) return 0; /* * Cannot force a partial or invalid partition root to a full * partition root. */ - if (val && cs->partition_root_state) + if (new_prs && (old_prs == PRS_ERROR)) return -EINVAL; - if (alloc_cpumasks(NULL, &tmp)) + if (alloc_cpumasks(NULL, &tmpmask)) return -ENOMEM; err = -EINVAL; - if (!cs->partition_root_state) { + if (!old_prs) { /* * Turning on partition root requires setting the * CS_CPU_EXCLUSIVE bit implicitly as well and cpus_allowed @@ -1978,31 +2016,27 @@ static int update_prstate(struct cpuset *cs, int val) goto out; err = update_parent_subparts_cpumask(cs, partcmd_enable, - NULL, &tmp); + NULL, &tmpmask); if (err) { update_flag(CS_CPU_EXCLUSIVE, cs, 0); goto out; } - cs->partition_root_state = PRS_ENABLED; } else { /* * Turning off partition root will clear the * CS_CPU_EXCLUSIVE bit. */ - if (cs->partition_root_state == PRS_ERROR) { - cs->partition_root_state = 0; + if (old_prs == PRS_ERROR) { update_flag(CS_CPU_EXCLUSIVE, cs, 0); err = 0; goto out; } err = update_parent_subparts_cpumask(cs, partcmd_disable, - NULL, &tmp); + NULL, &tmpmask); if (err) goto out; - cs->partition_root_state = 0; - /* Turning off CS_CPU_EXCLUSIVE will not return error */ update_flag(CS_CPU_EXCLUSIVE, cs, 0); } @@ -2015,11 +2049,18 @@ static int update_prstate(struct cpuset *cs, int val) update_tasks_cpumask(parent); if (parent->child_ecpus_count) - update_sibling_cpumasks(parent, cs, &tmp); + update_sibling_cpumasks(parent, cs, &tmpmask); rebuild_sched_domains_locked(); out: - free_cpumasks(NULL, &tmp); + if (!err) { + spin_lock_irq(&callback_lock); + cs->partition_root_state = new_prs; + spin_unlock_irq(&callback_lock); + notify_partition_change(cs, old_prs, new_prs); + } + + free_cpumasks(NULL, &tmpmask); return err; } @@ -2199,15 +2240,13 @@ static void cpuset_attach(struct cgroup_taskset *tset) percpu_down_write(&cpuset_rwsem); - /* prepare for attach */ - if (cs == &top_cpuset) - cpumask_copy(cpus_attach, cpu_possible_mask); - else - guarantee_online_cpus(cs, cpus_attach); - guarantee_online_mems(cs, &cpuset_attach_nodemask_to); cgroup_taskset_for_each(task, css, tset) { + if (cs != &top_cpuset) + guarantee_online_cpus(task, cpus_attach); + else + cpumask_copy(cpus_attach, task_cpu_possible_mask(task)); /* * can_attach beforehand should guarantee that this doesn't * fail. TODO: have a better way to handle failure here @@ -2282,7 +2321,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft, cpuset_filetype_t type = cft->private; int retval = 0; - get_online_cpus(); + cpus_read_lock(); percpu_down_write(&cpuset_rwsem); if (!is_cpuset_online(cs)) { retval = -ENODEV; @@ -2320,7 +2359,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft, } out_unlock: percpu_up_write(&cpuset_rwsem); - put_online_cpus(); + cpus_read_unlock(); return retval; } @@ -2331,7 +2370,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft, cpuset_filetype_t type = cft->private; int retval = -ENODEV; - get_online_cpus(); + cpus_read_lock(); percpu_down_write(&cpuset_rwsem); if (!is_cpuset_online(cs)) goto out_unlock; @@ -2346,7 +2385,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft, } out_unlock: percpu_up_write(&cpuset_rwsem); - put_online_cpus(); + cpus_read_unlock(); return retval; } @@ -2385,7 +2424,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of, kernfs_break_active_protection(of->kn); flush_work(&cpuset_hotplug_work); - get_online_cpus(); + cpus_read_lock(); percpu_down_write(&cpuset_rwsem); if (!is_cpuset_online(cs)) goto out_unlock; @@ -2411,7 +2450,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of, free_cpuset(trialcs); out_unlock: percpu_up_write(&cpuset_rwsem); - put_online_cpus(); + cpus_read_unlock(); kernfs_unbreak_active_protection(of->kn); css_put(&cs->css); flush_workqueue(cpuset_migrate_mm_wq); @@ -2542,7 +2581,7 @@ static ssize_t sched_partition_write(struct kernfs_open_file *of, char *buf, return -EINVAL; css_get(&cs->css); - get_online_cpus(); + cpus_read_lock(); percpu_down_write(&cpuset_rwsem); if (!is_cpuset_online(cs)) goto out_unlock; @@ -2550,7 +2589,7 @@ static ssize_t sched_partition_write(struct kernfs_open_file *of, char *buf, retval = update_prstate(cs, val); out_unlock: percpu_up_write(&cpuset_rwsem); - put_online_cpus(); + cpus_read_unlock(); css_put(&cs->css); return retval ?: nbytes; } @@ -2702,6 +2741,7 @@ static struct cftype dfl_files[] = { .write = sched_partition_write, .private = FILE_PARTITION_ROOT, .flags = CFTYPE_NOT_ON_ROOT, + .file_offset = offsetof(struct cpuset, partition_file), }, { @@ -2737,12 +2777,16 @@ cpuset_css_alloc(struct cgroup_subsys_state *parent_css) return ERR_PTR(-ENOMEM); } - set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); + __set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); nodes_clear(cs->mems_allowed); nodes_clear(cs->effective_mems); fmeter_init(&cs->fmeter); cs->relax_domain_level = -1; + /* Set CS_MEMORY_MIGRATE for default hierarchy */ + if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) + __set_bit(CS_MEMORY_MIGRATE, &cs->flags); + return &cs->css; } @@ -2756,7 +2800,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css) if (!parent) return 0; - get_online_cpus(); + cpus_read_lock(); percpu_down_write(&cpuset_rwsem); set_bit(CS_ONLINE, &cs->flags); @@ -2809,7 +2853,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css) spin_unlock_irq(&callback_lock); out_unlock: percpu_up_write(&cpuset_rwsem); - put_online_cpus(); + cpus_read_unlock(); return 0; } @@ -2828,7 +2872,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css) { struct cpuset *cs = css_cs(css); - get_online_cpus(); + cpus_read_lock(); percpu_down_write(&cpuset_rwsem); if (is_partition_root(cs)) @@ -2849,7 +2893,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css) clear_bit(CS_ONLINE, &cs->flags); percpu_up_write(&cpuset_rwsem); - put_online_cpus(); + cpus_read_unlock(); } static void cpuset_css_free(struct cgroup_subsys_state *css) @@ -3060,7 +3104,7 @@ retry: goto retry; } - parent = parent_cs(cs); + parent = parent_cs(cs); compute_effective_cpumask(&new_cpus, cs, parent); nodes_and(new_mems, cs->mems_allowed, parent->effective_mems); @@ -3082,8 +3126,10 @@ retry: if (is_partition_root(cs) && (cpumask_empty(&new_cpus) || (parent->partition_root_state == PRS_ERROR))) { if (cs->nr_subparts_cpus) { + spin_lock_irq(&callback_lock); cs->nr_subparts_cpus = 0; cpumask_clear(cs->subparts_cpus); + spin_unlock_irq(&callback_lock); compute_effective_cpumask(&new_cpus, cs, parent); } @@ -3095,9 +3141,17 @@ retry: */ if ((parent->partition_root_state == PRS_ERROR) || cpumask_empty(&new_cpus)) { + int old_prs; + update_parent_subparts_cpumask(cs, partcmd_disable, NULL, tmp); - cs->partition_root_state = PRS_ERROR; + old_prs = cs->partition_root_state; + if (old_prs != PRS_ERROR) { + spin_lock_irq(&callback_lock); + cs->partition_root_state = PRS_ERROR; + spin_unlock_irq(&callback_lock); + notify_partition_change(cs, old_prs, PRS_ERROR); + } } cpuset_force_rebuild(); } @@ -3168,6 +3222,13 @@ static void cpuset_hotplug_workfn(struct work_struct *work) cpus_updated = !cpumask_equal(top_cpuset.effective_cpus, &new_cpus); mems_updated = !nodes_equal(top_cpuset.effective_mems, new_mems); + /* + * In the rare case that hotplug removes all the cpus in subparts_cpus, + * we assumed that cpus are updated. + */ + if (!cpus_updated && top_cpuset.nr_subparts_cpus) + cpus_updated = true; + /* synchronize cpus_allowed to cpu_active_mask */ if (cpus_updated) { spin_lock_irq(&callback_lock); @@ -3302,9 +3363,7 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask) unsigned long flags; spin_lock_irqsave(&callback_lock, flags); - rcu_read_lock(); - guarantee_online_cpus(task_cs(tsk), pmask); - rcu_read_unlock(); + guarantee_online_cpus(tsk, pmask); spin_unlock_irqrestore(&callback_lock, flags); } @@ -3318,13 +3377,22 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask) * which will not contain a sane cpumask during cases such as cpu hotplugging. * This is the absolute last resort for the scheduler and it is only used if * _every_ other avenue has been traveled. + * + * Returns true if the affinity of @tsk was changed, false otherwise. **/ -void cpuset_cpus_allowed_fallback(struct task_struct *tsk) +bool cpuset_cpus_allowed_fallback(struct task_struct *tsk) { + const struct cpumask *possible_mask = task_cpu_possible_mask(tsk); + const struct cpumask *cs_mask; + bool changed = false; + rcu_read_lock(); - do_set_cpus_allowed(tsk, is_in_v2_mode() ? - task_cs(tsk)->cpus_allowed : cpu_possible_mask); + cs_mask = task_cs(tsk)->cpus_allowed; + if (is_in_v2_mode() && cpumask_subset(cs_mask, possible_mask)) { + do_set_cpus_allowed(tsk, cs_mask); + changed = true; + } rcu_read_unlock(); /* @@ -3344,6 +3412,7 @@ void cpuset_cpus_allowed_fallback(struct task_struct *tsk) * select_fallback_rq() will fix things ups and set cpu_possible_mask * if required. */ + return changed; } void __init cpuset_init_current_mems_allowed(void) diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c index 7f0e58917432..b264ab5652ba 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c @@ -347,19 +347,20 @@ static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu) } static struct cgroup_rstat_cpu * -cgroup_base_stat_cputime_account_begin(struct cgroup *cgrp) +cgroup_base_stat_cputime_account_begin(struct cgroup *cgrp, unsigned long *flags) { struct cgroup_rstat_cpu *rstatc; rstatc = get_cpu_ptr(cgrp->rstat_cpu); - u64_stats_update_begin(&rstatc->bsync); + *flags = u64_stats_update_begin_irqsave(&rstatc->bsync); return rstatc; } static void cgroup_base_stat_cputime_account_end(struct cgroup *cgrp, - struct cgroup_rstat_cpu *rstatc) + struct cgroup_rstat_cpu *rstatc, + unsigned long flags) { - u64_stats_update_end(&rstatc->bsync); + u64_stats_update_end_irqrestore(&rstatc->bsync, flags); cgroup_rstat_updated(cgrp, smp_processor_id()); put_cpu_ptr(rstatc); } @@ -367,18 +368,20 @@ static void cgroup_base_stat_cputime_account_end(struct cgroup *cgrp, void __cgroup_account_cputime(struct cgroup *cgrp, u64 delta_exec) { struct cgroup_rstat_cpu *rstatc; + unsigned long flags; - rstatc = cgroup_base_stat_cputime_account_begin(cgrp); + rstatc = cgroup_base_stat_cputime_account_begin(cgrp, &flags); rstatc->bstat.cputime.sum_exec_runtime += delta_exec; - cgroup_base_stat_cputime_account_end(cgrp, rstatc); + cgroup_base_stat_cputime_account_end(cgrp, rstatc, flags); } void __cgroup_account_cputime_field(struct cgroup *cgrp, enum cpu_usage_stat index, u64 delta_exec) { struct cgroup_rstat_cpu *rstatc; + unsigned long flags; - rstatc = cgroup_base_stat_cputime_account_begin(cgrp); + rstatc = cgroup_base_stat_cputime_account_begin(cgrp, &flags); switch (index) { case CPUTIME_USER: @@ -394,7 +397,7 @@ void __cgroup_account_cputime_field(struct cgroup *cgrp, break; } - cgroup_base_stat_cputime_account_end(cgrp, rstatc); + cgroup_base_stat_cputime_account_end(cgrp, rstatc, flags); } /* |