From 661ee6280931548f7b3b887ad26a157474ae5ac4 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sat, 8 May 2021 14:15:38 +0200 Subject: cgroup: introduce cgroup.kill Introduce the cgroup.kill file. It does what it says on the tin and allows a caller to kill a cgroup by writing "1" into cgroup.kill. The file is available in non-root cgroups. Killing cgroups is a process directed operation, i.e. the whole thread-group is affected. Consequently trying to write to cgroup.kill in threaded cgroups will be rejected and EOPNOTSUPP returned. This behavior aligns with cgroup.procs where reads in threaded-cgroups are rejected with EOPNOTSUPP. The cgroup.kill file is write-only since killing a cgroup is an event not which makes it different from e.g. freezer where a cgroup transitions between the two states. As with all new cgroup features cgroup.kill is recursive by default. Killing a cgroup is protected against concurrent migrations through the cgroup mutex. To protect against forkbombs and to mitigate the effect of racing forks a new CGRP_KILL css set lock protected flag is introduced that is set prior to killing a cgroup and unset after the cgroup has been killed. We can then check in cgroup_post_fork() where we hold the css set lock already whether the cgroup is currently being killed. If so we send the child a SIGKILL signal immediately taking it down as soon as it returns to userspace. To make the killing of the child semantically clean it is killed after all cgroup attachment operations have been finalized. There are various use-cases of this interface: - Containers usually have a conservative layout where each container usually has a delegated cgroup. For such layouts there is a 1:1 mapping between container and cgroup. If the container in addition uses a separate pid namespace then killing a container usually becomes a simple kill -9 from an ancestor pid namespace. However, there are quite a few scenarios where that isn't true. For example, there are containers that share the cgroup with other processes on purpose that are supposed to be bound to the lifetime of the container but are not in the same pidns of the container. Containers that are in a delegated cgroup but share the pid namespace with the host or other containers. - Service managers such as systemd use cgroups to group and organize processes belonging to a service. They usually rely on a recursive algorithm now to kill a service. With cgroup.kill this becomes a simple write to cgroup.kill. - Userspace OOM implementations can make good use of this feature to efficiently take down whole cgroups quickly. - The kill program can gain a new kill --cgroup /sys/fs/cgroup/delegated flag to take down cgroups. A few observations about the semantics: - If parent and child are in the same cgroup and CLONE_INTO_CGROUP is not specified we are not taking cgroup mutex meaning the cgroup can be killed while a process in that cgroup is forking. If the kill request happens right before cgroup_can_fork() and before the parent grabs its siglock the parent is guaranteed to see the pending SIGKILL. In addition we perform another check in cgroup_post_fork() whether the cgroup is being killed and is so take down the child (see above). This is robust enough and protects gainst forkbombs. If userspace really really wants to have stricter protection the simple solution would be to grab the write side of the cgroup threadgroup rwsem which will force all ongoing forks to complete before killing starts. We concluded that this is not necessary as the semantics for concurrent forking should simply align with freezer where a similar check as cgroup_post_fork() is performed. For all other cases CLONE_INTO_CGROUP is required. In this case we will grab the cgroup mutex so the cgroup can't be killed while we fork. Once we're done with the fork and have dropped cgroup mutex we are visible and will be found by any subsequent kill request. - We obviously don't kill kthreads. This means a cgroup that has a kthread will not become empty after killing and consequently no unpopulated event will be generated. The assumption is that kthreads should be in the root cgroup only anyway so this is not an issue. - We skip killing tasks that already have pending fatal signals. - Freezer doesn't care about tasks in different pid namespaces, i.e. if you have two tasks in different pid namespaces the cgroup would still be frozen. The cgroup.kill mechanism consequently behaves the same way, i.e. we kill all processes and ignore in which pid namespace they exist. - If the caller is located in a cgroup that is killed the caller will obviously be killed as well. Link: https://lore.kernel.org/r/20210503143922.3093755-1-brauner@kernel.org Cc: Shakeel Butt Cc: Roman Gushchin Cc: Tejun Heo Cc: cgroups@vger.kernel.org Reviewed-by: Shakeel Butt Reviewed-by: Serge Hallyn Acked-by: Roman Gushchin Signed-off-by: Christian Brauner Signed-off-by: Tejun Heo --- kernel/cgroup/cgroup.c | 127 +++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 113 insertions(+), 14 deletions(-) (limited to 'kernel/cgroup') diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index e049edd66776..e640fc78d731 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -3667,6 +3667,80 @@ static ssize_t cgroup_freeze_write(struct kernfs_open_file *of, return nbytes; } +static void __cgroup_kill(struct cgroup *cgrp) +{ + struct css_task_iter it; + struct task_struct *task; + + lockdep_assert_held(&cgroup_mutex); + + spin_lock_irq(&css_set_lock); + set_bit(CGRP_KILL, &cgrp->flags); + spin_unlock_irq(&css_set_lock); + + css_task_iter_start(&cgrp->self, CSS_TASK_ITER_PROCS | CSS_TASK_ITER_THREADED, &it); + while ((task = css_task_iter_next(&it))) { + /* Ignore kernel threads here. */ + if (task->flags & PF_KTHREAD) + continue; + + /* Skip tasks that are already dying. */ + if (__fatal_signal_pending(task)) + continue; + + send_sig(SIGKILL, task, 0); + } + css_task_iter_end(&it); + + spin_lock_irq(&css_set_lock); + clear_bit(CGRP_KILL, &cgrp->flags); + spin_unlock_irq(&css_set_lock); +} + +static void cgroup_kill(struct cgroup *cgrp) +{ + struct cgroup_subsys_state *css; + struct cgroup *dsct; + + lockdep_assert_held(&cgroup_mutex); + + cgroup_for_each_live_descendant_pre(dsct, css, cgrp) + __cgroup_kill(dsct); +} + +static ssize_t cgroup_kill_write(struct kernfs_open_file *of, char *buf, + size_t nbytes, loff_t off) +{ + ssize_t ret = 0; + int kill; + struct cgroup *cgrp; + + ret = kstrtoint(strstrip(buf), 0, &kill); + if (ret) + return ret; + + if (kill != 1) + return -ERANGE; + + cgrp = cgroup_kn_lock_live(of->kn, false); + if (!cgrp) + return -ENOENT; + + /* + * Killing is a process directed operation, i.e. the whole thread-group + * is taken down so act like we do for cgroup.procs and only make this + * writable in non-threaded cgroups. + */ + if (cgroup_is_threaded(cgrp)) + ret = -EOPNOTSUPP; + else + cgroup_kill(cgrp); + + cgroup_kn_unlock(of->kn); + + return ret ?: nbytes; +} + static int cgroup_file_open(struct kernfs_open_file *of) { struct cftype *cft = of_cft(of); @@ -4859,6 +4933,11 @@ static struct cftype cgroup_base_files[] = { .seq_show = cgroup_freeze_show, .write = cgroup_freeze_write, }, + { + .name = "cgroup.kill", + .flags = CFTYPE_NOT_ON_ROOT, + .write = cgroup_kill_write, + }, { .name = "cpu.stat", .seq_show = cpu_stat_show, @@ -6085,6 +6164,8 @@ void cgroup_post_fork(struct task_struct *child, struct kernel_clone_args *kargs) __releases(&cgroup_threadgroup_rwsem) __releases(&cgroup_mutex) { + unsigned long cgrp_flags = 0; + bool kill = false; struct cgroup_subsys *ss; struct css_set *cset; int i; @@ -6096,6 +6177,11 @@ void cgroup_post_fork(struct task_struct *child, /* init tasks are special, only link regular threads */ if (likely(child->pid)) { + if (kargs->cgrp) + cgrp_flags = kargs->cgrp->flags; + else + cgrp_flags = cset->dfl_cgrp->flags; + WARN_ON_ONCE(!list_empty(&child->cg_list)); cset->nr_tasks++; css_set_move_task(child, NULL, cset, false); @@ -6104,23 +6190,32 @@ void cgroup_post_fork(struct task_struct *child, cset = NULL; } - /* - * If the cgroup has to be frozen, the new task has too. Let's set - * the JOBCTL_TRAP_FREEZE jobctl bit to get the task into the - * frozen state. - */ - if (unlikely(cgroup_task_freeze(child))) { - spin_lock(&child->sighand->siglock); - WARN_ON_ONCE(child->frozen); - child->jobctl |= JOBCTL_TRAP_FREEZE; - spin_unlock(&child->sighand->siglock); + if (!(child->flags & PF_KTHREAD)) { + if (unlikely(test_bit(CGRP_FREEZE, &cgrp_flags))) { + /* + * If the cgroup has to be frozen, the new task has + * too. Let's set the JOBCTL_TRAP_FREEZE jobctl bit to + * get the task into the frozen state. + */ + spin_lock(&child->sighand->siglock); + WARN_ON_ONCE(child->frozen); + child->jobctl |= JOBCTL_TRAP_FREEZE; + spin_unlock(&child->sighand->siglock); + + /* + * Calling cgroup_update_frozen() isn't required here, + * because it will be called anyway a bit later from + * do_freezer_trap(). So we avoid cgroup's transient + * switch from the frozen state and back. + */ + } /* - * Calling cgroup_update_frozen() isn't required here, - * because it will be called anyway a bit later from - * do_freezer_trap(). So we avoid cgroup's transient switch - * from the frozen state and back. + * If the cgroup is to be killed notice it now and take the + * child down right after we finished preparing it for + * userspace. */ + kill = test_bit(CGRP_KILL, &cgrp_flags); } spin_unlock_irq(&css_set_lock); @@ -6143,6 +6238,10 @@ void cgroup_post_fork(struct task_struct *child, put_css_set(rcset); } + /* Cgroup has to be killed so take down child immediately. */ + if (unlikely(kill)) + do_send_sig_info(SIGKILL, SEND_SIG_NOINFO, child, PIDTYPE_TGID); + cgroup_css_set_put_fork(kargs); } -- cgit v1.2.3 From f4f809f66b7545b89bff4b132cdb37adc2d2c157 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Mon, 10 May 2021 14:39:46 -0700 Subject: cgroup: inline cgroup_task_freeze() After the introduction of the cgroup.kill there is only one call site of cgroup_task_freeze() left: cgroup_exit(). cgroup_task_freeze() is currently taking rcu_read_lock() to read task's cgroup flags, but because it's always called with css_set_lock locked, the rcu protection is excessive. Simplify the code by inlining cgroup_task_freeze(). v2: fix build Signed-off-by: Roman Gushchin Reviewed-by: Shakeel Butt Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 18 ------------------ kernel/cgroup/cgroup.c | 3 ++- 2 files changed, 2 insertions(+), 19 deletions(-) (limited to 'kernel/cgroup') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 4f2f79de083e..a72764287cb5 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -906,20 +906,6 @@ void cgroup_freeze(struct cgroup *cgrp, bool freeze); void cgroup_freezer_migrate_task(struct task_struct *task, struct cgroup *src, struct cgroup *dst); -static inline bool cgroup_task_freeze(struct task_struct *task) -{ - bool ret; - - if (task->flags & PF_KTHREAD) - return false; - - rcu_read_lock(); - ret = test_bit(CGRP_FREEZE, &task_dfl_cgroup(task)->flags); - rcu_read_unlock(); - - return ret; -} - static inline bool cgroup_task_frozen(struct task_struct *task) { return task->frozen; @@ -929,10 +915,6 @@ static inline bool cgroup_task_frozen(struct task_struct *task) static inline void cgroup_enter_frozen(void) { } static inline void cgroup_leave_frozen(bool always_leave) { } -static inline bool cgroup_task_freeze(struct task_struct *task) -{ - return false; -} static inline bool cgroup_task_frozen(struct task_struct *task) { return false; diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index e640fc78d731..8e0d7092afbb 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -6267,7 +6267,8 @@ void cgroup_exit(struct task_struct *tsk) cset->nr_tasks--; WARN_ON_ONCE(cgroup_task_frozen(tsk)); - if (unlikely(cgroup_task_freeze(tsk))) + if (unlikely(!(tsk->flags & PF_KTHREAD) && + test_bit(CGRP_FREEZE, &task_dfl_cgroup(tsk)->flags))) cgroup_update_frozen(task_dfl_cgroup(tsk)); spin_unlock_irq(&css_set_lock); -- cgit v1.2.3 From 2ca11b0e043be6f5c2b188897e9a32275eaab046 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Wed, 26 May 2021 10:49:09 +0800 Subject: cgroup: Fix kernel-doc Fix function name in cgroup.c and rstat.c kernel-doc comment to remove these warnings found by clang_w1. kernel/cgroup/cgroup.c:2401: warning: expecting prototype for cgroup_taskset_migrate(). Prototype was for cgroup_migrate_execute() instead. kernel/cgroup/rstat.c:233: warning: expecting prototype for cgroup_rstat_flush_begin(). Prototype was for cgroup_rstat_flush_hold() instead. Reported-by: Abaci Robot Fixes: 'commit e595cd706982 ("cgroup: track migration context in cgroup_mgctx")' Signed-off-by: Yang Li Signed-off-by: Tejun Heo --- kernel/cgroup/cgroup.c | 2 +- kernel/cgroup/rstat.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/cgroup') diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 099d3c663ce3..74e3cc801615 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -2389,7 +2389,7 @@ struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset, } /** - * cgroup_taskset_migrate - migrate a taskset + * cgroup_migrate_execute - migrate a taskset * @mgctx: migration context * * Migrate tasks in @mgctx as setup by migration preparation functions. diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c index cee265cb535c..7f0e58917432 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c @@ -220,7 +220,7 @@ void cgroup_rstat_flush_irqsafe(struct cgroup *cgrp) } /** - * cgroup_rstat_flush_begin - flush stats in @cgrp's subtree and hold + * cgroup_rstat_flush_hold - flush stats in @cgrp's subtree and hold * @cgrp: target cgroup * * Flush stats in @cgrp's subtree and prevent further flushes. Must be -- cgit v1.2.3 From 3958e2d0c34e18c41b60dc01832bd670a59ef70f Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Mon, 24 May 2021 12:53:39 -0700 Subject: cgroup: make per-cgroup pressure stall tracking configurable PSI accounts stalls for each cgroup separately and aggregates it at each level of the hierarchy. This causes additional overhead with psi_avgs_work being called for each cgroup in the hierarchy. psi_avgs_work has been highly optimized, however on systems with large number of cgroups the overhead becomes noticeable. Systems which use PSI only at the system level could avoid this overhead if PSI can be configured to skip per-cgroup stall accounting. Add "cgroup_disable=pressure" kernel command-line option to allow requesting system-wide only pressure stall accounting. When set, it keeps system-wide accounting under /proc/pressure/ but skips accounting for individual cgroups and does not expose PSI nodes in cgroup hierarchy. Signed-off-by: Suren Baghdasaryan Acked-by: Peter Zijlstra (Intel) Acked-by: Johannes Weiner Signed-off-by: Tejun Heo --- Documentation/admin-guide/kernel-parameters.txt | 9 +++-- include/linux/cgroup-defs.h | 1 + include/linux/cgroup.h | 7 ++++ kernel/cgroup/cgroup.c | 48 +++++++++++++++++++++++++ kernel/sched/psi.c | 30 +++++++++------- 5 files changed, 80 insertions(+), 15 deletions(-) (limited to 'kernel/cgroup') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index cb89dbdedc46..653c62142f07 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -497,16 +497,21 @@ ccw_timeout_log [S390] See Documentation/s390/common_io.rst for details. - cgroup_disable= [KNL] Disable a particular controller - Format: {name of the controller(s) to disable} + cgroup_disable= [KNL] Disable a particular controller or optional feature + Format: {name of the controller(s) or feature(s) to disable} The effects of cgroup_disable=foo are: - foo isn't auto-mounted if you mount all cgroups in a single hierarchy - foo isn't visible as an individually mountable subsystem + - if foo is an optional feature then the feature is + disabled and corresponding cgroup files are not + created {Currently only "memory" controller deal with this and cut the overhead, others just disable the usage. So only cgroup_disable=memory is actually worthy} + Specifying "pressure" disables per-cgroup pressure + stall information accounting feature cgroup_no_v1= [KNL] Disable cgroup controllers and named hierarchies in v1 Format: { { controller | "all" | "named" } diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 1a1f3e8faceb..e1c705fdfa7c 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -113,6 +113,7 @@ enum { CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */ CFTYPE_WORLD_WRITABLE = (1 << 4), /* (DON'T USE FOR NEW FILES) S_IWUGO */ CFTYPE_DEBUG = (1 << 5), /* create when cgroup_debug */ + CFTYPE_PRESSURE = (1 << 6), /* only if pressure feature is enabled */ /* internal flags, do not use outside cgroup core proper */ __CFTYPE_ONLY_ON_DFL = (1 << 16), /* only on default hierarchy */ diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 9047fa853dd3..2cc237e3e8b3 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -676,6 +676,8 @@ static inline struct psi_group *cgroup_psi(struct cgroup *cgrp) return &cgrp->psi; } +bool cgroup_psi_enabled(void); + static inline void cgroup_init_kthreadd(void) { /* @@ -735,6 +737,11 @@ static inline struct psi_group *cgroup_psi(struct cgroup *cgrp) return NULL; } +static inline bool cgroup_psi_enabled(void) +{ + return false; +} + static inline bool task_under_cgroup_hierarchy(struct task_struct *task, struct cgroup *ancestor) { diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 74e3cc801615..e13a37f4acab 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -209,6 +209,22 @@ struct cgroup_namespace init_cgroup_ns = { static struct file_system_type cgroup2_fs_type; static struct cftype cgroup_base_files[]; +/* cgroup optional features */ +enum cgroup_opt_features { +#ifdef CONFIG_PSI + OPT_FEATURE_PRESSURE, +#endif + OPT_FEATURE_COUNT +}; + +static const char *cgroup_opt_feature_names[OPT_FEATURE_COUNT] = { +#ifdef CONFIG_PSI + "pressure", +#endif +}; + +static u16 cgroup_feature_disable_mask __read_mostly; + static int cgroup_apply_control(struct cgroup *cgrp); static void cgroup_finalize_control(struct cgroup *cgrp, int ret); static void css_task_iter_skip(struct css_task_iter *it, @@ -3631,6 +3647,18 @@ static void cgroup_pressure_release(struct kernfs_open_file *of) { psi_trigger_replace(&of->priv, NULL); } + +bool cgroup_psi_enabled(void) +{ + return (cgroup_feature_disable_mask & (1 << OPT_FEATURE_PRESSURE)) == 0; +} + +#else /* CONFIG_PSI */ +bool cgroup_psi_enabled(void) +{ + return false; +} + #endif /* CONFIG_PSI */ static int cgroup_freeze_show(struct seq_file *seq, void *v) @@ -3955,6 +3983,8 @@ static int cgroup_addrm_files(struct cgroup_subsys_state *css, restart: for (cft = cfts; cft != cft_end && cft->name[0] != '\0'; cft++) { /* does cft->flags tell us to skip this file on @cgrp? */ + if ((cft->flags & CFTYPE_PRESSURE) && !cgroup_psi_enabled()) + continue; if ((cft->flags & __CFTYPE_ONLY_ON_DFL) && !cgroup_on_dfl(cgrp)) continue; if ((cft->flags & __CFTYPE_NOT_ON_DFL) && cgroup_on_dfl(cgrp)) @@ -4032,6 +4062,9 @@ static int cgroup_init_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) WARN_ON(cft->ss || cft->kf_ops); + if ((cft->flags & CFTYPE_PRESSURE) && !cgroup_psi_enabled()) + continue; + if (cft->seq_start) kf_ops = &cgroup_kf_ops; else @@ -4945,6 +4978,7 @@ static struct cftype cgroup_base_files[] = { #ifdef CONFIG_PSI { .name = "io.pressure", + .flags = CFTYPE_PRESSURE, .seq_show = cgroup_io_pressure_show, .write = cgroup_io_pressure_write, .poll = cgroup_pressure_poll, @@ -4952,6 +4986,7 @@ static struct cftype cgroup_base_files[] = { }, { .name = "memory.pressure", + .flags = CFTYPE_PRESSURE, .seq_show = cgroup_memory_pressure_show, .write = cgroup_memory_pressure_write, .poll = cgroup_pressure_poll, @@ -4959,6 +4994,7 @@ static struct cftype cgroup_base_files[] = { }, { .name = "cpu.pressure", + .flags = CFTYPE_PRESSURE, .seq_show = cgroup_cpu_pressure_show, .write = cgroup_cpu_pressure_write, .poll = cgroup_pressure_poll, @@ -6313,6 +6349,15 @@ static int __init cgroup_disable(char *str) pr_info("Disabling %s control group subsystem\n", ss->name); } + + for (i = 0; i < OPT_FEATURE_COUNT; i++) { + if (strcmp(token, cgroup_opt_feature_names[i])) + continue; + cgroup_feature_disable_mask |= 1 << i; + pr_info("Disabling %s control group feature\n", + cgroup_opt_feature_names[i]); + break; + } } return 1; } @@ -6611,6 +6656,9 @@ static ssize_t show_delegatable_files(struct cftype *files, char *buf, if (!(cft->flags & CFTYPE_NS_DELEGATABLE)) continue; + if ((cft->flags & CFTYPE_PRESSURE) && !cgroup_psi_enabled()) + continue; + if (prefix) ret += snprintf(buf + ret, size - ret, "%s.", prefix); diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index cc25a3cff41f..b773cae4c24b 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -148,6 +148,7 @@ static int psi_bug __read_mostly; DEFINE_STATIC_KEY_FALSE(psi_disabled); +DEFINE_STATIC_KEY_TRUE(psi_cgroups_enabled); #ifdef CONFIG_PSI_DEFAULT_DISABLED static bool psi_enable; @@ -211,6 +212,9 @@ void __init psi_init(void) return; } + if (!cgroup_psi_enabled()) + static_branch_disable(&psi_cgroups_enabled); + psi_period = jiffies_to_nsecs(PSI_FREQ); group_init(&psi_system); } @@ -744,23 +748,23 @@ static void psi_group_change(struct psi_group *group, int cpu, static struct psi_group *iterate_groups(struct task_struct *task, void **iter) { + if (*iter == &psi_system) + return NULL; + #ifdef CONFIG_CGROUPS - struct cgroup *cgroup = NULL; + if (static_branch_likely(&psi_cgroups_enabled)) { + struct cgroup *cgroup = NULL; - if (!*iter) - cgroup = task->cgroups->dfl_cgrp; - else if (*iter == &psi_system) - return NULL; - else - cgroup = cgroup_parent(*iter); + if (!*iter) + cgroup = task->cgroups->dfl_cgrp; + else + cgroup = cgroup_parent(*iter); - if (cgroup && cgroup_parent(cgroup)) { - *iter = cgroup; - return cgroup_psi(cgroup); + if (cgroup && cgroup_parent(cgroup)) { + *iter = cgroup; + return cgroup_psi(cgroup); + } } -#else - if (*iter) - return NULL; #endif *iter = &psi_system; return &psi_system; -- cgit v1.2.3