summaryrefslogtreecommitdiffstats
path: root/kernel/cgroup/cgroup.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cgroup/cgroup.c')
-rw-r--r--kernel/cgroup/cgroup.c107
1 files changed, 75 insertions, 32 deletions
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index a662bfcbea0e..acb66713f9b6 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -54,6 +54,7 @@
#include <linux/proc_ns.h>
#include <linux/nsproxy.h>
#include <linux/file.h>
+#include <linux/sched/cputime.h>
#include <net/sock.h>
#define CREATE_TRACE_POINTS
@@ -61,6 +62,8 @@
#define CGROUP_FILE_NAME_MAX (MAX_CGROUP_TYPE_NAMELEN + \
MAX_CFTYPE_NAME + 2)
+/* let's not notify more than 100 times per second */
+#define CGROUP_FILE_NOTIFY_MIN_INTV DIV_ROUND_UP(HZ, 100)
/*
* cgroup_mutex is the master lock. Any modification to cgroup or its
@@ -142,14 +145,14 @@ static struct static_key_true *cgroup_subsys_on_dfl_key[] = {
};
#undef SUBSYS
-static DEFINE_PER_CPU(struct cgroup_cpu_stat, cgrp_dfl_root_cpu_stat);
+static DEFINE_PER_CPU(struct cgroup_rstat_cpu, cgrp_dfl_root_rstat_cpu);
/*
* The default hierarchy, reserved for the subsystems that are otherwise
* unattached - it never has more than a single cgroup, and all tasks are
* part of that cgroup.
*/
-struct cgroup_root cgrp_dfl_root = { .cgrp.cpu_stat = &cgrp_dfl_root_cpu_stat };
+struct cgroup_root cgrp_dfl_root = { .cgrp.rstat_cpu = &cgrp_dfl_root_rstat_cpu };
EXPORT_SYMBOL_GPL(cgrp_dfl_root);
/*
@@ -1554,6 +1557,8 @@ static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
spin_lock_irq(&cgroup_file_kn_lock);
cfile->kn = NULL;
spin_unlock_irq(&cgroup_file_kn_lock);
+
+ del_timer_sync(&cfile->notify_timer);
}
kernfs_remove_by_name(cgrp->kn, cgroup_file_name(cgrp, cft, name));
@@ -1573,8 +1578,17 @@ static void css_clear_dir(struct cgroup_subsys_state *css)
css->flags &= ~CSS_VISIBLE;
- list_for_each_entry(cfts, &css->ss->cfts, node)
+ if (!css->ss) {
+ if (cgroup_on_dfl(cgrp))
+ cfts = cgroup_base_files;
+ else
+ cfts = cgroup1_base_files;
+
cgroup_addrm_files(css, cgrp, cfts, false);
+ } else {
+ list_for_each_entry(cfts, &css->ss->cfts, node)
+ cgroup_addrm_files(css, cgrp, cfts, false);
+ }
}
/**
@@ -1598,14 +1612,16 @@ static int css_populate_dir(struct cgroup_subsys_state *css)
else
cfts = cgroup1_base_files;
- return cgroup_addrm_files(&cgrp->self, cgrp, cfts, true);
- }
-
- list_for_each_entry(cfts, &css->ss->cfts, node) {
- ret = cgroup_addrm_files(css, cgrp, cfts, true);
- if (ret < 0) {
- failed_cfts = cfts;
- goto err;
+ ret = cgroup_addrm_files(&cgrp->self, cgrp, cfts, true);
+ if (ret < 0)
+ return ret;
+ } else {
+ list_for_each_entry(cfts, &css->ss->cfts, node) {
+ ret = cgroup_addrm_files(css, cgrp, cfts, true);
+ if (ret < 0) {
+ failed_cfts = cfts;
+ goto err;
+ }
}
}
@@ -1782,13 +1798,6 @@ static void cgroup_enable_task_cg_lists(void)
{
struct task_struct *p, *g;
- spin_lock_irq(&css_set_lock);
-
- if (use_task_css_set_links)
- goto out_unlock;
-
- use_task_css_set_links = true;
-
/*
* We need tasklist_lock because RCU is not safe against
* while_each_thread(). Besides, a forking task that has passed
@@ -1797,6 +1806,13 @@ static void cgroup_enable_task_cg_lists(void)
* tasklist if we walk through it with RCU.
*/
read_lock(&tasklist_lock);
+ spin_lock_irq(&css_set_lock);
+
+ if (use_task_css_set_links)
+ goto out_unlock;
+
+ use_task_css_set_links = true;
+
do_each_thread(g, p) {
WARN_ON_ONCE(!list_empty(&p->cg_list) ||
task_css_set(p) != &init_css_set);
@@ -1824,9 +1840,9 @@ static void cgroup_enable_task_cg_lists(void)
}
spin_unlock(&p->sighand->siglock);
} while_each_thread(g, p);
- read_unlock(&tasklist_lock);
out_unlock:
spin_unlock_irq(&css_set_lock);
+ read_unlock(&tasklist_lock);
}
static void init_cgroup_housekeeping(struct cgroup *cgrp)
@@ -1844,6 +1860,8 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
cgrp->dom_cgrp = cgrp;
cgrp->max_descendants = INT_MAX;
cgrp->max_depth = INT_MAX;
+ INIT_LIST_HEAD(&cgrp->rstat_css_list);
+ prev_cputime_init(&cgrp->prev_cputime);
for_each_subsys(ss, ssid)
INIT_LIST_HEAD(&cgrp->e_csets[ssid]);
@@ -3381,7 +3399,7 @@ static int cpu_stat_show(struct seq_file *seq, void *v)
struct cgroup __maybe_unused *cgrp = seq_css(seq)->cgroup;
int ret = 0;
- cgroup_stat_show_cputime(seq);
+ cgroup_base_stat_cputime_show(seq);
#ifdef CONFIG_CGROUP_SCHED
ret = cgroup_extra_stat_show(seq, cgrp, cpu_cgrp_id);
#endif
@@ -3521,6 +3539,12 @@ static int cgroup_kn_set_ugid(struct kernfs_node *kn)
return kernfs_setattr(kn, &iattr);
}
+static void cgroup_file_notify_timer(struct timer_list *timer)
+{
+ cgroup_file_notify(container_of(timer, struct cgroup_file,
+ notify_timer));
+}
+
static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp,
struct cftype *cft)
{
@@ -3547,6 +3571,8 @@ static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp,
if (cft->file_offset) {
struct cgroup_file *cfile = (void *)css + cft->file_offset;
+ timer_setup(&cfile->notify_timer, cgroup_file_notify_timer, 0);
+
spin_lock_irq(&cgroup_file_kn_lock);
cfile->kn = kn;
spin_unlock_irq(&cgroup_file_kn_lock);
@@ -3796,8 +3822,17 @@ void cgroup_file_notify(struct cgroup_file *cfile)
unsigned long flags;
spin_lock_irqsave(&cgroup_file_kn_lock, flags);
- if (cfile->kn)
- kernfs_notify(cfile->kn);
+ if (cfile->kn) {
+ unsigned long last = cfile->notified_at;
+ unsigned long next = last + CGROUP_FILE_NOTIFY_MIN_INTV;
+
+ if (time_in_range(jiffies, last, next)) {
+ timer_reduce(&cfile->notify_timer, next);
+ } else {
+ kernfs_notify(cfile->kn);
+ cfile->notified_at = jiffies;
+ }
+ }
spin_unlock_irqrestore(&cgroup_file_kn_lock, flags);
}
@@ -4560,7 +4595,7 @@ static void css_free_rwork_fn(struct work_struct *work)
cgroup_put(cgroup_parent(cgrp));
kernfs_put(cgrp->kn);
if (cgroup_on_dfl(cgrp))
- cgroup_stat_exit(cgrp);
+ cgroup_rstat_exit(cgrp);
kfree(cgrp);
} else {
/*
@@ -4587,6 +4622,11 @@ static void css_release_work_fn(struct work_struct *work)
if (ss) {
/* css release path */
+ if (!list_empty(&css->rstat_css_node)) {
+ cgroup_rstat_flush(cgrp);
+ list_del_rcu(&css->rstat_css_node);
+ }
+
cgroup_idr_replace(&ss->css_idr, NULL, css->id);
if (ss->css_released)
ss->css_released(css);
@@ -4597,7 +4637,7 @@ static void css_release_work_fn(struct work_struct *work)
trace_cgroup_release(cgrp);
if (cgroup_on_dfl(cgrp))
- cgroup_stat_flush(cgrp);
+ cgroup_rstat_flush(cgrp);
for (tcgrp = cgroup_parent(cgrp); tcgrp;
tcgrp = cgroup_parent(tcgrp))
@@ -4648,6 +4688,7 @@ static void init_and_link_css(struct cgroup_subsys_state *css,
css->id = -1;
INIT_LIST_HEAD(&css->sibling);
INIT_LIST_HEAD(&css->children);
+ INIT_LIST_HEAD(&css->rstat_css_node);
css->serial_nr = css_serial_nr_next++;
atomic_set(&css->online_cnt, 0);
@@ -4656,6 +4697,9 @@ static void init_and_link_css(struct cgroup_subsys_state *css,
css_get(css->parent);
}
+ if (cgroup_on_dfl(cgrp) && ss->css_rstat_flush)
+ list_add_rcu(&css->rstat_css_node, &cgrp->rstat_css_list);
+
BUG_ON(cgroup_css(cgrp, ss));
}
@@ -4757,6 +4801,7 @@ static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
err_list_del:
list_del_rcu(&css->sibling);
err_free_css:
+ list_del_rcu(&css->rstat_css_node);
INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn);
queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork);
return ERR_PTR(err);
@@ -4785,7 +4830,7 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
goto out_free_cgrp;
if (cgroup_on_dfl(parent)) {
- ret = cgroup_stat_init(cgrp);
+ ret = cgroup_rstat_init(cgrp);
if (ret)
goto out_cancel_ref;
}
@@ -4850,7 +4895,7 @@ out_idr_free:
cgroup_idr_remove(&root->cgroup_idr, cgrp->id);
out_stat_exit:
if (cgroup_on_dfl(parent))
- cgroup_stat_exit(cgrp);
+ cgroup_rstat_exit(cgrp);
out_cancel_ref:
percpu_ref_exit(&cgrp->self.refcnt);
out_free_cgrp:
@@ -5090,10 +5135,8 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
for_each_css(css, ssid, cgrp)
kill_css(css);
- /*
- * Remove @cgrp directory along with the base files. @cgrp has an
- * extra ref on its kn.
- */
+ /* clear and remove @cgrp dir, @cgrp has an extra ref on its kn */
+ css_clear_dir(&cgrp->self);
kernfs_remove(cgrp->kn);
if (parent && cgroup_is_threaded(cgrp))
@@ -5245,7 +5288,7 @@ int __init cgroup_init(void)
BUG_ON(cgroup_init_cftypes(NULL, cgroup_base_files));
BUG_ON(cgroup_init_cftypes(NULL, cgroup1_base_files));
- cgroup_stat_boot();
+ cgroup_rstat_boot();
/*
* The latency of the synchronize_sched() is too high for cgroups,
@@ -5335,7 +5378,7 @@ int __init cgroup_init(void)
WARN_ON(sysfs_create_mount_point(fs_kobj, "cgroup"));
WARN_ON(register_filesystem(&cgroup_fs_type));
WARN_ON(register_filesystem(&cgroup2_fs_type));
- WARN_ON(!proc_create("cgroups", 0, NULL, &proc_cgroupstats_operations));
+ WARN_ON(!proc_create_single("cgroups", 0, NULL, proc_cgroupstats_show));
return 0;
}