summaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c312
1 files changed, 195 insertions, 117 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 287933005e11..e50a2db5b4ff 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -687,10 +687,119 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz)
return mz;
}
-static unsigned long memcg_sum_events(struct mem_cgroup *memcg,
- int event)
+/**
+ * __mod_memcg_state - update cgroup memory statistics
+ * @memcg: the memory cgroup
+ * @idx: the stat item - can be enum memcg_stat_item or enum node_stat_item
+ * @val: delta to add to the counter, can be negative
+ */
+void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val)
+{
+ long x;
+
+ if (mem_cgroup_disabled())
+ return;
+
+ x = val + __this_cpu_read(memcg->vmstats_percpu->stat[idx]);
+ if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) {
+ struct mem_cgroup *mi;
+
+ atomic_long_add(x, &memcg->vmstats_local[idx]);
+ for (mi = memcg; mi; mi = parent_mem_cgroup(mi))
+ atomic_long_add(x, &mi->vmstats[idx]);
+ x = 0;
+ }
+ __this_cpu_write(memcg->vmstats_percpu->stat[idx], x);
+}
+
+static struct mem_cgroup_per_node *
+parent_nodeinfo(struct mem_cgroup_per_node *pn, int nid)
+{
+ struct mem_cgroup *parent;
+
+ parent = parent_mem_cgroup(pn->memcg);
+ if (!parent)
+ return NULL;
+ return mem_cgroup_nodeinfo(parent, nid);
+}
+
+/**
+ * __mod_lruvec_state - update lruvec memory statistics
+ * @lruvec: the lruvec
+ * @idx: the stat item
+ * @val: delta to add to the counter, can be negative
+ *
+ * The lruvec is the intersection of the NUMA node and a cgroup. This
+ * function updates the all three counters that are affected by a
+ * change of state at this level: per-node, per-cgroup, per-lruvec.
+ */
+void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
+ int val)
{
- return atomic_long_read(&memcg->events[event]);
+ pg_data_t *pgdat = lruvec_pgdat(lruvec);
+ struct mem_cgroup_per_node *pn;
+ struct mem_cgroup *memcg;
+ long x;
+
+ /* Update node */
+ __mod_node_page_state(pgdat, idx, val);
+
+ if (mem_cgroup_disabled())
+ return;
+
+ pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
+ memcg = pn->memcg;
+
+ /* Update memcg */
+ __mod_memcg_state(memcg, idx, val);
+
+ /* Update lruvec */
+ x = val + __this_cpu_read(pn->lruvec_stat_cpu->count[idx]);
+ if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) {
+ struct mem_cgroup_per_node *pi;
+
+ atomic_long_add(x, &pn->lruvec_stat_local[idx]);
+ for (pi = pn; pi; pi = parent_nodeinfo(pi, pgdat->node_id))
+ atomic_long_add(x, &pi->lruvec_stat[idx]);
+ x = 0;
+ }
+ __this_cpu_write(pn->lruvec_stat_cpu->count[idx], x);
+}
+
+/**
+ * __count_memcg_events - account VM events in a cgroup
+ * @memcg: the memory cgroup
+ * @idx: the event item
+ * @count: the number of events that occured
+ */
+void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx,
+ unsigned long count)
+{
+ unsigned long x;
+
+ if (mem_cgroup_disabled())
+ return;
+
+ x = count + __this_cpu_read(memcg->vmstats_percpu->events[idx]);
+ if (unlikely(x > MEMCG_CHARGE_BATCH)) {
+ struct mem_cgroup *mi;
+
+ atomic_long_add(x, &memcg->vmevents_local[idx]);
+ for (mi = memcg; mi; mi = parent_mem_cgroup(mi))
+ atomic_long_add(x, &mi->vmevents[idx]);
+ x = 0;
+ }
+ __this_cpu_write(memcg->vmstats_percpu->events[idx], x);
+}
+
+static unsigned long memcg_events(struct mem_cgroup *memcg, int event)
+{
+ return atomic_long_read(&memcg->vmevents[event]);
+}
+
+static unsigned long memcg_events_local(struct mem_cgroup *memcg, int event)
+{
+ return atomic_long_read(&memcg->vmevents_local[event]);
}
static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
@@ -722,7 +831,7 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
nr_pages = -nr_pages; /* for event */
}
- __this_cpu_add(memcg->stat_cpu->nr_page_events, nr_pages);
+ __this_cpu_add(memcg->vmstats_percpu->nr_page_events, nr_pages);
}
static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
@@ -730,8 +839,8 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
{
unsigned long val, next;
- val = __this_cpu_read(memcg->stat_cpu->nr_page_events);
- next = __this_cpu_read(memcg->stat_cpu->targets[target]);
+ val = __this_cpu_read(memcg->vmstats_percpu->nr_page_events);
+ next = __this_cpu_read(memcg->vmstats_percpu->targets[target]);
/* from time_after() in jiffies.h */
if ((long)(next - val) < 0) {
switch (target) {
@@ -747,7 +856,7 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
default:
break;
}
- __this_cpu_write(memcg->stat_cpu->targets[target], next);
+ __this_cpu_write(memcg->vmstats_percpu->targets[target], next);
return true;
}
return false;
@@ -1325,12 +1434,14 @@ void mem_cgroup_print_oom_meminfo(struct mem_cgroup *memcg)
if (memcg1_stats[i] == MEMCG_SWAP && !do_swap_account)
continue;
pr_cont(" %s:%luKB", memcg1_stat_names[i],
- K(memcg_page_state(iter, memcg1_stats[i])));
+ K(memcg_page_state_local(iter,
+ memcg1_stats[i])));
}
for (i = 0; i < NR_LRU_LISTS; i++)
pr_cont(" %s:%luKB", mem_cgroup_lru_names[i],
- K(memcg_page_state(iter, NR_LRU_BASE + i)));
+ K(memcg_page_state_local(iter,
+ NR_LRU_BASE + i)));
pr_cont("\n");
}
@@ -2076,7 +2187,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
static int memcg_hotplug_cpu_dead(unsigned int cpu)
{
struct memcg_stock_pcp *stock;
- struct mem_cgroup *memcg;
+ struct mem_cgroup *memcg, *mi;
stock = &per_cpu(memcg_stock, cpu);
drain_stock(stock);
@@ -2088,9 +2199,12 @@ static int memcg_hotplug_cpu_dead(unsigned int cpu)
int nid;
long x;
- x = this_cpu_xchg(memcg->stat_cpu->count[i], 0);
- if (x)
- atomic_long_add(x, &memcg->stat[i]);
+ x = this_cpu_xchg(memcg->vmstats_percpu->stat[i], 0);
+ if (x) {
+ atomic_long_add(x, &memcg->vmstats_local[i]);
+ for (mi = memcg; mi; mi = parent_mem_cgroup(mi))
+ atomic_long_add(x, &memcg->vmstats[i]);
+ }
if (i >= NR_VM_NODE_STAT_ITEMS)
continue;
@@ -2100,17 +2214,24 @@ static int memcg_hotplug_cpu_dead(unsigned int cpu)
pn = mem_cgroup_nodeinfo(memcg, nid);
x = this_cpu_xchg(pn->lruvec_stat_cpu->count[i], 0);
- if (x)
- atomic_long_add(x, &pn->lruvec_stat[i]);
+ if (x) {
+ atomic_long_add(x, &pn->lruvec_stat_local[i]);
+ do {
+ atomic_long_add(x, &pn->lruvec_stat[i]);
+ } while ((pn = parent_nodeinfo(pn, nid)));
+ }
}
}
for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
long x;
- x = this_cpu_xchg(memcg->stat_cpu->events[i], 0);
- if (x)
- atomic_long_add(x, &memcg->events[i]);
+ x = this_cpu_xchg(memcg->vmstats_percpu->events[i], 0);
+ if (x) {
+ atomic_long_add(x, &memcg->vmevents_local[i]);
+ for (mi = memcg; mi; mi = parent_mem_cgroup(mi))
+ atomic_long_add(x, &memcg->vmevents[i]);
+ }
}
}
@@ -2940,50 +3061,15 @@ static int mem_cgroup_hierarchy_write(struct cgroup_subsys_state *css,
return retval;
}
-struct accumulated_stats {
- unsigned long stat[MEMCG_NR_STAT];
- unsigned long events[NR_VM_EVENT_ITEMS];
- unsigned long lru_pages[NR_LRU_LISTS];
- const unsigned int *stats_array;
- const unsigned int *events_array;
- int stats_size;
- int events_size;
-};
-
-static void accumulate_memcg_tree(struct mem_cgroup *memcg,
- struct accumulated_stats *acc)
-{
- struct mem_cgroup *mi;
- int i;
-
- for_each_mem_cgroup_tree(mi, memcg) {
- for (i = 0; i < acc->stats_size; i++)
- acc->stat[i] += memcg_page_state(mi,
- acc->stats_array ? acc->stats_array[i] : i);
-
- for (i = 0; i < acc->events_size; i++)
- acc->events[i] += memcg_sum_events(mi,
- acc->events_array ? acc->events_array[i] : i);
-
- for (i = 0; i < NR_LRU_LISTS; i++)
- acc->lru_pages[i] += memcg_page_state(mi,
- NR_LRU_BASE + i);
- }
-}
-
static unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
{
- unsigned long val = 0;
+ unsigned long val;
if (mem_cgroup_is_root(memcg)) {
- struct mem_cgroup *iter;
-
- for_each_mem_cgroup_tree(iter, memcg) {
- val += memcg_page_state(iter, MEMCG_CACHE);
- val += memcg_page_state(iter, MEMCG_RSS);
- if (swap)
- val += memcg_page_state(iter, MEMCG_SWAP);
- }
+ val = memcg_page_state(memcg, MEMCG_CACHE) +
+ memcg_page_state(memcg, MEMCG_RSS);
+ if (swap)
+ val += memcg_page_state(memcg, MEMCG_SWAP);
} else {
if (!swap)
val = page_counter_read(&memcg->memory);
@@ -3324,7 +3410,7 @@ static unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
for_each_lru(lru) {
if (!(BIT(lru) & lru_mask))
continue;
- nr += lruvec_page_state(lruvec, NR_LRU_BASE + lru);
+ nr += lruvec_page_state_local(lruvec, NR_LRU_BASE + lru);
}
return nr;
}
@@ -3338,7 +3424,7 @@ static unsigned long mem_cgroup_nr_lru_pages(struct mem_cgroup *memcg,
for_each_lru(lru) {
if (!(BIT(lru) & lru_mask))
continue;
- nr += memcg_page_state(memcg, NR_LRU_BASE + lru);
+ nr += memcg_page_state_local(memcg, NR_LRU_BASE + lru);
}
return nr;
}
@@ -3414,7 +3500,6 @@ static int memcg_stat_show(struct seq_file *m, void *v)
unsigned long memory, memsw;
struct mem_cgroup *mi;
unsigned int i;
- struct accumulated_stats acc;
BUILD_BUG_ON(ARRAY_SIZE(memcg1_stat_names) != ARRAY_SIZE(memcg1_stats));
BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS);
@@ -3423,17 +3508,17 @@ static int memcg_stat_show(struct seq_file *m, void *v)
if (memcg1_stats[i] == MEMCG_SWAP && !do_memsw_account())
continue;
seq_printf(m, "%s %lu\n", memcg1_stat_names[i],
- memcg_page_state(memcg, memcg1_stats[i]) *
+ memcg_page_state_local(memcg, memcg1_stats[i]) *
PAGE_SIZE);
}
for (i = 0; i < ARRAY_SIZE(memcg1_events); i++)
seq_printf(m, "%s %lu\n", memcg1_event_names[i],
- memcg_sum_events(memcg, memcg1_events[i]));
+ memcg_events_local(memcg, memcg1_events[i]));
for (i = 0; i < NR_LRU_LISTS; i++)
seq_printf(m, "%s %lu\n", mem_cgroup_lru_names[i],
- memcg_page_state(memcg, NR_LRU_BASE + i) *
+ memcg_page_state_local(memcg, NR_LRU_BASE + i) *
PAGE_SIZE);
/* Hierarchical information */
@@ -3448,27 +3533,21 @@ static int memcg_stat_show(struct seq_file *m, void *v)
seq_printf(m, "hierarchical_memsw_limit %llu\n",
(u64)memsw * PAGE_SIZE);
- memset(&acc, 0, sizeof(acc));
- acc.stats_size = ARRAY_SIZE(memcg1_stats);
- acc.stats_array = memcg1_stats;
- acc.events_size = ARRAY_SIZE(memcg1_events);
- acc.events_array = memcg1_events;
- accumulate_memcg_tree(memcg, &acc);
-
for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) {
if (memcg1_stats[i] == MEMCG_SWAP && !do_memsw_account())
continue;
seq_printf(m, "total_%s %llu\n", memcg1_stat_names[i],
- (u64)acc.stat[i] * PAGE_SIZE);
+ (u64)memcg_page_state(memcg, i) * PAGE_SIZE);
}
for (i = 0; i < ARRAY_SIZE(memcg1_events); i++)
seq_printf(m, "total_%s %llu\n", memcg1_event_names[i],
- (u64)acc.events[i]);
+ (u64)memcg_events(memcg, i));
for (i = 0; i < NR_LRU_LISTS; i++)
seq_printf(m, "total_%s %llu\n", mem_cgroup_lru_names[i],
- (u64)acc.lru_pages[i] * PAGE_SIZE);
+ (u64)memcg_page_state(memcg, NR_LRU_BASE + i) *
+ PAGE_SIZE);
#ifdef CONFIG_DEBUG_VM
{
@@ -3901,11 +3980,11 @@ struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb)
*/
static unsigned long memcg_exact_page_state(struct mem_cgroup *memcg, int idx)
{
- long x = atomic_long_read(&memcg->stat[idx]);
+ long x = atomic_long_read(&memcg->vmstats[idx]);
int cpu;
for_each_online_cpu(cpu)
- x += per_cpu_ptr(memcg->stat_cpu, cpu)->count[idx];
+ x += per_cpu_ptr(memcg->vmstats_percpu, cpu)->stat[idx];
if (x < 0)
x = 0;
return x;
@@ -4445,7 +4524,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
for_each_node(node)
free_mem_cgroup_per_node_info(memcg, node);
- free_percpu(memcg->stat_cpu);
+ free_percpu(memcg->vmstats_percpu);
kfree(memcg);
}
@@ -4474,8 +4553,8 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
if (memcg->id.id < 0)
goto fail;
- memcg->stat_cpu = alloc_percpu(struct mem_cgroup_stat_cpu);
- if (!memcg->stat_cpu)
+ memcg->vmstats_percpu = alloc_percpu(struct memcg_vmstats_percpu);
+ if (!memcg->vmstats_percpu)
goto fail;
for_each_node(node)
@@ -5561,7 +5640,6 @@ static int memory_events_show(struct seq_file *m, void *v)
static int memory_stat_show(struct seq_file *m, void *v)
{
struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
- struct accumulated_stats acc;
int i;
/*
@@ -5575,31 +5653,27 @@ static int memory_stat_show(struct seq_file *m, void *v)
* Current memory state:
*/
- memset(&acc, 0, sizeof(acc));
- acc.stats_size = MEMCG_NR_STAT;
- acc.events_size = NR_VM_EVENT_ITEMS;
- accumulate_memcg_tree(memcg, &acc);
-
seq_printf(m, "anon %llu\n",
- (u64)acc.stat[MEMCG_RSS] * PAGE_SIZE);
+ (u64)memcg_page_state(memcg, MEMCG_RSS) * PAGE_SIZE);
seq_printf(m, "file %llu\n",
- (u64)acc.stat[MEMCG_CACHE] * PAGE_SIZE);
+ (u64)memcg_page_state(memcg, MEMCG_CACHE) * PAGE_SIZE);
seq_printf(m, "kernel_stack %llu\n",
- (u64)acc.stat[MEMCG_KERNEL_STACK_KB] * 1024);
+ (u64)memcg_page_state(memcg, MEMCG_KERNEL_STACK_KB) * 1024);
seq_printf(m, "slab %llu\n",
- (u64)(acc.stat[NR_SLAB_RECLAIMABLE] +
- acc.stat[NR_SLAB_UNRECLAIMABLE]) * PAGE_SIZE);
+ (u64)(memcg_page_state(memcg, NR_SLAB_RECLAIMABLE) +
+ memcg_page_state(memcg, NR_SLAB_UNRECLAIMABLE)) *
+ PAGE_SIZE);
seq_printf(m, "sock %llu\n",
- (u64)acc.stat[MEMCG_SOCK] * PAGE_SIZE);
+ (u64)memcg_page_state(memcg, MEMCG_SOCK) * PAGE_SIZE);
seq_printf(m, "shmem %llu\n",
- (u64)acc.stat[NR_SHMEM] * PAGE_SIZE);
+ (u64)memcg_page_state(memcg, NR_SHMEM) * PAGE_SIZE);
seq_printf(m, "file_mapped %llu\n",
- (u64)acc.stat[NR_FILE_MAPPED] * PAGE_SIZE);
+ (u64)memcg_page_state(memcg, NR_FILE_MAPPED) * PAGE_SIZE);
seq_printf(m, "file_dirty %llu\n",
- (u64)acc.stat[NR_FILE_DIRTY] * PAGE_SIZE);
+ (u64)memcg_page_state(memcg, NR_FILE_DIRTY) * PAGE_SIZE);
seq_printf(m, "file_writeback %llu\n",
- (u64)acc.stat[NR_WRITEBACK] * PAGE_SIZE);
+ (u64)memcg_page_state(memcg, NR_WRITEBACK) * PAGE_SIZE);
/*
* TODO: We should eventually replace our own MEMCG_RSS_HUGE counter
@@ -5608,43 +5682,47 @@ static int memory_stat_show(struct seq_file *m, void *v)
* where the page->mem_cgroup is set up and stable.
*/
seq_printf(m, "anon_thp %llu\n",
- (u64)acc.stat[MEMCG_RSS_HUGE] * PAGE_SIZE);
+ (u64)memcg_page_state(memcg, MEMCG_RSS_HUGE) * PAGE_SIZE);
for (i = 0; i < NR_LRU_LISTS; i++)
seq_printf(m, "%s %llu\n", mem_cgroup_lru_names[i],
- (u64)acc.lru_pages[i] * PAGE_SIZE);
+ (u64)memcg_page_state(memcg, NR_LRU_BASE + i) *
+ PAGE_SIZE);
seq_printf(m, "slab_reclaimable %llu\n",
- (u64)acc.stat[NR_SLAB_RECLAIMABLE] * PAGE_SIZE);
+ (u64)memcg_page_state(memcg, NR_SLAB_RECLAIMABLE) *
+ PAGE_SIZE);
seq_printf(m, "slab_unreclaimable %llu\n",
- (u64)acc.stat[NR_SLAB_UNRECLAIMABLE] * PAGE_SIZE);
+ (u64)memcg_page_state(memcg, NR_SLAB_UNRECLAIMABLE) *
+ PAGE_SIZE);
/* Accumulated memory events */
- seq_printf(m, "pgfault %lu\n", acc.events[PGFAULT]);
- seq_printf(m, "pgmajfault %lu\n", acc.events[PGMAJFAULT]);
+ seq_printf(m, "pgfault %lu\n", memcg_events(memcg, PGFAULT));
+ seq_printf(m, "pgmajfault %lu\n", memcg_events(memcg, PGMAJFAULT));
seq_printf(m, "workingset_refault %lu\n",
- acc.stat[WORKINGSET_REFAULT]);
+ memcg_page_state(memcg, WORKINGSET_REFAULT));
seq_printf(m, "workingset_activate %lu\n",
- acc.stat[WORKINGSET_ACTIVATE]);
+ memcg_page_state(memcg, WORKINGSET_ACTIVATE));
seq_printf(m, "workingset_nodereclaim %lu\n",
- acc.stat[WORKINGSET_NODERECLAIM]);
-
- seq_printf(m, "pgrefill %lu\n", acc.events[PGREFILL]);
- seq_printf(m, "pgscan %lu\n", acc.events[PGSCAN_KSWAPD] +
- acc.events[PGSCAN_DIRECT]);
- seq_printf(m, "pgsteal %lu\n", acc.events[PGSTEAL_KSWAPD] +
- acc.events[PGSTEAL_DIRECT]);
- seq_printf(m, "pgactivate %lu\n", acc.events[PGACTIVATE]);
- seq_printf(m, "pgdeactivate %lu\n", acc.events[PGDEACTIVATE]);
- seq_printf(m, "pglazyfree %lu\n", acc.events[PGLAZYFREE]);
- seq_printf(m, "pglazyfreed %lu\n", acc.events[PGLAZYFREED]);
+ memcg_page_state(memcg, WORKINGSET_NODERECLAIM));
+
+ seq_printf(m, "pgrefill %lu\n", memcg_events(memcg, PGREFILL));
+ seq_printf(m, "pgscan %lu\n", memcg_events(memcg, PGSCAN_KSWAPD) +
+ memcg_events(memcg, PGSCAN_DIRECT));
+ seq_printf(m, "pgsteal %lu\n", memcg_events(memcg, PGSTEAL_KSWAPD) +
+ memcg_events(memcg, PGSTEAL_DIRECT));
+ seq_printf(m, "pgactivate %lu\n", memcg_events(memcg, PGACTIVATE));
+ seq_printf(m, "pgdeactivate %lu\n", memcg_events(memcg, PGDEACTIVATE));
+ seq_printf(m, "pglazyfree %lu\n", memcg_events(memcg, PGLAZYFREE));
+ seq_printf(m, "pglazyfreed %lu\n", memcg_events(memcg, PGLAZYFREED));
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- seq_printf(m, "thp_fault_alloc %lu\n", acc.events[THP_FAULT_ALLOC]);
+ seq_printf(m, "thp_fault_alloc %lu\n",
+ memcg_events(memcg, THP_FAULT_ALLOC));
seq_printf(m, "thp_collapse_alloc %lu\n",
- acc.events[THP_COLLAPSE_ALLOC]);
+ memcg_events(memcg, THP_COLLAPSE_ALLOC));
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
return 0;
@@ -6080,7 +6158,7 @@ static void uncharge_batch(const struct uncharge_gather *ug)
__mod_memcg_state(ug->memcg, MEMCG_RSS_HUGE, -ug->nr_huge);
__mod_memcg_state(ug->memcg, NR_SHMEM, -ug->nr_shmem);
__count_memcg_events(ug->memcg, PGPGOUT, ug->pgpgout);
- __this_cpu_add(ug->memcg->stat_cpu->nr_page_events, nr_pages);
+ __this_cpu_add(ug->memcg->vmstats_percpu->nr_page_events, nr_pages);
memcg_check_events(ug->memcg, ug->dummy_page);
local_irq_restore(flags);