diff options
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 261 |
1 files changed, 129 insertions, 132 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index d18d3a6e7337..14c2f2017e37 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -14,6 +14,12 @@ * Copyright (C) 2012 Parallels Inc. and Google Inc. * Authors: Glauber Costa and Suleiman Souhlal * + * Native page reclaim + * Charge lifetime sanitation + * Lockless page tracking & accounting + * Unified hierarchy configuration model + * Copyright (C) 2015 Red Hat, Inc., Johannes Weiner + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -253,11 +259,6 @@ static void mem_cgroup_oom_notify(struct mem_cgroup *memcg); * page cache and RSS per cgroup. We would eventually like to provide * statistics based on the statistics developed by Rik Van Riel for clock-pro, * to help the administrator determine what knobs to tune. - * - * TODO: Add a water mark for the memory controller. Reclaim will begin when - * we hit the water mark. May be even add a low water mark, such that - * no reclaim occurs from a cgroup at it's low water mark, this is - * a feature that will be implemented much later in the future. */ struct mem_cgroup { struct cgroup_subsys_state css; @@ -454,6 +455,12 @@ static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg) return memcg->css.id; } +/* + * A helper function to get mem_cgroup from ID. must be called under + * rcu_read_lock(). The caller is responsible for calling + * css_tryget_online() if the mem_cgroup is used for charging. (dropping + * refcnt from swap can be called against removed memcg.) + */ static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id) { struct cgroup_subsys_state *css; @@ -667,7 +674,7 @@ static void mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz, static unsigned long soft_limit_excess(struct mem_cgroup *memcg) { unsigned long nr_pages = page_counter_read(&memcg->memory); - unsigned long soft_limit = ACCESS_ONCE(memcg->soft_limit); + unsigned long soft_limit = READ_ONCE(memcg->soft_limit); unsigned long excess = 0; if (nr_pages > soft_limit) @@ -1035,7 +1042,7 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, goto out_unlock; do { - pos = ACCESS_ONCE(iter->position); + pos = READ_ONCE(iter->position); /* * A racing update may change the position and * put the last reference, hence css_tryget(), @@ -1352,13 +1359,13 @@ static unsigned long mem_cgroup_margin(struct mem_cgroup *memcg) unsigned long limit; count = page_counter_read(&memcg->memory); - limit = ACCESS_ONCE(memcg->memory.limit); + limit = READ_ONCE(memcg->memory.limit); if (count < limit) margin = limit - count; if (do_swap_account) { count = page_counter_read(&memcg->memsw); - limit = ACCESS_ONCE(memcg->memsw.limit); + limit = READ_ONCE(memcg->memsw.limit); if (count <= limit) margin = min(margin, limit - count); } @@ -1436,15 +1443,17 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) struct mem_cgroup *iter; unsigned int i; - if (!p) - return; - mutex_lock(&oom_info_lock); rcu_read_lock(); - pr_info("Task in "); - pr_cont_cgroup_path(task_cgroup(p, memory_cgrp_id)); - pr_cont(" killed as a result of limit of "); + if (p) { + pr_info("Task in "); + pr_cont_cgroup_path(task_cgroup(p, memory_cgrp_id)); + pr_cont(" killed as a result of limit of "); + } else { + pr_info("Memory limit reached of cgroup "); + } + pr_cont_cgroup_path(memcg->css.cgroup); pr_cont("\n"); @@ -1531,7 +1540,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, return; } - check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, order, NULL); + check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, order, NULL, memcg); totalpages = mem_cgroup_get_limit(memcg) ? : 1; for_each_mem_cgroup_tree(iter, memcg) { struct css_task_iter it; @@ -2341,20 +2350,6 @@ static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages) } /* - * A helper function to get mem_cgroup from ID. must be called under - * rcu_read_lock(). The caller is responsible for calling - * css_tryget_online() if the mem_cgroup is used for charging. (dropping - * refcnt from swap can be called against removed memcg.) - */ -static struct mem_cgroup *mem_cgroup_lookup(unsigned short id) -{ - /* ID 0 is unused ID */ - if (!id) - return NULL; - return mem_cgroup_from_id(id); -} - -/* * try_get_mem_cgroup_from_page - look up page's memcg association * @page: the page * @@ -2380,7 +2375,7 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) ent.val = page_private(page); id = lookup_swap_cgroup_id(ent); rcu_read_lock(); - memcg = mem_cgroup_lookup(id); + memcg = mem_cgroup_from_id(id); if (memcg && !css_tryget_online(&memcg->css)) memcg = NULL; rcu_read_unlock(); @@ -2642,7 +2637,7 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep) return cachep; memcg = get_mem_cgroup_from_mm(current->mm); - kmemcg_id = ACCESS_ONCE(memcg->kmemcg_id); + kmemcg_id = READ_ONCE(memcg->kmemcg_id); if (kmemcg_id < 0) goto out; @@ -2779,92 +2774,6 @@ void mem_cgroup_split_huge_fixup(struct page *head) } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ -/** - * mem_cgroup_move_account - move account of the page - * @page: the page - * @nr_pages: number of regular pages (>1 for huge pages) - * @from: mem_cgroup which the page is moved from. - * @to: mem_cgroup which the page is moved to. @from != @to. - * - * The caller must confirm following. - * - page is not on LRU (isolate_page() is useful.) - * - compound_lock is held when nr_pages > 1 - * - * This function doesn't do "charge" to new cgroup and doesn't do "uncharge" - * from old cgroup. - */ -static int mem_cgroup_move_account(struct page *page, - unsigned int nr_pages, - struct mem_cgroup *from, - struct mem_cgroup *to) -{ - unsigned long flags; - int ret; - - VM_BUG_ON(from == to); - VM_BUG_ON_PAGE(PageLRU(page), page); - /* - * The page is isolated from LRU. So, collapse function - * will not handle this page. But page splitting can happen. - * Do this check under compound_page_lock(). The caller should - * hold it. - */ - ret = -EBUSY; - if (nr_pages > 1 && !PageTransHuge(page)) - goto out; - - /* - * Prevent mem_cgroup_migrate() from looking at page->mem_cgroup - * of its source page while we change it: page migration takes - * both pages off the LRU, but page cache replacement doesn't. - */ - if (!trylock_page(page)) - goto out; - - ret = -EINVAL; - if (page->mem_cgroup != from) - goto out_unlock; - - spin_lock_irqsave(&from->move_lock, flags); - - if (!PageAnon(page) && page_mapped(page)) { - __this_cpu_sub(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED], - nr_pages); - __this_cpu_add(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED], - nr_pages); - } - - if (PageWriteback(page)) { - __this_cpu_sub(from->stat->count[MEM_CGROUP_STAT_WRITEBACK], - nr_pages); - __this_cpu_add(to->stat->count[MEM_CGROUP_STAT_WRITEBACK], - nr_pages); - } - - /* - * It is safe to change page->mem_cgroup here because the page - * is referenced, charged, and isolated - we can't race with - * uncharging, charging, migration, or LRU putback. - */ - - /* caller should have done css_get */ - page->mem_cgroup = to; - spin_unlock_irqrestore(&from->move_lock, flags); - - ret = 0; - - local_irq_disable(); - mem_cgroup_charge_statistics(to, page, nr_pages); - memcg_check_events(to, page); - mem_cgroup_charge_statistics(from, page, -nr_pages); - memcg_check_events(from, page); - local_irq_enable(); -out_unlock: - unlock_page(page); -out: - return ret; -} - #ifdef CONFIG_MEMCG_SWAP static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg, bool charge) @@ -4816,6 +4725,92 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma, return page; } +/** + * mem_cgroup_move_account - move account of the page + * @page: the page + * @nr_pages: number of regular pages (>1 for huge pages) + * @from: mem_cgroup which the page is moved from. + * @to: mem_cgroup which the page is moved to. @from != @to. + * + * The caller must confirm following. + * - page is not on LRU (isolate_page() is useful.) + * - compound_lock is held when nr_pages > 1 + * + * This function doesn't do "charge" to new cgroup and doesn't do "uncharge" + * from old cgroup. + */ +static int mem_cgroup_move_account(struct page *page, + unsigned int nr_pages, + struct mem_cgroup *from, + struct mem_cgroup *to) +{ + unsigned long flags; + int ret; + + VM_BUG_ON(from == to); + VM_BUG_ON_PAGE(PageLRU(page), page); + /* + * The page is isolated from LRU. So, collapse function + * will not handle this page. But page splitting can happen. + * Do this check under compound_page_lock(). The caller should + * hold it. + */ + ret = -EBUSY; + if (nr_pages > 1 && !PageTransHuge(page)) + goto out; + + /* + * Prevent mem_cgroup_migrate() from looking at page->mem_cgroup + * of its source page while we change it: page migration takes + * both pages off the LRU, but page cache replacement doesn't. + */ + if (!trylock_page(page)) + goto out; + + ret = -EINVAL; + if (page->mem_cgroup != from) + goto out_unlock; + + spin_lock_irqsave(&from->move_lock, flags); + + if (!PageAnon(page) && page_mapped(page)) { + __this_cpu_sub(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED], + nr_pages); + __this_cpu_add(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED], + nr_pages); + } + + if (PageWriteback(page)) { + __this_cpu_sub(from->stat->count[MEM_CGROUP_STAT_WRITEBACK], + nr_pages); + __this_cpu_add(to->stat->count[MEM_CGROUP_STAT_WRITEBACK], + nr_pages); + } + + /* + * It is safe to change page->mem_cgroup here because the page + * is referenced, charged, and isolated - we can't race with + * uncharging, charging, migration, or LRU putback. + */ + + /* caller should have done css_get */ + page->mem_cgroup = to; + spin_unlock_irqrestore(&from->move_lock, flags); + + ret = 0; + + local_irq_disable(); + mem_cgroup_charge_statistics(to, page, nr_pages); + memcg_check_events(to, page); + mem_cgroup_charge_statistics(from, page, -nr_pages); + memcg_check_events(from, page); + local_irq_enable(); +out_unlock: + unlock_page(page); +out: + return ret; +} + static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma, unsigned long addr, pte_t ptent, union mc_target *target) { @@ -5012,7 +5007,7 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css, * tunable will only affect upcoming migrations, not the current one. * So we need to save it, and keep it going. */ - move_flags = ACCESS_ONCE(memcg->move_charge_at_immigrate); + move_flags = READ_ONCE(memcg->move_charge_at_immigrate); if (move_flags) { struct mm_struct *mm; struct mem_cgroup *from = mem_cgroup_from_task(p); @@ -5232,7 +5227,9 @@ static void mem_cgroup_bind(struct cgroup_subsys_state *root_css) * on for the root memcg is enough. */ if (cgroup_on_dfl(root_css->cgroup)) - mem_cgroup_from_css(root_css)->use_hierarchy = true; + root_mem_cgroup->use_hierarchy = true; + else + root_mem_cgroup->use_hierarchy = false; } static u64 memory_current_read(struct cgroup_subsys_state *css, @@ -5244,10 +5241,10 @@ static u64 memory_current_read(struct cgroup_subsys_state *css, static int memory_low_show(struct seq_file *m, void *v) { struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); - unsigned long low = ACCESS_ONCE(memcg->low); + unsigned long low = READ_ONCE(memcg->low); if (low == PAGE_COUNTER_MAX) - seq_puts(m, "infinity\n"); + seq_puts(m, "max\n"); else seq_printf(m, "%llu\n", (u64)low * PAGE_SIZE); @@ -5262,7 +5259,7 @@ static ssize_t memory_low_write(struct kernfs_open_file *of, int err; buf = strstrip(buf); - err = page_counter_memparse(buf, "infinity", &low); + err = page_counter_memparse(buf, "max", &low); if (err) return err; @@ -5274,10 +5271,10 @@ static ssize_t memory_low_write(struct kernfs_open_file *of, static int memory_high_show(struct seq_file *m, void *v) { struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); - unsigned long high = ACCESS_ONCE(memcg->high); + unsigned long high = READ_ONCE(memcg->high); if (high == PAGE_COUNTER_MAX) - seq_puts(m, "infinity\n"); + seq_puts(m, "max\n"); else seq_printf(m, "%llu\n", (u64)high * PAGE_SIZE); @@ -5292,7 +5289,7 @@ static ssize_t memory_high_write(struct kernfs_open_file *of, int err; buf = strstrip(buf); - err = page_counter_memparse(buf, "infinity", &high); + err = page_counter_memparse(buf, "max", &high); if (err) return err; @@ -5304,10 +5301,10 @@ static ssize_t memory_high_write(struct kernfs_open_file *of, static int memory_max_show(struct seq_file *m, void *v) { struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); - unsigned long max = ACCESS_ONCE(memcg->memory.limit); + unsigned long max = READ_ONCE(memcg->memory.limit); if (max == PAGE_COUNTER_MAX) - seq_puts(m, "infinity\n"); + seq_puts(m, "max\n"); else seq_printf(m, "%llu\n", (u64)max * PAGE_SIZE); @@ -5322,7 +5319,7 @@ static ssize_t memory_max_write(struct kernfs_open_file *of, int err; buf = strstrip(buf); - err = page_counter_memparse(buf, "infinity", &max); + err = page_counter_memparse(buf, "max", &max); if (err) return err; @@ -5426,7 +5423,7 @@ bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg) if (memcg == root_mem_cgroup) return false; - if (page_counter_read(&memcg->memory) > memcg->low) + if (page_counter_read(&memcg->memory) >= memcg->low) return false; while (memcg != root) { @@ -5435,7 +5432,7 @@ bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg) if (memcg == root_mem_cgroup) break; - if (page_counter_read(&memcg->memory) > memcg->low) + if (page_counter_read(&memcg->memory) >= memcg->low) return false; } return true; @@ -5859,7 +5856,7 @@ void mem_cgroup_uncharge_swap(swp_entry_t entry) id = swap_cgroup_record(entry, 0); rcu_read_lock(); - memcg = mem_cgroup_lookup(id); + memcg = mem_cgroup_from_id(id); if (memcg) { if (!mem_cgroup_is_root(memcg)) page_counter_uncharge(&memcg->memsw, 1); |