diff options
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 532 |
1 files changed, 188 insertions, 344 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index f973a025569b..5381afb23d58 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -83,9 +83,9 @@ static bool cgroup_memory_nokmem; /* Whether the swap controller is active */ #ifdef CONFIG_MEMCG_SWAP -int do_swap_account __read_mostly; +bool cgroup_memory_noswap __read_mostly; #else -#define do_swap_account 0 +#define cgroup_memory_noswap 1 #endif #ifdef CONFIG_CGROUP_WRITEBACK @@ -95,7 +95,7 @@ static DECLARE_WAIT_QUEUE_HEAD(memcg_cgwb_frn_waitq); /* Whether legacy memory+swap accounting is active */ static bool do_memsw_account(void) { - return !cgroup_subsys_on_dfl(memory_cgrp_subsys) && do_swap_account; + return !cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_noswap; } #define THRESHOLDS_EVENTS_TARGET 128 @@ -834,25 +834,8 @@ static unsigned long memcg_events_local(struct mem_cgroup *memcg, int event) static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg, struct page *page, - bool compound, int nr_pages) + int nr_pages) { - /* - * Here, RSS means 'mapped anon' and anon's SwapCache. Shmem/tmpfs is - * counted as CACHE even if it's on ANON LRU. - */ - if (PageAnon(page)) - __mod_memcg_state(memcg, MEMCG_RSS, nr_pages); - else { - __mod_memcg_state(memcg, MEMCG_CACHE, nr_pages); - if (PageSwapBacked(page)) - __mod_memcg_state(memcg, NR_SHMEM, nr_pages); - } - - if (compound) { - VM_BUG_ON_PAGE(!PageTransHuge(page), page); - __mod_memcg_state(memcg, MEMCG_RSS_HUGE, nr_pages); - } - /* pagein of a big page is an event. So, ignore page size */ if (nr_pages > 0) __count_memcg_events(memcg, PGPGIN, 1); @@ -1218,9 +1201,8 @@ int mem_cgroup_scan_tasks(struct mem_cgroup *memcg, * @page: the page * @pgdat: pgdat of the page * - * This function is only safe when following the LRU page isolation - * and putback protocol: the LRU lock must be held, and the page must - * either be PageLRU() or the caller must have isolated/allocated it. + * This function relies on page->mem_cgroup being stable - see the + * access rules in commit_charge(). */ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct pglist_data *pgdat) { @@ -1389,10 +1371,10 @@ static char *memory_stat_format(struct mem_cgroup *memcg) */ seq_buf_printf(&s, "anon %llu\n", - (u64)memcg_page_state(memcg, MEMCG_RSS) * + (u64)memcg_page_state(memcg, NR_ANON_MAPPED) * PAGE_SIZE); seq_buf_printf(&s, "file %llu\n", - (u64)memcg_page_state(memcg, MEMCG_CACHE) * + (u64)memcg_page_state(memcg, NR_FILE_PAGES) * PAGE_SIZE); seq_buf_printf(&s, "kernel_stack %llu\n", (u64)memcg_page_state(memcg, MEMCG_KERNEL_STACK_KB) * @@ -1418,15 +1400,11 @@ static char *memory_stat_format(struct mem_cgroup *memcg) (u64)memcg_page_state(memcg, NR_WRITEBACK) * PAGE_SIZE); - /* - * TODO: We should eventually replace our own MEMCG_RSS_HUGE counter - * with the NR_ANON_THP vm counter, but right now it's a pain in the - * arse because it requires migrating the work out of rmap to a place - * where the page->mem_cgroup is set up and stable. - */ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE seq_buf_printf(&s, "anon_thp %llu\n", - (u64)memcg_page_state(memcg, MEMCG_RSS_HUGE) * - PAGE_SIZE); + (u64)memcg_page_state(memcg, NR_ANON_THPS) * + HPAGE_PMD_SIZE); +#endif for (i = 0; i < NR_LRU_LISTS; i++) seq_buf_printf(&s, "%s %llu\n", lru_list_name(i), @@ -1981,6 +1959,7 @@ void mem_cgroup_print_oom_group(struct mem_cgroup *memcg) */ struct mem_cgroup *lock_page_memcg(struct page *page) { + struct page *head = compound_head(page); /* rmap on tail pages */ struct mem_cgroup *memcg; unsigned long flags; @@ -2000,7 +1979,7 @@ struct mem_cgroup *lock_page_memcg(struct page *page) if (mem_cgroup_disabled()) return NULL; again: - memcg = page->mem_cgroup; + memcg = head->mem_cgroup; if (unlikely(!memcg)) return NULL; @@ -2008,7 +1987,7 @@ again: return memcg; spin_lock_irqsave(&memcg->move_lock, flags); - if (memcg != page->mem_cgroup) { + if (memcg != head->mem_cgroup) { spin_unlock_irqrestore(&memcg->move_lock, flags); goto again; } @@ -2051,7 +2030,9 @@ void __unlock_page_memcg(struct mem_cgroup *memcg) */ void unlock_page_memcg(struct page *page) { - __unlock_page_memcg(page->mem_cgroup); + struct page *head = compound_head(page); + + __unlock_page_memcg(head->mem_cgroup); } EXPORT_SYMBOL(unlock_page_memcg); @@ -2659,6 +2640,7 @@ done_restock: return 0; } +#if defined(CONFIG_MEMCG_KMEM) || defined(CONFIG_MMU) static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages) { if (mem_cgroup_is_root(memcg)) @@ -2670,70 +2652,20 @@ static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages) css_put_many(&memcg->css, nr_pages); } +#endif -static void lock_page_lru(struct page *page, int *isolated) -{ - pg_data_t *pgdat = page_pgdat(page); - - spin_lock_irq(&pgdat->lru_lock); - if (PageLRU(page)) { - struct lruvec *lruvec; - - lruvec = mem_cgroup_page_lruvec(page, pgdat); - ClearPageLRU(page); - del_page_from_lru_list(page, lruvec, page_lru(page)); - *isolated = 1; - } else - *isolated = 0; -} - -static void unlock_page_lru(struct page *page, int isolated) -{ - pg_data_t *pgdat = page_pgdat(page); - - if (isolated) { - struct lruvec *lruvec; - - lruvec = mem_cgroup_page_lruvec(page, pgdat); - VM_BUG_ON_PAGE(PageLRU(page), page); - SetPageLRU(page); - add_page_to_lru_list(page, lruvec, page_lru(page)); - } - spin_unlock_irq(&pgdat->lru_lock); -} - -static void commit_charge(struct page *page, struct mem_cgroup *memcg, - bool lrucare) +static void commit_charge(struct page *page, struct mem_cgroup *memcg) { - int isolated; - VM_BUG_ON_PAGE(page->mem_cgroup, page); - /* - * In some cases, SwapCache and FUSE(splice_buf->radixtree), the page - * may already be on some other mem_cgroup's LRU. Take care of it. - */ - if (lrucare) - lock_page_lru(page, &isolated); - - /* - * Nobody should be changing or seriously looking at - * page->mem_cgroup at this point: - * - * - the page is uncharged + * Any of the following ensures page->mem_cgroup stability: * - * - the page is off-LRU - * - * - an anonymous fault has exclusive page access, except for - * a locked page table - * - * - a page cache insertion, a swapin fault, or a migration - * have the page locked + * - the page lock + * - LRU isolation + * - lock_page_memcg() + * - exclusive reference */ page->mem_cgroup = memcg; - - if (lrucare) - unlock_page_lru(page, isolated); } #ifdef CONFIG_MEMCG_KMEM @@ -3070,8 +3002,6 @@ void mem_cgroup_split_huge_fixup(struct page *head) for (i = 1; i < HPAGE_PMD_NR; i++) head[i].mem_cgroup = head->mem_cgroup; - - __mod_memcg_state(head->mem_cgroup, MEMCG_RSS_HUGE, -HPAGE_PMD_NR); } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ @@ -3354,8 +3284,8 @@ static unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap) unsigned long val; if (mem_cgroup_is_root(memcg)) { - val = memcg_page_state(memcg, MEMCG_CACHE) + - memcg_page_state(memcg, MEMCG_RSS); + val = memcg_page_state(memcg, NR_FILE_PAGES) + + memcg_page_state(memcg, NR_ANON_MAPPED); if (swap) val += memcg_page_state(memcg, MEMCG_SWAP); } else { @@ -3743,7 +3673,7 @@ static int mem_cgroup_move_charge_write(struct cgroup_subsys_state *css, #define LRU_ALL ((1 << NR_LRU_LISTS) - 1) static unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg, - int nid, unsigned int lru_mask) + int nid, unsigned int lru_mask, bool tree) { struct lruvec *lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid)); unsigned long nr = 0; @@ -3754,13 +3684,17 @@ static unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg, for_each_lru(lru) { if (!(BIT(lru) & lru_mask)) continue; - nr += lruvec_page_state_local(lruvec, NR_LRU_BASE + lru); + if (tree) + nr += lruvec_page_state(lruvec, NR_LRU_BASE + lru); + else + nr += lruvec_page_state_local(lruvec, NR_LRU_BASE + lru); } return nr; } static unsigned long mem_cgroup_nr_lru_pages(struct mem_cgroup *memcg, - unsigned int lru_mask) + unsigned int lru_mask, + bool tree) { unsigned long nr = 0; enum lru_list lru; @@ -3768,7 +3702,10 @@ static unsigned long mem_cgroup_nr_lru_pages(struct mem_cgroup *memcg, for_each_lru(lru) { if (!(BIT(lru) & lru_mask)) continue; - nr += memcg_page_state_local(memcg, NR_LRU_BASE + lru); + if (tree) + nr += memcg_page_state(memcg, NR_LRU_BASE + lru); + else + nr += memcg_page_state_local(memcg, NR_LRU_BASE + lru); } return nr; } @@ -3788,34 +3725,28 @@ static int memcg_numa_stat_show(struct seq_file *m, void *v) }; const struct numa_stat *stat; int nid; - unsigned long nr; struct mem_cgroup *memcg = mem_cgroup_from_seq(m); for (stat = stats; stat < stats + ARRAY_SIZE(stats); stat++) { - nr = mem_cgroup_nr_lru_pages(memcg, stat->lru_mask); - seq_printf(m, "%s=%lu", stat->name, nr); - for_each_node_state(nid, N_MEMORY) { - nr = mem_cgroup_node_nr_lru_pages(memcg, nid, - stat->lru_mask); - seq_printf(m, " N%d=%lu", nid, nr); - } + seq_printf(m, "%s=%lu", stat->name, + mem_cgroup_nr_lru_pages(memcg, stat->lru_mask, + false)); + for_each_node_state(nid, N_MEMORY) + seq_printf(m, " N%d=%lu", nid, + mem_cgroup_node_nr_lru_pages(memcg, nid, + stat->lru_mask, false)); seq_putc(m, '\n'); } for (stat = stats; stat < stats + ARRAY_SIZE(stats); stat++) { - struct mem_cgroup *iter; - - nr = 0; - for_each_mem_cgroup_tree(iter, memcg) - nr += mem_cgroup_nr_lru_pages(iter, stat->lru_mask); - seq_printf(m, "hierarchical_%s=%lu", stat->name, nr); - for_each_node_state(nid, N_MEMORY) { - nr = 0; - for_each_mem_cgroup_tree(iter, memcg) - nr += mem_cgroup_node_nr_lru_pages( - iter, nid, stat->lru_mask); - seq_printf(m, " N%d=%lu", nid, nr); - } + + seq_printf(m, "hierarchical_%s=%lu", stat->name, + mem_cgroup_nr_lru_pages(memcg, stat->lru_mask, + true)); + for_each_node_state(nid, N_MEMORY) + seq_printf(m, " N%d=%lu", nid, + mem_cgroup_node_nr_lru_pages(memcg, nid, + stat->lru_mask, true)); seq_putc(m, '\n'); } @@ -3824,9 +3755,11 @@ static int memcg_numa_stat_show(struct seq_file *m, void *v) #endif /* CONFIG_NUMA */ static const unsigned int memcg1_stats[] = { - MEMCG_CACHE, - MEMCG_RSS, - MEMCG_RSS_HUGE, + NR_FILE_PAGES, + NR_ANON_MAPPED, +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + NR_ANON_THPS, +#endif NR_SHMEM, NR_FILE_MAPPED, NR_FILE_DIRTY, @@ -3837,7 +3770,9 @@ static const unsigned int memcg1_stats[] = { static const char *const memcg1_stat_names[] = { "cache", "rss", +#ifdef CONFIG_TRANSPARENT_HUGEPAGE "rss_huge", +#endif "shmem", "mapped_file", "dirty", @@ -3863,11 +3798,16 @@ static int memcg_stat_show(struct seq_file *m, void *v) BUILD_BUG_ON(ARRAY_SIZE(memcg1_stat_names) != ARRAY_SIZE(memcg1_stats)); for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) { + unsigned long nr; + if (memcg1_stats[i] == MEMCG_SWAP && !do_memsw_account()) continue; - seq_printf(m, "%s %lu\n", memcg1_stat_names[i], - memcg_page_state_local(memcg, memcg1_stats[i]) * - PAGE_SIZE); + nr = memcg_page_state_local(memcg, memcg1_stats[i]); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + if (memcg1_stats[i] == NR_ANON_THPS) + nr *= HPAGE_PMD_NR; +#endif + seq_printf(m, "%s %lu\n", memcg1_stat_names[i], nr * PAGE_SIZE); } for (i = 0; i < ARRAY_SIZE(memcg1_events); i++) @@ -3913,23 +3853,17 @@ static int memcg_stat_show(struct seq_file *m, void *v) { pg_data_t *pgdat; struct mem_cgroup_per_node *mz; - struct zone_reclaim_stat *rstat; - unsigned long recent_rotated[2] = {0, 0}; - unsigned long recent_scanned[2] = {0, 0}; + unsigned long anon_cost = 0; + unsigned long file_cost = 0; for_each_online_pgdat(pgdat) { mz = mem_cgroup_nodeinfo(memcg, pgdat->node_id); - rstat = &mz->lruvec.reclaim_stat; - recent_rotated[0] += rstat->recent_rotated[0]; - recent_rotated[1] += rstat->recent_rotated[1]; - recent_scanned[0] += rstat->recent_scanned[0]; - recent_scanned[1] += rstat->recent_scanned[1]; + anon_cost += mz->lruvec.anon_cost; + file_cost += mz->lruvec.file_cost; } - seq_printf(m, "recent_rotated_anon %lu\n", recent_rotated[0]); - seq_printf(m, "recent_rotated_file %lu\n", recent_rotated[1]); - seq_printf(m, "recent_scanned_anon %lu\n", recent_scanned[0]); - seq_printf(m, "recent_scanned_file %lu\n", recent_scanned[1]); + seq_printf(m, "anon_cost %lu\n", anon_cost); + seq_printf(m, "file_cost %lu\n", file_cost); } #endif @@ -5364,8 +5298,7 @@ static struct page *mc_handle_swap_pte(struct vm_area_struct *vma, * we call find_get_page() with swapper_space directly. */ page = find_get_page(swap_address_space(ent), swp_offset(ent)); - if (do_memsw_account()) - entry->val = ent.val; + entry->val = ent.val; return page; } @@ -5399,8 +5332,7 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma, page = find_get_entry(mapping, pgoff); if (xa_is_value(page)) { swp_entry_t swp = radix_to_swp_entry(page); - if (do_memsw_account()) - *entry = swp; + *entry = swp; page = find_get_page(swap_address_space(swp), swp_offset(swp)); } @@ -5431,10 +5363,8 @@ static int mem_cgroup_move_account(struct page *page, { struct lruvec *from_vec, *to_vec; struct pglist_data *pgdat; - unsigned long flags; unsigned int nr_pages = compound ? hpage_nr_pages(page) : 1; int ret; - bool anon; VM_BUG_ON(from == to); VM_BUG_ON_PAGE(PageLRU(page), page); @@ -5452,30 +5382,47 @@ static int mem_cgroup_move_account(struct page *page, if (page->mem_cgroup != from) goto out_unlock; - anon = PageAnon(page); - pgdat = page_pgdat(page); from_vec = mem_cgroup_lruvec(from, pgdat); to_vec = mem_cgroup_lruvec(to, pgdat); - spin_lock_irqsave(&from->move_lock, flags); + lock_page_memcg(page); - if (!anon && page_mapped(page)) { - __mod_lruvec_state(from_vec, NR_FILE_MAPPED, -nr_pages); - __mod_lruvec_state(to_vec, NR_FILE_MAPPED, nr_pages); - } + if (PageAnon(page)) { + if (page_mapped(page)) { + __mod_lruvec_state(from_vec, NR_ANON_MAPPED, -nr_pages); + __mod_lruvec_state(to_vec, NR_ANON_MAPPED, nr_pages); + if (PageTransHuge(page)) { + __mod_lruvec_state(from_vec, NR_ANON_THPS, + -nr_pages); + __mod_lruvec_state(to_vec, NR_ANON_THPS, + nr_pages); + } - /* - * move_lock grabbed above and caller set from->moving_account, so - * mod_memcg_page_state will serialize updates to PageDirty. - * So mapping should be stable for dirty pages. - */ - if (!anon && PageDirty(page)) { - struct address_space *mapping = page_mapping(page); + } + } else { + __mod_lruvec_state(from_vec, NR_FILE_PAGES, -nr_pages); + __mod_lruvec_state(to_vec, NR_FILE_PAGES, nr_pages); + + if (PageSwapBacked(page)) { + __mod_lruvec_state(from_vec, NR_SHMEM, -nr_pages); + __mod_lruvec_state(to_vec, NR_SHMEM, nr_pages); + } + + if (page_mapped(page)) { + __mod_lruvec_state(from_vec, NR_FILE_MAPPED, -nr_pages); + __mod_lruvec_state(to_vec, NR_FILE_MAPPED, nr_pages); + } + + if (PageDirty(page)) { + struct address_space *mapping = page_mapping(page); - if (mapping_cap_account_dirty(mapping)) { - __mod_lruvec_state(from_vec, NR_FILE_DIRTY, -nr_pages); - __mod_lruvec_state(to_vec, NR_FILE_DIRTY, nr_pages); + if (mapping_cap_account_dirty(mapping)) { + __mod_lruvec_state(from_vec, NR_FILE_DIRTY, + -nr_pages); + __mod_lruvec_state(to_vec, NR_FILE_DIRTY, + nr_pages); + } } } @@ -5485,22 +5432,30 @@ static int mem_cgroup_move_account(struct page *page, } /* + * All state has been migrated, let's switch to the new memcg. + * * It is safe to change page->mem_cgroup here because the page - * is referenced, charged, and isolated - we can't race with - * uncharging, charging, migration, or LRU putback. + * is referenced, charged, isolated, and locked: we can't race + * with (un)charging, migration, LRU putback, or anything else + * that would rely on a stable page->mem_cgroup. + * + * Note that lock_page_memcg is a memcg lock, not a page lock, + * to save space. As soon as we switch page->mem_cgroup to a + * new memcg that isn't locked, the above state can change + * concurrently again. Make sure we're truly done with it. */ + smp_mb(); - /* caller should have done css_get */ - page->mem_cgroup = to; + page->mem_cgroup = to; /* caller should have done css_get */ - spin_unlock_irqrestore(&from->move_lock, flags); + __unlock_page_memcg(from); ret = 0; local_irq_disable(); - mem_cgroup_charge_statistics(to, page, compound, nr_pages); + mem_cgroup_charge_statistics(to, page, nr_pages); memcg_check_events(to, page); - mem_cgroup_charge_statistics(from, page, compound, -nr_pages); + mem_cgroup_charge_statistics(from, page, -nr_pages); memcg_check_events(from, page); local_irq_enable(); out_unlock: @@ -6486,125 +6441,63 @@ out: } /** - * mem_cgroup_try_charge - try charging a page + * mem_cgroup_charge - charge a newly allocated page to a cgroup * @page: page to charge * @mm: mm context of the victim * @gfp_mask: reclaim mode - * @memcgp: charged memcg return - * @compound: charge the page as compound or small page * * Try to charge @page to the memcg that @mm belongs to, reclaiming * pages according to @gfp_mask if necessary. * - * Returns 0 on success, with *@memcgp pointing to the charged memcg. - * Otherwise, an error code is returned. - * - * After page->mapping has been set up, the caller must finalize the - * charge with mem_cgroup_commit_charge(). Or abort the transaction - * with mem_cgroup_cancel_charge() in case page instantiation fails. + * Returns 0 on success. Otherwise, an error code is returned. */ -int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm, - gfp_t gfp_mask, struct mem_cgroup **memcgp, - bool compound) +int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) { + unsigned int nr_pages = hpage_nr_pages(page); struct mem_cgroup *memcg = NULL; - unsigned int nr_pages = compound ? hpage_nr_pages(page) : 1; int ret = 0; if (mem_cgroup_disabled()) goto out; if (PageSwapCache(page)) { + swp_entry_t ent = { .val = page_private(page), }; + unsigned short id; + /* * Every swap fault against a single page tries to charge the * page, bail as early as possible. shmem_unuse() encounters - * already charged pages, too. The USED bit is protected by - * the page lock, which serializes swap cache removal, which + * already charged pages, too. page->mem_cgroup is protected + * by the page lock, which serializes swap cache removal, which * in turn serializes uncharging. */ VM_BUG_ON_PAGE(!PageLocked(page), page); if (compound_head(page)->mem_cgroup) goto out; - if (do_swap_account) { - swp_entry_t ent = { .val = page_private(page), }; - unsigned short id = lookup_swap_cgroup_id(ent); - - rcu_read_lock(); - memcg = mem_cgroup_from_id(id); - if (memcg && !css_tryget_online(&memcg->css)) - memcg = NULL; - rcu_read_unlock(); - } + id = lookup_swap_cgroup_id(ent); + rcu_read_lock(); + memcg = mem_cgroup_from_id(id); + if (memcg && !css_tryget_online(&memcg->css)) + memcg = NULL; + rcu_read_unlock(); } if (!memcg) memcg = get_mem_cgroup_from_mm(mm); ret = try_charge(memcg, gfp_mask, nr_pages); + if (ret) + goto out_put; - css_put(&memcg->css); -out: - *memcgp = memcg; - return ret; -} - -int mem_cgroup_try_charge_delay(struct page *page, struct mm_struct *mm, - gfp_t gfp_mask, struct mem_cgroup **memcgp, - bool compound) -{ - struct mem_cgroup *memcg; - int ret; - - ret = mem_cgroup_try_charge(page, mm, gfp_mask, memcgp, compound); - memcg = *memcgp; - mem_cgroup_throttle_swaprate(memcg, page_to_nid(page), gfp_mask); - return ret; -} - -/** - * mem_cgroup_commit_charge - commit a page charge - * @page: page to charge - * @memcg: memcg to charge the page to - * @lrucare: page might be on LRU already - * @compound: charge the page as compound or small page - * - * Finalize a charge transaction started by mem_cgroup_try_charge(), - * after page->mapping has been set up. This must happen atomically - * as part of the page instantiation, i.e. under the page table lock - * for anonymous pages, under the page lock for page and swap cache. - * - * In addition, the page must not be on the LRU during the commit, to - * prevent racing with task migration. If it might be, use @lrucare. - * - * Use mem_cgroup_cancel_charge() to cancel the transaction instead. - */ -void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg, - bool lrucare, bool compound) -{ - unsigned int nr_pages = compound ? hpage_nr_pages(page) : 1; - - VM_BUG_ON_PAGE(!page->mapping, page); - VM_BUG_ON_PAGE(PageLRU(page) && !lrucare, page); - - if (mem_cgroup_disabled()) - return; - /* - * Swap faults will attempt to charge the same page multiple - * times. But reuse_swap_page() might have removed the page - * from swapcache already, so we can't check PageSwapCache(). - */ - if (!memcg) - return; - - commit_charge(page, memcg, lrucare); + commit_charge(page, memcg); local_irq_disable(); - mem_cgroup_charge_statistics(memcg, page, compound, nr_pages); + mem_cgroup_charge_statistics(memcg, page, nr_pages); memcg_check_events(memcg, page); local_irq_enable(); - if (do_memsw_account() && PageSwapCache(page)) { + if (PageSwapCache(page)) { swp_entry_t entry = { .val = page_private(page) }; /* * The swap entry might not get freed for a long time, @@ -6613,42 +6506,18 @@ void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg, */ mem_cgroup_uncharge_swap(entry, nr_pages); } -} -/** - * mem_cgroup_cancel_charge - cancel a page charge - * @page: page to charge - * @memcg: memcg to charge the page to - * @compound: charge the page as compound or small page - * - * Cancel a charge transaction started by mem_cgroup_try_charge(). - */ -void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg, - bool compound) -{ - unsigned int nr_pages = compound ? hpage_nr_pages(page) : 1; - - if (mem_cgroup_disabled()) - return; - /* - * Swap faults will attempt to charge the same page multiple - * times. But reuse_swap_page() might have removed the page - * from swapcache already, so we can't check PageSwapCache(). - */ - if (!memcg) - return; - - cancel_charge(memcg, nr_pages); +out_put: + css_put(&memcg->css); +out: + return ret; } struct uncharge_gather { struct mem_cgroup *memcg; + unsigned long nr_pages; unsigned long pgpgout; - unsigned long nr_anon; - unsigned long nr_file; unsigned long nr_kmem; - unsigned long nr_huge; - unsigned long nr_shmem; struct page *dummy_page; }; @@ -6659,37 +6528,32 @@ static inline void uncharge_gather_clear(struct uncharge_gather *ug) static void uncharge_batch(const struct uncharge_gather *ug) { - unsigned long nr_pages = ug->nr_anon + ug->nr_file + ug->nr_kmem; unsigned long flags; if (!mem_cgroup_is_root(ug->memcg)) { - page_counter_uncharge(&ug->memcg->memory, nr_pages); + page_counter_uncharge(&ug->memcg->memory, ug->nr_pages); if (do_memsw_account()) - page_counter_uncharge(&ug->memcg->memsw, nr_pages); + page_counter_uncharge(&ug->memcg->memsw, ug->nr_pages); if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && ug->nr_kmem) page_counter_uncharge(&ug->memcg->kmem, ug->nr_kmem); memcg_oom_recover(ug->memcg); } local_irq_save(flags); - __mod_memcg_state(ug->memcg, MEMCG_RSS, -ug->nr_anon); - __mod_memcg_state(ug->memcg, MEMCG_CACHE, -ug->nr_file); - __mod_memcg_state(ug->memcg, MEMCG_RSS_HUGE, -ug->nr_huge); - __mod_memcg_state(ug->memcg, NR_SHMEM, -ug->nr_shmem); __count_memcg_events(ug->memcg, PGPGOUT, ug->pgpgout); - __this_cpu_add(ug->memcg->vmstats_percpu->nr_page_events, nr_pages); + __this_cpu_add(ug->memcg->vmstats_percpu->nr_page_events, ug->nr_pages); memcg_check_events(ug->memcg, ug->dummy_page); local_irq_restore(flags); if (!mem_cgroup_is_root(ug->memcg)) - css_put_many(&ug->memcg->css, nr_pages); + css_put_many(&ug->memcg->css, ug->nr_pages); } static void uncharge_page(struct page *page, struct uncharge_gather *ug) { + unsigned long nr_pages; + VM_BUG_ON_PAGE(PageLRU(page), page); - VM_BUG_ON_PAGE(page_count(page) && !is_zone_device_page(page) && - !PageHWPoison(page) , page); if (!page->mem_cgroup) return; @@ -6708,23 +6572,13 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug) ug->memcg = page->mem_cgroup; } - if (!PageKmemcg(page)) { - unsigned int nr_pages = 1; + nr_pages = compound_nr(page); + ug->nr_pages += nr_pages; - if (PageTransHuge(page)) { - nr_pages = compound_nr(page); - ug->nr_huge += nr_pages; - } - if (PageAnon(page)) - ug->nr_anon += nr_pages; - else { - ug->nr_file += nr_pages; - if (PageSwapBacked(page)) - ug->nr_shmem += nr_pages; - } + if (!PageKmemcg(page)) { ug->pgpgout++; } else { - ug->nr_kmem += compound_nr(page); + ug->nr_kmem += nr_pages; __ClearPageKmemcg(page); } @@ -6761,8 +6615,7 @@ static void uncharge_list(struct list_head *page_list) * mem_cgroup_uncharge - uncharge a page * @page: page to uncharge * - * Uncharge a page previously charged with mem_cgroup_try_charge() and - * mem_cgroup_commit_charge(). + * Uncharge a page previously charged with mem_cgroup_charge(). */ void mem_cgroup_uncharge(struct page *page) { @@ -6785,7 +6638,7 @@ void mem_cgroup_uncharge(struct page *page) * @page_list: list of pages to uncharge * * Uncharge a list of pages previously charged with - * mem_cgroup_try_charge() and mem_cgroup_commit_charge(). + * mem_cgroup_charge(). */ void mem_cgroup_uncharge_list(struct list_head *page_list) { @@ -6838,11 +6691,10 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage) page_counter_charge(&memcg->memsw, nr_pages); css_get_many(&memcg->css, nr_pages); - commit_charge(newpage, memcg, false); + commit_charge(newpage, memcg); local_irq_save(flags); - mem_cgroup_charge_statistics(memcg, newpage, PageTransHuge(newpage), - nr_pages); + mem_cgroup_charge_statistics(memcg, newpage, nr_pages); memcg_check_events(memcg, newpage); local_irq_restore(flags); } @@ -7030,7 +6882,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) VM_BUG_ON_PAGE(PageLRU(page), page); VM_BUG_ON_PAGE(page_count(page), page); - if (!do_memsw_account()) + if (cgroup_subsys_on_dfl(memory_cgrp_subsys)) return; memcg = page->mem_cgroup; @@ -7059,7 +6911,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) if (!mem_cgroup_is_root(memcg)) page_counter_uncharge(&memcg->memory, nr_entries); - if (memcg != swap_memcg) { + if (!cgroup_memory_noswap && memcg != swap_memcg) { if (!mem_cgroup_is_root(swap_memcg)) page_counter_charge(&swap_memcg->memsw, nr_entries); page_counter_uncharge(&memcg->memsw, nr_entries); @@ -7072,8 +6924,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) * only synchronisation we have for updating the per-CPU variables. */ VM_BUG_ON(!irqs_disabled()); - mem_cgroup_charge_statistics(memcg, page, PageTransHuge(page), - -nr_entries); + mem_cgroup_charge_statistics(memcg, page, -nr_entries); memcg_check_events(memcg, page); if (!mem_cgroup_is_root(memcg)) @@ -7096,7 +6947,7 @@ int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry) struct mem_cgroup *memcg; unsigned short oldid; - if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) || !do_swap_account) + if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) return 0; memcg = page->mem_cgroup; @@ -7112,7 +6963,7 @@ int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry) memcg = mem_cgroup_id_get_online(memcg); - if (!mem_cgroup_is_root(memcg) && + if (!cgroup_memory_noswap && !mem_cgroup_is_root(memcg) && !page_counter_try_charge(&memcg->swap, nr_pages, &counter)) { memcg_memory_event(memcg, MEMCG_SWAP_MAX); memcg_memory_event(memcg, MEMCG_SWAP_FAIL); @@ -7140,14 +6991,11 @@ void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages) struct mem_cgroup *memcg; unsigned short id; - if (!do_swap_account) - return; - id = swap_cgroup_record(entry, 0, nr_pages); rcu_read_lock(); memcg = mem_cgroup_from_id(id); if (memcg) { - if (!mem_cgroup_is_root(memcg)) { + if (!cgroup_memory_noswap && !mem_cgroup_is_root(memcg)) { if (cgroup_subsys_on_dfl(memory_cgrp_subsys)) page_counter_uncharge(&memcg->swap, nr_pages); else @@ -7163,7 +7011,7 @@ long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg) { long nr_swap_pages = get_nr_swap_pages(); - if (!do_swap_account || !cgroup_subsys_on_dfl(memory_cgrp_subsys)) + if (cgroup_memory_noswap || !cgroup_subsys_on_dfl(memory_cgrp_subsys)) return nr_swap_pages; for (; memcg != root_mem_cgroup; memcg = parent_mem_cgroup(memcg)) nr_swap_pages = min_t(long, nr_swap_pages, @@ -7180,7 +7028,7 @@ bool mem_cgroup_swap_full(struct page *page) if (vm_swap_full()) return true; - if (!do_swap_account || !cgroup_subsys_on_dfl(memory_cgrp_subsys)) + if (cgroup_memory_noswap || !cgroup_subsys_on_dfl(memory_cgrp_subsys)) return false; memcg = page->mem_cgroup; @@ -7198,22 +7046,15 @@ bool mem_cgroup_swap_full(struct page *page) return false; } -/* for remember boot option*/ -#ifdef CONFIG_MEMCG_SWAP_ENABLED -static int really_do_swap_account __initdata = 1; -#else -static int really_do_swap_account __initdata; -#endif - -static int __init enable_swap_account(char *s) +static int __init setup_swap_account(char *s) { if (!strcmp(s, "1")) - really_do_swap_account = 1; + cgroup_memory_noswap = 0; else if (!strcmp(s, "0")) - really_do_swap_account = 0; + cgroup_memory_noswap = 1; return 1; } -__setup("swapaccount=", enable_swap_account); +__setup("swapaccount=", setup_swap_account); static u64 swap_current_read(struct cgroup_subsys_state *css, struct cftype *cft) @@ -7310,7 +7151,7 @@ static struct cftype swap_files[] = { { } /* terminate */ }; -static struct cftype memsw_cgroup_files[] = { +static struct cftype memsw_files[] = { { .name = "memsw.usage_in_bytes", .private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE), @@ -7339,13 +7180,16 @@ static struct cftype memsw_cgroup_files[] = { static int __init mem_cgroup_swap_init(void) { - if (!mem_cgroup_disabled() && really_do_swap_account) { - do_swap_account = 1; - WARN_ON(cgroup_add_dfl_cftypes(&memory_cgrp_subsys, - swap_files)); - WARN_ON(cgroup_add_legacy_cftypes(&memory_cgrp_subsys, - memsw_cgroup_files)); - } + /* No memory control -> no swap control */ + if (mem_cgroup_disabled()) + cgroup_memory_noswap = true; + + if (cgroup_memory_noswap) + return 0; + + WARN_ON(cgroup_add_dfl_cftypes(&memory_cgrp_subsys, swap_files)); + WARN_ON(cgroup_add_legacy_cftypes(&memory_cgrp_subsys, memsw_files)); + return 0; } subsys_initcall(mem_cgroup_swap_init); |