diff options
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 318 |
1 files changed, 104 insertions, 214 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 085dc6d2f876..23976fd885fd 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -292,6 +292,9 @@ struct mem_cgroup { /* vmpressure notifications */ struct vmpressure vmpressure; + /* css_online() has been completed */ + int initialized; + /* * the counter to account for mem+swap usage. */ @@ -315,9 +318,6 @@ struct mem_cgroup { /* OOM-Killer disable */ int oom_kill_disable; - /* set when res.limit == memsw.limit */ - bool memsw_is_minimum; - /* protect arrays of thresholds */ struct mutex thresholds_lock; @@ -481,14 +481,6 @@ enum res_type { #define OOM_CONTROL (0) /* - * Reclaim flags for mem_cgroup_hierarchical_reclaim - */ -#define MEM_CGROUP_RECLAIM_NOSWAP_BIT 0x0 -#define MEM_CGROUP_RECLAIM_NOSWAP (1 << MEM_CGROUP_RECLAIM_NOSWAP_BIT) -#define MEM_CGROUP_RECLAIM_SHRINK_BIT 0x1 -#define MEM_CGROUP_RECLAIM_SHRINK (1 << MEM_CGROUP_RECLAIM_SHRINK_BIT) - -/* * The memcg_create_mutex will be held whenever a new cgroup is created. * As a consequence, any change that needs to protect against new child cgroups * appearing has to hold it as well. @@ -646,11 +638,13 @@ int memcg_limited_groups_array_size; struct static_key memcg_kmem_enabled_key; EXPORT_SYMBOL(memcg_kmem_enabled_key); +static void memcg_free_cache_id(int id); + static void disarm_kmem_keys(struct mem_cgroup *memcg) { if (memcg_kmem_is_active(memcg)) { static_key_slow_dec(&memcg_kmem_enabled_key); - ida_simple_remove(&kmem_limited_groups, memcg->kmemcg_id); + memcg_free_cache_id(memcg->kmemcg_id); } /* * This check can't live in kmem destruction function, @@ -1099,10 +1093,21 @@ skip_node: * skipping css reference should be safe. */ if (next_css) { - if ((next_css == &root->css) || - ((next_css->flags & CSS_ONLINE) && - css_tryget_online(next_css))) - return mem_cgroup_from_css(next_css); + struct mem_cgroup *memcg = mem_cgroup_from_css(next_css); + + if (next_css == &root->css) + return memcg; + + if (css_tryget_online(next_css)) { + /* + * Make sure the memcg is initialized: + * mem_cgroup_css_online() orders the the + * initialization against setting the flag. + */ + if (smp_load_acquire(&memcg->initialized)) + return memcg; + css_put(next_css); + } prev_css = next_css; goto skip_node; @@ -1792,42 +1797,6 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, NULL, "Memory cgroup out of memory"); } -static unsigned long mem_cgroup_reclaim(struct mem_cgroup *memcg, - gfp_t gfp_mask, - unsigned long flags) -{ - unsigned long total = 0; - bool noswap = false; - int loop; - - if (flags & MEM_CGROUP_RECLAIM_NOSWAP) - noswap = true; - if (!(flags & MEM_CGROUP_RECLAIM_SHRINK) && memcg->memsw_is_minimum) - noswap = true; - - for (loop = 0; loop < MEM_CGROUP_MAX_RECLAIM_LOOPS; loop++) { - if (loop) - drain_all_stock_async(memcg); - total += try_to_free_mem_cgroup_pages(memcg, gfp_mask, noswap); - /* - * Allow limit shrinkers, which are triggered directly - * by userspace, to catch signals and stop reclaim - * after minimal progress, regardless of the margin. - */ - if (total && (flags & MEM_CGROUP_RECLAIM_SHRINK)) - break; - if (mem_cgroup_margin(memcg)) - break; - /* - * If nothing was reclaimed after two attempts, there - * may be no reclaimable pages in this hierarchy. - */ - if (loop && !total) - break; - } - return total; -} - /** * test_mem_cgroup_node_reclaimable * @memcg: the target memcg @@ -2530,8 +2499,9 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, struct mem_cgroup *mem_over_limit; struct res_counter *fail_res; unsigned long nr_reclaimed; - unsigned long flags = 0; unsigned long long size; + bool may_swap = true; + bool drained = false; int ret = 0; if (mem_cgroup_is_root(memcg)) @@ -2541,16 +2511,17 @@ retry: goto done; size = batch * PAGE_SIZE; - if (!res_counter_charge(&memcg->res, size, &fail_res)) { - if (!do_swap_account) - goto done_restock; - if (!res_counter_charge(&memcg->memsw, size, &fail_res)) + if (!do_swap_account || + !res_counter_charge(&memcg->memsw, size, &fail_res)) { + if (!res_counter_charge(&memcg->res, size, &fail_res)) goto done_restock; - res_counter_uncharge(&memcg->res, size); - mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw); - flags |= MEM_CGROUP_RECLAIM_NOSWAP; - } else + if (do_swap_account) + res_counter_uncharge(&memcg->memsw, size); mem_over_limit = mem_cgroup_from_res_counter(fail_res, res); + } else { + mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw); + may_swap = false; + } if (batch > nr_pages) { batch = nr_pages; @@ -2574,11 +2545,18 @@ retry: if (!(gfp_mask & __GFP_WAIT)) goto nomem; - nr_reclaimed = mem_cgroup_reclaim(mem_over_limit, gfp_mask, flags); + nr_reclaimed = try_to_free_mem_cgroup_pages(mem_over_limit, nr_pages, + gfp_mask, may_swap); if (mem_cgroup_margin(mem_over_limit) >= nr_pages) goto retry; + if (!drained) { + drain_all_stock_async(mem_over_limit); + drained = true; + goto retry; + } + if (gfp_mask & __GFP_NORETRY) goto nomem; /* @@ -2784,12 +2762,6 @@ static DEFINE_MUTEX(memcg_slab_mutex); static DEFINE_MUTEX(activate_kmem_mutex); -static inline bool memcg_can_account_kmem(struct mem_cgroup *memcg) -{ - return !mem_cgroup_disabled() && !mem_cgroup_is_root(memcg) && - memcg_kmem_is_active(memcg); -} - /* * This is a bit cumbersome, but it is rarely used and avoids a backpointer * in the memcg_cache_params struct. @@ -2809,7 +2781,7 @@ static int mem_cgroup_slabinfo_read(struct seq_file *m, void *v) struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); struct memcg_cache_params *params; - if (!memcg_can_account_kmem(memcg)) + if (!memcg_kmem_is_active(memcg)) return -EIO; print_slabinfo_header(m); @@ -2892,19 +2864,44 @@ int memcg_cache_id(struct mem_cgroup *memcg) return memcg ? memcg->kmemcg_id : -1; } -static size_t memcg_caches_array_size(int num_groups) +static int memcg_alloc_cache_id(void) { - ssize_t size; - if (num_groups <= 0) - return 0; + int id, size; + int err; + + id = ida_simple_get(&kmem_limited_groups, + 0, MEMCG_CACHES_MAX_SIZE, GFP_KERNEL); + if (id < 0) + return id; - size = 2 * num_groups; + if (id < memcg_limited_groups_array_size) + return id; + + /* + * There's no space for the new id in memcg_caches arrays, + * so we have to grow them. + */ + + size = 2 * (id + 1); if (size < MEMCG_CACHES_MIN_SIZE) size = MEMCG_CACHES_MIN_SIZE; else if (size > MEMCG_CACHES_MAX_SIZE) size = MEMCG_CACHES_MAX_SIZE; - return size; + mutex_lock(&memcg_slab_mutex); + err = memcg_update_all_caches(size); + mutex_unlock(&memcg_slab_mutex); + + if (err) { + ida_simple_remove(&kmem_limited_groups, id); + return err; + } + return id; +} + +static void memcg_free_cache_id(int id) +{ + ida_simple_remove(&kmem_limited_groups, id); } /* @@ -2914,97 +2911,7 @@ static size_t memcg_caches_array_size(int num_groups) */ void memcg_update_array_size(int num) { - if (num > memcg_limited_groups_array_size) - memcg_limited_groups_array_size = memcg_caches_array_size(num); -} - -int memcg_update_cache_size(struct kmem_cache *s, int num_groups) -{ - struct memcg_cache_params *cur_params = s->memcg_params; - - VM_BUG_ON(!is_root_cache(s)); - - if (num_groups > memcg_limited_groups_array_size) { - int i; - struct memcg_cache_params *new_params; - ssize_t size = memcg_caches_array_size(num_groups); - - size *= sizeof(void *); - size += offsetof(struct memcg_cache_params, memcg_caches); - - new_params = kzalloc(size, GFP_KERNEL); - if (!new_params) - return -ENOMEM; - - new_params->is_root_cache = true; - - /* - * There is the chance it will be bigger than - * memcg_limited_groups_array_size, if we failed an allocation - * in a cache, in which case all caches updated before it, will - * have a bigger array. - * - * But if that is the case, the data after - * memcg_limited_groups_array_size is certainly unused - */ - for (i = 0; i < memcg_limited_groups_array_size; i++) { - if (!cur_params->memcg_caches[i]) - continue; - new_params->memcg_caches[i] = - cur_params->memcg_caches[i]; - } - - /* - * Ideally, we would wait until all caches succeed, and only - * then free the old one. But this is not worth the extra - * pointer per-cache we'd have to have for this. - * - * It is not a big deal if some caches are left with a size - * bigger than the others. And all updates will reset this - * anyway. - */ - rcu_assign_pointer(s->memcg_params, new_params); - if (cur_params) - kfree_rcu(cur_params, rcu_head); - } - return 0; -} - -int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s, - struct kmem_cache *root_cache) -{ - size_t size; - - if (!memcg_kmem_enabled()) - return 0; - - if (!memcg) { - size = offsetof(struct memcg_cache_params, memcg_caches); - size += memcg_limited_groups_array_size * sizeof(void *); - } else - size = sizeof(struct memcg_cache_params); - - s->memcg_params = kzalloc(size, GFP_KERNEL); - if (!s->memcg_params) - return -ENOMEM; - - if (memcg) { - s->memcg_params->memcg = memcg; - s->memcg_params->root_cache = root_cache; - css_get(&memcg->css); - } else - s->memcg_params->is_root_cache = true; - - return 0; -} - -void memcg_free_cache_params(struct kmem_cache *s) -{ - if (!s->memcg_params) - return; - if (!s->memcg_params->is_root_cache) - css_put(&s->memcg_params->memcg->css); - kfree(s->memcg_params); + memcg_limited_groups_array_size = num; } static void memcg_register_cache(struct mem_cgroup *memcg, @@ -3037,6 +2944,7 @@ static void memcg_register_cache(struct mem_cgroup *memcg, if (!cachep) return; + css_get(&memcg->css); list_add(&cachep->memcg_params->list, &memcg->memcg_slab_caches); /* @@ -3070,6 +2978,9 @@ static void memcg_unregister_cache(struct kmem_cache *cachep) list_del(&cachep->memcg_params->list); kmem_cache_destroy(cachep); + + /* drop the reference taken in memcg_register_cache */ + css_put(&memcg->css); } /* @@ -3247,7 +3158,7 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep, rcu_read_lock(); memcg = mem_cgroup_from_task(rcu_dereference(current->mm->owner)); - if (!memcg_can_account_kmem(memcg)) + if (!memcg_kmem_is_active(memcg)) goto out; memcg_cachep = cache_from_memcg_idx(cachep, memcg_cache_id(memcg)); @@ -3332,7 +3243,7 @@ __memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **_memcg, int order) memcg = get_mem_cgroup_from_mm(current->mm); - if (!memcg_can_account_kmem(memcg)) { + if (!memcg_kmem_is_active(memcg)) { css_put(&memcg->css); return true; } @@ -3674,7 +3585,6 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg, unsigned long long val) { int retry_count; - u64 memswlimit, memlimit; int ret = 0; int children = mem_cgroup_count_children(memcg); u64 curusage, oldusage; @@ -3701,31 +3611,23 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg, * We have to guarantee memcg->res.limit <= memcg->memsw.limit. */ mutex_lock(&set_limit_mutex); - memswlimit = res_counter_read_u64(&memcg->memsw, RES_LIMIT); - if (memswlimit < val) { + if (res_counter_read_u64(&memcg->memsw, RES_LIMIT) < val) { ret = -EINVAL; mutex_unlock(&set_limit_mutex); break; } - memlimit = res_counter_read_u64(&memcg->res, RES_LIMIT); - if (memlimit < val) + if (res_counter_read_u64(&memcg->res, RES_LIMIT) < val) enlarge = 1; ret = res_counter_set_limit(&memcg->res, val); - if (!ret) { - if (memswlimit == val) - memcg->memsw_is_minimum = true; - else - memcg->memsw_is_minimum = false; - } mutex_unlock(&set_limit_mutex); if (!ret) break; - mem_cgroup_reclaim(memcg, GFP_KERNEL, - MEM_CGROUP_RECLAIM_SHRINK); + try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL, true); + curusage = res_counter_read_u64(&memcg->res, RES_USAGE); /* Usage is reduced ? */ if (curusage >= oldusage) @@ -3743,7 +3645,7 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg, unsigned long long val) { int retry_count; - u64 memlimit, memswlimit, oldusage, curusage; + u64 oldusage, curusage; int children = mem_cgroup_count_children(memcg); int ret = -EBUSY; int enlarge = 0; @@ -3762,30 +3664,21 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg, * We have to guarantee memcg->res.limit <= memcg->memsw.limit. */ mutex_lock(&set_limit_mutex); - memlimit = res_counter_read_u64(&memcg->res, RES_LIMIT); - if (memlimit > val) { + if (res_counter_read_u64(&memcg->res, RES_LIMIT) > val) { ret = -EINVAL; mutex_unlock(&set_limit_mutex); break; } - memswlimit = res_counter_read_u64(&memcg->memsw, RES_LIMIT); - if (memswlimit < val) + if (res_counter_read_u64(&memcg->memsw, RES_LIMIT) < val) enlarge = 1; ret = res_counter_set_limit(&memcg->memsw, val); - if (!ret) { - if (memlimit == val) - memcg->memsw_is_minimum = true; - else - memcg->memsw_is_minimum = false; - } mutex_unlock(&set_limit_mutex); if (!ret) break; - mem_cgroup_reclaim(memcg, GFP_KERNEL, - MEM_CGROUP_RECLAIM_NOSWAP | - MEM_CGROUP_RECLAIM_SHRINK); + try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL, false); + curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE); /* Usage is reduced ? */ if (curusage >= oldusage) @@ -4034,8 +3927,8 @@ static int mem_cgroup_force_empty(struct mem_cgroup *memcg) if (signal_pending(current)) return -EINTR; - progress = try_to_free_mem_cgroup_pages(memcg, GFP_KERNEL, - false); + progress = try_to_free_mem_cgroup_pages(memcg, 1, + GFP_KERNEL, true); if (!progress) { nr_retries--; /* maybe some writeback is necessary */ @@ -4200,23 +4093,12 @@ static int __memcg_activate_kmem(struct mem_cgroup *memcg, if (err) goto out; - memcg_id = ida_simple_get(&kmem_limited_groups, - 0, MEMCG_CACHES_MAX_SIZE, GFP_KERNEL); + memcg_id = memcg_alloc_cache_id(); if (memcg_id < 0) { err = memcg_id; goto out; } - /* - * Make sure we have enough space for this cgroup in each root cache's - * memcg_params. - */ - mutex_lock(&memcg_slab_mutex); - err = memcg_update_all_caches(memcg_id + 1); - mutex_unlock(&memcg_slab_mutex); - if (err) - goto out_rmid; - memcg->kmemcg_id = memcg_id; INIT_LIST_HEAD(&memcg->memcg_slab_caches); @@ -4237,10 +4119,6 @@ static int __memcg_activate_kmem(struct mem_cgroup *memcg, out: memcg_resume_kmem_account(); return err; - -out_rmid: - ida_simple_remove(&kmem_limited_groups, memcg_id); - goto out; } static int memcg_activate_kmem(struct mem_cgroup *memcg, @@ -5549,6 +5427,7 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css) { struct mem_cgroup *memcg = mem_cgroup_from_css(css); struct mem_cgroup *parent = mem_cgroup_from_css(css->parent); + int ret; if (css->id > MEM_CGROUP_ID_MAX) return -ENOSPC; @@ -5585,7 +5464,18 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css) } mutex_unlock(&memcg_create_mutex); - return memcg_init_kmem(memcg, &memory_cgrp_subsys); + ret = memcg_init_kmem(memcg, &memory_cgrp_subsys); + if (ret) + return ret; + + /* + * Make sure the memcg is initialized: mem_cgroup_iter() + * orders reading memcg->initialized against its callers + * reading the memcg members. + */ + smp_store_release(&memcg->initialized, 1); + + return 0; } /* |