summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2020-06-02 06:49:42 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2020-06-02 19:59:09 +0200
commit8a5dbc657e1233c907f84f2c6192d3a3cf0026b3 (patch)
tree869fcf3c06900baeda8fcc45f4dca0213250e81e
parentmemcg: expose root cgroup's memory.stat (diff)
downloadlinux-8a5dbc657e1233c907f84f2c6192d3a3cf0026b3.tar.xz
linux-8a5dbc657e1233c907f84f2c6192d3a3cf0026b3.zip
mm/memcg: prepare for swap over-high accounting and penalty calculation
Patch series "memcg: Slow down swap allocation as the available space gets depleted", v6. Tejun describes the problem as follows: When swap runs out, there's an abrupt change in system behavior - the anonymous memory suddenly becomes unmanageable which readily breaks any sort of memory isolation and can bring down the whole system. To avoid that, oomd [1] monitors free swap space and triggers kills when it drops below the specific threshold (e.g. 15%). While this works, it's far from ideal: - Depending on IO performance and total swap size, a given headroom might not be enough or too much. - oomd has to monitor swap depletion in addition to the usual pressure metrics and it currently doesn't consider memory.swap.max. Solve this by adapting parts of the approach that memory.high uses - slow down allocation as the resource gets depleted turning the depletion behavior from abrupt cliff one to gradual degradation observable through memory pressure metric. [1] https://github.com/facebookincubator/oomd This patch (of 4): Slice the memory overage calculation logic a little bit so we can reuse it to apply a similar penalty to the swap. The logic which accesses the memory-specific fields (use and high values) has to be taken out of calculate_high_delay(). Signed-off-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Reviewed-by: Shakeel Butt <shakeelb@google.com> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Cc: Hugh Dickins <hughd@google.com> Cc: Chris Down <chris@chrisdown.name> Cc: Michal Hocko <mhocko@kernel.org> Cc: Tejun Heo <tj@kernel.org> Link: http://lkml.kernel.org/r/20200527195846.102707-1-kuba@kernel.org Link: http://lkml.kernel.org/r/20200527195846.102707-2-kuba@kernel.org Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/memcontrol.c62
1 files changed, 35 insertions, 27 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d5bbd920398e..b0ac90dc3bb0 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2321,41 +2321,48 @@ static void high_work_func(struct work_struct *work)
#define MEMCG_DELAY_PRECISION_SHIFT 20
#define MEMCG_DELAY_SCALING_SHIFT 14
-/*
- * Get the number of jiffies that we should penalise a mischievous cgroup which
- * is exceeding its memory.high by checking both it and its ancestors.
- */
-static unsigned long calculate_high_delay(struct mem_cgroup *memcg,
- unsigned int nr_pages)
+static u64 calculate_overage(unsigned long usage, unsigned long high)
{
- unsigned long penalty_jiffies;
- u64 max_overage = 0;
-
- do {
- unsigned long usage, high;
- u64 overage;
+ u64 overage;
- usage = page_counter_read(&memcg->memory);
- high = READ_ONCE(memcg->high);
+ if (usage <= high)
+ return 0;
- if (usage <= high)
- continue;
+ /*
+ * Prevent division by 0 in overage calculation by acting as if
+ * it was a threshold of 1 page
+ */
+ high = max(high, 1UL);
- /*
- * Prevent division by 0 in overage calculation by acting as if
- * it was a threshold of 1 page
- */
- high = max(high, 1UL);
+ overage = usage - high;
+ overage <<= MEMCG_DELAY_PRECISION_SHIFT;
+ return div64_u64(overage, high);
+}
- overage = usage - high;
- overage <<= MEMCG_DELAY_PRECISION_SHIFT;
- overage = div64_u64(overage, high);
+static u64 mem_find_max_overage(struct mem_cgroup *memcg)
+{
+ u64 overage, max_overage = 0;
- if (overage > max_overage)
- max_overage = overage;
+ do {
+ overage = calculate_overage(page_counter_read(&memcg->memory),
+ READ_ONCE(memcg->high));
+ max_overage = max(overage, max_overage);
} while ((memcg = parent_mem_cgroup(memcg)) &&
!mem_cgroup_is_root(memcg));
+ return max_overage;
+}
+
+/*
+ * Get the number of jiffies that we should penalise a mischievous cgroup which
+ * is exceeding its memory.high by checking both it and its ancestors.
+ */
+static unsigned long calculate_high_delay(struct mem_cgroup *memcg,
+ unsigned int nr_pages,
+ u64 max_overage)
+{
+ unsigned long penalty_jiffies;
+
if (!max_overage)
return 0;
@@ -2411,7 +2418,8 @@ void mem_cgroup_handle_over_high(void)
* memory.high is breached and reclaim is unable to keep up. Throttle
* allocators proactively to slow down excessive growth.
*/
- penalty_jiffies = calculate_high_delay(memcg, nr_pages);
+ penalty_jiffies = calculate_high_delay(memcg, nr_pages,
+ mem_find_max_overage(memcg));
/*
* Don't sleep if the amount of jiffies this memcg owes us is so low