summaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c175
1 files changed, 83 insertions, 92 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 43bd549cf3f5..0e3a8c11fb3b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -6234,6 +6234,76 @@ struct cgroup_subsys memory_cgrp_subsys = {
.early_init = 0,
};
+/*
+ * This function calculates an individual cgroup's effective
+ * protection which is derived from its own memory.min/low, its
+ * parent's and siblings' settings, as well as the actual memory
+ * distribution in the tree.
+ *
+ * The following rules apply to the effective protection values:
+ *
+ * 1. At the first level of reclaim, effective protection is equal to
+ * the declared protection in memory.min and memory.low.
+ *
+ * 2. To enable safe delegation of the protection configuration, at
+ * subsequent levels the effective protection is capped to the
+ * parent's effective protection.
+ *
+ * 3. To make complex and dynamic subtrees easier to configure, the
+ * user is allowed to overcommit the declared protection at a given
+ * level. If that is the case, the parent's effective protection is
+ * distributed to the children in proportion to how much protection
+ * they have declared and how much of it they are utilizing.
+ *
+ * This makes distribution proportional, but also work-conserving:
+ * if one cgroup claims much more protection than it uses memory,
+ * the unused remainder is available to its siblings.
+ *
+ * 4. Conversely, when the declared protection is undercommitted at a
+ * given level, the distribution of the larger parental protection
+ * budget is NOT proportional. A cgroup's protection from a sibling
+ * is capped to its own memory.min/low setting.
+ *
+ */
+static unsigned long effective_protection(unsigned long usage,
+ unsigned long setting,
+ unsigned long parent_effective,
+ unsigned long siblings_protected)
+{
+ unsigned long protected;
+
+ protected = min(usage, setting);
+ /*
+ * If all cgroups at this level combined claim and use more
+ * protection then what the parent affords them, distribute
+ * shares in proportion to utilization.
+ *
+ * We are using actual utilization rather than the statically
+ * claimed protection in order to be work-conserving: claimed
+ * but unused protection is available to siblings that would
+ * otherwise get a smaller chunk than what they claimed.
+ */
+ if (siblings_protected > parent_effective)
+ return protected * parent_effective / siblings_protected;
+
+ /*
+ * Ok, utilized protection of all children is within what the
+ * parent affords them, so we know whatever this child claims
+ * and utilizes is effectively protected.
+ *
+ * If there is unprotected usage beyond this value, reclaim
+ * will apply pressure in proportion to that amount.
+ *
+ * If there is unutilized protection, the cgroup will be fully
+ * shielded from reclaim, but we do return a smaller value for
+ * protection than what the group could enjoy in theory. This
+ * is okay. With the overcommit distribution above, effective
+ * protection is always dependent on how memory is actually
+ * consumed among the siblings anyway.
+ */
+ return protected;
+}
+
/**
* mem_cgroup_protected - check if memory consumption is in the normal range
* @root: the top ancestor of the sub-tree being checked
@@ -6247,67 +6317,11 @@ struct cgroup_subsys memory_cgrp_subsys = {
* MEMCG_PROT_LOW: cgroup memory is protected as long there is
* an unprotected supply of reclaimable memory from other cgroups.
* MEMCG_PROT_MIN: cgroup memory is protected
- *
- * @root is exclusive; it is never protected when looked at directly
- *
- * To provide a proper hierarchical behavior, effective memory.min/low values
- * are used. Below is the description of how effective memory.low is calculated.
- * Effective memory.min values is calculated in the same way.
- *
- * Effective memory.low is always equal or less than the original memory.low.
- * If there is no memory.low overcommittment (which is always true for
- * top-level memory cgroups), these two values are equal.
- * Otherwise, it's a part of parent's effective memory.low,
- * calculated as a cgroup's memory.low usage divided by sum of sibling's
- * memory.low usages, where memory.low usage is the size of actually
- * protected memory.
- *
- * low_usage
- * elow = min( memory.low, parent->elow * ------------------ ),
- * siblings_low_usage
- *
- * low_usage = min(memory.low, memory.current)
- *
- *
- * Such definition of the effective memory.low provides the expected
- * hierarchical behavior: parent's memory.low value is limiting
- * children, unprotected memory is reclaimed first and cgroups,
- * which are not using their guarantee do not affect actual memory
- * distribution.
- *
- * For example, if there are memcgs A, A/B, A/C, A/D and A/E:
- *
- * A A/memory.low = 2G, A/memory.current = 6G
- * //\\
- * BC DE B/memory.low = 3G B/memory.current = 2G
- * C/memory.low = 1G C/memory.current = 2G
- * D/memory.low = 0 D/memory.current = 2G
- * E/memory.low = 10G E/memory.current = 0
- *
- * and the memory pressure is applied, the following memory distribution
- * is expected (approximately):
- *
- * A/memory.current = 2G
- *
- * B/memory.current = 1.3G
- * C/memory.current = 0.6G
- * D/memory.current = 0
- * E/memory.current = 0
- *
- * These calculations require constant tracking of the actual low usages
- * (see propagate_protected_usage()), as well as recursive calculation of
- * effective memory.low values. But as we do call mem_cgroup_protected()
- * path for each memory cgroup top-down from the reclaim,
- * it's possible to optimize this part, and save calculated elow
- * for next usage. This part is intentionally racy, but it's ok,
- * as memory.low is a best-effort mechanism.
*/
enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root,
struct mem_cgroup *memcg)
{
struct mem_cgroup *parent;
- unsigned long emin, parent_emin;
- unsigned long elow, parent_elow;
unsigned long usage;
if (mem_cgroup_disabled())
@@ -6322,52 +6336,29 @@ enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root,
if (!usage)
return MEMCG_PROT_NONE;
- emin = memcg->memory.min;
- elow = memcg->memory.low;
-
parent = parent_mem_cgroup(memcg);
/* No parent means a non-hierarchical mode on v1 memcg */
if (!parent)
return MEMCG_PROT_NONE;
- if (parent == root)
- goto exit;
-
- parent_emin = READ_ONCE(parent->memory.emin);
- emin = min(emin, parent_emin);
- if (emin && parent_emin) {
- unsigned long min_usage, siblings_min_usage;
-
- min_usage = min(usage, memcg->memory.min);
- siblings_min_usage = atomic_long_read(
- &parent->memory.children_min_usage);
-
- if (min_usage && siblings_min_usage)
- emin = min(emin, parent_emin * min_usage /
- siblings_min_usage);
+ if (parent == root) {
+ memcg->memory.emin = memcg->memory.min;
+ memcg->memory.elow = memcg->memory.low;
+ goto out;
}
- parent_elow = READ_ONCE(parent->memory.elow);
- elow = min(elow, parent_elow);
- if (elow && parent_elow) {
- unsigned long low_usage, siblings_low_usage;
-
- low_usage = min(usage, memcg->memory.low);
- siblings_low_usage = atomic_long_read(
- &parent->memory.children_low_usage);
+ memcg->memory.emin = effective_protection(usage,
+ memcg->memory.min, READ_ONCE(parent->memory.emin),
+ atomic_long_read(&parent->memory.children_min_usage));
- if (low_usage && siblings_low_usage)
- elow = min(elow, parent_elow * low_usage /
- siblings_low_usage);
- }
+ memcg->memory.elow = effective_protection(usage,
+ memcg->memory.low, READ_ONCE(parent->memory.elow),
+ atomic_long_read(&parent->memory.children_low_usage));
-exit:
- memcg->memory.emin = emin;
- memcg->memory.elow = elow;
-
- if (usage <= emin)
+out:
+ if (usage <= memcg->memory.emin)
return MEMCG_PROT_MIN;
- else if (usage <= elow)
+ else if (usage <= memcg->memory.elow)
return MEMCG_PROT_LOW;
else
return MEMCG_PROT_NONE;