summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLuca Boccassi <luca.boccassi@microsoft.com>2021-03-30 20:01:01 +0200
committerGitHub <noreply@github.com>2021-03-30 20:01:01 +0200
commitae63987fac68b3f0e1d941ed53fe56c50f0d6281 (patch)
tree07cb779cf3a59768c47037728c96d31d861d1f01
parentMerge pull request #19155 from keszybz/hwdb-contrib-v248 (diff)
parentoomd: fix iteration over candidates to kill (diff)
downloadsystemd-ae63987fac68b3f0e1d941ed53fe56c50f0d6281.tar.xz
systemd-ae63987fac68b3f0e1d941ed53fe56c50f0d6281.zip
Merge pull request #19149 from anitazha/oomdlogging
oomd: make it more clear when a kill happens
-rw-r--r--src/oom/oomd-manager.c38
-rw-r--r--src/oom/oomd-util.c72
-rw-r--r--src/oom/oomd-util.h7
3 files changed, 85 insertions, 32 deletions
diff --git a/src/oom/oomd-manager.c b/src/oom/oomd-manager.c
index 345f8a77cf..c3e84aadde 100644
--- a/src/oom/oomd-manager.c
+++ b/src/oom/oomd-manager.c
@@ -378,10 +378,18 @@ static int monitor_cgroup_contexts_handler(sd_event_source *s, uint64_t usec, vo
OomdCGroupContext *t;
SET_FOREACH(t, targets) {
- log_notice("Memory pressure for %s is greater than %lu for more than %"PRIu64" seconds and there was reclaim activity",
- t->path, LOAD_INT(t->mem_pressure_limit), m->default_mem_pressure_duration_usec / USEC_PER_SEC);
-
- r = oomd_kill_by_pgscan_rate(m->monitored_mem_pressure_cgroup_contexts_candidates, t->path, m->dry_run);
+ _cleanup_free_ char *selected = NULL;
+ char ts[FORMAT_TIMESPAN_MAX];
+
+ log_debug("Memory pressure for %s is %lu.%02lu%% > %lu.%02lu%% for > %s with reclaim activity",
+ t->path,
+ LOAD_INT(t->memory_pressure.avg10), LOAD_FRAC(t->memory_pressure.avg10),
+ LOAD_INT(t->mem_pressure_limit), LOAD_FRAC(t->mem_pressure_limit),
+ format_timespan(ts, sizeof ts,
+ m->default_mem_pressure_duration_usec,
+ USEC_PER_SEC));
+
+ r = oomd_kill_by_pgscan_rate(m->monitored_mem_pressure_cgroup_contexts_candidates, t->path, m->dry_run, &selected);
if (r == -ENOMEM)
return log_oom();
if (r < 0)
@@ -389,6 +397,15 @@ static int monitor_cgroup_contexts_handler(sd_event_source *s, uint64_t usec, vo
else {
/* Don't act on all the high pressure cgroups at once; return as soon as we kill one */
m->post_action_delay_start = usec_now;
+ if (selected)
+ log_notice("Killed %s due to memory pressure for %s being %lu.%02lu%% > %lu.%02lu%%"
+ " for > %s with reclaim activity",
+ selected, t->path,
+ LOAD_INT(t->memory_pressure.avg10), LOAD_FRAC(t->memory_pressure.avg10),
+ LOAD_INT(t->mem_pressure_limit), LOAD_FRAC(t->mem_pressure_limit),
+ format_timespan(ts, sizeof ts,
+ m->default_mem_pressure_duration_usec,
+ USEC_PER_SEC));
return 0;
}
}
@@ -397,9 +414,11 @@ static int monitor_cgroup_contexts_handler(sd_event_source *s, uint64_t usec, vo
if (oomd_swap_free_below(&m->system_context, 10000 - m->swap_used_limit_permyriad)) {
_cleanup_hashmap_free_ Hashmap *candidates = NULL;
+ _cleanup_free_ char *selected = NULL;
- log_notice("Swap used (%"PRIu64") / total (%"PRIu64") is more than " PERMYRIAD_AS_PERCENT_FORMAT_STR,
- m->system_context.swap_used, m->system_context.swap_total, PERMYRIAD_AS_PERCENT_FORMAT_VAL(m->swap_used_limit_permyriad));
+ log_debug("Swap used (%"PRIu64") / total (%"PRIu64") is more than " PERMYRIAD_AS_PERCENT_FORMAT_STR,
+ m->system_context.swap_used, m->system_context.swap_total,
+ PERMYRIAD_AS_PERCENT_FORMAT_VAL(m->swap_used_limit_permyriad));
r = get_monitored_cgroup_contexts_candidates(m->monitored_swap_cgroup_contexts, &candidates);
if (r == -ENOMEM)
@@ -407,13 +426,18 @@ static int monitor_cgroup_contexts_handler(sd_event_source *s, uint64_t usec, vo
if (r < 0)
log_debug_errno(r, "Failed to get monitored swap cgroup candidates, ignoring: %m");
- r = oomd_kill_by_swap_usage(candidates, m->dry_run);
+ r = oomd_kill_by_swap_usage(candidates, m->dry_run, &selected);
if (r == -ENOMEM)
return log_oom();
if (r < 0)
log_notice_errno(r, "Failed to kill any cgroup(s) based on swap: %m");
else {
m->post_action_delay_start = usec_now;
+ if (selected)
+ log_notice("Killed %s due to swap used (%"PRIu64") / total (%"PRIu64") being more than "
+ PERMYRIAD_AS_PERCENT_FORMAT_STR,
+ selected, m->system_context.swap_used, m->system_context.swap_total,
+ PERMYRIAD_AS_PERCENT_FORMAT_VAL(m->swap_used_limit_permyriad));
return 0;
}
}
diff --git a/src/oom/oomd-util.c b/src/oom/oomd-util.c
index 7860f2154d..894d23a83a 100644
--- a/src/oom/oomd-util.c
+++ b/src/oom/oomd-util.c
@@ -208,54 +208,82 @@ int oomd_cgroup_kill(const char *path, bool recurse, bool dry_run) {
return set_size(pids_killed) != 0;
}
-int oomd_kill_by_pgscan_rate(Hashmap *h, const char *prefix, bool dry_run) {
+int oomd_kill_by_pgscan_rate(Hashmap *h, const char *prefix, bool dry_run, char **ret_selected) {
_cleanup_free_ OomdCGroupContext **sorted = NULL;
- int r;
+ int n, r, ret = 0;
assert(h);
+ assert(ret_selected);
- r = oomd_sort_cgroup_contexts(h, compare_pgscan_rate_and_memory_usage, prefix, &sorted);
- if (r < 0)
- return r;
+ n = oomd_sort_cgroup_contexts(h, compare_pgscan_rate_and_memory_usage, prefix, &sorted);
+ if (n < 0)
+ return n;
- for (int i = 0; i < r; i++) {
- /* Skip cgroups with no reclaim and memory usage; it won't alleviate pressure. */
- /* Don't break since there might be "avoid" cgroups at the end. */
+ for (int i = 0; i < n; i++) {
+ /* Skip cgroups with no reclaim and memory usage; it won't alleviate pressure.
+ * Continue since there might be "avoid" cgroups at the end. */
if (sorted[i]->pgscan == 0 && sorted[i]->current_memory_usage == 0)
continue;
r = oomd_cgroup_kill(sorted[i]->path, true, dry_run);
- if (r > 0 || r == -ENOMEM)
- break;
+ if (r == 0)
+ continue; /* We didn't find anything to kill */
+ if (r == -ENOMEM)
+ return r; /* Treat oom as a hard error */
+ if (r < 0) {
+ if (ret == 0)
+ ret = r;
+ continue; /* Try to find something else to kill */
+ }
+
+ char *selected = strdup(sorted[i]->path);
+ if (!selected)
+ return -ENOMEM;
+ *ret_selected = selected;
+ return 1;
}
- return r;
+ return ret;
}
-int oomd_kill_by_swap_usage(Hashmap *h, bool dry_run) {
+int oomd_kill_by_swap_usage(Hashmap *h, bool dry_run, char **ret_selected) {
_cleanup_free_ OomdCGroupContext **sorted = NULL;
- int r;
+ int n, r, ret = 0;
assert(h);
+ assert(ret_selected);
- r = oomd_sort_cgroup_contexts(h, compare_swap_usage, NULL, &sorted);
- if (r < 0)
- return r;
+ n = oomd_sort_cgroup_contexts(h, compare_swap_usage, NULL, &sorted);
+ if (n < 0)
+ return n;
/* Try to kill cgroups with non-zero swap usage until we either succeed in
* killing or we get to a cgroup with no swap usage. */
- for (int i = 0; i < r; i++) {
- /* Skip over cgroups with no resource usage. Don't break since there might be "avoid"
- * cgroups at the end. */
+ for (int i = 0; i < n; i++) {
+ /* Skip over cgroups with no resource usage.
+ * Continue break since there might be "avoid" cgroups at the end. */
if (sorted[i]->swap_usage == 0)
continue;
r = oomd_cgroup_kill(sorted[i]->path, true, dry_run);
- if (r > 0 || r == -ENOMEM)
- break;
+ if (r == 0)
+ continue; /* We didn't find anything to kill */
+ if (r == -ENOMEM)
+ return r; /* Treat oom as a hard error */
+ if (r < 0) {
+ if (ret == 0)
+ ret = r;
+ continue; /* Try to find something else to kill */
+ }
+
+ char *selected = strdup(sorted[i]->path);
+ if (!selected)
+ return -ENOMEM;
+ *ret_selected = selected;
+ return 1;
}
- return r;
+ return ret;
}
int oomd_cgroup_context_acquire(const char *path, OomdCGroupContext **ret) {
diff --git a/src/oom/oomd-util.h b/src/oom/oomd-util.h
index 560697a4f4..51423130d1 100644
--- a/src/oom/oomd-util.h
+++ b/src/oom/oomd-util.h
@@ -122,9 +122,10 @@ int oomd_cgroup_kill(const char *path, bool recurse, bool dry_run);
/* The following oomd_kill_by_* functions return 1 if processes were killed, or negative otherwise. */
/* If `prefix` is supplied, only cgroups whose paths start with `prefix` are eligible candidates. Otherwise,
- * everything in `h` is a candidate. */
-int oomd_kill_by_pgscan_rate(Hashmap *h, const char *prefix, bool dry_run);
-int oomd_kill_by_swap_usage(Hashmap *h, bool dry_run);
+ * everything in `h` is a candidate.
+ * Returns the killed cgroup in ret_selected. */
+int oomd_kill_by_pgscan_rate(Hashmap *h, const char *prefix, bool dry_run, char **ret_selected);
+int oomd_kill_by_swap_usage(Hashmap *h, bool dry_run, char **ret_selected);
int oomd_cgroup_context_acquire(const char *path, OomdCGroupContext **ret);
int oomd_system_context_acquire(const char *proc_swaps_path, OomdSystemContext *ret);