diff options
author | Luca Boccassi <luca.boccassi@microsoft.com> | 2021-03-30 20:01:01 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-03-30 20:01:01 +0200 |
commit | ae63987fac68b3f0e1d941ed53fe56c50f0d6281 (patch) | |
tree | 07cb779cf3a59768c47037728c96d31d861d1f01 | |
parent | Merge pull request #19155 from keszybz/hwdb-contrib-v248 (diff) | |
parent | oomd: fix iteration over candidates to kill (diff) | |
download | systemd-ae63987fac68b3f0e1d941ed53fe56c50f0d6281.tar.xz systemd-ae63987fac68b3f0e1d941ed53fe56c50f0d6281.zip |
Merge pull request #19149 from anitazha/oomdlogging
oomd: make it more clear when a kill happens
-rw-r--r-- | src/oom/oomd-manager.c | 38 | ||||
-rw-r--r-- | src/oom/oomd-util.c | 72 | ||||
-rw-r--r-- | src/oom/oomd-util.h | 7 |
3 files changed, 85 insertions, 32 deletions
diff --git a/src/oom/oomd-manager.c b/src/oom/oomd-manager.c index 345f8a77cf..c3e84aadde 100644 --- a/src/oom/oomd-manager.c +++ b/src/oom/oomd-manager.c @@ -378,10 +378,18 @@ static int monitor_cgroup_contexts_handler(sd_event_source *s, uint64_t usec, vo OomdCGroupContext *t; SET_FOREACH(t, targets) { - log_notice("Memory pressure for %s is greater than %lu for more than %"PRIu64" seconds and there was reclaim activity", - t->path, LOAD_INT(t->mem_pressure_limit), m->default_mem_pressure_duration_usec / USEC_PER_SEC); - - r = oomd_kill_by_pgscan_rate(m->monitored_mem_pressure_cgroup_contexts_candidates, t->path, m->dry_run); + _cleanup_free_ char *selected = NULL; + char ts[FORMAT_TIMESPAN_MAX]; + + log_debug("Memory pressure for %s is %lu.%02lu%% > %lu.%02lu%% for > %s with reclaim activity", + t->path, + LOAD_INT(t->memory_pressure.avg10), LOAD_FRAC(t->memory_pressure.avg10), + LOAD_INT(t->mem_pressure_limit), LOAD_FRAC(t->mem_pressure_limit), + format_timespan(ts, sizeof ts, + m->default_mem_pressure_duration_usec, + USEC_PER_SEC)); + + r = oomd_kill_by_pgscan_rate(m->monitored_mem_pressure_cgroup_contexts_candidates, t->path, m->dry_run, &selected); if (r == -ENOMEM) return log_oom(); if (r < 0) @@ -389,6 +397,15 @@ static int monitor_cgroup_contexts_handler(sd_event_source *s, uint64_t usec, vo else { /* Don't act on all the high pressure cgroups at once; return as soon as we kill one */ m->post_action_delay_start = usec_now; + if (selected) + log_notice("Killed %s due to memory pressure for %s being %lu.%02lu%% > %lu.%02lu%%" + " for > %s with reclaim activity", + selected, t->path, + LOAD_INT(t->memory_pressure.avg10), LOAD_FRAC(t->memory_pressure.avg10), + LOAD_INT(t->mem_pressure_limit), LOAD_FRAC(t->mem_pressure_limit), + format_timespan(ts, sizeof ts, + m->default_mem_pressure_duration_usec, + USEC_PER_SEC)); return 0; } } @@ -397,9 +414,11 @@ static int monitor_cgroup_contexts_handler(sd_event_source *s, uint64_t usec, vo if (oomd_swap_free_below(&m->system_context, 10000 - m->swap_used_limit_permyriad)) { _cleanup_hashmap_free_ Hashmap *candidates = NULL; + _cleanup_free_ char *selected = NULL; - log_notice("Swap used (%"PRIu64") / total (%"PRIu64") is more than " PERMYRIAD_AS_PERCENT_FORMAT_STR, - m->system_context.swap_used, m->system_context.swap_total, PERMYRIAD_AS_PERCENT_FORMAT_VAL(m->swap_used_limit_permyriad)); + log_debug("Swap used (%"PRIu64") / total (%"PRIu64") is more than " PERMYRIAD_AS_PERCENT_FORMAT_STR, + m->system_context.swap_used, m->system_context.swap_total, + PERMYRIAD_AS_PERCENT_FORMAT_VAL(m->swap_used_limit_permyriad)); r = get_monitored_cgroup_contexts_candidates(m->monitored_swap_cgroup_contexts, &candidates); if (r == -ENOMEM) @@ -407,13 +426,18 @@ static int monitor_cgroup_contexts_handler(sd_event_source *s, uint64_t usec, vo if (r < 0) log_debug_errno(r, "Failed to get monitored swap cgroup candidates, ignoring: %m"); - r = oomd_kill_by_swap_usage(candidates, m->dry_run); + r = oomd_kill_by_swap_usage(candidates, m->dry_run, &selected); if (r == -ENOMEM) return log_oom(); if (r < 0) log_notice_errno(r, "Failed to kill any cgroup(s) based on swap: %m"); else { m->post_action_delay_start = usec_now; + if (selected) + log_notice("Killed %s due to swap used (%"PRIu64") / total (%"PRIu64") being more than " + PERMYRIAD_AS_PERCENT_FORMAT_STR, + selected, m->system_context.swap_used, m->system_context.swap_total, + PERMYRIAD_AS_PERCENT_FORMAT_VAL(m->swap_used_limit_permyriad)); return 0; } } diff --git a/src/oom/oomd-util.c b/src/oom/oomd-util.c index 7860f2154d..894d23a83a 100644 --- a/src/oom/oomd-util.c +++ b/src/oom/oomd-util.c @@ -208,54 +208,82 @@ int oomd_cgroup_kill(const char *path, bool recurse, bool dry_run) { return set_size(pids_killed) != 0; } -int oomd_kill_by_pgscan_rate(Hashmap *h, const char *prefix, bool dry_run) { +int oomd_kill_by_pgscan_rate(Hashmap *h, const char *prefix, bool dry_run, char **ret_selected) { _cleanup_free_ OomdCGroupContext **sorted = NULL; - int r; + int n, r, ret = 0; assert(h); + assert(ret_selected); - r = oomd_sort_cgroup_contexts(h, compare_pgscan_rate_and_memory_usage, prefix, &sorted); - if (r < 0) - return r; + n = oomd_sort_cgroup_contexts(h, compare_pgscan_rate_and_memory_usage, prefix, &sorted); + if (n < 0) + return n; - for (int i = 0; i < r; i++) { - /* Skip cgroups with no reclaim and memory usage; it won't alleviate pressure. */ - /* Don't break since there might be "avoid" cgroups at the end. */ + for (int i = 0; i < n; i++) { + /* Skip cgroups with no reclaim and memory usage; it won't alleviate pressure. + * Continue since there might be "avoid" cgroups at the end. */ if (sorted[i]->pgscan == 0 && sorted[i]->current_memory_usage == 0) continue; r = oomd_cgroup_kill(sorted[i]->path, true, dry_run); - if (r > 0 || r == -ENOMEM) - break; + if (r == 0) + continue; /* We didn't find anything to kill */ + if (r == -ENOMEM) + return r; /* Treat oom as a hard error */ + if (r < 0) { + if (ret == 0) + ret = r; + continue; /* Try to find something else to kill */ + } + + char *selected = strdup(sorted[i]->path); + if (!selected) + return -ENOMEM; + *ret_selected = selected; + return 1; } - return r; + return ret; } -int oomd_kill_by_swap_usage(Hashmap *h, bool dry_run) { +int oomd_kill_by_swap_usage(Hashmap *h, bool dry_run, char **ret_selected) { _cleanup_free_ OomdCGroupContext **sorted = NULL; - int r; + int n, r, ret = 0; assert(h); + assert(ret_selected); - r = oomd_sort_cgroup_contexts(h, compare_swap_usage, NULL, &sorted); - if (r < 0) - return r; + n = oomd_sort_cgroup_contexts(h, compare_swap_usage, NULL, &sorted); + if (n < 0) + return n; /* Try to kill cgroups with non-zero swap usage until we either succeed in * killing or we get to a cgroup with no swap usage. */ - for (int i = 0; i < r; i++) { - /* Skip over cgroups with no resource usage. Don't break since there might be "avoid" - * cgroups at the end. */ + for (int i = 0; i < n; i++) { + /* Skip over cgroups with no resource usage. + * Continue break since there might be "avoid" cgroups at the end. */ if (sorted[i]->swap_usage == 0) continue; r = oomd_cgroup_kill(sorted[i]->path, true, dry_run); - if (r > 0 || r == -ENOMEM) - break; + if (r == 0) + continue; /* We didn't find anything to kill */ + if (r == -ENOMEM) + return r; /* Treat oom as a hard error */ + if (r < 0) { + if (ret == 0) + ret = r; + continue; /* Try to find something else to kill */ + } + + char *selected = strdup(sorted[i]->path); + if (!selected) + return -ENOMEM; + *ret_selected = selected; + return 1; } - return r; + return ret; } int oomd_cgroup_context_acquire(const char *path, OomdCGroupContext **ret) { diff --git a/src/oom/oomd-util.h b/src/oom/oomd-util.h index 560697a4f4..51423130d1 100644 --- a/src/oom/oomd-util.h +++ b/src/oom/oomd-util.h @@ -122,9 +122,10 @@ int oomd_cgroup_kill(const char *path, bool recurse, bool dry_run); /* The following oomd_kill_by_* functions return 1 if processes were killed, or negative otherwise. */ /* If `prefix` is supplied, only cgroups whose paths start with `prefix` are eligible candidates. Otherwise, - * everything in `h` is a candidate. */ -int oomd_kill_by_pgscan_rate(Hashmap *h, const char *prefix, bool dry_run); -int oomd_kill_by_swap_usage(Hashmap *h, bool dry_run); + * everything in `h` is a candidate. + * Returns the killed cgroup in ret_selected. */ +int oomd_kill_by_pgscan_rate(Hashmap *h, const char *prefix, bool dry_run, char **ret_selected); +int oomd_kill_by_swap_usage(Hashmap *h, bool dry_run, char **ret_selected); int oomd_cgroup_context_acquire(const char *path, OomdCGroupContext **ret); int oomd_system_context_acquire(const char *proc_swaps_path, OomdSystemContext *ret); |