diff options
author | Nishal Kulkarni <nishalkulkarni@gmail.com> | 2022-03-13 20:05:18 +0100 |
---|---|---|
committer | Nishal Kulkarni <nishalkulkarni@gmail.com> | 2022-03-22 13:27:59 +0100 |
commit | 38c41427c7ee9a6209b84e7b17b1df5774d8f1ed (patch) | |
tree | d158d3034c88b278b10de3624b46d0ed794b268f /src | |
parent | core/cgroup: Add OOM check (diff) | |
download | systemd-38c41427c7ee9a6209b84e7b17b1df5774d8f1ed.tar.xz systemd-38c41427c7ee9a6209b84e7b17b1df5774d8f1ed.zip |
core/oomd: Use oom-kill ServiceResult for oomd
To notify user of kill events from systemd-oomd we now use
`SERVICE_FAILURE_OOM_KILL` as the failure result.
`unit_check_oomd_kill` now calls `notify_cgroup_oom` to
update the service result to `oom-kill`.
We add a new xattr `user.oomd_ooms` to keep track of the OOM kills
initiated by systemd-oomd, this helps us resolve a race between sending
SIGKILL to processes and checking for OOM kill status from the xattr.
Related to: #20649
Diffstat (limited to 'src')
-rw-r--r-- | src/core/cgroup.c | 21 | ||||
-rw-r--r-- | src/core/manager.c | 4 | ||||
-rw-r--r-- | src/core/manager.h | 6 | ||||
-rw-r--r-- | src/core/service.c | 7 | ||||
-rw-r--r-- | src/core/service.h | 2 | ||||
-rw-r--r-- | src/core/unit.c | 7 | ||||
-rw-r--r-- | src/core/unit.h | 6 | ||||
-rw-r--r-- | src/oom/oomd-util.c | 4 |
8 files changed, 42 insertions, 15 deletions
diff --git a/src/core/cgroup.c b/src/core/cgroup.c index f3b124eb67..15ab363548 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -3041,7 +3041,7 @@ int unit_check_oomd_kill(Unit *u) { else if (r == 0) return 0; - r = cg_get_xattr_malloc(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, "user.oomd_kill", &value); + r = cg_get_xattr_malloc(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, "user.oomd_ooms", &value); if (r < 0 && r != -ENODATA) return r; @@ -3057,11 +3057,25 @@ int unit_check_oomd_kill(Unit *u) { if (!increased) return 0; + n = 0; + value = mfree(value); + r = cg_get_xattr_malloc(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, "user.oomd_kill", &value); + if (r >= 0 && !isempty(value)) + (void) safe_atou64(value, &n); + if (n > 0) log_unit_struct(u, LOG_NOTICE, "MESSAGE_ID=" SD_MESSAGE_UNIT_OOMD_KILL_STR, LOG_UNIT_INVOCATION_ID(u), - LOG_UNIT_MESSAGE(u, "systemd-oomd killed %"PRIu64" process(es) in this unit.", n)); + LOG_UNIT_MESSAGE(u, "systemd-oomd killed %"PRIu64" process(es) in this unit.", n), + "N_PROCESSES=%" PRIu64, n); + else + log_unit_struct(u, LOG_NOTICE, + "MESSAGE_ID=" SD_MESSAGE_UNIT_OOMD_KILL_STR, + LOG_UNIT_INVOCATION_ID(u), + LOG_UNIT_MESSAGE(u, "systemd-oomd killed some process(es) in this unit.")); + + unit_notify_cgroup_oom(u, /* ManagedOOM= */ true); return 1; } @@ -3097,8 +3111,7 @@ int unit_check_oom(Unit *u) { LOG_UNIT_INVOCATION_ID(u), LOG_UNIT_MESSAGE(u, "A process of this unit has been killed by the OOM killer.")); - if (UNIT_VTABLE(u)->notify_cgroup_oom) - UNIT_VTABLE(u)->notify_cgroup_oom(u); + unit_notify_cgroup_oom(u, /* ManagedOOM= */ false); return 1; } diff --git a/src/core/manager.c b/src/core/manager.c index a379bbefea..69717e5ba6 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -2644,9 +2644,7 @@ static int manager_dispatch_sigchld(sd_event_source *source, void *userdata) { * We only do this for the cgroup the PID belonged to. */ (void) unit_check_oom(u1); - /* This only logs for now. In the future when the interface for kills/notifications - * is more stable we can extend service results table similar to how kernel oom kills - * are managed. */ + /* We check if systemd-oomd perfomed a kill so that we log and notify appropriately */ (void) unit_check_oomd_kill(u1); manager_invoke_sigchld_event(m, u1, &si); diff --git a/src/core/manager.h b/src/core/manager.h index f9096cf348..c989ce9c32 100644 --- a/src/core/manager.h +++ b/src/core/manager.h @@ -60,9 +60,9 @@ typedef enum StatusType { } StatusType; typedef enum OOMPolicy { - OOM_CONTINUE, /* The kernel kills the process it wants to kill, and that's it */ - OOM_STOP, /* The kernel kills the process it wants to kill, and we stop the unit */ - OOM_KILL, /* The kernel kills the process it wants to kill, and all others in the unit, and we stop the unit */ + OOM_CONTINUE, /* The kernel or systemd-oomd kills the process it wants to kill, and that's it */ + OOM_STOP, /* The kernel or systemd-oomd kills the process it wants to kill, and we stop the unit */ + OOM_KILL, /* The kernel or systemd-oomd kills the process it wants to kill, and all others in the unit, and we stop the unit */ _OOM_POLICY_MAX, _OOM_POLICY_INVALID = -EINVAL, } OOMPolicy; diff --git a/src/core/service.c b/src/core/service.c index 396c27956c..2d7a086852 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -3404,10 +3404,13 @@ static void service_notify_cgroup_empty_event(Unit *u) { } } -static void service_notify_cgroup_oom_event(Unit *u) { +static void service_notify_cgroup_oom_event(Unit *u, bool managed_oom) { Service *s = SERVICE(u); - log_unit_debug(u, "Process of control group was killed by the OOM killer."); + if (managed_oom) + log_unit_debug(u, "Process(es) of control group were killed by systemd-oomd."); + else + log_unit_debug(u, "Process of control group was killed by the OOM killer."); if (s->oom_policy == OOM_CONTINUE) return; diff --git a/src/core/service.h b/src/core/service.h index 4116e40d8f..91e02e6d7e 100644 --- a/src/core/service.h +++ b/src/core/service.h @@ -75,7 +75,7 @@ typedef enum ServiceResult { SERVICE_FAILURE_CORE_DUMP, SERVICE_FAILURE_WATCHDOG, SERVICE_FAILURE_START_LIMIT_HIT, - SERVICE_FAILURE_OOM_KILL, + SERVICE_FAILURE_OOM_KILL, /* OOM Kill by the Kernel or systemd-oomd */ SERVICE_SKIP_CONDITION, _SERVICE_RESULT_MAX, _SERVICE_RESULT_INVALID = -EINVAL, diff --git a/src/core/unit.c b/src/core/unit.c index 69ece07447..42fb4220f6 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -3801,6 +3801,13 @@ int unit_kill(Unit *u, KillWho w, int signo, sd_bus_error *error) { return UNIT_VTABLE(u)->kill(u, w, signo, error); } +void unit_notify_cgroup_oom(Unit *u, bool managed_oom) { + assert(u); + + if (UNIT_VTABLE(u)->notify_cgroup_oom) + UNIT_VTABLE(u)->notify_cgroup_oom(u, managed_oom); +} + static Set *unit_pid_set(pid_t main_pid, pid_t control_pid) { _cleanup_set_free_ Set *pid_set = NULL; int r; diff --git a/src/core/unit.h b/src/core/unit.h index 94f2180951..733eeecd7f 100644 --- a/src/core/unit.h +++ b/src/core/unit.h @@ -285,7 +285,7 @@ typedef struct Unit { nsec_t cpu_usage_base; nsec_t cpu_usage_last; /* the most recently read value */ - /* The current counter of processes sent SIGKILL by systemd-oomd */ + /* The current counter of OOM kills initiated by systemd-oomd */ uint64_t managed_oom_kill_last; /* The current counter of the oom_kill field in the memory.events cgroup attribute */ @@ -596,7 +596,7 @@ typedef struct UnitVTable { void (*notify_cgroup_empty)(Unit *u); /* Called whenever an OOM kill event on this unit was seen */ - void (*notify_cgroup_oom)(Unit *u); + void (*notify_cgroup_oom)(Unit *u, bool managed_oom); /* Called whenever a process of this unit sends us a message */ void (*notify_message)(Unit *u, const struct ucred *ucred, char * const *tags, FDSet *fds); @@ -811,6 +811,8 @@ int unit_reload(Unit *u); int unit_kill(Unit *u, KillWho w, int signo, sd_bus_error *error); int unit_kill_common(Unit *u, KillWho who, int signo, pid_t main_pid, pid_t control_pid, sd_bus_error *error); +void unit_notify_cgroup_oom(Unit *u, bool managed_oom); + typedef enum UnitNotifyFlags { UNIT_NOTIFY_RELOAD_FAILURE = 1 << 0, UNIT_NOTIFY_WILL_AUTO_RESTART = 1 << 1, diff --git a/src/oom/oomd-util.c b/src/oom/oomd-util.c index 77718d9c9e..a135824c53 100644 --- a/src/oom/oomd-util.c +++ b/src/oom/oomd-util.c @@ -192,6 +192,10 @@ int oomd_cgroup_kill(const char *path, bool recurse, bool dry_run) { if (!pids_killed) return -ENOMEM; + r = increment_oomd_xattr(path, "user.oomd_ooms", 1); + if (r < 0) + log_debug_errno(r, "Failed to set user.oomd_ooms before kill: %m"); + if (recurse) r = cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, path, SIGKILL, CGROUP_IGNORE_SELF, pids_killed, log_kill, NULL); else |