summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorNishal Kulkarni <nishalkulkarni@gmail.com>2022-03-13 20:05:18 +0100
committerNishal Kulkarni <nishalkulkarni@gmail.com>2022-03-22 13:27:59 +0100
commit38c41427c7ee9a6209b84e7b17b1df5774d8f1ed (patch)
treed158d3034c88b278b10de3624b46d0ed794b268f /src
parentcore/cgroup: Add OOM check (diff)
downloadsystemd-38c41427c7ee9a6209b84e7b17b1df5774d8f1ed.tar.xz
systemd-38c41427c7ee9a6209b84e7b17b1df5774d8f1ed.zip
core/oomd: Use oom-kill ServiceResult for oomd
To notify user of kill events from systemd-oomd we now use `SERVICE_FAILURE_OOM_KILL` as the failure result. `unit_check_oomd_kill` now calls `notify_cgroup_oom` to update the service result to `oom-kill`. We add a new xattr `user.oomd_ooms` to keep track of the OOM kills initiated by systemd-oomd, this helps us resolve a race between sending SIGKILL to processes and checking for OOM kill status from the xattr. Related to: #20649
Diffstat (limited to 'src')
-rw-r--r--src/core/cgroup.c21
-rw-r--r--src/core/manager.c4
-rw-r--r--src/core/manager.h6
-rw-r--r--src/core/service.c7
-rw-r--r--src/core/service.h2
-rw-r--r--src/core/unit.c7
-rw-r--r--src/core/unit.h6
-rw-r--r--src/oom/oomd-util.c4
8 files changed, 42 insertions, 15 deletions
diff --git a/src/core/cgroup.c b/src/core/cgroup.c
index f3b124eb67..15ab363548 100644
--- a/src/core/cgroup.c
+++ b/src/core/cgroup.c
@@ -3041,7 +3041,7 @@ int unit_check_oomd_kill(Unit *u) {
else if (r == 0)
return 0;
- r = cg_get_xattr_malloc(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, "user.oomd_kill", &value);
+ r = cg_get_xattr_malloc(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, "user.oomd_ooms", &value);
if (r < 0 && r != -ENODATA)
return r;
@@ -3057,11 +3057,25 @@ int unit_check_oomd_kill(Unit *u) {
if (!increased)
return 0;
+ n = 0;
+ value = mfree(value);
+ r = cg_get_xattr_malloc(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, "user.oomd_kill", &value);
+ if (r >= 0 && !isempty(value))
+ (void) safe_atou64(value, &n);
+
if (n > 0)
log_unit_struct(u, LOG_NOTICE,
"MESSAGE_ID=" SD_MESSAGE_UNIT_OOMD_KILL_STR,
LOG_UNIT_INVOCATION_ID(u),
- LOG_UNIT_MESSAGE(u, "systemd-oomd killed %"PRIu64" process(es) in this unit.", n));
+ LOG_UNIT_MESSAGE(u, "systemd-oomd killed %"PRIu64" process(es) in this unit.", n),
+ "N_PROCESSES=%" PRIu64, n);
+ else
+ log_unit_struct(u, LOG_NOTICE,
+ "MESSAGE_ID=" SD_MESSAGE_UNIT_OOMD_KILL_STR,
+ LOG_UNIT_INVOCATION_ID(u),
+ LOG_UNIT_MESSAGE(u, "systemd-oomd killed some process(es) in this unit."));
+
+ unit_notify_cgroup_oom(u, /* ManagedOOM= */ true);
return 1;
}
@@ -3097,8 +3111,7 @@ int unit_check_oom(Unit *u) {
LOG_UNIT_INVOCATION_ID(u),
LOG_UNIT_MESSAGE(u, "A process of this unit has been killed by the OOM killer."));
- if (UNIT_VTABLE(u)->notify_cgroup_oom)
- UNIT_VTABLE(u)->notify_cgroup_oom(u);
+ unit_notify_cgroup_oom(u, /* ManagedOOM= */ false);
return 1;
}
diff --git a/src/core/manager.c b/src/core/manager.c
index a379bbefea..69717e5ba6 100644
--- a/src/core/manager.c
+++ b/src/core/manager.c
@@ -2644,9 +2644,7 @@ static int manager_dispatch_sigchld(sd_event_source *source, void *userdata) {
* We only do this for the cgroup the PID belonged to. */
(void) unit_check_oom(u1);
- /* This only logs for now. In the future when the interface for kills/notifications
- * is more stable we can extend service results table similar to how kernel oom kills
- * are managed. */
+ /* We check if systemd-oomd perfomed a kill so that we log and notify appropriately */
(void) unit_check_oomd_kill(u1);
manager_invoke_sigchld_event(m, u1, &si);
diff --git a/src/core/manager.h b/src/core/manager.h
index f9096cf348..c989ce9c32 100644
--- a/src/core/manager.h
+++ b/src/core/manager.h
@@ -60,9 +60,9 @@ typedef enum StatusType {
} StatusType;
typedef enum OOMPolicy {
- OOM_CONTINUE, /* The kernel kills the process it wants to kill, and that's it */
- OOM_STOP, /* The kernel kills the process it wants to kill, and we stop the unit */
- OOM_KILL, /* The kernel kills the process it wants to kill, and all others in the unit, and we stop the unit */
+ OOM_CONTINUE, /* The kernel or systemd-oomd kills the process it wants to kill, and that's it */
+ OOM_STOP, /* The kernel or systemd-oomd kills the process it wants to kill, and we stop the unit */
+ OOM_KILL, /* The kernel or systemd-oomd kills the process it wants to kill, and all others in the unit, and we stop the unit */
_OOM_POLICY_MAX,
_OOM_POLICY_INVALID = -EINVAL,
} OOMPolicy;
diff --git a/src/core/service.c b/src/core/service.c
index 396c27956c..2d7a086852 100644
--- a/src/core/service.c
+++ b/src/core/service.c
@@ -3404,10 +3404,13 @@ static void service_notify_cgroup_empty_event(Unit *u) {
}
}
-static void service_notify_cgroup_oom_event(Unit *u) {
+static void service_notify_cgroup_oom_event(Unit *u, bool managed_oom) {
Service *s = SERVICE(u);
- log_unit_debug(u, "Process of control group was killed by the OOM killer.");
+ if (managed_oom)
+ log_unit_debug(u, "Process(es) of control group were killed by systemd-oomd.");
+ else
+ log_unit_debug(u, "Process of control group was killed by the OOM killer.");
if (s->oom_policy == OOM_CONTINUE)
return;
diff --git a/src/core/service.h b/src/core/service.h
index 4116e40d8f..91e02e6d7e 100644
--- a/src/core/service.h
+++ b/src/core/service.h
@@ -75,7 +75,7 @@ typedef enum ServiceResult {
SERVICE_FAILURE_CORE_DUMP,
SERVICE_FAILURE_WATCHDOG,
SERVICE_FAILURE_START_LIMIT_HIT,
- SERVICE_FAILURE_OOM_KILL,
+ SERVICE_FAILURE_OOM_KILL, /* OOM Kill by the Kernel or systemd-oomd */
SERVICE_SKIP_CONDITION,
_SERVICE_RESULT_MAX,
_SERVICE_RESULT_INVALID = -EINVAL,
diff --git a/src/core/unit.c b/src/core/unit.c
index 69ece07447..42fb4220f6 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -3801,6 +3801,13 @@ int unit_kill(Unit *u, KillWho w, int signo, sd_bus_error *error) {
return UNIT_VTABLE(u)->kill(u, w, signo, error);
}
+void unit_notify_cgroup_oom(Unit *u, bool managed_oom) {
+ assert(u);
+
+ if (UNIT_VTABLE(u)->notify_cgroup_oom)
+ UNIT_VTABLE(u)->notify_cgroup_oom(u, managed_oom);
+}
+
static Set *unit_pid_set(pid_t main_pid, pid_t control_pid) {
_cleanup_set_free_ Set *pid_set = NULL;
int r;
diff --git a/src/core/unit.h b/src/core/unit.h
index 94f2180951..733eeecd7f 100644
--- a/src/core/unit.h
+++ b/src/core/unit.h
@@ -285,7 +285,7 @@ typedef struct Unit {
nsec_t cpu_usage_base;
nsec_t cpu_usage_last; /* the most recently read value */
- /* The current counter of processes sent SIGKILL by systemd-oomd */
+ /* The current counter of OOM kills initiated by systemd-oomd */
uint64_t managed_oom_kill_last;
/* The current counter of the oom_kill field in the memory.events cgroup attribute */
@@ -596,7 +596,7 @@ typedef struct UnitVTable {
void (*notify_cgroup_empty)(Unit *u);
/* Called whenever an OOM kill event on this unit was seen */
- void (*notify_cgroup_oom)(Unit *u);
+ void (*notify_cgroup_oom)(Unit *u, bool managed_oom);
/* Called whenever a process of this unit sends us a message */
void (*notify_message)(Unit *u, const struct ucred *ucred, char * const *tags, FDSet *fds);
@@ -811,6 +811,8 @@ int unit_reload(Unit *u);
int unit_kill(Unit *u, KillWho w, int signo, sd_bus_error *error);
int unit_kill_common(Unit *u, KillWho who, int signo, pid_t main_pid, pid_t control_pid, sd_bus_error *error);
+void unit_notify_cgroup_oom(Unit *u, bool managed_oom);
+
typedef enum UnitNotifyFlags {
UNIT_NOTIFY_RELOAD_FAILURE = 1 << 0,
UNIT_NOTIFY_WILL_AUTO_RESTART = 1 << 1,
diff --git a/src/oom/oomd-util.c b/src/oom/oomd-util.c
index 77718d9c9e..a135824c53 100644
--- a/src/oom/oomd-util.c
+++ b/src/oom/oomd-util.c
@@ -192,6 +192,10 @@ int oomd_cgroup_kill(const char *path, bool recurse, bool dry_run) {
if (!pids_killed)
return -ENOMEM;
+ r = increment_oomd_xattr(path, "user.oomd_ooms", 1);
+ if (r < 0)
+ log_debug_errno(r, "Failed to set user.oomd_ooms before kill: %m");
+
if (recurse)
r = cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, path, SIGKILL, CGROUP_IGNORE_SELF, pids_killed, log_kill, NULL);
else