summaryrefslogtreecommitdiffstats
path: root/src/core/service.c
diff options
context:
space:
mode:
authorLennart Poettering <lennart@poettering.net>2019-03-19 19:05:19 +0100
committerLennart Poettering <lennart@poettering.net>2019-04-09 11:17:58 +0200
commitafcfaa695cd00b713e7d57e1829da90b692ac6f8 (patch)
tree0f0394e7d37d4fbcdcc792cb00d22a2de1df0fc3 /src/core/service.c
parentservice: beautify debug log message a bit (diff)
downloadsystemd-afcfaa695cd00b713e7d57e1829da90b692ac6f8.tar.xz
systemd-afcfaa695cd00b713e7d57e1829da90b692ac6f8.zip
core: implement OOMPolicy= and watch cgroups for OOM killings
This adds a new per-service OOMPolicy= (along with a global DefaultOOMPolicy=) that controls what to do if a process of the service is killed by the kernel's OOM killer. It has three different values: "continue" (old behaviour), "stop" (terminate the service), "kill" (let the kernel kill all the service's processes). On top of that, track OOM killer events per unit: generate a per-unit structured, recognizable log message when we see an OOM killer event, and put the service in a failure state if an OOM killer event was seen and the selected policy was not "continue". A new "result" is defined for this case: "oom-kill". All of this relies on new cgroupv2 kernel functionality: the "memory.events" notification interface and the "memory.oom.group" attribute (which makes the kernel kill all cgroup processes automatically).
Diffstat (limited to 'src/core/service.c')
-rw-r--r--src/core/service.c70
1 files changed, 68 insertions, 2 deletions
diff --git a/src/core/service.c b/src/core/service.c
index a6f6a7383f..53cead772a 100644
--- a/src/core/service.c
+++ b/src/core/service.c
@@ -112,6 +112,8 @@ static void service_init(Unit *u) {
EXEC_KEYRING_PRIVATE : EXEC_KEYRING_INHERIT;
s->watchdog_original_usec = USEC_INFINITY;
+
+ s->oom_policy = _OOM_POLICY_INVALID;
}
static void service_unwatch_control_pid(Service *s) {
@@ -731,6 +733,15 @@ static int service_add_extras(Service *s) {
(s->type == SERVICE_NOTIFY || s->watchdog_usec > 0 || s->n_fd_store_max > 0))
s->notify_access = NOTIFY_MAIN;
+ /* If no OOM policy was explicitly set, then default to the configure default OOM policy. Except when
+ * delegation is on, in that case it we assume the payload knows better what to do and can process
+ * things in a more focussed way. */
+ if (s->oom_policy < 0)
+ s->oom_policy = s->cgroup_context.delegate ? OOM_CONTINUE : UNIT(s)->manager->default_oom_policy;
+
+ /* Let the kernel do the killing if that's requested. */
+ s->cgroup_context.memory_oom_group = s->oom_policy == OOM_KILL;
+
r = service_add_default_dependencies(s);
if (r < 0)
return r;
@@ -799,7 +810,8 @@ static void service_dump(Unit *u, FILE *f, const char *prefix) {
"%sType: %s\n"
"%sRestart: %s\n"
"%sNotifyAccess: %s\n"
- "%sNotifyState: %s\n",
+ "%sNotifyState: %s\n"
+ "%sOOMPolicy: %s\n",
prefix, service_state_to_string(s->state),
prefix, service_result_to_string(s->result),
prefix, service_result_to_string(s->reload_result),
@@ -810,7 +822,8 @@ static void service_dump(Unit *u, FILE *f, const char *prefix) {
prefix, service_type_to_string(s->type),
prefix, service_restart_to_string(s->restart),
prefix, notify_access_to_string(s->notify_access),
- prefix, notify_state_to_string(s->notify_state));
+ prefix, notify_state_to_string(s->notify_state),
+ prefix, oom_policy_to_string(s->oom_policy));
if (s->control_pid > 0)
fprintf(f,
@@ -3211,6 +3224,57 @@ static void service_notify_cgroup_empty_event(Unit *u) {
}
}
+static void service_notify_cgroup_oom_event(Unit *u) {
+ Service *s = SERVICE(u);
+
+ log_unit_debug(u, "Process of control group was killed by the OOM killer.");
+
+ if (s->oom_policy == OOM_CONTINUE)
+ return;
+
+ switch (s->state) {
+
+ case SERVICE_START_PRE:
+ case SERVICE_START:
+ case SERVICE_START_POST:
+ case SERVICE_STOP:
+ if (s->oom_policy == OOM_STOP)
+ service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_OOM_KILL);
+ else if (s->oom_policy == OOM_KILL)
+ service_enter_signal(s, SERVICE_STOP_SIGKILL, SERVICE_FAILURE_OOM_KILL);
+
+ break;
+
+ case SERVICE_EXITED:
+ case SERVICE_RUNNING:
+ if (s->oom_policy == OOM_STOP)
+ service_enter_stop(s, SERVICE_FAILURE_OOM_KILL);
+ else if (s->oom_policy == OOM_KILL)
+ service_enter_signal(s, SERVICE_STOP_SIGKILL, SERVICE_FAILURE_OOM_KILL);
+
+ break;
+
+ case SERVICE_STOP_WATCHDOG:
+ case SERVICE_STOP_SIGTERM:
+ service_enter_signal(s, SERVICE_STOP_SIGKILL, SERVICE_FAILURE_OOM_KILL);
+ break;
+
+ case SERVICE_STOP_SIGKILL:
+ case SERVICE_FINAL_SIGKILL:
+ if (s->result == SERVICE_SUCCESS)
+ s->result = SERVICE_FAILURE_OOM_KILL;
+ break;
+
+ case SERVICE_STOP_POST:
+ case SERVICE_FINAL_SIGTERM:
+ service_enter_signal(s, SERVICE_FINAL_SIGKILL, SERVICE_FAILURE_OOM_KILL);
+ break;
+
+ default:
+ ;
+ }
+}
+
static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
bool notify_dbus = true;
Service *s = SERVICE(u);
@@ -4116,6 +4180,7 @@ static const char* const service_result_table[_SERVICE_RESULT_MAX] = {
[SERVICE_FAILURE_CORE_DUMP] = "core-dump",
[SERVICE_FAILURE_WATCHDOG] = "watchdog",
[SERVICE_FAILURE_START_LIMIT_HIT] = "start-limit-hit",
+ [SERVICE_FAILURE_OOM_KILL] = "oom-kill",
};
DEFINE_STRING_TABLE_LOOKUP(service_result, ServiceResult);
@@ -4169,6 +4234,7 @@ const UnitVTable service_vtable = {
.reset_failed = service_reset_failed,
.notify_cgroup_empty = service_notify_cgroup_empty_event,
+ .notify_cgroup_oom = service_notify_cgroup_oom_event,
.notify_message = service_notify_message,
.main_pid = service_main_pid,