diff options
author | Lennart Poettering <lennart@poettering.net> | 2019-03-19 19:05:19 +0100 |
---|---|---|
committer | Lennart Poettering <lennart@poettering.net> | 2019-04-09 11:17:58 +0200 |
commit | afcfaa695cd00b713e7d57e1829da90b692ac6f8 (patch) | |
tree | 0f0394e7d37d4fbcdcc792cb00d22a2de1df0fc3 /src/core/service.c | |
parent | service: beautify debug log message a bit (diff) | |
download | systemd-afcfaa695cd00b713e7d57e1829da90b692ac6f8.tar.xz systemd-afcfaa695cd00b713e7d57e1829da90b692ac6f8.zip |
core: implement OOMPolicy= and watch cgroups for OOM killings
This adds a new per-service OOMPolicy= (along with a global
DefaultOOMPolicy=) that controls what to do if a process of the service
is killed by the kernel's OOM killer. It has three different values:
"continue" (old behaviour), "stop" (terminate the service), "kill" (let
the kernel kill all the service's processes).
On top of that, track OOM killer events per unit: generate a per-unit
structured, recognizable log message when we see an OOM killer event,
and put the service in a failure state if an OOM killer event was seen
and the selected policy was not "continue". A new "result" is defined
for this case: "oom-kill".
All of this relies on new cgroupv2 kernel functionality: the
"memory.events" notification interface and the "memory.oom.group"
attribute (which makes the kernel kill all cgroup processes
automatically).
Diffstat (limited to 'src/core/service.c')
-rw-r--r-- | src/core/service.c | 70 |
1 files changed, 68 insertions, 2 deletions
diff --git a/src/core/service.c b/src/core/service.c index a6f6a7383f..53cead772a 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -112,6 +112,8 @@ static void service_init(Unit *u) { EXEC_KEYRING_PRIVATE : EXEC_KEYRING_INHERIT; s->watchdog_original_usec = USEC_INFINITY; + + s->oom_policy = _OOM_POLICY_INVALID; } static void service_unwatch_control_pid(Service *s) { @@ -731,6 +733,15 @@ static int service_add_extras(Service *s) { (s->type == SERVICE_NOTIFY || s->watchdog_usec > 0 || s->n_fd_store_max > 0)) s->notify_access = NOTIFY_MAIN; + /* If no OOM policy was explicitly set, then default to the configure default OOM policy. Except when + * delegation is on, in that case it we assume the payload knows better what to do and can process + * things in a more focussed way. */ + if (s->oom_policy < 0) + s->oom_policy = s->cgroup_context.delegate ? OOM_CONTINUE : UNIT(s)->manager->default_oom_policy; + + /* Let the kernel do the killing if that's requested. */ + s->cgroup_context.memory_oom_group = s->oom_policy == OOM_KILL; + r = service_add_default_dependencies(s); if (r < 0) return r; @@ -799,7 +810,8 @@ static void service_dump(Unit *u, FILE *f, const char *prefix) { "%sType: %s\n" "%sRestart: %s\n" "%sNotifyAccess: %s\n" - "%sNotifyState: %s\n", + "%sNotifyState: %s\n" + "%sOOMPolicy: %s\n", prefix, service_state_to_string(s->state), prefix, service_result_to_string(s->result), prefix, service_result_to_string(s->reload_result), @@ -810,7 +822,8 @@ static void service_dump(Unit *u, FILE *f, const char *prefix) { prefix, service_type_to_string(s->type), prefix, service_restart_to_string(s->restart), prefix, notify_access_to_string(s->notify_access), - prefix, notify_state_to_string(s->notify_state)); + prefix, notify_state_to_string(s->notify_state), + prefix, oom_policy_to_string(s->oom_policy)); if (s->control_pid > 0) fprintf(f, @@ -3211,6 +3224,57 @@ static void service_notify_cgroup_empty_event(Unit *u) { } } +static void service_notify_cgroup_oom_event(Unit *u) { + Service *s = SERVICE(u); + + log_unit_debug(u, "Process of control group was killed by the OOM killer."); + + if (s->oom_policy == OOM_CONTINUE) + return; + + switch (s->state) { + + case SERVICE_START_PRE: + case SERVICE_START: + case SERVICE_START_POST: + case SERVICE_STOP: + if (s->oom_policy == OOM_STOP) + service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_OOM_KILL); + else if (s->oom_policy == OOM_KILL) + service_enter_signal(s, SERVICE_STOP_SIGKILL, SERVICE_FAILURE_OOM_KILL); + + break; + + case SERVICE_EXITED: + case SERVICE_RUNNING: + if (s->oom_policy == OOM_STOP) + service_enter_stop(s, SERVICE_FAILURE_OOM_KILL); + else if (s->oom_policy == OOM_KILL) + service_enter_signal(s, SERVICE_STOP_SIGKILL, SERVICE_FAILURE_OOM_KILL); + + break; + + case SERVICE_STOP_WATCHDOG: + case SERVICE_STOP_SIGTERM: + service_enter_signal(s, SERVICE_STOP_SIGKILL, SERVICE_FAILURE_OOM_KILL); + break; + + case SERVICE_STOP_SIGKILL: + case SERVICE_FINAL_SIGKILL: + if (s->result == SERVICE_SUCCESS) + s->result = SERVICE_FAILURE_OOM_KILL; + break; + + case SERVICE_STOP_POST: + case SERVICE_FINAL_SIGTERM: + service_enter_signal(s, SERVICE_FINAL_SIGKILL, SERVICE_FAILURE_OOM_KILL); + break; + + default: + ; + } +} + static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) { bool notify_dbus = true; Service *s = SERVICE(u); @@ -4116,6 +4180,7 @@ static const char* const service_result_table[_SERVICE_RESULT_MAX] = { [SERVICE_FAILURE_CORE_DUMP] = "core-dump", [SERVICE_FAILURE_WATCHDOG] = "watchdog", [SERVICE_FAILURE_START_LIMIT_HIT] = "start-limit-hit", + [SERVICE_FAILURE_OOM_KILL] = "oom-kill", }; DEFINE_STRING_TABLE_LOOKUP(service_result, ServiceResult); @@ -4169,6 +4234,7 @@ const UnitVTable service_vtable = { .reset_failed = service_reset_failed, .notify_cgroup_empty = service_notify_cgroup_empty_event, + .notify_cgroup_oom = service_notify_cgroup_oom_event, .notify_message = service_notify_message, .main_pid = service_main_pid, |