units: enable waiting for unit termination in certain cases

The legacy cgroup hierarchy does not support reliable empty notifications in containers and if there are left-over subgroups in a cgroup. This makes it hard to correctly wait for them running empty, and thus we previously disabled this logic entirely. With this change we explicitly check for the container case, and whether the unit is a "delegation" unit (i.e. one where programs may create their own subgroups). If we are neither in a container, nor operating on a delegation unit cgroup empty notifications become reliable and thus we start waiting for the empty notifications again. This doesn't really fix the general problem around cgroup notifications but reduces the effect around it. (This also reorders #include lines by their focus, as suggsted in CODING_STYLE. We have to add "virt.h", so let's do that at the right place.) Also see #317.
author: Lennart Poettering <lennart@poettering.net> 2015-09-01 17:25:59 +0200
committer: Lennart Poettering <lennart@poettering.net> 2015-09-01 17:44:17 +0200
commit: e9db43d5910717a1084924c512bf85e2b8265375 (patch)
tree: 6435db76781a17d47340d509ed2cbe2d44dcf0fc
parent: unit: suppress unnecessary cgroup empty check (diff)
download: systemd-e9db43d5910717a1084924c512bf85e2b8265375.tar.xz
systemd-e9db43d5910717a1084924c512bf85e2b8265375.zip
3 files changed, 37 insertions, 18 deletions
diff --git a/src/core/cgroup.c b/src/core/cgroup.c
index c26807ba2b..da6de68637 100644
--- a/src/core/cgroup.c
+++ b/src/core/cgroup.c
@@ -1124,6 +1124,18 @@ int unit_reset_cpu_usage(Unit *u) {
         return 0;
 }
 
+bool unit_cgroup_delegate(Unit *u) {
+        CGroupContext *c;
+
+        assert(u);
+
+        c = unit_get_cgroup_context(u);
+        if (!c)
+                return false;
+
+        return c->delegate;
+}
+
 static const char* const cgroup_device_policy_table[_CGROUP_DEVICE_POLICY_MAX] = {
         [CGROUP_AUTO] = "auto",
         [CGROUP_CLOSED] = "closed",
diff --git a/src/core/cgroup.h b/src/core/cgroup.h
index 869ddae8c4..7b38d210fb 100644
--- a/src/core/cgroup.h
+++ b/src/core/cgroup.h
@@ -130,5 +130,7 @@ int unit_get_memory_current(Unit *u, uint64_t *ret);
 int unit_get_cpu_usage(Unit *u, nsec_t *ret);
 int unit_reset_cpu_usage(Unit *u);
 
+bool unit_cgroup_delegate(Unit *u);
+
 const char* cgroup_device_policy_to_string(CGroupDevicePolicy i) _const_;
 CGroupDevicePolicy cgroup_device_policy_from_string(const char *s) _pure_;
diff --git a/src/core/unit.c b/src/core/unit.c
index 5f602bdf5f..3fec8c4c36 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -28,27 +28,28 @@
 #include "sd-id128.h"
 #include "sd-messages.h"
 #include "set.h"
-#include "unit.h"
 #include "macro.h"
 #include "strv.h"
 #include "path-util.h"
-#include "load-fragment.h"
-#include "load-dropin.h"
 #include "log.h"
-#include "unit-name.h"
-#include "dbus-unit.h"
-#include "special.h"
 #include "cgroup-util.h"
 #include "missing.h"
 #include "mkdir.h"
 #include "fileio-label.h"
-#include "bus-common-errors.h"
-#include "dbus.h"
-#include "execute.h"
-#include "dropin.h"
 #include "formats-util.h"
 #include "process-util.h"
+#include "virt.h"
+#include "bus-common-errors.h"
 #include "bus-util.h"
+#include "dropin.h"
+#include "unit-name.h"
+#include "special.h"
+#include "unit.h"
+#include "load-fragment.h"
+#include "load-dropin.h"
+#include "dbus.h"
+#include "dbus-unit.h"
+#include "execute.h"
 
 const UnitVTable * const unit_vtable[_UNIT_TYPE_MAX] = {
         [UNIT_SERVICE] = &service_vtable,
@@ -3594,14 +3595,18 @@ int unit_kill_context(
                 } else if (r > 0) {
 
                         /* FIXME: For now, we will not wait for the
-                         * cgroup members to die, simply because
-                         * cgroup notification is unreliable. It
-                         * doesn't work at all in containers, and
-                         * outside of containers it can be confused
-                         * easily by leaving directories in the
-                         * cgroup. */
-
-                        /* wait_for_exit = true; */
+                         * cgroup members to die if we are running in
+                         * a container or if this is a delegation
+                         * unit, simply because cgroup notification is
+                         * unreliable in these cases. It doesn't work
+                         * at all in containers, and outside of
+                         * containers it can be confused easily by
+                         * left-over directories in the cgroup --
+                         * which however should not exist in
+                         * non-delegated units. */
+
+                        if  (detect_container(NULL) == 0 && !unit_cgroup_delegate(u))
+                                wait_for_exit = true;
 
                         if (c->send_sighup && k != KILL_KILL) {
                                 set_free(pid_set);
author	Lennart Poettering <lennart@poettering.net>	2015-09-01 17:25:59 +0200
committer	Lennart Poettering <lennart@poettering.net>	2015-09-01 17:44:17 +0200
commit	e9db43d5910717a1084924c512bf85e2b8265375 (patch)
tree	6435db76781a17d47340d509ed2cbe2d44dcf0fc
parent	unit: suppress unnecessary cgroup empty check (diff)
download	systemd-e9db43d5910717a1084924c512bf85e2b8265375.tar.xz systemd-e9db43d5910717a1084924c512bf85e2b8265375.zip