diff options
author | Lennart Poettering <lennart@poettering.net> | 2018-05-02 14:24:54 +0200 |
---|---|---|
committer | Lennart Poettering <lennart@poettering.net> | 2018-05-03 17:45:42 +0200 |
commit | 720f0a2f3c928cc9379501a52146be9fbb4d9be2 (patch) | |
tree | bc5c733e8cf132e7747ae7738f328e17e90440fb | |
parent | nspawn: let's make use of SPECIAL_MACHINE_SLICE macro, after all we already s... (diff) | |
download | systemd-720f0a2f3c928cc9379501a52146be9fbb4d9be2.tar.xz systemd-720f0a2f3c928cc9379501a52146be9fbb4d9be2.zip |
nspawn: move nspawn cgroup hierarchy one level down unconditionally
We need to do this in all cases, including on cgroupsv1 in order to
ensure the host systemd and any systemd in the payload won't fight for
the cgroup attributes of the top-level cgroup of the payload.
This is because systemd for Delegate=yes units will only delegate the
right to create children as well as their attributes. However, nspawn
expects that the cgroup delegated covers both the right to create
children and the attributes of the cgroup itself. Hence, to clear this
up, let's unconditionally insert a intermediary cgroup, on cgroupsv1 as
well as cgroupsv2, unconditionally.
This is also nice as it reduces the differences in the various setups
and exposes very close behaviour everywhere.
-rw-r--r-- | src/nspawn/nspawn-cgroup.c | 59 | ||||
-rw-r--r-- | src/nspawn/nspawn-cgroup.h | 2 | ||||
-rw-r--r-- | src/nspawn/nspawn.c | 8 |
3 files changed, 38 insertions, 31 deletions
diff --git a/src/nspawn/nspawn-cgroup.c b/src/nspawn/nspawn-cgroup.c index 682ea65080..761d737dc9 100644 --- a/src/nspawn/nspawn-cgroup.c +++ b/src/nspawn/nspawn-cgroup.c @@ -141,44 +141,53 @@ finish: return r; } -int create_subcgroup(pid_t pid, CGroupUnified unified_requested) { +int create_subcgroup(pid_t pid, bool keep_unit, CGroupUnified unified_requested) { _cleanup_free_ char *cgroup = NULL; - const char *child; - int r; CGroupMask supported; + const char *payload; + int r; - /* In the unified hierarchy inner nodes may only contain - * subgroups, but not processes. Hence, if we running in the - * unified hierarchy and the container does the same, and we - * did not create a scope unit for the container move us and - * the container into two separate subcgroups. */ - - if (unified_requested == CGROUP_UNIFIED_NONE) - return 0; - - r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER); - if (r < 0) - return log_error_errno(r, "Failed to determine whether the systemd controller is unified: %m"); - if (r == 0) - return 0; + assert(pid > 1); + + /* In the unified hierarchy inner nodes may only contain subgroups, but not processes. Hence, if we running in + * the unified hierarchy and the container does the same, and we did not create a scope unit for the container + * move us and the container into two separate subcgroups. + * + * Moreover, container payloads such as systemd try to manage the cgroup they run in in full (i.e. including + * its attributes), while the host systemd will only delegate cgroups for children of the cgroup created for a + * delegation unit, instead of the cgroup itself. This means, if we'd pass on the cgroup allocated from the + * host systemd directly to the payload, the host and payload systemd might fight for the cgroup + * attributes. Hence, let's insert an intermediary cgroup to cover that case too. + * + * Note that we only bother with the main hierarchy here, not with any secondary ones. On the unified setup + * that's fine because there's only one hiearchy anyway and controllers are enabled directly on it. On the + * legacy setup, this is fine too, since delegation of controllers is generally not safe there, hence we won't + * do it. */ r = cg_mask_supported(&supported); if (r < 0) return log_error_errno(r, "Failed to determine supported controllers: %m"); - r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &cgroup); + if (keep_unit) + r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &cgroup); + else + r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup); if (r < 0) return log_error_errno(r, "Failed to get our control group: %m"); - child = strjoina(cgroup, "/payload"); - r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, child, pid); + payload = strjoina(cgroup, "/payload"); + r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, payload, pid); if (r < 0) - return log_error_errno(r, "Failed to create %s subcgroup: %m", child); + return log_error_errno(r, "Failed to create %s subcgroup: %m", payload); - child = strjoina(cgroup, "/supervisor"); - r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, child, 0); - if (r < 0) - return log_error_errno(r, "Failed to create %s subcgroup: %m", child); + if (keep_unit) { + const char *supervisor; + + supervisor = strjoina(cgroup, "/supervisor"); + r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, supervisor, 0); + if (r < 0) + return log_error_errno(r, "Failed to create %s subcgroup: %m", supervisor); + } /* Try to enable as many controllers as possible for the new payload. */ (void) cg_enable_everywhere(supported, supported, cgroup); diff --git a/src/nspawn/nspawn-cgroup.h b/src/nspawn/nspawn-cgroup.h index 3a8e98e122..7639b483ae 100644 --- a/src/nspawn/nspawn-cgroup.h +++ b/src/nspawn/nspawn-cgroup.h @@ -14,4 +14,4 @@ int chown_cgroup(pid_t pid, CGroupUnified unified_requested, uid_t uid_shift); int sync_cgroup(pid_t pid, CGroupUnified unified_requested, uid_t uid_shift); -int create_subcgroup(pid_t pid, CGroupUnified unified_requested); +int create_subcgroup(pid_t pid, bool keep_unit, CGroupUnified unified_requested); diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index 162f3508b9..d7ceb4ed44 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -3643,11 +3643,9 @@ static int run(int master, if (r < 0) return r; - if (arg_keep_unit) { - r = create_subcgroup(*pid, arg_unified_cgroup_hierarchy); - if (r < 0) - return r; - } + r = create_subcgroup(*pid, arg_keep_unit, arg_unified_cgroup_hierarchy); + if (r < 0) + return r; r = chown_cgroup(*pid, arg_unified_cgroup_hierarchy, arg_uid_shift); if (r < 0) |