diff options
author | Yu Watanabe <watanabe.yu+github@gmail.com> | 2022-03-16 12:46:49 +0100 |
---|---|---|
committer | Yu Watanabe <watanabe.yu+github@gmail.com> | 2022-03-17 12:24:38 +0100 |
commit | a1f4fd387603673a79a84ca4e5ce25b439b85fe6 (patch) | |
tree | 3a8ab14b4055ff199ee06b51d6d667f52453d095 | |
parent | Revert "udev: do not kill "udevadm control" process in the same cgroup" (diff) | |
download | systemd-a1f4fd387603673a79a84ca4e5ce25b439b85fe6.tar.xz systemd-a1f4fd387603673a79a84ca4e5ce25b439b85fe6.zip |
udev: run the main process, workers, and spawned commands in /udev subcgroup
And enable cgroup delegation for udevd.
Then, processes invoked through ExecReload= are assigned .control
subcgroup, and they are not killed by cg_kill().
Fixes #16867 and #22686.
-rw-r--r-- | src/udev/udevd.c | 76 | ||||
-rw-r--r-- | units/systemd-udevd.service.in | 1 |
2 files changed, 59 insertions, 18 deletions
diff --git a/src/udev/udevd.c b/src/udev/udevd.c index 8380d674c5..c6f6d945c8 100644 --- a/src/udev/udevd.c +++ b/src/udev/udevd.c @@ -28,6 +28,7 @@ #include "sd-event.h" #include "alloc-util.h" +#include "cgroup-setup.h" #include "cgroup-util.h" #include "cpu-set-util.h" #include "dev-setup.h" @@ -48,6 +49,7 @@ #include "mkdir.h" #include "netlink-util.h" #include "parse-util.h" +#include "path-util.h" #include "pretty-print.h" #include "proc-cmdline.h" #include "process-util.h" @@ -85,7 +87,7 @@ typedef struct Manager { sd_event *event; Hashmap *workers; LIST_HEAD(Event, events); - const char *cgroup; + char *cgroup; pid_t pid; /* the process that originally allocated the manager object */ int log_level; @@ -238,6 +240,7 @@ static Manager* manager_free(Manager *manager) { safe_close(manager->inotify_fd); safe_close_pair(manager->worker_watch); + free(manager->cgroup); return mfree(manager); } @@ -1722,12 +1725,63 @@ static int parse_argv(int argc, char *argv[]) { return 1; } -static int manager_new(Manager **ret, int fd_ctrl, int fd_uevent, const char *cgroup) { +static int create_subcgroup(char **ret) { + _cleanup_free_ char *cgroup = NULL, *subcgroup = NULL; + int r; + + if (getppid() != 1) + return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Not invoked by PID1."); + + r = sd_booted(); + if (r < 0) + return log_debug_errno(r, "Failed to check if systemd is running: %m"); + if (r == 0) + return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "systemd is not running."); + + /* Get our own cgroup, we regularly kill everything udev has left behind. + * We only do this on systemd systems, and only if we are directly spawned + * by PID1. Otherwise we are not guaranteed to have a dedicated cgroup. */ + + r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &cgroup); + if (r < 0) { + if (IN_SET(r, -ENOENT, -ENOMEDIUM)) + return log_debug_errno(r, "Dedicated cgroup not found: %m"); + return log_debug_errno(r, "Failed to get cgroup: %m"); + } + + r = cg_get_xattr_bool(SYSTEMD_CGROUP_CONTROLLER, cgroup, "trusted.delegate"); + if (IN_SET(r, 0, -ENODATA)) + return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "The cgroup %s is not delegated to us.", cgroup); + if (r < 0) + return log_debug_errno(r, "Failed to read trusted.delegate attribute: %m"); + + /* We are invoked with our own delegated cgroup tree, let's move us one level down, so that we + * don't collide with the "no processes in inner nodes" rule of cgroups, when the service + * manager invokes the ExecReload= job in the .control/ subcgroup. */ + + subcgroup = path_join(cgroup, "/udev"); + if (!subcgroup) + return log_oom_debug(); + + r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, subcgroup, 0); + if (r < 0) + return log_debug_errno(r, "Failed to create %s subcgroup: %m", subcgroup); + + log_debug("Created %s subcgroup.", subcgroup); + if (ret) + *ret = TAKE_PTR(subcgroup); + return 0; +} + +static int manager_new(Manager **ret, int fd_ctrl, int fd_uevent) { _cleanup_(manager_freep) Manager *manager = NULL; + _cleanup_free_ char *cgroup = NULL; int r; assert(ret); + (void) create_subcgroup(&cgroup); + manager = new(Manager, 1); if (!manager) return log_oom(); @@ -1735,7 +1789,7 @@ static int manager_new(Manager **ret, int fd_ctrl, int fd_uevent, const char *cg *manager = (Manager) { .inotify_fd = -1, .worker_watch = { -1, -1 }, - .cgroup = cgroup, + .cgroup = TAKE_PTR(cgroup), }; r = udev_ctrl_new_from_fd(&manager->ctrl, fd_ctrl); @@ -1880,7 +1934,6 @@ static int main_loop(Manager *manager) { } int run_udevd(int argc, char *argv[]) { - _cleanup_free_ char *cgroup = NULL; _cleanup_(manager_freep) Manager *manager = NULL; int fd_ctrl = -1, fd_uevent = -1; int r; @@ -1937,24 +1990,11 @@ int run_udevd(int argc, char *argv[]) { if (r < 0 && r != -EEXIST) return log_error_errno(r, "Failed to create /run/udev: %m"); - if (getppid() == 1 && sd_booted() > 0) { - /* Get our own cgroup, we regularly kill everything udev has left behind. - * We only do this on systemd systems, and only if we are directly spawned - * by PID1. Otherwise we are not guaranteed to have a dedicated cgroup. */ - r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &cgroup); - if (r < 0) { - if (IN_SET(r, -ENOENT, -ENOMEDIUM)) - log_debug_errno(r, "Dedicated cgroup not found: %m"); - else - log_warning_errno(r, "Failed to get cgroup: %m"); - } - } - r = listen_fds(&fd_ctrl, &fd_uevent); if (r < 0) return log_error_errno(r, "Failed to listen on fds: %m"); - r = manager_new(&manager, fd_ctrl, fd_uevent, cgroup); + r = manager_new(&manager, fd_ctrl, fd_uevent); if (r < 0) return log_error_errno(r, "Failed to create manager: %m"); diff --git a/units/systemd-udevd.service.in b/units/systemd-udevd.service.in index d042bfb0d3..9901198274 100644 --- a/units/systemd-udevd.service.in +++ b/units/systemd-udevd.service.in @@ -16,6 +16,7 @@ Before=sysinit.target ConditionPathIsReadWrite=/sys [Service] +Delegate=pids DeviceAllow=block-* rwm DeviceAllow=char-* rwm Type=notify |