summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYu Watanabe <watanabe.yu+github@gmail.com>2022-03-16 12:46:49 +0100
committerYu Watanabe <watanabe.yu+github@gmail.com>2022-03-17 12:24:38 +0100
commita1f4fd387603673a79a84ca4e5ce25b439b85fe6 (patch)
tree3a8ab14b4055ff199ee06b51d6d667f52453d095
parentRevert "udev: do not kill "udevadm control" process in the same cgroup" (diff)
downloadsystemd-a1f4fd387603673a79a84ca4e5ce25b439b85fe6.tar.xz
systemd-a1f4fd387603673a79a84ca4e5ce25b439b85fe6.zip
udev: run the main process, workers, and spawned commands in /udev subcgroup
And enable cgroup delegation for udevd. Then, processes invoked through ExecReload= are assigned .control subcgroup, and they are not killed by cg_kill(). Fixes #16867 and #22686.
-rw-r--r--src/udev/udevd.c76
-rw-r--r--units/systemd-udevd.service.in1
2 files changed, 59 insertions, 18 deletions
diff --git a/src/udev/udevd.c b/src/udev/udevd.c
index 8380d674c5..c6f6d945c8 100644
--- a/src/udev/udevd.c
+++ b/src/udev/udevd.c
@@ -28,6 +28,7 @@
#include "sd-event.h"
#include "alloc-util.h"
+#include "cgroup-setup.h"
#include "cgroup-util.h"
#include "cpu-set-util.h"
#include "dev-setup.h"
@@ -48,6 +49,7 @@
#include "mkdir.h"
#include "netlink-util.h"
#include "parse-util.h"
+#include "path-util.h"
#include "pretty-print.h"
#include "proc-cmdline.h"
#include "process-util.h"
@@ -85,7 +87,7 @@ typedef struct Manager {
sd_event *event;
Hashmap *workers;
LIST_HEAD(Event, events);
- const char *cgroup;
+ char *cgroup;
pid_t pid; /* the process that originally allocated the manager object */
int log_level;
@@ -238,6 +240,7 @@ static Manager* manager_free(Manager *manager) {
safe_close(manager->inotify_fd);
safe_close_pair(manager->worker_watch);
+ free(manager->cgroup);
return mfree(manager);
}
@@ -1722,12 +1725,63 @@ static int parse_argv(int argc, char *argv[]) {
return 1;
}
-static int manager_new(Manager **ret, int fd_ctrl, int fd_uevent, const char *cgroup) {
+static int create_subcgroup(char **ret) {
+ _cleanup_free_ char *cgroup = NULL, *subcgroup = NULL;
+ int r;
+
+ if (getppid() != 1)
+ return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Not invoked by PID1.");
+
+ r = sd_booted();
+ if (r < 0)
+ return log_debug_errno(r, "Failed to check if systemd is running: %m");
+ if (r == 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "systemd is not running.");
+
+ /* Get our own cgroup, we regularly kill everything udev has left behind.
+ * We only do this on systemd systems, and only if we are directly spawned
+ * by PID1. Otherwise we are not guaranteed to have a dedicated cgroup. */
+
+ r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &cgroup);
+ if (r < 0) {
+ if (IN_SET(r, -ENOENT, -ENOMEDIUM))
+ return log_debug_errno(r, "Dedicated cgroup not found: %m");
+ return log_debug_errno(r, "Failed to get cgroup: %m");
+ }
+
+ r = cg_get_xattr_bool(SYSTEMD_CGROUP_CONTROLLER, cgroup, "trusted.delegate");
+ if (IN_SET(r, 0, -ENODATA))
+ return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "The cgroup %s is not delegated to us.", cgroup);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to read trusted.delegate attribute: %m");
+
+ /* We are invoked with our own delegated cgroup tree, let's move us one level down, so that we
+ * don't collide with the "no processes in inner nodes" rule of cgroups, when the service
+ * manager invokes the ExecReload= job in the .control/ subcgroup. */
+
+ subcgroup = path_join(cgroup, "/udev");
+ if (!subcgroup)
+ return log_oom_debug();
+
+ r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, subcgroup, 0);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to create %s subcgroup: %m", subcgroup);
+
+ log_debug("Created %s subcgroup.", subcgroup);
+ if (ret)
+ *ret = TAKE_PTR(subcgroup);
+ return 0;
+}
+
+static int manager_new(Manager **ret, int fd_ctrl, int fd_uevent) {
_cleanup_(manager_freep) Manager *manager = NULL;
+ _cleanup_free_ char *cgroup = NULL;
int r;
assert(ret);
+ (void) create_subcgroup(&cgroup);
+
manager = new(Manager, 1);
if (!manager)
return log_oom();
@@ -1735,7 +1789,7 @@ static int manager_new(Manager **ret, int fd_ctrl, int fd_uevent, const char *cg
*manager = (Manager) {
.inotify_fd = -1,
.worker_watch = { -1, -1 },
- .cgroup = cgroup,
+ .cgroup = TAKE_PTR(cgroup),
};
r = udev_ctrl_new_from_fd(&manager->ctrl, fd_ctrl);
@@ -1880,7 +1934,6 @@ static int main_loop(Manager *manager) {
}
int run_udevd(int argc, char *argv[]) {
- _cleanup_free_ char *cgroup = NULL;
_cleanup_(manager_freep) Manager *manager = NULL;
int fd_ctrl = -1, fd_uevent = -1;
int r;
@@ -1937,24 +1990,11 @@ int run_udevd(int argc, char *argv[]) {
if (r < 0 && r != -EEXIST)
return log_error_errno(r, "Failed to create /run/udev: %m");
- if (getppid() == 1 && sd_booted() > 0) {
- /* Get our own cgroup, we regularly kill everything udev has left behind.
- * We only do this on systemd systems, and only if we are directly spawned
- * by PID1. Otherwise we are not guaranteed to have a dedicated cgroup. */
- r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &cgroup);
- if (r < 0) {
- if (IN_SET(r, -ENOENT, -ENOMEDIUM))
- log_debug_errno(r, "Dedicated cgroup not found: %m");
- else
- log_warning_errno(r, "Failed to get cgroup: %m");
- }
- }
-
r = listen_fds(&fd_ctrl, &fd_uevent);
if (r < 0)
return log_error_errno(r, "Failed to listen on fds: %m");
- r = manager_new(&manager, fd_ctrl, fd_uevent, cgroup);
+ r = manager_new(&manager, fd_ctrl, fd_uevent);
if (r < 0)
return log_error_errno(r, "Failed to create manager: %m");
diff --git a/units/systemd-udevd.service.in b/units/systemd-udevd.service.in
index d042bfb0d3..9901198274 100644
--- a/units/systemd-udevd.service.in
+++ b/units/systemd-udevd.service.in
@@ -16,6 +16,7 @@ Before=sysinit.target
ConditionPathIsReadWrite=/sys
[Service]
+Delegate=pids
DeviceAllow=block-* rwm
DeviceAllow=char-* rwm
Type=notify